src/client.c | 216 ++++++++++++++++++++++++++++++++++++++++++-------
src/cmd.c | 192 ++++++++++++++++++++++++++++++++++++++++---
src/delta_lease.c | 59 ++++++++++++-
src/delta_lease.h | 7 +
src/direct.c | 49 +++++------
src/direct.h | 11 +-
src/direct_lib.c | 36 +++++++-
src/lockspace.c | 2
src/main.c | 60 +++++++++++--
src/paxos_lease.c | 32 +++++++
src/paxos_lease.h | 4
src/sanlock.8 | 41 ++++++---
src/sanlock_admin.h | 77 +++++++++++++++++
src/sanlock_direct.h | 18 ++++
src/sanlock_internal.h | 1
src/sanlock_sock.h | 40 ++++-----
16 files changed, 729 insertions(+), 116 deletions(-)
New commits:
commit 185bc1f850ead0092d20d6dacc647a40ebdd1205
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Nov 21 09:14:18 2012 -0600
sanlock: read and write apis for lockspace and resource
- write_lockspace includes an arg to specify the io timeout
written in the disk record.
- read_lockspace provides the io timeout from the disk record.
- read_lockspace/resource only require path and offset input.
If host_id is zero or unspecified, host_id 1 is used.
If other parameters are specified, they are used and verified.
# sanlock client init -s s1:0:/dev/vg/leases:0
# sanlock client init -r s1:r1:/dev/vg/leases:1048576
# sanlock client read -s ::/dev/vg/leases:0
s s1:1:/dev/vg/leases:0
io_timeout 10
# sanlock client read -r ::/dev/vg/leases:1048576
r s1:r1:/dev/vg/leases:1048576:0
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/client.c b/src/client.c
index 48e1505..ae73226 100644
--- a/src/client.c
+++ b/src/client.c
@@ -187,50 +187,157 @@ int sanlock_align(struct sanlk_disk *disk)
return rv;
}
-int sanlock_init(struct sanlk_lockspace *ls,
- struct sanlk_resource *res,
- int max_hosts, int num_hosts)
+int sanlock_read_lockspace(struct sanlk_lockspace *ls, uint32_t flags, uint32_t *io_timeout)
{
- int rv, fd, cmd, datalen;
+ struct sm_header h;
+ int rv, fd;
- if (!ls && !res)
+ if (!ls || !ls->host_id_disk.path[0])
return -EINVAL;
rv = connect_socket(&fd);
if (rv < 0)
return rv;
- if (ls && ls->host_id_disk.path[0]) {
- cmd = SM_CMD_INIT_LOCKSPACE;
- datalen = sizeof(struct sanlk_lockspace);
- } else {
- cmd = SM_CMD_INIT_RESOURCE;
- datalen = sizeof(struct sanlk_resource) +
- sizeof(struct sanlk_disk) * res->num_disks;
+ rv = send_header(fd, SM_CMD_READ_LOCKSPACE, flags,
+ sizeof(struct sanlk_lockspace),
+ 0, 0);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, ls, sizeof(struct sanlk_lockspace), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ /* receive result, io_timeout and ls struct */
+
+ memset(&h, 0, sizeof(struct sm_header));
+
+ rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
}
- rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts);
+ if (rv != sizeof(h)) {
+ rv = -1;
+ goto out;
+ }
+
+ rv = (int)h.data;
if (rv < 0)
goto out;
- if (ls) {
- rv = send(fd, ls, sizeof(struct sanlk_lockspace), 0);
- if (rv < 0) {
- rv = -errno;
- goto out;
- }
- } else {
- rv = send(fd, res, sizeof(struct sanlk_resource), 0);
- if (rv < 0) {
- rv = -errno;
- goto out;
- }
+ rv = recv(fd, ls, sizeof(struct sanlk_lockspace), MSG_WAITALL);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
- rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
- if (rv < 0) {
- rv = -errno;
- goto out;
- }
+ if (rv != sizeof(struct sanlk_lockspace)) {
+ rv = -1;
+ goto out;
+ }
+
+ *io_timeout = h.data2;
+ rv = (int)h.data;
+ out:
+ close(fd);
+ return rv;
+}
+
+int sanlock_read_resource(struct sanlk_resource *res, uint32_t flags)
+{
+ struct sm_header h;
+ int rv, fd;
+
+ if (!res || !res->num_disks || res->num_disks > SANLK_MAX_DISKS ||
+ !res->disks[0].path[0])
+ return -EINVAL;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ rv = send_header(fd, SM_CMD_READ_RESOURCE, flags,
+ sizeof(struct sanlk_resource) +
+ sizeof(struct sanlk_disk) * res->num_disks,
+ 0, 0);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, res, sizeof(struct sanlk_resource), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ /* receive result and res struct */
+
+ memset(&h, 0, sizeof(struct sm_header));
+
+ rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ if (rv != sizeof(h)) {
+ rv = -1;
+ goto out;
+ }
+
+ rv = (int)h.data;
+ if (rv < 0)
+ goto out;
+
+ rv = recv(fd, res, sizeof(struct sanlk_resource), MSG_WAITALL);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ if (rv != sizeof(struct sanlk_resource)) {
+ rv = -1;
+ goto out;
+ }
+
+ rv = (int)h.data;
+ out:
+ close(fd);
+ return rv;
+}
+
+int sanlock_write_lockspace(struct sanlk_lockspace *ls, int max_hosts,
+ uint32_t flags, uint32_t io_timeout)
+{
+ int rv, fd;
+
+ if (!ls || !ls->host_id_disk.path[0])
+ return -EINVAL;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ rv = send_header(fd, SM_CMD_WRITE_LOCKSPACE, flags,
+ sizeof(struct sanlk_lockspace),
+ max_hosts, io_timeout);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, ls, sizeof(struct sanlk_lockspace), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
}
rv = recv_result(fd);
@@ -239,6 +346,55 @@ int sanlock_init(struct sanlk_lockspace *ls,
return rv;
}
+int sanlock_write_resource(struct sanlk_resource *res,
+ int max_hosts, int num_hosts, uint32_t flags)
+{
+ int rv, fd;
+
+ if (!res || !res->num_disks || res->num_disks > SANLK_MAX_DISKS ||
+ !res->disks[0].path[0])
+ return -EINVAL;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ rv = send_header(fd, SM_CMD_WRITE_RESOURCE, flags,
+ sizeof(struct sanlk_resource) +
+ sizeof(struct sanlk_disk) * res->num_disks,
+ max_hosts, num_hosts);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, res, sizeof(struct sanlk_resource), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = recv_result(fd);
+ out:
+ close(fd);
+ return rv;
+}
+
+/* old api */
+int sanlock_init(struct sanlk_lockspace *ls,
+ struct sanlk_resource *res,
+ int max_hosts, int num_hosts)
+{
+ if (ls)
+ return sanlock_write_lockspace(ls, max_hosts, 0, 0);
+ else
+ return sanlock_write_resource(res, max_hosts, num_hosts, 0);
+}
+
/* src has colons unescaped, dst should have them escaped with backslash */
size_t sanlock_path_export(char *dst, const char *src, size_t dstlen)
diff --git a/src/cmd.c b/src/cmd.c
index d443e92..18b9db0 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -1106,23 +1106,180 @@ static void cmd_align(struct task *task GNUC_UNUSED, struct cmd_args *ca)
client_resume(ca->ci_in);
}
-static void cmd_init_lockspace(struct task *task, struct cmd_args *ca)
+static void cmd_read_lockspace(struct task *task, struct cmd_args *ca)
+{
+ struct sm_header h;
+ struct sanlk_lockspace lockspace;
+ struct sync_disk sd;
+ uint64_t host_id;
+ int io_timeout = 0;
+ int fd, rv, result;
+
+ fd = client[ca->ci_in].fd;
+
+ rv = recv(fd, &lockspace, sizeof(struct sanlk_lockspace), MSG_WAITALL);
+ if (rv != sizeof(struct sanlk_lockspace)) {
+ log_error("cmd_read_lockspace %d,%d recv %d %d",
+ ca->ci_in, fd, rv, errno);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ if (!lockspace.host_id)
+ host_id = 1;
+ else
+ host_id = lockspace.host_id;
+
+ log_debug("cmd_read_lockspace %d,%d %llu %s:%llu",
+ ca->ci_in, fd,
+ (unsigned long long)host_id,
+ lockspace.host_id_disk.path,
+ (unsigned long long)lockspace.host_id_disk.offset);
+
+ if (!lockspace.host_id_disk.path[0]) {
+ result = -ENODEV;
+ goto reply;
+ }
+
+ memset(&sd, 0, sizeof(struct sync_disk));
+ memcpy(&sd, &lockspace.host_id_disk, sizeof(struct sanlk_disk));
+ sd.fd = -1;
+
+ rv = open_disk(&sd);
+ if (rv < 0) {
+ result = -ENODEV;
+ goto reply;
+ }
+
+ /* sets ls->name and io_timeout */
+ result = delta_read_lockspace(task, &sd, host_id, &lockspace,
+ DEFAULT_IO_TIMEOUT, &io_timeout);
+ if (result == SANLK_OK)
+ result = 0;
+
+ close_disks(&sd, 1);
+ reply:
+ log_debug("cmd_read_lockspace %d,%d done %d", ca->ci_in, fd, result);
+
+ memcpy(&h, &ca->header, sizeof(struct sm_header));
+ h.data = result;
+ h.data2 = io_timeout;
+ h.length = sizeof(h) + sizeof(lockspace);
+ send(fd, &h, sizeof(h), MSG_NOSIGNAL);
+ send(fd, &lockspace, sizeof(lockspace), MSG_NOSIGNAL);
+ client_resume(ca->ci_in);
+}
+
+static void cmd_read_resource(struct task *task, struct cmd_args *ca)
+{
+ struct sm_header h;
+ struct sanlk_resource res;
+ struct token *token = NULL;
+ int token_len, disks_len;
+ int j, fd, rv, result;
+
+ fd = client[ca->ci_in].fd;
+
+ /* receiving and setting up token copied from cmd_acquire */
+
+ rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL);
+ if (rv != sizeof(struct sanlk_resource)) {
+ log_error("cmd_read_resource %d,%d recv %d %d",
+ ca->ci_in, fd, rv, errno);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ if (!res.num_disks || res.num_disks > SANLK_MAX_DISKS) {
+ result = -ERANGE;
+ goto reply;
+ }
+
+ disks_len = res.num_disks * sizeof(struct sync_disk);
+ token_len = sizeof(struct token) + disks_len;
+
+ token = malloc(token_len);
+ if (!token) {
+ result = -ENOMEM;
+ goto reply;
+ }
+ memset(token, 0, token_len);
+ token->disks = (struct sync_disk *)&token->r.disks[0]; /* shorthand */
+ token->r.num_disks = res.num_disks;
+ memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
+ memcpy(token->r.name, res.name, SANLK_NAME_LEN);
+
+ /*
+ * receive sanlk_disk's / sync_disk's
+ *
+ * WARNING: as a shortcut, this requires that sync_disk and
+ * sanlk_disk match; this is the reason for the pad fields
+ * in sanlk_disk (TODO: let these differ?)
+ */
+
+ rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
+ if (rv != disks_len) {
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ /* zero out pad1 and pad2, see WARNING above */
+ for (j = 0; j < token->r.num_disks; j++) {
+ token->disks[j].sector_size = 0;
+ token->disks[j].fd = -1;
+ }
+
+ log_debug("cmd_read_resource %d,%d %.256s:%llu",
+ ca->ci_in, fd,
+ token->disks[0].path,
+ (unsigned long long)token->r.disks[0].offset);
+
+ rv = open_disks(token->disks, token->r.num_disks);
+ if (rv < 0) {
+ result = rv;
+ goto reply;
+ }
+
+ token->io_timeout = DEFAULT_IO_TIMEOUT;
+
+ /* sets res.lockspace_name, res.name, res.lver */
+ result = paxos_read_resource(task, token, &res);
+ if (result == SANLK_OK)
+ result = 0;
+
+ close_disks(token->disks, token->r.num_disks);
+ reply:
+ if (token)
+ free(token);
+ log_debug("cmd_read_resource %d,%d done %d", ca->ci_in, fd, result);
+
+ memcpy(&h, &ca->header, sizeof(struct sm_header));
+ h.data = result;
+ h.data2 = 0;
+ h.length = sizeof(h) + sizeof(res);
+ send(fd, &h, sizeof(h), MSG_NOSIGNAL);
+ send(fd, &res, sizeof(res), MSG_NOSIGNAL);
+ client_resume(ca->ci_in);
+}
+
+static void cmd_write_lockspace(struct task *task, struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
struct sync_disk sd;
int fd, rv, result;
+ int io_timeout = DEFAULT_IO_TIMEOUT;
fd = client[ca->ci_in].fd;
rv = recv(fd, &lockspace, sizeof(struct sanlk_lockspace), MSG_WAITALL);
if (rv != sizeof(struct sanlk_lockspace)) {
- log_error("cmd_init_lockspace %d,%d recv %d %d",
+ log_error("cmd_write_lockspace %d,%d recv %d %d",
ca->ci_in, fd, rv, errno);
result = -ENOTCONN;
goto reply;
}
- log_debug("cmd_init_lockspace %d,%d %.48s:%llu:%s:%llu",
+ log_debug("cmd_write_lockspace %d,%d %.48s:%llu:%s:%llu",
ca->ci_in, fd, lockspace.name,
(unsigned long long)lockspace.host_id,
lockspace.host_id_disk.path,
@@ -1143,17 +1300,20 @@ static void cmd_init_lockspace(struct task *task, struct cmd_args *ca)
goto reply;
}
- result = delta_lease_init(task, DEFAULT_IO_TIMEOUT, &sd, lockspace.name, ca->header.data);
+ if (ca->header.data2)
+ io_timeout = ca->header.data2;
+
+ result = delta_lease_init(task, io_timeout, &sd, lockspace.name, ca->header.data);
close_disks(&sd, 1);
reply:
- log_debug("cmd_init_lockspace %d,%d done %d", ca->ci_in, fd, result);
+ log_debug("cmd_write_lockspace %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result);
client_resume(ca->ci_in);
}
-static void cmd_init_resource(struct task *task, struct cmd_args *ca)
+static void cmd_write_resource(struct task *task, struct cmd_args *ca)
{
struct token *token = NULL;
struct sanlk_resource res;
@@ -1166,7 +1326,7 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL);
if (rv != sizeof(struct sanlk_resource)) {
- log_error("cmd_init_resource %d,%d recv %d %d",
+ log_error("cmd_write_resource %d,%d recv %d %d",
ca->ci_in, fd, rv, errno);
result = -ENOTCONN;
goto reply;
@@ -1211,7 +1371,7 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
token->disks[j].fd = -1;
}
- log_debug("cmd_init_resource %d,%d %.48s:%.48s:%.256s:%llu",
+ log_debug("cmd_write_resource %d,%d %.48s:%.48s:%.256s:%llu",
ca->ci_in, fd,
token->r.lockspace_name,
token->r.name,
@@ -1232,7 +1392,7 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
reply:
if (token)
free(token);
- log_debug("cmd_init_resource %d,%d done %d", ca->ci_in, fd, result);
+ log_debug("cmd_write_resource %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result);
client_resume(ca->ci_in);
@@ -1329,11 +1489,17 @@ void call_cmd_thread(struct task *task, struct cmd_args *ca)
case SM_CMD_ALIGN:
cmd_align(task, ca);
break;
- case SM_CMD_INIT_LOCKSPACE:
- cmd_init_lockspace(task, ca);
+ case SM_CMD_WRITE_LOCKSPACE:
+ cmd_write_lockspace(task, ca);
+ break;
+ case SM_CMD_WRITE_RESOURCE:
+ cmd_write_resource(task, ca);
+ break;
+ case SM_CMD_READ_LOCKSPACE:
+ cmd_read_lockspace(task, ca);
break;
- case SM_CMD_INIT_RESOURCE:
- cmd_init_resource(task, ca);
+ case SM_CMD_READ_RESOURCE:
+ cmd_read_resource(task, ca);
break;
case SM_CMD_EXAMINE_LOCKSPACE:
case SM_CMD_EXAMINE_RESOURCE:
diff --git a/src/delta_lease.c b/src/delta_lease.c
index bad5e63..e0fe372 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -148,6 +148,43 @@ static int verify_leader(struct sync_disk *disk,
return result;
}
+
+/* read the lockspace name and io_timeout given the disk location */
+
+int delta_read_lockspace(struct task *task,
+ struct sync_disk *disk,
+ uint64_t host_id,
+ struct sanlk_lockspace *ls,
+ int io_timeout,
+ int *io_timeout_ret)
+{
+ struct leader_record leader;
+ char *space_name;
+ int rv, error;
+
+ /* host_id N is block offset N-1 */
+
+ memset(&leader, 0, sizeof(struct leader_record));
+
+ rv = read_sectors(disk, host_id - 1, 1, (char *)&leader, sizeof(struct leader_record),
+ task, io_timeout, "read_lockspace");
+ if (rv < 0)
+ return rv;
+
+ if (!ls->name[0])
+ space_name = leader.space_name;
+
+ error = verify_leader(disk, space_name, host_id, &leader, "read_lockspace");
+
+ if (error == SANLK_OK) {
+ memcpy(ls->name, leader.space_name, SANLK_NAME_LEN);
+ ls->host_id = host_id;
+ *io_timeout_ret = leader.io_timeout;
+ }
+
+ return error;
+}
+
int delta_lease_leader_read(struct task *task, int io_timeout,
struct sync_disk *disk,
char *space_name,
@@ -618,6 +655,9 @@ int delta_lease_init(struct task *task,
if (!max_hosts)
max_hosts = DEFAULT_MAX_HOSTS;
+ if (!io_timeout)
+ io_timeout = DEFAULT_IO_TIMEOUT;
+
align_size = direct_align(disk);
if (align_size < 0)
return align_size;
@@ -647,16 +687,27 @@ int delta_lease_init(struct task *task,
leader->io_timeout = io_timeout;
strncpy(leader->space_name, space_name, NAME_ID_SIZE);
leader->checksum = leader_checksum(leader);
+
+ /* make the first record invalid so we can do a single atomic
+ write below to commit the whole thing */
+ if (!i)
+ leader->magic = 0;
}
rv = write_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task, io_timeout);
+ if (rv < 0)
+ goto out;
+
+ /* commit the whole lockspace by making the first record valid */
+
+ leader = (struct leader_record *)iobuf;
+ leader->magic = DELTA_DISK_MAGIC;
+ rv = write_iobuf(disk->fd, disk->offset, iobuf, disk->sector_size, task, io_timeout);
+ out:
if (rv != SANLK_AIO_TIMEOUT)
free(iobuf);
- if (rv < 0)
- return rv;
-
- return 0;
+ return rv;
}
diff --git a/src/delta_lease.h b/src/delta_lease.h
index 4c21267..f015d1e 100644
--- a/src/delta_lease.h
+++ b/src/delta_lease.h
@@ -48,4 +48,11 @@ int delta_lease_init(struct task *task,
char *space_name,
int max_hosts);
+int delta_read_lockspace(struct task *task,
+ struct sync_disk *disk,
+ uint64_t host_id,
+ struct sanlk_lockspace *ls,
+ int io_timeout,
+ int *io_timeout_ret);
+
#endif
diff --git a/src/direct.c b/src/direct.c
index ab39569..7308c82 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -91,6 +91,9 @@ static int do_paxos_action(int action, struct task *task, int io_timeout,
int disks_len, token_len;
int j, rv = 0;
+ if (!io_timeout)
+ io_timeout = DEFAULT_IO_TIMEOUT;
+
disks_len = res->num_disks * sizeof(struct sync_disk);
token_len = sizeof(struct token) + disks_len;
@@ -199,6 +202,9 @@ static int do_delta_action(int action,
memset(bitmap, 0, sizeof(bitmap));
+ if (!io_timeout)
+ io_timeout = DEFAULT_IO_TIMEOUT;
+
/* for log_space in delta functions */
memset(&space, 0, sizeof(space));
space.io_timeout = io_timeout;
@@ -392,36 +398,31 @@ int direct_align(struct sync_disk *disk)
return -EINVAL;
}
-/*
- * sanlock direct init [-s LOCKSPACE] [-r RESOURCE]
- *
- * Note: host_id not used for init, whatever is given in LOCKSPACE
- * is ignored
- */
-
-int direct_init(struct task *task,
- int io_timeout,
- struct sanlk_lockspace *ls,
- struct sanlk_resource *res,
- int max_hosts, int num_hosts)
+/* io_timeout is written to leader record and used for the write call itself */
+int direct_write_lockspace(struct task *task, struct sanlk_lockspace *ls,
+ int max_hosts, uint32_t io_timeout)
{
- int rv = -1;
+ if (!ls)
+ return -1;
- if (ls && ls->host_id_disk.path[0]) {
- rv = do_delta_action(ACT_DIRECT_INIT, task, io_timeout, ls, max_hosts, NULL, NULL);
+ return do_delta_action(ACT_DIRECT_INIT, task, io_timeout, ls,
+ max_hosts, NULL, NULL);
+}
- } else if (res) {
- if (!res->num_disks)
- return -ENODEV;
+int direct_write_resource(struct task *task, struct sanlk_resource *res,
+ int max_hosts, int num_hosts)
+{
+ if (!res)
+ return -1;
- if (!res->disks[0].path[0])
- return -ENODEV;
+ if (!res->num_disks)
+ return -ENODEV;
- rv = do_paxos_action(ACT_DIRECT_INIT, task, io_timeout, res,
- max_hosts, num_hosts, 0, 0, NULL);
- }
+ if (!res->disks[0].path[0])
+ return -ENODEV;
- return rv;
+ return do_paxos_action(ACT_DIRECT_INIT, task, 0, res,
+ max_hosts, num_hosts, 0, 0, NULL);
}
int direct_read_leader(struct task *task,
diff --git a/src/direct.h b/src/direct.h
index bd71096..f6371ed 100644
--- a/src/direct.h
+++ b/src/direct.h
@@ -45,10 +45,13 @@ int direct_live_id(struct task *task, int io_timeout,
int direct_align(struct sync_disk *disk);
-int direct_init(struct task *task, int io_timeout,
- struct sanlk_lockspace *ls,
- struct sanlk_resource *res,
- int max_hosts, int num_hosts);
+/* io_timeout is written in the leader record and used for the
+ write call itself */
+int direct_write_lockspace(struct task *task, struct sanlk_lockspace *ls,
+ int max_hosts, uint32_t io_timeout);
+
+int direct_write_resource(struct task *task, struct sanlk_resource *res,
+ int max_hosts, int num_hosts);
int direct_read_leader(struct task *task, int io_timeout,
struct sanlk_lockspace *ls,
diff --git a/src/direct_lib.c b/src/direct_lib.c
index 750ae71..c5b6dc0 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -123,6 +123,37 @@ int sanlock_direct_live_id(struct sanlk_lockspace *ls,
return rv;
}
+int sanlock_direct_write_lockspace(struct sanlk_lockspace *ls, int max_hosts,
+ uint32_t flags GNUC_UNUSED, uint32_t io_timeout)
+{
+ struct task task;
+ int rv;
+
+ setup_task_lib(&task, 1);
+
+ rv = direct_write_lockspace(&task, ls, max_hosts, io_timeout);
+
+ close_task_aio(&task);
+
+ return rv;
+}
+
+int sanlock_direct_write_resource(struct sanlk_resource *res,
+ int max_hosts, int num_hosts,
+ uint32_t flags GNUC_UNUSED)
+{
+ struct task task;
+ int rv;
+
+ setup_task_lib(&task, 1);
+
+ rv = direct_write_resource(&task, res, max_hosts, num_hosts);
+
+ close_task_aio(&task);
+
+ return rv;
+}
+
int sanlock_direct_init(struct sanlk_lockspace *ls,
struct sanlk_resource *res,
int max_hosts, int num_hosts, int use_aio)
@@ -132,7 +163,10 @@ int sanlock_direct_init(struct sanlk_lockspace *ls,
setup_task_lib(&task, use_aio);
- rv = direct_init(&task, DEFAULT_IO_TIMEOUT, ls, res, max_hosts, num_hosts);
+ if (ls)
+ rv = direct_write_lockspace(&task, ls, max_hosts, 0);
+ else
+ rv = direct_write_resource(&task, res, max_hosts, num_hosts);
close_task_aio(&task);
diff --git a/src/lockspace.c b/src/lockspace.c
index 469b66f..8e05fa5 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -384,7 +384,7 @@ static void *lockspace_thread(void *arg_in)
struct space *sp;
struct leader_record leader;
uint64_t delta_begin, last_success = 0;
- int rv, delta_length, renewal_interval;
+ int rv, delta_length, renewal_interval = 0;
int id_renewal_seconds, id_renewal_fail_seconds;
int acquire_result, delta_result, read_result;
int opened = 0;
diff --git a/src/main.c b/src/main.c
index 2ce26a3..37b20db 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1157,8 +1157,10 @@ static void process_connection(int ci)
case SM_CMD_EXAMINE_RESOURCE:
case SM_CMD_EXAMINE_LOCKSPACE:
case SM_CMD_ALIGN:
- case SM_CMD_INIT_LOCKSPACE:
- case SM_CMD_INIT_RESOURCE:
+ case SM_CMD_WRITE_LOCKSPACE:
+ case SM_CMD_WRITE_RESOURCE:
+ case SM_CMD_READ_LOCKSPACE:
+ case SM_CMD_READ_RESOURCE:
rv = client_suspend(ci);
if (rv < 0)
return;
@@ -1779,6 +1781,7 @@ static void print_usage(void)
printf("sanlock client log_dump\n");
printf("sanlock client shutdown [-f 0|1]\n");
printf("sanlock client init -s LOCKSPACE | -r RESOURCE\n");
+ printf("sanlock client read -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock client align -s LOCKSPACE\n");
printf("sanlock client add_lockspace -s LOCKSPACE\n");
printf("sanlock client inq_lockspace -s LOCKSPACE\n");
@@ -1902,6 +1905,10 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_CLIENT_ALIGN;
else if (!strcmp(act, "init"))
com.action = ACT_CLIENT_INIT;
+ else if (!strcmp(act, "write"))
+ com.action = ACT_CLIENT_INIT;
+ else if (!strcmp(act, "read"))
+ com.action = ACT_CLIENT_READ;
else {
log_tool("client action \"%s\" is unknown", act);
exit(EXIT_FAILURE);
@@ -2117,6 +2124,8 @@ static int do_client(void)
struct sanlk_resource **res_args = NULL;
struct sanlk_resource *res;
char *res_state = NULL;
+ char *res_str = NULL;
+ uint32_t io_timeout = 0;
int i, fd, rv = 0;
if (com.action == ACT_COMMAND || com.action == ACT_ACQUIRE) {
@@ -2271,14 +2280,45 @@ static int do_client(void)
case ACT_CLIENT_INIT:
log_tool("init");
if (com.lockspace.host_id_disk.path[0])
- rv = sanlock_init(&com.lockspace, NULL,
- com.max_hosts, com.num_hosts);
+ rv = sanlock_write_lockspace(&com.lockspace,
+ com.max_hosts, 0,
+ com.io_timeout_arg);
else
- rv = sanlock_init(NULL, com.res_args[0],
- com.max_hosts, com.num_hosts);
+ rv = sanlock_write_resource(com.res_args[0],
+ com.max_hosts,
+ com.num_hosts, 0);
log_tool("init done %d", rv);
break;
+ case ACT_CLIENT_READ:
+ if (com.lockspace.host_id_disk.path[0])
+ rv = sanlock_read_lockspace(&com.lockspace, 0, &io_timeout);
+ else
+ rv = sanlock_read_resource(com.res_args[0], 0);
+
+ if (rv < 0) {
+ log_tool("read error %d", rv);
+ break;
+ }
+
+ if (com.lockspace.host_id_disk.path[0]) {
+ log_tool("s %.48s:%llu:%s:%llu",
+ com.lockspace.name,
+ (unsigned long long)com.lockspace.host_id,
+ com.lockspace.host_id_disk.path,
+ (unsigned long long)com.lockspace.host_id_disk.offset);
+ log_tool("io_timeout %u", io_timeout);
+ } else {
+ rv = sanlock_res_to_str(com.res_args[0], &res_str);
+ if (rv < 0) {
+ log_tool("res_to_str error %d", rv);
+ break;
+ }
+ log_tool("r %s", res_str);
+ free(res_str);
+ }
+ break;
+
default:
log_tool("action not implemented");
rv = -1;
@@ -2299,8 +2339,12 @@ static int do_direct(void)
switch (com.action) {
case ACT_DIRECT_INIT:
- rv = direct_init(&main_task, com.io_timeout_arg, &com.lockspace,
- com.res_args[0], com.max_hosts, com.num_hosts);
+ if (com.lockspace.host_id_disk.path[0])
+ rv = direct_write_lockspace(&main_task, &com.lockspace,
+ com.max_hosts, com.io_timeout_arg);
+ else
+ rv = direct_write_resource(&main_task, com.res_args[0],
+ com.max_hosts, com.num_hosts);
log_tool("init done %d", rv);
break;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 0561730..577ed0b 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -692,6 +692,38 @@ static int leaders_match(struct leader_record *a, struct leader_record *b)
return 0;
}
+/* read the lockspace name and resource name given the disk location */
+
+int paxos_read_resource(struct task *task,
+ struct token *token,
+ struct sanlk_resource *res)
+{
+ struct leader_record leader;
+ int rv;
+
+ memset(&leader, 0, sizeof(struct leader_record));
+
+ rv = read_leader(task, token, &token->disks[0], &leader);
+ if (rv < 0)
+ return rv;
+
+ if (!res->lockspace_name[0])
+ memcpy(token->r.lockspace_name, leader.space_name, NAME_ID_SIZE);
+
+ if (!res->name[0])
+ memcpy(token->r.name, leader.resource_name, NAME_ID_SIZE);
+
+ rv = verify_leader(token, &token->disks[0], &leader, "read_resource");
+
+ if (rv == SANLK_OK) {
+ memcpy(res->lockspace_name, leader.space_name, NAME_ID_SIZE);
+ memcpy(res->name, leader.resource_name, NAME_ID_SIZE);
+ res->lver = leader.lver;
+ }
+
+ return rv;
+}
+
static int _leader_read_one(struct task *task,
struct token *token,
struct leader_record *leader_ret,
diff --git a/src/paxos_lease.h b/src/paxos_lease.h
index 6e7e833..99df087 100644
--- a/src/paxos_lease.h
+++ b/src/paxos_lease.h
@@ -42,4 +42,8 @@ int paxos_lease_request_read(struct task *task, struct token *token,
int paxos_lease_request_write(struct task *task, struct token *token,
struct request_record *rr);
+int paxos_read_resource(struct task *task,
+ struct token *token,
+ struct sanlk_resource *res);
+
#endif
diff --git a/src/sanlock.8 b/src/sanlock.8
index a6bf6c4..59ef392 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -242,9 +242,6 @@ use mlockall (0 none, 1 current, 2 current and future)
.BR \-a " 0|1"
use async i/o
-.BI -o " sec"
-io timeout in seconds
-
.B "sanlock client"
.I action
[options]
@@ -273,11 +270,28 @@ command will be ignored if any lockspaces exist. With the force option
released, and lockspaces removed.
.BR "sanlock client init -s" " LOCKSPACE"
-.br
+
+Tell the sanlock daemon to initialize a lockspace on disk. The -o option
+can be used to specify the io timeout to be written in the host_id leases.
+(Also see sanlock direct init.)
+
.BR "sanlock client init -r" " RESOURCE"
-Tell the sanlock daemon to initialize storage for lease areas.
-(See sanlock direct init.)
+Tell the sanlock daemon to initialize a resource lease on disk.
+(Also see sanlock direct init.)
+
+.BR "sanlock client read -s" " LOCKSPACE"
+
+Tell the sanlock daemon to read a lockspace from disk. Only the
+LOCKSPACE path and offset are required. If host_id is zero, the first
+record at offset (host_id 1) is used. The complete LOCKSPACE and io
+timeout are printed. (Also see sanlock direct read_leader and read_id.)
+
+.BR "sanlock client read -r" " RESOURCE"
+
+Tell the sanlock daemon to read a resource lease from disk. Only the
+RESOURCE path and offset are required. The complete RESOURCE is printed.
+(Also see sanlock direct read_leader.)
.BR "sanlock client align -s" " LOCKSPACE"
@@ -287,11 +301,14 @@ path. Only path is used from the LOCKSPACE argument.
.BR "sanlock client add_lockspace -s" " LOCKSPACE"
Tell the sanlock daemon to acquire the specified host_id in the lockspace.
-This will allow resources to be acquired in the lockspace.
+This will allow resources to be acquired in the lockspace. The -o option
+can be used to specify the io timeout of the acquiring host, and will be
+written in the host_id lease.
.BR "sanlock client inq_lockspace -s" " LOCKSPACE"
-Ask to the sanlock daemon weather the lockspace is acquired or not.
+Inquire about the state of the lockspace in the sanlock daemon, whether
+it is being added or removed, or is joined.
.BR "sanlock client rem_lockspace -s" " LOCKSPACE"
@@ -359,7 +376,8 @@ lockspace, or initialize storage for one resource (paxos) lease. Both
options require 1MB of space. The host_id in the LOCKSPACE string is not
relevant to initialization, so the value is ignored. (The default of 2000
host_ids can be changed for special cases using the -n num_hosts and -m
-max_hosts options.)
+max_hosts options.) With -s, the -o option specifies the io timeout to be
+written in the host_id leases.
.BR "sanlock direct read_leader -s" " LOCKSPACE"
.br
@@ -374,9 +392,8 @@ the single sector of a delta lease, or the first sector of a paxos lease.
read_id reads a host_id and prints the owner. live_id reads a host_id
once a second until it the timestamp or owner change (prints live 1), or
-until host_dead_seconds (prints live 0). (host_dead_seconds is derived
-from the io_timeout option. The live 0|1 conclusion will not match the
-sanlock daemon's conclusion unless the configured timeouts match.)
+until host_dead_seconds (prints live 0). (host_dead_seconds is based on
+the owner's io timeout.)
./" .P
./" .BR "sanlock direct acquire_id -s" " LOCKSPACE"
./" .br
diff --git a/src/sanlock_admin.h b/src/sanlock_admin.h
index 4544c21..14b1e7a 100644
--- a/src/sanlock_admin.h
+++ b/src/sanlock_admin.h
@@ -70,10 +70,87 @@ int sanlock_align(struct sanlk_disk *disk);
* Use max_hosts = 0 for default value.
* Use num_hosts = 0 for default value.
* Provide either lockspace or resource, not both
+ *
+ * (Old api, see write_lockspace/resource)
*/
int sanlock_init(struct sanlk_lockspace *ls,
struct sanlk_resource *res,
int max_hosts, int num_hosts);
+/*
+ * write a lockspace to disk
+ *
+ * the sanlock daemon writes max_hosts lockspace leader records to disk
+ *
+ * the lockspace will support up to max_hosts using the lockspace at once
+ *
+ * use max_hosts = 0 for default value
+ *
+ * the first host_id (1) (the first record at offset) is the last
+ * leader record written, so read_lockspace of host_id 1 will fail
+ * until the entire write_lockspace is complete.
+ */
+
+int sanlock_write_lockspace(struct sanlk_lockspace *ls, int max_hosts,
+ uint32_t flags, uint32_t io_timeout);
+
+/*
+ * read one host's lockspace record from disk
+ *
+ * the sanlock daemon reads one lockspace leader record from disk
+ *
+ * the minimum input is path and offset
+ *
+ * if name is specified and does not match the leader record name,
+ * SANLK_LEADER_LOCKSPACE is returned
+ *
+ * if name is not specified, it is filled it with the value from disk
+ *
+ * if host_id is zero, host_id 1 is used (the first record at offset)
+ *
+ * if there is no delta lease magic number found at the host_id location,
+ * SANLK_LEADER_MAGIC is returned
+ *
+ * on success, zero is returned and
+ * io_timeout and the entire sanlk_lockspace struct are written to
+ */
+
+int sanlock_read_lockspace(struct sanlk_lockspace *ls,
+ uint32_t flags, uint32_t *io_timeout);
+
+/*
+ * format a resource lease area on disk
+ *
+ * the sanlock daemon writes a resource lease area to disk
+ *
+ * use max_hosts = 0 for default value
+ * use num_hosts = 0 for default value
+ */
+
+int sanlock_write_resource(struct sanlk_resource *res,
+ int max_hosts, int num_hosts, uint32_t flags);
+
+/*
+ * read a resource lease from disk
+ *
+ * the sanlock daemon reads the lease's leader record from disk
+ *
+ * the minimum input is one disk with path and offset
+ *
+ * if lockspace name is specified and does not match the leader record
+ * lockspace name, SANLK_LEADER_LOCKSPACE is returned
+ *
+ * if resource name is specified and does not match the leader record
+ * resource name, SANLK_LEADER_RESOURCE is returned
+ *
+ * if there is no paxos lease magic number found in the leader record,
+ * SANLK_LEADER_MAGIC is returned
+ *
+ * on success, zero is returned and
+ * the entire sanlk_resource struct is written to (res->disks is not changed)
+ */
+
+int sanlock_read_resource(struct sanlk_resource *res, uint32_t flags);
+
#endif
diff --git a/src/sanlock_direct.h b/src/sanlock_direct.h
index 0f0cf18..cbdec46 100644
--- a/src/sanlock_direct.h
+++ b/src/sanlock_direct.h
@@ -32,6 +32,8 @@ int sanlock_direct_live_id(struct sanlk_lockspace *ls,
* Use max_hosts = 0 for default value.
* Use num_hosts = 0 for default value.
* Provide either lockspace or resource, not both
+ *
+ * (Old api, see write_lockspace/resource)
*/
int sanlock_direct_init(struct sanlk_lockspace *ls,
@@ -39,6 +41,22 @@ int sanlock_direct_init(struct sanlk_lockspace *ls,
int max_hosts, int num_hosts, int use_aio);
/*
+ * write a lockspace to disk
+ * (also see sanlock_write_lockspace)
+ */
+
+int sanlock_direct_write_lockspace(struct sanlk_lockspace *ls, int max_hosts,
+ uint32_t flags, uint32_t io_timeout);
+
+/*
+ * format a resource lease area on disk
+ * (also see sanlock_write_resource)
+ */
+
+int sanlock_direct_write_resource(struct sanlk_resource *res,
+ int max_hosts, int num_hosts, uint32_t flags);
+
+/*
* Returns the alignment in bytes required by sanlock_direct_init()
* (1MB for disks with 512 sectors, 8MB for disks with 4096 sectors)
*/
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 65496ff..414ceb8 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -309,6 +309,7 @@ enum {
ACT_NEXT_FREE,
ACT_READ_LEADER,
ACT_CLIENT_INIT,
+ ACT_CLIENT_READ,
ACT_CLIENT_ALIGN,
ACT_EXAMINE,
};
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index 5f8fcf0..fbabbb2 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -18,25 +18,27 @@
#define MAX_CLIENT_MSG (1024 * 1024) /* TODO: this is random */
enum {
- SM_CMD_REGISTER = 1,
- SM_CMD_ADD_LOCKSPACE = 2,
- SM_CMD_REM_LOCKSPACE = 3,
- SM_CMD_SHUTDOWN = 4,
- SM_CMD_STATUS = 5,
- SM_CMD_LOG_DUMP = 6,
- SM_CMD_ACQUIRE = 7,
- SM_CMD_RELEASE = 8,
- SM_CMD_INQUIRE = 9,
- SM_CMD_RESTRICT = 10,
- SM_CMD_REQUEST = 11,
- SM_CMD_ALIGN = 12,
- SM_CMD_INIT_LOCKSPACE = 13,
- SM_CMD_INIT_RESOURCE = 14,
- SM_CMD_EXAMINE_LOCKSPACE = 15,
- SM_CMD_EXAMINE_RESOURCE = 16,
- SM_CMD_HOST_STATUS = 17,
- SM_CMD_INQ_LOCKSPACE = 18,
- SM_CMD_KILLPATH = 19,
+ SM_CMD_REGISTER = 1,
+ SM_CMD_ADD_LOCKSPACE = 2,
+ SM_CMD_REM_LOCKSPACE = 3,
+ SM_CMD_SHUTDOWN = 4,
+ SM_CMD_STATUS = 5,
+ SM_CMD_LOG_DUMP = 6,
+ SM_CMD_ACQUIRE = 7,
+ SM_CMD_RELEASE = 8,
+ SM_CMD_INQUIRE = 9,
+ SM_CMD_RESTRICT = 10,
+ SM_CMD_REQUEST = 11,
+ SM_CMD_ALIGN = 12,
+ SM_CMD_EXAMINE_LOCKSPACE = 13,
+ SM_CMD_EXAMINE_RESOURCE = 14,
+ SM_CMD_HOST_STATUS = 15,
+ SM_CMD_INQ_LOCKSPACE = 16,
+ SM_CMD_KILLPATH = 17,
+ SM_CMD_WRITE_LOCKSPACE = 18,
+ SM_CMD_WRITE_RESOURCE = 19,
+ SM_CMD_READ_LOCKSPACE = 20,
+ SM_CMD_READ_RESOURCE = 21,
};
struct sm_header {