src/delta_lease.c | 4
src/direct.c | 241 +++++++++++++++++++++++++++----------------------
src/direct.h | 11 +-
src/main.c | 41 +++++++-
src/paxos_lease.c | 118 +++++++++++++++++------
src/sanlock_internal.h | 1
6 files changed, 267 insertions(+), 149 deletions(-)
New commits:
commit 7a63dd77ab564e2f34c54d2b53b44fae259c226a
Author: David Teigland <teigland(a)redhat.com>
Date: Thu May 5 16:39:58 2011 -0500
sanlock: paxos delays
Change two delays in the paxos code:
- sleep 1 instead of 2 between host_id liveness changes;
I was using 1 during the most successful test run, but
should go back and see if there's actually any difference
between using 1 and 2.
- sleep random time between 0 and 1 sec between ballot retries;
this should hopefully resolve the situation I saw where all
hosts were in fast ballot retry loops, always seeing larger
mbal values from other hosts (even if it would have resolved
itself eventually, it's not nice to hammer the system in a
loop like this for so long if we can avoid it.)
Also more comment, and log message changes.
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 116823d..c93b4b3 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -183,6 +183,53 @@ static int read_request(struct timeout *ti,
}
#endif
+/*
+ * It's possible that we pick a bk_max from another host which has our own
+ * inp values in it, and we can end up commiting our own inp values, copied
+ * from another host's dblock:
+ *
+ * host2 leader free
+ * host2 phase1 mbal 14002
+ * host2 writes dblock[1] mbal 14002
+ * host2 reads no higher mbal
+ * host2 choose own inp 2,1
+ * host2 phase2 mbal 14002 bal 14002 inp 2,1
+ * host2 writes dblock[1] bal 14002 inp 2,1
+ * host1 leader free
+ * host1 phase1 mbal 20001
+ * host1 writes dblock[0] mbal 20001
+ * host1 reads no higher mbal
+ * host1 choose dblock[1] bal 14002 inp 2,1
+ * host1 phase2 mbal 20001 bal 20001 inp 2,1
+ * host1 writes dblock[0] bal 20001 inp 2,1
+ * host2 reads dblock[0] mbal 20001 > 14002
+ * abort2, retry
+ * host2 leader free
+ * host2 phase1 mbal 16002
+ * host2 writes dblock[1] mbal 16002
+ * host2 reads dblock[0] mbal 20001 > 16002
+ * abort1 retry
+ * host2 leader free
+ * host2 phase1 mbal 18002
+ * host2 writes dblock[1] mbal 18002
+ * host2 reads dblock[0] mbal 20001 > 18002
+ * abort1 retry
+ * host2 leader free
+ * host2 phase1 mbal 20002
+ * host2 writes dblock[1] mbal 20002
+ * host2 reads no higher mbal
+ * host2 choose dblock[0] bal 20001 inp 2,1
+ * host1 reads dblock[1] mbal 20002 >
20001
+ * abort2 retry
+ * host2 phase2 mbal 20002 bal 20002 inp 2,1
+ * host2 writes dblock[1] bal 20002 inp 2,1
+ * host2 reads no higher mbal
+ * host2 commit inp 2,1
+ * host2 success
+ * host1 leader owner 2,1
+ * host1 fail
+ */
+
static int run_ballot(struct timeout *ti, struct token *token, int num_hosts,
uint64_t next_lver, uint64_t our_mbal,
struct paxos_dblock *dblock_out)
@@ -245,7 +292,7 @@ static int run_ballot(struct timeout *ti, struct token *token, int
num_hosts,
if (bk[q].lver > dblock.lver) {
/* I don't think this should happen */
- log_errot(token, "ballot %llu larger lver[%d] %llu",
+ log_errot(token, "ballot %llu larger1 lver[%d] %llu",
(unsigned long long)next_lver, q,
(unsigned long long)bk[q].lver);
return SANLK_DBLOCK_LVER;
@@ -254,7 +301,7 @@ static int run_ballot(struct timeout *ti, struct token *token, int
num_hosts,
/* see "It aborts the ballot" in comment above */
if (bk[q].mbal > dblock.mbal) {
- log_errot(token, "ballot %llu mbal %llu larger mbal[%d] %llu",
+ log_errot(token, "ballot %llu abort1 mbal %llu mbal[%d] %llu",
(unsigned long long)next_lver,
(unsigned long long)our_mbal, q,
(unsigned long long)bk[q].mbal);
@@ -317,7 +364,7 @@ static int run_ballot(struct timeout *ti, struct token *token, int
num_hosts,
if (bk_max.inp) {
/* not a problem, but interesting to see, so use log_error */
- log_errot(token, "ballot %llu bk_max[%d] lver %llu mbal %llu bal %llu inp %llu
%llu %llu",
+ log_errot(token, "ballot %llu choose bk_max[%d] lver %llu mbal %llu bal %llu inp
%llu %llu %llu",
(unsigned long long)next_lver, q_max,
(unsigned long long)bk_max.lver,
(unsigned long long)bk_max.mbal,
@@ -380,7 +427,7 @@ static int run_ballot(struct timeout *ti, struct token *token, int
num_hosts,
/* see "It aborts the ballot" in comment above */
if (bk[q].mbal > dblock.mbal) {
- log_errot(token, "ballot %llu mbal %llu larger2 mbal[%d] %llu",
+ log_errot(token, "ballot %llu abort2 mbal %llu mbal[%d] %llu",
(unsigned long long)next_lver,
(unsigned long long)our_mbal, q,
(unsigned long long)bk[q].mbal);
@@ -589,7 +636,7 @@ int paxos_lease_leader_read(struct timeout *ti,
}
if (!majority_disks(token, num_reads)) {
- log_errot(token, "%s leader_read error %d", caller, rv);
+ log_errot(token, "%s leader read error %d", caller, rv);
error = SANLK_LEADER_READ;
goto fail;
}
@@ -611,12 +658,12 @@ int paxos_lease_leader_read(struct timeout *ti,
}
if (!found) {
- log_errot(token, "%s leader_read inconsistent", caller);
+ log_errot(token, "%s leader inconsistent", caller);
error = SANLK_LEADER_DIFF;
goto fail;
}
- log_token(token, "%s leader_read %llu owner %llu %llu %llu", caller,
+ log_token(token, "%s leader %llu owner %llu %llu %llu", caller,
(unsigned long long)leader.lver,
(unsigned long long)leader.owner_id,
(unsigned long long)leader.owner_generation,
@@ -632,6 +679,13 @@ int paxos_lease_leader_read(struct timeout *ti,
return error;
}
+/* return a random int between a and b inclusive */
+
+static int get_rand(int a, int b)
+{
+ return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
+}
+
static int write_new_leader(struct timeout *ti, struct token *token,
struct leader_record *nl, const char *caller)
{
@@ -648,7 +702,11 @@ static int write_new_leader(struct timeout *ti, struct token *token,
}
if (!majority_disks(token, num_writes)) {
- log_errot(token, "%s write_new_leader no majority writes", caller);
+ log_errot(token, "%s write_new_leader error %d owner %llu %llu %llu",
+ caller, rv,
+ (unsigned long long)nl->owner_id,
+ (unsigned long long)nl->owner_generation,
+ (unsigned long long)nl->timestamp);
error = SANLK_LEADER_WRITE;
}
@@ -683,7 +741,7 @@ int paxos_lease_acquire(struct timeout *ti,
uint64_t next_lver;
uint64_t our_mbal = 0;
uint64_t last_timestamp = 0;
- int error, rv, d, num_reads, disk_open = 0;
+ int error, rv, d, us, num_reads, disk_open = 0;
log_token(token, "paxos_acquire begin acquire_lver %llu flags %x",
(unsigned long long)acquire_lver, flags);
@@ -705,7 +763,8 @@ int paxos_lease_acquire(struct timeout *ti,
}
if (cur_leader.timestamp == LEASE_FREE) {
- log_token(token, "paxos_acquire lease free");
+ log_token(token, "paxos_acquire leader %llu free",
+ (unsigned long long)cur_leader.lver);
goto run;
}
@@ -840,7 +899,8 @@ int paxos_lease_acquire(struct timeout *ti,
last_timestamp = host_id_leader.timestamp;
- sleep(2);
+ /* TODO: test with sleep(2) here */
+ sleep(1);
error = paxos_lease_leader_read(ti, token, &tmp_leader,
"paxos_acquire");
if (error < 0)
@@ -908,8 +968,12 @@ int paxos_lease_acquire(struct timeout *ti,
tmp_leader.owner_generation == token->host_generation) {
/* not a problem, but interesting to see, so use log_error */
- log_errot(token, "paxos_acquire %llu our id commited by %llu",
+ log_errot(token, "paxos_acquire %llu owner our inp "
+ "%llu %llu %llu commited by %llu",
(unsigned long long)next_lver,
+ (unsigned long long)tmp_leader.owner_id,
+ (unsigned long long)tmp_leader.owner_generation,
+ (unsigned long long)tmp_leader.timestamp,
(unsigned long long)tmp_leader.write_id);
memcpy(leader_ret, &tmp_leader, sizeof(struct leader_record));
@@ -930,8 +994,11 @@ int paxos_lease_acquire(struct timeout *ti,
&dblock);
if (error == SANLK_DBLOCK_MBAL) {
- log_token(token, "paxos_acquire %llu retry ballot",
- (unsigned long long)next_lver);
+ us = get_rand(0, 1000000);
+ /* not a problem, but interesting to see, so use log_error */
+ log_errot(token, "paxos_acquire %llu retry delay %d us",
+ (unsigned long long)next_lver, us);
+ usleep(us);
our_mbal += cur_leader.max_hosts;
goto retry_ballot;
}
@@ -965,7 +1032,7 @@ int paxos_lease_acquire(struct timeout *ti,
if (new_leader.owner_id != token->host_id) {
/* not a problem, but interesting to see, so use log_error */
- log_errot(token, "paxos_acquire %llu commit other owner %llu %llu %llu",
+ log_errot(token, "ballot %llu commit other owner %llu %llu %llu",
(unsigned long long)new_leader.lver,
(unsigned long long)new_leader.owner_id,
(unsigned long long)new_leader.owner_generation,
@@ -975,7 +1042,7 @@ int paxos_lease_acquire(struct timeout *ti,
goto out;
}
- log_token(token, "paxos_acquire %llu owner %llu %llu %llu done",
+ log_token(token, "ballot %llu commit self owner %llu %llu %llu",
(unsigned long long)next_lver,
(unsigned long long)new_leader.owner_id,
(unsigned long long)new_leader.owner_generation,
@@ -992,24 +1059,6 @@ int paxos_lease_acquire(struct timeout *ti,
}
#if 0
-int paxos_lease_leader_write(struct timeout *ti,
- struct token *token,
- struct leader_record *leader_new)
-{
- int error;
-
- log_token(token, "paxos_lease_leader_write begin");
-
- leader_new->checksum = leader_checksum(leader_new);
-
- error = write_new_leader(ti, token, leader_new);
-
- log_token(token, "paxos_lease_leader_write done %d", error);
- return error;
-}
-#endif
-
-#if 0
int paxos_lease_renew(struct timeout *ti,
struct token *token,
struct leader_record *leader_last,
commit c7c1ce615bd633f11520105b4352097c319a2998
Author: David Teigland <teigland(a)redhat.com>
Date: Thu May 5 12:23:51 2011 -0500
sanlock: direct read_leader
new command to directly read and print a leader_record
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 036d7dc..9b2a6d1 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -171,11 +171,9 @@ int delta_lease_leader_read(struct timeout *ti,
return SANLK_LEADER_READ;
error = verify_leader(disk, space_name, host_id, &leader, caller);
- if (error < 0)
- return error;
memcpy(leader_ret, &leader, sizeof(struct leader_record));
- return SANLK_OK;
+ return error;
}
int delta_lease_acquire(struct timeout *ti,
diff --git a/src/direct.c b/src/direct.c
index 1a8312b..ff1597e 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -31,6 +31,54 @@
#include "delta_lease.h"
#include "sanlock_direct.h"
+/*
+ * cli: sanlock direct init
+ * cli: sanlock direct read_leader
+ * cli: sanlock direct acquire
+ * cli: sanlock direct release
+ * lib: sanlock_direct_init()
+ *
+ * direct.c:
+ * direct_init()
+ * direct_read_leader()
+ * direct_acquire()
+ * direct_release()
+ * do_paxos_action()
+ * paxos_lease.c:
+ * paxos_lease_init()
+ * paxos_lease_leader_read()
+ * paxos_lease_acquire()
+ * paxos_lease_release()
+ *
+ * cli: sanlock direct init
+ * cli: sanlock direct read_leader
+ * cli: sanlock direct acquire_id
+ * cli: sanlock direct release_id
+ * cli: sanlock direct renew_id
+ * cli: sanlock direct read_id
+ * cli: sanlock direct live_id
+ * lib: sanlock_direct_read_id()
+ * lib: sanlock_direct_live_id()
+ * lib: sanlock_direct_init()
+ *
+ * direct.c:
+ * direct_init()
+ * direct_read_leader()
+ * direct_acquire_id()
+ * direct_release_id()
+ * direct_renew_id()
+ * direct_read_id()
+ * direct_live_id()
+ * do_delta_action()
+ * delta_lease.c:
+ * delta_lease_init()
+ * delta_lease_leader_read()
+ * delta_lease_acquire()
+ * delta_lease_release()
+ * delta_lease_renew()
+ */
+
+
/* TODO: include from sanlock_internal */
static struct timeout to_default = {
DEFAULT_USE_AIO,
@@ -40,16 +88,15 @@ static struct timeout to_default = {
DEFAULT_HOST_ID_RENEWAL_FAIL_SECONDS,
DEFAULT_HOST_ID_RENEWAL_WARN_SECONDS };
-static int do_paxos_action(int action,
- struct timeout *ti,
+static int do_paxos_action(int action, struct timeout *ti,
struct sanlk_resource *res,
- int max_hosts,
- int num_hosts,
+ int max_hosts, int num_hosts,
uint64_t local_host_id,
- uint64_t local_host_generation)
+ uint64_t local_host_generation,
+ struct leader_record *leader_ret)
{
struct token *token;
- struct leader_record leader_read, leader_ret;
+ struct leader_record leader;
int disks_len, token_len;
int num_opened;
int j, rv = 0;
@@ -76,49 +123,42 @@ static int do_paxos_action(int action,
}
num_opened = open_disks(token->disks, token->r.num_disks);
- if (!majority_disks(token, num_opened)) {
- log_tool("cannot open majority of disks");
- return -1;
- }
+ if (!majority_disks(token, num_opened))
+ return -ENODEV;
switch (action) {
case ACT_INIT:
rv = paxos_lease_init(ti, token, num_hosts, max_hosts);
- if (rv < 0) {
- log_tool("cannot initialize disks");
- goto exit_fail;
- }
break;
case ACT_ACQUIRE:
token->host_id = local_host_id;
token->host_generation = local_host_generation;
- rv = paxos_lease_acquire(ti, token, 0, &leader_ret, 0, num_hosts);
- if (rv < 0) {
- log_tool("cannot acquire lease on %s", token->r.name);
- goto exit_fail;
- }
+ rv = paxos_lease_acquire(ti, token, 0, leader_ret, 0, num_hosts);
break;
case ACT_RELEASE:
- rv = paxos_lease_leader_read(ti, token, &leader_read, "direct_release");
- if (rv < 0) {
- log_tool("cannot read lease on %s", token->r.name);
- goto exit_fail;
- }
- rv = paxos_lease_release(ti, token, &leader_read, &leader_ret);
- if (rv < 0) {
- log_tool("cannot release lease on %s", token->r.name);
- goto exit_fail;
- }
+ rv = paxos_lease_leader_read(ti, token, &leader, "direct_release");
+ if (rv < 0)
+ break;
+ rv = paxos_lease_release(ti, token, &leader, leader_ret);
+ break;
+
+ case ACT_READ_LEADER:
+ rv = paxos_lease_leader_read(ti, token, &leader, "direct_read_leader");
break;
}
-exit_fail:
close_disks(token->disks, token->r.num_disks);
free(token);
+ if (rv == SANLK_OK)
+ rv = 0;
+
+ if (leader_ret)
+ memcpy(leader_ret, &leader, sizeof(struct leader_record));
+
return rv;
}
@@ -131,21 +171,29 @@ int direct_acquire(struct timeout *ti,
struct sanlk_resource *res,
int num_hosts,
uint64_t local_host_id,
- uint64_t local_host_generation)
+ uint64_t local_host_generation,
+ struct leader_record *leader_ret)
{
- return do_paxos_action(ACT_ACQUIRE, ti, res, -1, num_hosts,
- local_host_id, local_host_generation);
+ return do_paxos_action(ACT_ACQUIRE, ti, res,
+ -1, num_hosts,
+ local_host_id, local_host_generation,
+ leader_ret);
}
int direct_release(struct timeout *ti,
- struct sanlk_resource *res)
+ struct sanlk_resource *res,
+ struct leader_record *leader_ret)
{
- return do_paxos_action(ACT_RELEASE, ti, res, -1, -1, 0, 0);
+ return do_paxos_action(ACT_RELEASE, ti, res,
+ -1, -1,
+ 0, 0,
+ leader_ret);
}
static int do_delta_action(int action,
struct timeout *ti,
struct sanlk_lockspace *ls,
+ int max_hosts,
struct leader_record *leader_ret)
{
struct leader_record leader;
@@ -153,32 +201,24 @@ static int do_delta_action(int action,
struct space space;
int rv;
- if (!ls->name[0])
- return -1;
-
- if (!ls->host_id_disk.path[0]) {
- log_tool("invalid disk path");
- return -1;
- }
-
- if (!ls->host_id) {
- log_tool("zero host_id");
- return -1;
- }
-
/* for log_space in delta functions */
memset(&space, 0, sizeof(space));
+ if (!ls->host_id_disk.path[0])
+ return -ENODEV;
+
memset(&sd, 0, sizeof(struct sync_disk));
memcpy(&sd, &ls->host_id_disk, sizeof(struct sanlk_disk));
rv = open_disks(&sd, 1);
- if (rv != 1) {
- log_tool("open_disk failed %d %s", rv, sd.path);
- return -1;
- }
+ if (rv != 1)
+ return -ENODEV;
switch (action) {
+ case ACT_INIT:
+ rv = delta_lease_init(ti, &sd, ls->name, max_hosts);
+ break;
+
case ACT_ACQUIRE_ID:
rv = delta_lease_acquire(ti, &space, &sd,
ls->name,
@@ -207,6 +247,7 @@ static int do_delta_action(int action,
&leader, &leader);
break;
case ACT_READ_ID:
+ case ACT_READ_LEADER:
rv = delta_lease_leader_read(ti, &sd,
ls->name,
ls->host_id,
@@ -215,6 +256,8 @@ static int do_delta_action(int action,
break;
}
+ close_disks(&sd, 1);
+
if (rv == SANLK_OK)
rv = 0;
@@ -233,17 +276,17 @@ static int do_delta_action(int action,
int direct_acquire_id(struct timeout *ti, struct sanlk_lockspace *ls)
{
- return do_delta_action(ACT_ACQUIRE_ID, ti, ls, NULL);
+ return do_delta_action(ACT_ACQUIRE_ID, ti, ls, -1, NULL);
}
int direct_release_id(struct timeout *ti, struct sanlk_lockspace *ls)
{
- return do_delta_action(ACT_RELEASE_ID, ti, ls, NULL);
+ return do_delta_action(ACT_RELEASE_ID, ti, ls, -1, NULL);
}
int direct_renew_id(struct timeout *ti, struct sanlk_lockspace *ls)
{
- return do_delta_action(ACT_RENEW_ID, ti, ls, NULL);
+ return do_delta_action(ACT_RENEW_ID, ti, ls, -1, NULL);
}
int direct_read_id(struct timeout *ti,
@@ -257,7 +300,7 @@ int direct_read_id(struct timeout *ti,
memset(&leader, 0, sizeof(struct leader_record));
- rv = do_delta_action(ACT_READ_ID, ti, ls, &leader);
+ rv = do_delta_action(ACT_READ_ID, ti, ls, -1, &leader);
*timestamp = leader.timestamp;
*owner_id = leader.owner_id;
@@ -290,7 +333,7 @@ int direct_live_id(struct timeout *ti,
time_t start;
int rv;
- rv = do_delta_action(ACT_READ_ID, ti, ls, &leader_begin);
+ rv = do_delta_action(ACT_READ_ID, ti, ls, -1, &leader_begin);
if (rv < 0)
return rv;
@@ -299,7 +342,7 @@ int direct_live_id(struct timeout *ti,
while (1) {
sleep(1);
- rv = do_delta_action(ACT_READ_ID, ti, ls, &leader);
+ rv = do_delta_action(ACT_READ_ID, ti, ls, -1, &leader);
if (rv < 0)
return rv;
@@ -355,61 +398,29 @@ int direct_init(struct timeout *ti,
struct sanlk_resource *res,
int max_hosts, int num_hosts)
{
- struct sync_disk sd;
- int num_opened;
- int rv;
+ int rv = -1;
if (ls && ls->host_id_disk.path[0]) {
- memset(&sd, 0, sizeof(struct sync_disk));
- memcpy(&sd, &ls->host_id_disk, sizeof(struct sanlk_disk));
-
- num_opened = open_disks(&sd, 1);
- if (num_opened != 1) {
- log_tool("cannot open disk %s", sd.path);
- return -1;
- }
+ rv = do_delta_action(ACT_INIT, ti, ls, max_hosts, NULL);
- rv = delta_lease_init(ti, &sd, ls->name, max_hosts);
- close_disks(&sd, 1);
+ } else if (res) {
+ if (!num_hosts)
+ return -EINVAL;
- if (rv < 0) {
- log_tool("lockspace init failed %d", rv);
- return -1;
- }
- }
+ if (num_hosts > max_hosts)
+ return SANLK_LEADER_NUMHOSTS;
- if (res) {
- if (!num_hosts) {
- log_tool("num_hosts option required for paxos lease init");
- return -1;
- }
+ if (!res->num_disks)
+ return -ENODEV;
- if (!max_hosts)
- max_hosts = DEFAULT_MAX_HOSTS;
+ if (!res->disks[0].path[0])
+ return -ENODEV;
- if (num_hosts > max_hosts) {
- log_tool("num_hosts cannot be greater than max_hosts");
- return -1;
- }
-
- if (!res->num_disks) {
- log_tool("num_disks zero");
- return -1;
- }
-
- if (!res->disks[0].path[0]) {
- log_tool("invalid disk path");
- return -1;
- }
-
- rv = do_paxos_action(ACT_INIT, ti, res, max_hosts, num_hosts, 0, 0);
- if (rv < 0) {
- log_tool("resource init failed %d", rv);
- return rv;
- }
+ rv = do_paxos_action(ACT_INIT, ti, res,
+ max_hosts, num_hosts, 0, 0, NULL);
}
- return 0;
+ return rv;
}
int sanlock_direct_init(struct sanlk_lockspace *ls,
@@ -425,6 +436,22 @@ int sanlock_direct_init(struct sanlk_lockspace *ls,
return direct_init(&ti, ls, res, max_hosts, num_hosts);
}
+int direct_read_leader(struct timeout *ti,
+ struct sanlk_lockspace *ls,
+ struct sanlk_resource *res,
+ struct leader_record *leader_ret)
+{
+ int rv = -1;
+
+ if (ls && ls->host_id_disk.path[0])
+ rv = do_delta_action(ACT_READ_LEADER, ti, ls, -1, leader_ret);
+
+ else if (res)
+ rv = do_paxos_action(ACT_READ_LEADER, ti, res,
+ -1, -1, 0, 0, leader_ret);
+ return rv;
+}
+
int direct_dump(struct timeout *ti, char *dump_path)
{
char *data;
@@ -448,10 +475,8 @@ int direct_dump(struct timeout *ti, char *dump_path)
strncpy(sd.path, dump_path, SANLK_PATH_LEN);
num_opened = open_disks(&sd, 1);
- if (num_opened != 1) {
- log_tool("cannot open disk %s", sd.path);
- return -1;
- }
+ if (num_opened != 1)
+ return -ENODEV;
data = malloc(sd.sector_size);
if (!data)
diff --git a/src/direct.h b/src/direct.h
index b73affc..e500638 100644
--- a/src/direct.h
+++ b/src/direct.h
@@ -13,10 +13,12 @@ int direct_acquire(struct timeout *ti,
struct sanlk_resource *res,
int num_hosts,
uint64_t local_host_id,
- uint64_t local_host_generation);
+ uint64_t local_host_generation,
+ struct leader_record *leader_ret);
int direct_release(struct timeout *ti,
- struct sanlk_resource *res);
+ struct sanlk_resource *res,
+ struct leader_record *leader_ret);
int direct_acquire_id(struct timeout *ti, struct sanlk_lockspace *ls);
int direct_release_id(struct timeout *ti, struct sanlk_lockspace *ls);
@@ -40,6 +42,11 @@ int direct_init(struct timeout *ti,
struct sanlk_resource *res,
int max_hosts, int num_hosts);
+int direct_read_leader(struct timeout *ti,
+ struct sanlk_lockspace *ls,
+ struct sanlk_resource *res,
+ struct leader_record *leader_ret);
+
int direct_dump(struct timeout *ti, char *dump_path);
#endif
diff --git a/src/main.c b/src/main.c
index 40cd20c..c9287d0 100644
--- a/src/main.c
+++ b/src/main.c
@@ -2268,6 +2268,7 @@ static void print_usage(void)
printf("direct actions: read/write storage directly to:\n");
printf(" init initialize disk areas for host_id and resource leases\n");
printf(" dump print initialized leases\n");
+ printf(" read_leader print values in leader_record\n");
printf(" acquire acquire leases\n");
printf(" release release leases\n");
printf(" acquire_id acquire a host_id lease\n");
@@ -2334,6 +2335,9 @@ static void print_usage(void)
printf(" -D debug: print extra info for debugging\n");
printf(" -a <num> use async io (1 yes, 0 no)\n");
printf("\n");
+ printf("direct read_leader [-s LOCKSPACE] [-r RESOURCE]\n");
+ printf(" -a <num> use async io (1 yes, 0 no)\n");
+ printf("\n");
printf("direct acquire|release -i <num> -g <num> -r RESOURCE\n");
printf(" -a <num> use async io (1 yes, 0 no)\n");
printf(" -n <num_hosts> change num_hosts in leases when acquired\n");
@@ -2440,6 +2444,8 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_INIT;
else if (!strcmp(act, "dump"))
com.action = ACT_DUMP;
+ else if (!strcmp(act, "read_leader"))
+ com.action = ACT_READ_LEADER;
else if (!strcmp(act, "acquire"))
com.action = ACT_ACQUIRE;
else if (!strcmp(act, "release"))
@@ -2734,6 +2740,7 @@ static int do_client(void)
static int do_direct(void)
{
+ struct leader_record leader;
uint64_t timestamp, owner_id, owner_generation;
int live;
int rv;
@@ -2750,14 +2757,44 @@ static int do_direct(void)
log_tool("dump done %d", rv);
break;
+ case ACT_READ_LEADER:
+ rv = direct_read_leader(&to, &com.lockspace, com.res_args[0], &leader);
+ log_tool("read_leader done %d", rv);
+ log_tool("magic 0x%x", leader.magic);
+ log_tool("version 0x%x", leader.version);
+ log_tool("sector_size %u", leader.sector_size);
+ log_tool("num_hosts %llu",
+ (unsigned long long)leader.num_hosts);
+ log_tool("max_hosts %llu",
+ (unsigned long long)leader.max_hosts);
+ log_tool("owner_id %llu",
+ (unsigned long long)leader.owner_id);
+ log_tool("owner_generation %llu",
+ (unsigned long long)leader.owner_generation);
+ log_tool("lver %llu",
+ (unsigned long long)leader.lver);
+ log_tool("space_name %.48s", leader.space_name);
+ log_tool("resource_name %.48s", leader.resource_name);
+ log_tool("timestamp %llu",
+ (unsigned long long)leader.timestamp);
+ log_tool("checksum %u", leader.checksum);
+ log_tool("write_id %llu",
+ (unsigned long long)leader.write_id);
+ log_tool("write_generation %llu",
+ (unsigned long long)leader.write_generation);
+ log_tool("write_timestamp %llu",
+ (unsigned long long)leader.write_timestamp);
+ break;
+
case ACT_ACQUIRE:
rv = direct_acquire(&to, com.res_args[0], com.num_hosts,
- com.local_host_id, com.local_host_generation);
+ com.local_host_id, com.local_host_generation,
+ &leader);
log_tool("acquire done %d", rv);
break;
case ACT_RELEASE:
- rv = direct_release(&to, com.res_args[0]);
+ rv = direct_release(&to, com.res_args[0], &leader);
log_tool("release done %d", rv);
break;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 1e84dc7..116823d 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -626,6 +626,7 @@ int paxos_lease_leader_read(struct timeout *ti,
return SANLK_OK;
fail:
+ memcpy(leader_ret, &leader, sizeof(struct leader_record));
free(leaders);
free(leader_reps);
return error;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index f8c4e2f..5ad996a 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -310,6 +310,7 @@ enum {
ACT_LIVE_ID,
ACT_INIT,
ACT_DUMP,
+ ACT_READ_LEADER,
};
#endif