doc/migration.txt | 167 ++++++++++++++++++++++++++++++++++++++++
src/client_resource.c | 13 ++-
src/main.c | 200 ++++++++++++++++++++++++++++++++++++++-----------
src/paxos_lease.c | 3
src/sanlock_resource.h | 2
src/token_manager.c | 101 ++++++++++++++----------
src/token_manager.h | 3
7 files changed, 397 insertions(+), 92 deletions(-)
New commits:
commit 7265ecb04851ee7c7a4007097be604502e0ca8b2
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Feb 25 16:25:59 2011 -0600
sanlock: migration.txt describes libvirt/sanlock steps
diff --git a/doc/migration.txt b/doc/migration.txt
new file mode 100644
index 0000000..3b556df
--- /dev/null
+++ b/doc/migration.txt
@@ -0,0 +1,167 @@
+
+Migration 1 (no problems)
+
+- Source pid X is running and has leases on resources A and B
+
+- S libvirt calls plugin "migrate pid X's leases to D" (virLockDriverGetState)
+ . plugin calls sanlock_migrate(X, D)
+ . sanlock writes next_owner_id=D to leader block of leases A,B
+ [see cmd_migrate_thread() and migrate_lease()]
+ . sanlock_migrate() returns 0
+ . sanlock returns string "leases=A,B ver=1,1 result=OK" to plugin
+ . plugin returns string "leases=A,B ver=1,1 result=OK" to libvirt
+
+- S libvirt sends "leases=A,B ver=1,1 result=OK" to D libvirt
+- S libvirt migration begins
+
+- D libvirt calls plugin New()
+- D libvirt calls plugin AddResource(A, path, offset)
+- D libvirt calls plugin AddResource(B, path, offset)
+- D libvirt forks pid Y
+- D libvirt calls plugin AcquireObject("leases=A,B ver=1,1 result=OK")
+ . plugin calls sanlock_acquire(A, B, INCOMING, "leases=A,B ver=1,1 result=0")
+ . sanlock reads next_owner_id=D in leader block of leases A,B
+ [see cmd_acquire_thread() and receive_lease()]
+ . sanlock_acquire() returns 0
+- D libvirt execs qemu -incoming in paused mode
+
+- S libvirt migration ends successfully
+- D libvirt migration ends successfully
+
+- D libvirt calls plugin "migration of pid Y complete" (no function yet)
+ . plugin calls sanlock_setowner(Y)
+ . sanlock writes owner_id=D, next_owner_id=0 to leader block of leases A,B
+ [see cmd_setowner_thread() and setowner_lease()]
+ . sanlock_setowner() returns 0
+
+- D libvirt resumes vm pid Y
+- S qemu pid X exits
+- S sanlock cleans up pid X when it exits
+
+End result is success.
+
+--
+
+Migration 2 (source looses storage connectivity, destination doesn't)
+
+- Source pid X is running and has leases on resources A and B
+
+- S libvirt calls plugin "migrate pid X's leases to D" (virLockDriverGetState)
+ . plugin calls sanlock_migrate(X, D)
+ . sanlock FAILS to write next_owner_id=D to leader block of leases A,B
+ . sanlock_migrate() returns 0 (migration can still procede despite failure)
+ . sanlock returns string "leases=A,B ver=1,1 result=ERROR" to plugin
+ . plugin returns string "leases=A,B ver=1,1 result=ERROR" to libvirt
+
+- S libvirt sends "leases=A,B ver=1,1 result=ERROR" to D libvirt
+- S libvirt migration begins
+
+- D libvirt calls plugin New()
+- D libvirt calls plugin AddResource(A, path, offset)
+- D libvirt calls plugin AddResource(B, path, offset)
+- D libvirt forks pid Y
+- D libvirt calls plugin AcquireObject("leases=A,B ver=1,1 result=ERROR")
+ . plugin calls sanlock_acquire(A, B, INCOMING, "leases=A,B ver=1,1 result=ERROR")
+ . sanlock reads next_owner_id=0 in leader block of leases A,B
+ . sanlock sees ERROR which is consistent with no next_owner_id being set
+ . sanlock writes next_owner_id=D to leader block of leases A,B
+ (S was supposed to do this but failed, having D set next_owner_id is
+ probably unnecessary)
+ . sanlock_acquire() returns 0
+- D libvirt execs qemu -incoming in paused mode
+
+- S libvirt migration ends successfully
+- D libvirt migration ends successfully
+
+- D libvirt calls plugin "migration of pid Y complete" (no function yet)
+ . plugin calls sanlock_setowner(Y)
+ . sanlock writes owner_id=D, next_owner_id=0 to leader block of leases A,B
+ [see cmd_setowner_thread() and setowner_lease()]
+ . sanlock_setowner() returns 0
+
+- D libvirt resumes vm pid Y
+- S qemu pid X exits
+- S sanlock cleans up pid X when it exits
+
+End result is success.
+
+--
+
+Migration 3 (source looses storage connectivity, destination also)
+
+- Source pid X is running and has leases on resources A and B
+
+- S libvirt calls plugin "migrate pid X's leases to D" (virLockDriverGetState)
+ . plugin calls sanlock_migrate(X, D)
+ . sanlock FAILS to write next_owner_id=D to leader block of leases A,B
+ . sanlock_migrate() returns 0 (migration can still procede despite failure)
+ . sanlock returns string "leases=A,B ver=1,1 result=ERROR" to plugin
+ . plugin returns string "leases=A,B ver=1,1 result=ERROR" to libvirt
+
+- S libvirt sends "leases=A,B ver=1,1 result=ERROR" to D libvirt
+- S libvirt migration begins
+
+- D libvirt calls plugin New()
+- D libvirt calls plugin AddResource(A, path, offset)
+- D libvirt calls plugin AddResource(B, path, offset)
+- D libvirt forks pid Y
+- D libvirt calls plugin AcquireObject("leases=A,B ver=1,1 result=ERROR")
+ . plugin calls sanlock_acquire(A, B, INCOMING, "leases=A,B ver=1,1 result=ERROR")
+ . sanlock FAILS to read leader block of leases A,B
+ [receive_token() paxos_lease_leader_read() error]
+ . sanlock_acquire() returns -1
+- D libvirt does not exec qemu
+
+- D libvirt migration fails
+- S libvirt migration fails
+
+- D pid Y exits
+- D sanlock cleans up pid Y when it exits
+
+- S qemu pid X exits
+- S sanlock cleans up pid X when it exits
+
+End result is the vm is not running on either the source or the
+destination. The disk leases that the vm held have owner_id=S.
+
+--
+
+Migration 4 (libvirt migration fails)
+
+- Source pid X is running and has leases on resources A and B
+
+- S libvirt calls plugin "migrate pid X's leases to D" (virLockDriverGetState)
+ . plugin calls sanlock_migrate(X, D)
+ . sanlock writes next_owner_id=D to leader block of leases A,B
+ [see cmd_migrate_thread() and migrate_lease()]
+ . sanlock_migrate() returns 0
+ . sanlock returns string "leases=A,B ver=1,1 result=OK" to plugin
+ . plugin returns string "leases=A,B ver=1,1 result=OK" to libvirt
+
+- S libvirt sends "leases=A,B ver=1,1 result=OK" to D libvirt
+- S libvirt migration begins
+
+- D libvirt calls plugin New()
+- D libvirt calls plugin AddResource(A, path, offset)
+- D libvirt calls plugin AddResource(B, path, offset)
+- D libvirt forks pid Y
+- D libvirt calls plugin AcquireObject("leases=A,B ver=1,1 result=OK")
+ . plugin calls sanlock_acquire(A, B, INCOMING, "leases=A,B ver=1,1 result=0")
+ . sanlock reads next_owner_id=D in leader block of leases A,B
+ [see cmd_acquire_thread() and receive_lease()]
+ . sanlock_acquire() returns 0
+- D libvirt execs qemu -incoming in paused mode
+
+- S libvirt migration fails
+- D libvirt migration fails
+
+- D pid Y exits
+- D sanlock cleans up pid Y when it exits
+
+- S qemu pid X exits
+- S sanlock cleans up pid X when it exits
+ . writes owner=0, next_owner=0 in leader block of leases A,B
+
+End result is the vm is not running on either the source or the
+destination. The disk leases that the vm held are free.
+
commit 91192505c2597740a04757c2a5097493b0f54b8a
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Feb 25 16:21:50 2011 -0600
sanlock: return migration state
sanlock_migrate() needs to return a string with the state of leases.
libvirtd will send this to the migration destination where it is
input for sanlock_acquire(INCOMING).
(Have not yet tested the string validation in acquire-incoming/receive)
diff --git a/src/client_resource.c b/src/client_resource.c
index 7a7c53f..ab17b7e 100644
--- a/src/client_resource.c
+++ b/src/client_resource.c
@@ -148,7 +148,7 @@ int sanlock_acquire(int sock, int pid, int res_count,
return rv;
}
-int sanlock_migrate(int sock, int pid, uint64_t target_host_id)
+int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char **state)
{
struct sm_header h;
char *reply_str = NULL;
@@ -191,25 +191,30 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id)
len = h.length - sizeof(h);
reply_str = malloc(len);
- if (!reply_str)
+ if (!reply_str) {
+ rv = -ENOMEM;
goto out;
+ }
rv = recv(fd, reply_str, len, MSG_WAITALL);
if (rv != len) {
+ free(reply_str);
rv = -errno;
goto out;
}
if (h.data) {
+ free(reply_str);
rv = (int)h.data;
goto out;
}
+
+ if (state)
+ *state = reply_str;
rv = 0;
out:
if (sock == -1)
close(fd);
- if (reply_str)
- free(reply_str);
return rv;
}
diff --git a/src/main.c b/src/main.c
index 3d49866..6402f19 100644
--- a/src/main.c
+++ b/src/main.c
@@ -492,20 +492,21 @@ static void client_recv_all(int ci, struct sm_header *h_recv, int pos)
log_debug("recv_all ci %d rem %d total %d", ci, rem, total);
}
-/* optstr format: "abc=123 def=456 ghi=780" */
+/* str format: "abc=123 def=456 ghi=780" */
-static int parse_key_val(char *optstr, const char *key_arg, char *val_arg,
- int len)
+static int parse_key_val(char *str, const char *key_arg, char *val_arg, int len)
{
int copy_key, copy_val, i, kvi;
char key[64], val[64];
+ memset(val_arg, 0, len);
+
copy_key = 1;
copy_val = 0;
kvi = 0;
- for (i = 0; i < strlen(optstr); i++) {
- if (optstr[i] == ' ') {
+ for (i = 0; i < strlen(str); i++) {
+ if (str[i] == ' ') {
if (!strcmp(key, key_arg)) {
strncpy(val_arg, val, len);
return 0;
@@ -518,7 +519,7 @@ static int parse_key_val(char *optstr, const char *key_arg, char *val_arg,
continue;
}
- if (optstr[i] == '=') {
+ if (str[i] == '=') {
copy_key = 0;
copy_val = 1;
kvi = 0;
@@ -526,9 +527,9 @@ static int parse_key_val(char *optstr, const char *key_arg, char *val_arg,
}
if (copy_key)
- key[kvi++] = optstr[i];
+ key[kvi++] = str[i];
else if (copy_val)
- val[kvi++] = optstr[i];
+ val[kvi++] = str[i];
if (kvi > 62) {
log_error("invalid timeout parameter");
@@ -544,6 +545,74 @@ static int parse_key_val(char *optstr, const char *key_arg, char *val_arg,
return -1;
}
+/*
+ * The state strings for multiple tokens all exist in the single input
+ * string. There's no special separator between strings for separate
+ * tokens, so we expect that each token state string begins with
+ * "lockspace_name=.... resource_name=.... "
+ */
+
+static int parse_migrate_state(struct token *token, char *str,
+ int *migrate_result,
+ struct leader_record *leader)
+{
+ char state[SANLK_STATE_MAXSTR];
+ char name[128];
+ char val_str[128];
+ char *p, *begin, *end;
+ int rv, i;
+
+ memset(name, 0, sizeof(name));
+
+ snprintf(name, 128, "lockspace_name=%s resource_name=%s",
+ token->space_name, token->resource_name);
+
+ begin = strstr(str, name);
+ if (!begin)
+ return -1;
+
+ end = strstr(begin, "lockspace_name=");
+ if (!end)
+ end = str + strlen(str) + 1;
+
+ memset(state, 0, sizeof(state));
+
+ for (i = 0, p = begin; p < end; p++) {
+ state[i++] = *p;
+
+ if (i == SANLK_STATE_MAXSTR)
+ break;
+ }
+ state[SANLK_STATE_MAXSTR-1] = '\0';
+
+ rv = parse_key_val(state, "migrate_result", val_str, sizeof(val_str));
+ if (rv < 0)
+ return rv;
+ *migrate_result = atoi(val_str);
+
+ rv = parse_key_val(state, "leader.lver", val_str, sizeof(val_str));
+ if (rv < 0)
+ return rv;
+ leader->lver = strtoull(val_str, NULL, 0);
+
+ rv = parse_key_val(state, "leader.timestamp", val_str, sizeof(val_str));
+ if (rv < 0)
+ return rv;
+ leader->timestamp = strtoull(val_str, NULL, 0);
+
+ rv = parse_key_val(state, "leader.owner_id", val_str, sizeof(val_str));
+ if (rv < 0)
+ return rv;
+ leader->owner_id = strtoull(val_str, NULL, 0);
+
+ rv = parse_key_val(state, "leader.next_owner_id", val_str, sizeof(val_str));
+ if (rv < 0)
+ return rv;
+ leader->next_owner_id = strtoull(val_str, NULL, 0);
+
+ return 0;
+}
+
static void *cmd_acquire_thread(void *args_in)
{
struct cmd_args *ca = args_in;
@@ -554,6 +623,7 @@ static void *cmd_acquire_thread(void *args_in)
struct token *new_tokens[SANLK_MAX_RESOURCES];
struct sanlk_resource res;
struct sanlk_options opt;
+ struct leader_record leader;
char *opt_str;
char num_hosts_str[16];
uint64_t reacquire_lver = 0;
@@ -562,7 +632,7 @@ static void *cmd_acquire_thread(void *args_in)
int fd, rv, i, j, disks_len, num_disks, empty_slots, opened;
int alloc_count = 0, add_count = 0, open_count = 0, acquire_count = 0;
int pos = 0, need_setowner = 0, pid_dead = 0;
- int new_tokens_count;
+ int new_tokens_count, migrate_result;
cl = &client[ca->ci_target];
fd = client[ca->ci_in].fd;
@@ -798,17 +868,31 @@ static void *cmd_acquire_thread(void *args_in)
for (i = 0; i < new_tokens_count; i++) {
token = new_tokens[i];
+
if (opt.flags & SANLK_FLG_INCOMING) {
- rv = receive_token(token, opt_str);
- } else {
- if (opt.flags & SANLK_FLG_REACQUIRE)
- reacquire_lver = token->prev_lver;
- rv = acquire_token(token, reacquire_lver, new_num_hosts);
+ migrate_result = 0;
+ memset(&leader, 0, sizeof(leader));
+ rv = parse_migrate_state(token, opt_str, &migrate_result, &leader);
+ if (rv < 0 || !migrate_result) {
+ log_errot(token, "cmd_acquire migrate state "
+ "bad %d len %zd", migrate_result,
+ strlen(opt_str));
+ goto fail_release;
+ }
+ } else if (opt.flags & SANLK_FLG_REACQUIRE) {
+ reacquire_lver = token->prev_lver;
}
+
+ if (opt.flags & SANLK_FLG_INCOMING)
+ rv = receive_token(token, migrate_result, &leader);
+ else
+ rv = acquire_token(token, reacquire_lver, new_num_hosts);
+
save_resource_leader(token);
if (rv < 0) {
- log_errot(token, "cmd_acquire lease %d", rv);
+ log_errot(token, "cmd_acquire lease %d flags %x",
+ rv, opt.flags);
goto fail_release;
}
acquire_count++;
@@ -1008,10 +1092,10 @@ static void *cmd_migrate_thread(void *args_in)
struct cmd_args *ca = args_in;
struct sm_header h;
struct token *token;
- struct token *tokens_reply;
+ char *reply_str;
struct client *cl;
uint64_t target_host_id = 0;
- int fd, rv, i, tokens_len, result = 0, total = 0, total2 = 0;
+ int fd, rv, i, reply_len, result = 0, total = 0, ret, pos;
cl = &client[ca->ci_target];
fd = client[ca->ci_in].fd;
@@ -1035,33 +1119,54 @@ static void *cmd_migrate_thread(void *args_in)
total++;
}
- tokens_len = total * sizeof(struct token);
- tokens_reply = malloc(tokens_len);
- if (!tokens_reply) {
+ reply_len = total * SANLK_STATE_MAXSTR;
+ reply_str = malloc(reply_len);
+ if (!reply_str) {
result = -ENOMEM;
total = 0;
goto reply;
}
- memset(tokens_reply, 0, tokens_len);
+ memset(reply_str, 0, reply_len);
+ pos = 0;
for (i = 0; i < SANLK_MAX_RESOURCES; i++) {
token = cl->tokens[i];
if (!token)
continue;
- rv = migrate_token(token, target_host_id);
- if (rv < 0 && !result)
- result = rv;
-
- /* TODO: would it be better to quit after one failure? */
-
- if (total2 == total) {
- log_error("cmd_migrate total %d changed", total);
- continue;
+ /* if migrate_token() fails it is not fatal, we can still
+ procede with the migration; receive_token() will attempt
+ to set next_owner_id */
+
+ migrate_token(token, target_host_id);
+
+ ret = snprintf(reply_str + pos, reply_len - pos,
+ "lockspace_name=%s "
+ "resource_name=%s "
+ "token_id=%u "
+ "migrate_result=%d "
+ "leader.lver=%llu "
+ "leader.timestamp=%llu "
+ "leader.owner_id=%llu "
+ "leader.owner_generation=%llu "
+ "leader.next_owner_id=%llu ",
+ token->space_name,
+ token->resource_name,
+ token->token_id,
+ token->migrate_result,
+ (unsigned long long)token->leader.lver,
+ (unsigned long long)token->leader.timestamp,
+ (unsigned long long)token->leader.owner_id,
+ (unsigned long long)token->leader.owner_generation,
+ (unsigned long long)token->leader.next_owner_id);
+
+ if (ret >= reply_len - pos) {
+ log_errot(token, "cmd_migrate reply_str full");
+ result = -ENOMSG;
}
-
- memcpy(&tokens_reply[total2++], token, sizeof(struct token));
+ pos += ret;
}
+ reply_str[reply_len-1] = '\0';
reply:
/* TODO: for success I don't think we want to clear cmd_active
@@ -1071,16 +1176,17 @@ static void *cmd_migrate_thread(void *args_in)
if (result < 0)
set_cmd_active(ca->ci_target, 0);
- log_debug("cmd_migrate done %d", total);
-
- /* TODO: encode tokens_reply as a string to send back */
+ log_debug("cmd_migrate done result %d", result);
memcpy(&h, &ca->header, sizeof(struct sm_header));
- h.length = sizeof(h) + tokens_len;
+ h.length = sizeof(h) + strlen(reply_str)+1;
h.data = result;
send(fd, &h, sizeof(h), MSG_NOSIGNAL);
if (total)
- send(fd, tokens_reply, tokens_len, MSG_NOSIGNAL);
+ send(fd, reply_str, strlen(reply_str)+1, MSG_NOSIGNAL);
+
+ if (reply_str)
+ free(reply_str);
client_back(ca->ci_in, fd);
free(ca);
@@ -1302,17 +1408,24 @@ static int print_token_state(struct token *t, char *str)
snprintf(str, SANLK_STATE_MAXSTR-1,
"token_id=%u "
- "acquire_result=%d migrate_result=%d "
- "release_result=%d setowner_result=%d "
- "leader.lver=%llu leader.timestamp=%llu "
+ "acquire_result=%d "
+ "migrate_result=%d "
+ "release_result=%d "
+ "setowner_result=%d "
+ "leader.lver=%llu "
+ "leader.timestamp=%llu "
+ "leader.owner_id=%llu "
+ "leader.owner_generation=%llu "
"leader.next_owner_id=%llu",
+ t->token_id,
t->acquire_result,
t->migrate_result,
t->release_result,
t->setowner_result,
- t->token_id,
(unsigned long long)t->leader.lver,
(unsigned long long)t->leader.timestamp,
+ (unsigned long long)t->leader.owner_id,
+ (unsigned long long)t->leader.owner_generation,
(unsigned long long)t->leader.next_owner_id);
return strlen(str) + 1;
@@ -2491,6 +2604,7 @@ static int read_command_line(int argc, char *argv[])
static int do_client(void)
{
struct sanlk_options *opt = NULL;
+ char *state = NULL;
int fd, rv = 0;
switch (com.action) {
@@ -2568,7 +2682,9 @@ static int do_client(void)
log_tool("migrate %d to host_id %llu",
com.pid, (unsigned long long)com.target_host_id);
- rv = sanlock_migrate(-1, com.pid, com.target_host_id);
+ rv = sanlock_migrate(-1, com.pid, com.target_host_id, &state);
+ if (state)
+ printf("migrate state: %s\n", state);
break;
case ACT_SETOWNER:
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 48c0297..96e77cc 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -805,7 +805,8 @@ int paxos_lease_migrate(struct token *token,
log_errot(token, "paxos_migrate num_hosts %llu target_host_id %llu",
(unsigned long long)new_leader.num_hosts,
(unsigned long long)target_host_id);
- return DP_BAD_NUMHOSTS;
+ error = DP_BAD_NUMHOSTS;
+ goto out;
}
new_leader.next_owner_id = target_host_id;
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index 41c45a6..c07b414 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -26,7 +26,7 @@ int sanlock_acquire(int sock, int pid, int res_count,
struct sanlk_options *opt_in);
int sanlock_release(int sock, int pid, int res_count,
struct sanlk_resource *res_args[]);
-int sanlock_migrate(int sock, int pid, uint64_t target_host_id);
+int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char **state);
int sanlock_setowner(int sock, int pid);
#endif
diff --git a/src/token_manager.c b/src/token_manager.c
index c7b3481..5d09ef0 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -209,9 +209,11 @@ int acquire_token(struct token *token, uint64_t reacquire_lver,
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
- return 1;
+ return rv; /* DP_OK */
}
+/* return < 0 on error, 1 on success */
+
int setowner_token(struct token *token)
{
struct leader_record leader_ret;
@@ -241,7 +243,7 @@ int setowner_token(struct token *token)
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
- return 1;
+ return rv; /* DP_OK */
}
/* return < 0 on error, 1 on success */
@@ -269,10 +271,11 @@ int release_token(struct token *token)
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
- return 1;
+ return rv; /* DP_OK */
}
/* migration source: writes leader_record.next_owner_id = target_host_id */
+/* return < 0 on error, 1 on success */
int migrate_token(struct token *token, uint64_t target_host_id)
{
@@ -289,66 +292,78 @@ int migrate_token(struct token *token, uint64_t target_host_id)
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
- return 1;
+ return rv; /* DP_OK */
}
-/* migration target: verifies that the source wrote us as the next_owner_id */
-int receive_token(struct token *token, char *opt_str GNUC_UNUSED)
+/*
+ * migration target: verifies that the source wrote us as the next_owner_id
+ *
+ * When everything is working correctly, we just verify here that
+ * fields in leader_ret match what we see in leader_src
+ * (created from opt_str which was returned by sanlock_migrate()
+ * on the source).
+ *
+ * If we can't read the leader, return an error, and the migration
+ * needs to be aborted.
+ */
+/* return < 0 on error, 1 on success */
+
+int receive_token(struct token *token, int migrate_result,
+ struct leader_record *leader_src)
{
- struct leader_record leader_ret;
+ struct leader_record leader_read;
int rv;
- rv = paxos_lease_leader_read(token, &leader_ret);
+ rv = paxos_lease_leader_read(token, &leader_read);
if (rv < 0)
return rv;
- /* TODO: opt_str will be an encoding of a bunch of lease state
- * (full leader_record?) from the migration source. */
-#if 0
- /* token->leader is a copy of the leader_record that the source wrote
- in migrate_token(); it should not have changed between then and when
- we read it here. */
-
- if (memcmp(&token->leader, &leader_ret, sizeof(struct leader_record))) {
- log_errot(token, "receive leader_read mismatch");
- return -1;
- }
-#endif
-
- /* token->migrate_result is a copy of the paxos_lease_migrate() return
- value on the source; if it was successful on the source (1), then
- next_owner_id should equal our_host_id; if the source could not
- write to the lease, then next_owner_id should be 0, and we'll write
- next_owner_id = our_host_id for it. */
-
- if (token->migrate_result == 1) {
- if (leader_ret.next_owner_id != token->host_id) {
- log_errot(token, "receive wrong next_owner %llu",
- (unsigned long long)leader_ret.next_owner_id);
+ if (migrate_result == DP_OK) {
+ if (leader_src->next_owner_id == token->host_id &&
+ leader_read.next_owner_id == token->host_id &&
+ leader_src->lver == leader_read.lver &&
+ leader_src->timestamp == leader_read.timestamp) {
+ log_token(token, "receive_token all match");
+ return DP_OK;
+ } else {
+ log_errot(token, "receive_token mismatch "
+ "next_owner %llu %llu %llu "
+ "lver %llu %llu "
+ "timestamp %llu %llu",
+ (unsigned long long)token->host_id,
+ (unsigned long long)leader_src->next_owner_id,
+ (unsigned long long)leader_read.next_owner_id,
+ (unsigned long long)leader_src->lver,
+ (unsigned long long)leader_read.lver,
+ (unsigned long long)leader_src->timestamp,
+ (unsigned long long)leader_read.timestamp);
return -1;
}
- goto out;
}
- /* source failed to migrate this lease, so next_owner_id should still
- be zero */
-
- if (leader_ret.next_owner_id != 0) {
- log_errot(token, "receive expect zero next_owner %llu",
- (unsigned long long)leader_ret.next_owner_id);
+ /* migrate_result < 0, source could not write next_owner_id, so it
+ should still be 0 */
+
+ if (leader_src->owner_id != leader_read.owner_id ||
+ leader_src->timestamp != leader_read.timestamp ||
+ leader_read.next_owner_id != 0) {
+
+ log_errot(token, "receive_token mismatch migrate_result %d "
+ "next_owner %llu owner %llu %llu timestamp %llu %llu",
+ migrate_result,
+ (unsigned long long)leader_read.next_owner_id,
+ (unsigned long long)leader_src->owner_id,
+ (unsigned long long)leader_read.owner_id,
+ (unsigned long long)leader_src->timestamp,
+ (unsigned long long)leader_read.timestamp);
return -1;
}
- /* TODO: not sure about this */
/* since the source failed to write next_owner_id to be us, we do it
instead */
return migrate_token(token, token->host_id);
-
- out:
- memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
- return 1;
}
int create_token(int num_disks, struct token **token_out)
diff --git a/src/token_manager.h b/src/token_manager.h
index 3d8e6e3..7119936 100644
--- a/src/token_manager.h
+++ b/src/token_manager.h
@@ -13,7 +13,8 @@ int acquire_token(struct token *token, uint64_t reacquire_lver,
int new_num_hosts);
int release_token(struct token *token);
int migrate_token(struct token *token, uint64_t target_host_id);
-int receive_token(struct token *token, char *opt_str);
+int receive_token(struct token *token, int migrate_result,
+ struct leader_record *leader_src);
int setowner_token(struct token *token);
int create_token(int num_disks, struct token **token_out);