src/paxos_lease.c | 22 ++++++++++++++++++++++
src/resource.c | 3 +++
2 files changed, 25 insertions(+)
New commits:
commit fe03cafde177757ffe3c9e24d269c4ad5328f538
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Feb 24 16:25:51 2012 -0600
sanlock: fix problem in paxos
The special case in the comment should be recognized
and handled properly.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 9fa62b1..d1243cb 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -1220,6 +1220,11 @@ int paxos_lease_acquire(struct task *task,
* current host_id and generation?
*/
+ /* This next_lver assignment is based on the original cur_leader, not a
+ re-reading of the leader here, i.e. we cannot just re-read the leader
+ here, and make next_lver one more than that. This is because another
+ node may have made us the owner of next_lver as it is now. */
+
next_lver = cur_leader.lver + 1;
if (!our_dblock.mbal)
@@ -1271,6 +1276,23 @@ int paxos_lease_acquire(struct task *task,
goto out;
}
+ if (tmp_leader.lver > next_lver) {
+ /*
+ * A case where this was observed: for next_lver 65 we abort1, and delay.
+ * While sleeping, the lease v65 (which was acquired during our abort1) is
+ * released and then reacquired as v66. When we goto retry_ballot, our
+ * next_lver is 65, but the current lver on disk is 66, causing us to
+ * we fail in the larger1 check.)
+ */
+ log_token(token, "paxos_acquire stale next_lver %llu now %llu owner %llu %llu
%llu",
+ (unsigned long long)next_lver,
+ (unsigned long long)tmp_leader.lver,
+ (unsigned long long)tmp_leader.owner_id,
+ (unsigned long long)tmp_leader.owner_generation,
+ (unsigned long long)tmp_leader.timestamp);
+ goto restart;
+ }
+
error = run_ballot(task, token, cur_leader.num_hosts, next_lver, our_mbal,
&dblock);
commit 74383a19e5a1254fc661630b2c6bc1d9b3b098d6
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Feb 24 15:25:13 2012 -0600
sanlock: fix missing close_disks
which was leaking fd's
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/resource.c b/src/resource.c
index 0df17a4..a76b88d 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -349,17 +349,20 @@ static int _release_token(struct task *task, struct token *token,
int opened,
if ((r->flags & R_SHARED) && !last_token) {
/* will release when final sh token is released */
log_token(token, "release_token more shared");
+ close_disks(token->disks, token->r.num_disks);
return SANLK_OK;
}
if (!last_token) {
/* should never happen */
log_errot(token, "release_token exclusive not last");
+ close_disks(token->disks, token->r.num_disks);
return SANLK_ERROR;
}
if (!lver) {
/* never acquired on disk so no need to release on disk */
+ close_disks(token->disks, token->r.num_disks);
rv = SANLK_OK;
goto out;
}
Show replies by date