src/host_id.c | 36 ++++++++++++++++++++++++++++++++++++
src/log.c | 2 +-
src/main.c | 2 +-
src/sanlock_internal.h | 1 +
src/sanlock_rv.h | 3 ++-
5 files changed, 41 insertions(+), 3 deletions(-)
New commits:
commit d852c084b7d46915dab20ab641945a6aa2c166b7
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Jun 30 12:37:58 2011 -0500
sanlock: fail host_id when corrupted
If we read our host_id for renewal and find it's corrupted,
there's no point in retrying for a while, we should treat it
as failed right away.
diff --git a/src/host_id.c b/src/host_id.c
index b2c64c2..142f61b 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -53,6 +53,7 @@ int print_space_state(struct space *sp, char *str)
"host_generation=%llu "
"space_dead=%d "
"killing_pids=%d "
+ "corrupt_result=%d "
"acquire_last_result=%d "
"renewal_last_result=%d "
"acquire_last_attempt=%llu "
@@ -63,6 +64,7 @@ int print_space_state(struct space *sp, char *str)
(unsigned long long)sp->host_generation,
sp->space_dead,
sp->killing_pids,
+ sp->lease_status.corrupt_result,
sp->lease_status.acquire_last_result,
sp->lease_status.renewal_last_result,
(unsigned long long)sp->lease_status.acquire_last_attempt,
@@ -171,12 +173,19 @@ int host_id_disk_info(char *name, struct sync_disk *disk)
int host_id_check(struct task *task, struct space *sp)
{
uint64_t last_success;
+ int corrupt_result;
int gap;
pthread_mutex_lock(&sp->mutex);
last_success = sp->lease_status.renewal_last_success;
+ corrupt_result = sp->lease_status.corrupt_result;
pthread_mutex_unlock(&sp->mutex);
+ if (corrupt_result) {
+ log_erros(sp, "host_id_check corrupt %d", corrupt_result);
+ return 0;
+ }
+
gap = time(NULL) - last_success;
if (gap >= task->id_renewal_fail_seconds) {
@@ -197,6 +206,27 @@ int host_id_check(struct task *task, struct space *sp)
return 1;
}
+/* If a renewal result is one of the listed errors, it means our
+ delta lease has been corrupted/overwritten/reinitialized out from
+ under us, and we should stop using it immediately. There's no
+ point in retrying the renewal. */
+
+static int corrupt_result(int result)
+{
+ switch (result) {
+ case SANLK_RENEW_OWNER:
+ case SANLK_RENEW_DIFF:
+ case SANLK_LEADER_MAGIC:
+ case SANLK_LEADER_VERSION:
+ case SANLK_LEADER_SECTORSIZE:
+ case SANLK_LEADER_LOCKSPACE:
+ case SANLK_LEADER_CHECKSUM:
+ return result;
+ default:
+ return 0;
+ }
+}
+
static void *lockspace_thread(void *arg_in)
{
struct task task;
@@ -316,6 +346,12 @@ static void *lockspace_thread(void *arg_in)
log_erros(sp, "renewal error %d delta_length %d last_success %llu",
result, delta_length,
(unsigned long long)sp->lease_status.renewal_last_success);
+
+ if (!sp->lease_status.corrupt_result) {
+ sp->lease_status.corrupt_result = corrupt_result(result);
+ log_erros(sp, "renewal error %d is corruption",
+ sp->lease_status.corrupt_result);
+ }
}
stop = sp->thread_stop;
pthread_mutex_unlock(&sp->mutex);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index c38ebf8..cb7cf60 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -94,6 +94,7 @@ struct token {
};
struct lease_status {
+ int corrupt_result;
int acquire_last_result;
int renewal_last_result;
uint64_t acquire_last_attempt;
diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h
index 9683477..8133efd 100644
--- a/src/sanlock_rv.h
+++ b/src/sanlock_rv.h
@@ -22,7 +22,8 @@
#define SANLK_DBLOCK_MBAL -213
#define SANLK_DBLOCK_CHECKSUM -214
-/* verify_leader, leader_read, leader_write (paxos or delta) */
+/* verify_leader, leader_read, leader_write (paxos or delta)
+ (when adding to list, check if it should be a corrupt_result()) */
#define SANLK_LEADER_READ -220
#define SANLK_LEADER_WRITE -221
commit c8130f398bdb1aa6bab088e1c9a4e5d745e0e9b7
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jun 28 17:33:31 2011 -0500
sanlock: change a log_error to log_debug
for a non error case we shouldn't need to worry about
diff --git a/src/main.c b/src/main.c
index 5918c6c..70e2630 100644
--- a/src/main.c
+++ b/src/main.c
@@ -652,7 +652,7 @@ static void client_recv_all(int ci, struct sm_header *h_recv, int
pos)
break;
}
- log_error("recv_all %d,%d,%d pos %d rv %d error %d rem %d total %d",
+ log_debug("recv_all %d,%d,%d pos %d rv %d error %d rem %d total %d",
ci, client[ci].fd, client[ci].pid, pos, rv, error, rem, total);
}
commit 06674b31739d84bf6bc8a484552f4e1de85cab90
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jun 28 17:31:39 2011 -0500
sanlock: increase log line to 512 bytes
diff --git a/src/log.c b/src/log.c
index 884507e..e753ee6 100644
--- a/src/log.c
+++ b/src/log.c
@@ -25,7 +25,7 @@
#include "sanlock_internal.h"
#include "log.h"
-#define LOG_STR_LEN 256
+#define LOG_STR_LEN 512
static char log_str[LOG_STR_LEN];
static pthread_t thread_handle;