ldap/servers/slapd/back-ldbm/back-ldbm.h | 1
ldap/servers/slapd/back-ldbm/cache.c | 6 ++-
ldap/servers/slapd/back-ldbm/ldbm_delete.c | 46 ++++++++++++++++++++---------
3 files changed, 37 insertions(+), 16 deletions(-)
New commits:
commit 844d09d0700fca7aa64a2453d99e7c274a76e784
Author: Mark Reynolds <mreynolds(a)redhat.com>
Date: Tue Apr 8 14:39:47 2014 -0400
Ticket 47771 - Performing deletes during tombstone purging results in operation
errors
Bug Description: An operations error can occur when deleting entry while
tombstone purging is happening. The error occurs when it
tries the lock the parent entry, but the parent entry was
replaced in the cache before it could be locked.
Fix Description: Return a special error code when cache_lock_entry fails because
the entry was marked as deleted. Then try to grab the entry
again and lock it.
https://fedorahosted.org/389/ticket/47771
Reviewed by: rmeggins & nhosoi(Thanks!!)
diff --git a/ldap/servers/slapd/back-ldbm/back-ldbm.h
b/ldap/servers/slapd/back-ldbm/back-ldbm.h
index 26e081c..0834a1c 100644
--- a/ldap/servers/slapd/back-ldbm/back-ldbm.h
+++ b/ldap/servers/slapd/back-ldbm/back-ldbm.h
@@ -216,6 +216,7 @@ typedef unsigned short u_int16_t;
#define DEFAULT_IMPORT_INDEX_BUFFER_SIZE 0
#define SUBLEN 3
#define LDBM_CACHE_RETRY_COUNT 1000 /* Number of times we re-try a cache operation */
+#define RETRY_CACHE_LOCK 2 /* error code to signal a retry of the cache lock */
#define IDL_FETCH_RETRY_COUNT 5 /* Number of times we re-try idl_fetch if it returns
deadlock */
#define IMPORT_SUBCOUNT_HASHTABLE_SIZE 500 /* Number of buckets in hash used to
accumulate subcount for broody parents */
diff --git a/ldap/servers/slapd/back-ldbm/cache.c b/ldap/servers/slapd/back-ldbm/cache.c
index a73ae6a..7f3f025 100644
--- a/ldap/servers/slapd/back-ldbm/cache.c
+++ b/ldap/servers/slapd/back-ldbm/cache.c
@@ -1483,7 +1483,9 @@ void cache_unlock(struct cache *cache)
/* locks an entry so that it can be modified (you should have gotten the
* entry via cache_find_*).
- * returns 0 on success, 1 if the entry is scheduled for deletion.
+ * returns 0 on success,
+ * returns 1 if the entry lock could not be created
+ * returns 2 (RETRY_CACHE_LOCK) if the entry is scheduled for deletion.
*/
int cache_lock_entry(struct cache *cache, struct backentry *e)
{
@@ -1515,7 +1517,7 @@ int cache_lock_entry(struct cache *cache, struct backentry *e)
PR_Unlock(cache->c_mutex);
PR_ExitMonitor(e->ep_mutexp);
LOG("<= cache_lock_entry (DELETED)\n", 0, 0, 0);
- return 1;
+ return RETRY_CACHE_LOCK;
}
PR_Unlock(cache->c_mutex);
diff --git a/ldap/servers/slapd/back-ldbm/ldbm_delete.c
b/ldap/servers/slapd/back-ldbm/ldbm_delete.c
index 2b349f6..a858fa0 100644
--- a/ldap/servers/slapd/back-ldbm/ldbm_delete.c
+++ b/ldap/servers/slapd/back-ldbm/ldbm_delete.c
@@ -98,7 +98,6 @@ ldbm_back_delete( Slapi_PBlock *pb )
int opreturn = 0;
int free_delete_existing_entry = 0;
int not_an_error = 0;
- int updated_num = 0;
slapi_pblock_get( pb, SLAPI_BACKEND, &be);
slapi_pblock_get( pb, SLAPI_PLUGIN_PRIVATE, &li );
@@ -459,14 +458,37 @@ ldbm_back_delete( Slapi_PBlock *pb )
* (find_entry2modify_only_ext), a wrong parent could be found,
* and numsubordinate count could get confused.
*/
- ID pid = (ID)strtol(pid_str, (char **)NULL, 10);
+ ID pid;
+ int cache_retry_count = 0;
+ int cache_retry = 0;
+
+ pid = (ID)strtol(pid_str, (char **)NULL, 10);
slapi_ch_free_string(&pid_str);
- parent = id2entry(be, pid ,NULL, &retval);
- if (parent && cache_lock_entry(&inst->inst_cache, parent)) {
- /* Failed to obtain parent entry's entry lock */
- CACHE_RETURN(&(inst->inst_cache), &parent);
- retval = -1;
- goto error_return;
+
+ /*
+ * Its possible that the parent entry retrieved from the cache in id2entry
+ * could be removed before we lock it, because tombstone purging updated/replaced
+ * the parent. If we fail to lock the entry, just try again.
+ */
+ while(1){
+ parent = id2entry(be, pid ,NULL, &retval);
+ if (parent && (cache_retry = cache_lock_entry(&inst->inst_cache,
parent))) {
+ /* Failed to obtain parent entry's entry lock */
+ if(cache_retry == RETRY_CACHE_LOCK &&
+ cache_retry_count < LDBM_CACHE_RETRY_COUNT)
+ {
+ /* try again */
+ DS_Sleep(PR_MillisecondsToInterval(100));
+ cache_retry_count++;
+ continue;
+ }
+ retval = -1;
+ CACHE_RETURN(&(inst->inst_cache), &parent);
+ goto error_return;
+ } else {
+ /* entry locked, move on */
+ break;
+ }
}
}
if (NULL == parent) {
@@ -502,8 +524,7 @@ ldbm_back_delete( Slapi_PBlock *pb )
retval = -1;
goto error_return;
}
- /* MARK */
- updated_num = 1;
+
/*
* Replication urp_post_delete will delete the parent entry
* if it is a glue entry without any more children.
@@ -519,7 +540,6 @@ ldbm_back_delete( Slapi_PBlock *pb )
}
}
}
- slapi_sdn_done(&parentsdn);
if(create_tombstone_entry)
{
@@ -1291,14 +1311,12 @@ diskfull_return:
slapi_ch_free((void**)&errbuf);
slapi_sdn_done(&nscpEntrySDN);
slapi_ch_free_string(&e_uniqueid);
+ slapi_sdn_done(&parentsdn);
if (pb->pb_conn)
{
slapi_log_error (SLAPI_LOG_TRACE, "ldbm_back_delete", "leave
conn=%" NSPRIu64 " op=%d\n",
(long long unsigned int)pb->pb_conn->c_connid, operation->o_opid);
}
- if(!updated_num && ldap_result_code != 32){
- slapi_log_error (SLAPI_LOG_FATAL,"MARK", "Failed to update
numsubordinates\n");
- }
return rc;
}