ldap/servers/plugins/replication/repl5_inc_protocol.c | 38 ++++++++++++++++++
ldap/servers/plugins/replication/repl5_ruv.c | 32 +++++++++++++++
2 files changed, 70 insertions(+)
New commits:
commit 0c194eb79aa381bf4e4cd05082956218512115a4
Author: Rich Megginson <rmeggins(a)redhat.com>
Date: Wed May 15 19:39:24 2013 -0600
Ticket #47362 - ipa upgrade selinuxusermap data not replicating
https://fedorahosted.org/389/ticket/47362
Reviewed by: nhosoi (Thanks!)
Branch: 389-ds-base-1.2.11
Fix Description: When nsslapd-port is set to 0, this causes the
replica purl to be "ldap://hostname:0". At startup, the MMR code looks to
see if this replica purl is in the RUV, by doing a string comparison of this
purl with the ruv replica purl. If it is not there, the MMR code wipes out
this ruv element. Later the code in replica_check_for_data_reload() uses
this RUV to see if it needs to reinit the changelog. Since the RUV doesn't
match the changelog RUV any more, the changelog is erased, which erases
any changes that were made in the meantime. The missing RUV element causes
the supplier to attempt to send over changes which may already exist on the
consumer. If one of these is an ADD, the urp code will correctly flag this
as an attempt to add an entry that already exists, and will turn this into
a replConflict entry. A subsequent attempt to replicate the same ADD will
cause an error in the urp code which will cause it to return err=53.
Replication will then become stuck on this update - it will keep trying to
send it over and over again, and will not be able to proceed.
The only workaround is a replica reinit of the replica, to get the database
RUV and changelog in a consistent state.
I've also added some additional RUV debugging when using the REPL log level.
Platforms tested: RHEL6 x86_64
Flag Day: no
Doc impact: no
diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c
b/ldap/servers/plugins/replication/repl5_inc_protocol.c
index 743be57..82b121c 100644
--- a/ldap/servers/plugins/replication/repl5_inc_protocol.c
+++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c
@@ -1931,6 +1931,44 @@ repl5_inc_stop(Private_Repl_Protocol *prp)
agmt_get_long_name(prp->agmt),
PR_IntervalToSeconds(now-start));
}
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ if (NULL == prp->replica_object) {
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "%s: repl5_inc_stop: protocol replica_object is NULL\n",
+ agmt_get_long_name(prp->agmt));
+ } else {
+ Replica *replica;
+ object_acquire(prp->replica_object);
+ replica = object_get_data(prp->replica_object);
+ if (NULL == replica) {
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "%s: repl5_inc_stop: replica is NULL\n",
+ agmt_get_long_name(prp->agmt));
+ } else {
+ Object *ruv_obj = replica_get_ruv(replica);
+ if (NULL == ruv_obj) {
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "%s: repl5_inc_stop: ruv_obj is NULL\n",
+ agmt_get_long_name(prp->agmt));
+ } else {
+ RUV *ruv;
+ object_acquire(ruv_obj);
+ ruv = (RUV*)object_get_data (ruv_obj);
+ if (NULL == ruv) {
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "%s: repl5_inc_stop: ruv is NULL\n",
+ agmt_get_long_name(prp->agmt));
+
+ } else {
+ ruv_dump(ruv, "Database RUV", NULL);
+ }
+ object_release(ruv_obj);
+ }
+ }
+ object_release(prp->replica_object);
+ }
+
+ }
return return_value;
}
diff --git a/ldap/servers/plugins/replication/repl5_ruv.c
b/ldap/servers/plugins/replication/repl5_ruv.c
index b52dd49..8fbd89c 100644
--- a/ldap/servers/plugins/replication/repl5_ruv.c
+++ b/ldap/servers/plugins/replication/repl5_ruv.c
@@ -208,6 +208,9 @@ ruv_init_from_slapi_attr_and_check_purl(Slapi_Attr *attr, RUV **ruv,
ReplicaId *
Slapi_Value *value;
const struct berval *bval;
const char *purl = NULL;
+ char *localhost = get_localhost_DNS();
+ size_t localhostlen = localhost ? strlen(localhost) : 0;
+ int port = config_get_port();
return_value = RUV_SUCCESS;
@@ -236,16 +239,30 @@ ruv_init_from_slapi_attr_and_check_purl(Slapi_Attr *attr, RUV **ruv,
ReplicaId *
RUVElement *ruve = get_ruvelement_from_berval(bval);
if (NULL != ruve)
{
+ char *ptr;
/* Is the local purl already in the ruv ? */
if ( (*contain_purl==0) && ruve->replica_purl && purl
&& (strncmp(ruve->replica_purl, purl, strlen(purl))==0) )
{
*contain_purl = ruve->rid;
}
+ /* ticket 47362 - nsslapd-port: 0 causes replication to break */
+ else if ((*contain_purl==0) && ruve->replica_purl && (port ==
0) && localhost &&
+ (ptr = strstr(ruve->replica_purl, localhost)) && (ptr !=
ruve->replica_purl) &&
+ (*(ptr - 1) == '/') && (*(ptr+localhostlen) == ':'))
+ {
+ /* same hostname, but port number may have been temporarily set to 0
+ * just allow it with whatever port number is already in the replica_purl
+ * do not reset the port number, do not tell the configure_ruv code that there
+ * is anything wrong
+ */
+ *contain_purl = ruve->rid;
+ }
dl_add ((*ruv)->elements, ruve);
}
}
}
}
+ slapi_ch_free_string(&localhost);
}
}
return return_value;
@@ -1279,6 +1296,11 @@ ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV
*ruv2, const ch
const char *ruvbnames[] = {ruv2name, ruv1name};
const int nitems = 2;
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ ruv_dump(ruv1, (char *)ruv1name, NULL);
+ ruv_dump(ruv2, (char *)ruv2name, NULL);
+ }
+
/* compare replica generations first */
if (ruv1->replGen == NULL || ruv2->replGen == NULL) {
slapi_log_error(loglevel, repl_plugin_name,
@@ -1335,7 +1357,17 @@ ruv_compare_ruv(const RUV *ruv1, const char *ruv1name, const RUV
*ruv2, const ch
"than the max CSN [%s] from RUV [%s] for element
[%s]\n",
csnstrb, ruvbname, csnstra, ruvaname, ruvelem);
rc = RUV_COMP_CSN_DIFFERS;
+ } else {
+ csn_as_string(replicaa->csn, PR_FALSE, csnstra);
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "ruv_compare_ruv: the max CSN [%s] from RUV [%s]
is less than "
+ "or equal to the max CSN [%s] from RUV [%s] for
element [%s]\n",
+ csnstrb, ruvbname, csnstra, ruvaname, ruvelem);
}
+ } else {
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "ruv_compare_ruv: RUV [%s] has an empty
CSN\n",
+ ruvbname);
}
}
}