January 2016 - 389-commits - Fedora Mailing-Lists

2 commits - configure configure.ac ldap/ldif ldap/schema ldap/servers m4/systemd.m4 Makefile.am Makefile.in

by William Brown

Makefile.am | 13 +- Makefile.in | 26 ++-- configure | 96 +++++++++++++++ configure.ac | 4 ldap/ldif/template-dse.ldif.in | 1 ldap/schema/01core389.ldif | 1 ldap/servers/slapd/auditlog.c | 9 - ldap/servers/slapd/libglobs.c | 12 + ldap/servers/slapd/log.c | 256 +++++++++++++++++++++++++++++++++++----- ldap/servers/slapd/log.h | 1 ldap/servers/slapd/proto-slap.h | 8 - ldap/servers/slapd/slap.h | 14 ++ m4/systemd.m4 | 36 +++++ 13 files changed, 427 insertions(+), 50 deletions(-) New commits: commit aa206345f4fa29e16c2e4118d26b2fb4c93ddd51 Author: William Brown <firstyear(a)redhat.com> Date: Mon Dec 7 14:26:08 2015 +1030 Ticket 47968 - RFE send logs to journald Bug Description: Instead of writing the debug logs directly to the disk, it is possible to send them to journald, on platforms where journald is available. Fix Description: * This patch enables a configuration for selecting log backends, and defines the basic wrappers to route log entries based on the backend selection. The backends for syslog and journald have had identities reserved. * This enables the configure option --with-systemd which will allow us to link to the daemon and journald apis from within slapd. * Allows all backends to log simultaneously. Configuration is a comma seperated set of values taking "dirsrv-log,syslog,journald'. Unknown values are ignored, invalid is ignored. At least one valid log backend must be in the list. https://fedorahosted.org/389/ticket/47968 Author: wibrown Review by: mreynolds (Thanks!) diff --git a/Makefile.am b/Makefile.am index 213922f..d90fb27 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,6 +13,7 @@ NQBUILDNUM := $(subst \,,$(subst $(QUOTE),,$(BUILDNUM))) DEBUG_DEFINES = @debug_defs@ GCCSEC_DEFINES = @gccsec_defs@ ASAN_DEFINES = @asan_defs@ +SYSTEMD_DEFINES = @systemd_defs@ # the -U undefines these symbols - should use the corresponding DS_ ones instead - see configure.ac DS_DEFINES = -DBUILD_NUM=$(BUILDNUM) -DVENDOR="\"$(vendor)\"" -DBRAND="\"$(brand)\"" -DCAPBRAND="\"$(capbrand)\"" \ -UPACKAGE_VERSION -UPACKAGE_TARNAME -UPACKAGE_STRING -UPACKAGE_BUGREPORT @@ -40,8 +41,8 @@ PATH_DEFINES = -DLOCALSTATEDIR="\"$(localstatedir)\"" -DSYSCONFDIR="\"$(sysconfd -DDATADIR="\"$(datadir)\"" -DDOCDIR="\"$(docdir)\"" \ -DSBINDIR="\"$(sbindir)\"" -DPLUGINDIR="\"$(serverplugindir)\"" -DTEMPLATEDIR="\"$(sampledatadir)\"" -AM_CPPFLAGS = $(DEBUG_DEFINES) $(GCCSEC_DEFINES) $(ASAN_DEFINES) $(DS_DEFINES) $(DS_INCLUDES) $(PATH_DEFINES) -PLUGIN_CPPFLAGS = $(AM_CPPFLAGS) @openldap_inc@ @ldapsdk_inc@ @nss_inc@ @nspr_inc@ +AM_CPPFLAGS = $(DEBUG_DEFINES) $(GCCSEC_DEFINES) $(ASAN_DEFINES) $(DS_DEFINES) $(DS_INCLUDES) $(PATH_DEFINES) $(SYSTEMD_DEFINES) +PLUGIN_CPPFLAGS = $(AM_CPPFLAGS) @openldap_inc@ @ldapsdk_inc@ @nss_inc@ @nspr_inc@ @systemd_inc@ # We need to make sure that libpthread is linked before libc on HP-UX. if HPUX AM_LDFLAGS = -lpthread @@ -77,6 +78,8 @@ NETSNMP_LINK = @netsnmp_lib@ @netsnmp_link@ PAM_LINK = -lpam KERBEROS_LINK = $(kerberos_lib) DLOPEN_LINK = -ldl +SYSTEMD_LINK = @systemd_lib@ + LIBSOCKET=@LIBSOCKET@ LIBNSL=@LIBNSL@ @@ -1028,7 +1031,7 @@ libslapd_la_CPPFLAGS = $(PLUGIN_CPPFLAGS) @sasl_inc@ @db_inc@ @svrcore_inc@ @ker if SPARC libslapd_la_SOURCES += ldap/servers/slapd/slapi_counter_sunos_sparcv9.S endif -libslapd_la_LIBADD = $(LDAPSDK_LINK) $(SASL_LINK) $(NUNC_STANS_LINK) $(SVRCORE_LINK) $(NSS_LINK) $(NSPR_LINK) $(KERBEROS_LINK) $(PCRE_LINK) $(THREADLIB) +libslapd_la_LIBADD = $(LDAPSDK_LINK) $(SASL_LINK) $(NUNC_STANS_LINK) $(SVRCORE_LINK) $(NSS_LINK) $(NSPR_LINK) $(KERBEROS_LINK) $(PCRE_LINK) $(THREADLIB) $(SYSTEMD_LINK) #//////////////////////////////////////////////////////////////// @@ -1706,9 +1709,9 @@ ns_slapd_SOURCES = ldap/servers/slapd/abandon.c \ $(GETSOCKETPEER) ns_slapd_CPPFLAGS = $(AM_CPPFLAGS) @sasl_inc@ @openldap_inc@ @ldapsdk_inc@ @nss_inc@ \ - @nspr_inc@ @svrcore_inc@ + @nspr_inc@ @svrcore_inc@ @systemd_inc@ ns_slapd_LDADD = libslapd.la libldaputil.a $(LDAPSDK_LINK) $(NSS_LINK) $(DLOPEN_LINK) \ - $(NSPR_LINK) $(SASL_LINK) $(SVRCORE_LINK) $(LIBNSL) $(LIBSOCKET) $(THREADLIB) + $(NSPR_LINK) $(SASL_LINK) $(SVRCORE_LINK) $(LIBNSL) $(LIBSOCKET) $(THREADLIB) $(SYSTEMD_LINK) # We need to link ns-slapd with the C++ compiler on HP-UX since we load # some C++ shared libraries (such as icu). if HPUX diff --git a/Makefile.in b/Makefile.in index 745d6a8..2ad44c2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -110,7 +110,7 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/icu.m4 $(top_srcdir)/m4/netsnmp.m4 \ $(top_srcdir)/m4/kerberos.m4 $(top_srcdir)/m4/pcre.m4 \ $(top_srcdir)/m4/selinux.m4 $(top_srcdir)/m4/nunc-stans.m4 \ - $(top_srcdir)/configure.ac + $(top_srcdir)/m4/systemd.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ @@ -713,7 +713,7 @@ libslapd_la_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_3) $(am__DEPENDENCIES_1) \ - $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__libslapd_la_SOURCES_DIST = ldap/servers/slapd/add.c \ ldap/servers/slapd/agtmmap.c ldap/servers/slapd/apibroker.c \ ldap/servers/slapd/attr.c ldap/servers/slapd/attrlist.c \ @@ -1003,7 +1003,8 @@ am_migratecred_bin_OBJECTS = ldap/servers/slapd/tools/migratecred_bin-migratecre migratecred_bin_OBJECTS = $(am_migratecred_bin_OBJECTS) migratecred_bin_DEPENDENCIES = libslapd.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ - $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) am_mmldif_bin_OBJECTS = \ ldap/servers/slapd/tools/mmldif_bin-mmldif.$(OBJEXT) mmldif_bin_OBJECTS = $(am_mmldif_bin_OBJECTS) @@ -1066,7 +1067,7 @@ ns_slapd_DEPENDENCIES = libslapd.la libldaputil.a \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ - $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_pwdhash_bin_OBJECTS = \ ldap/servers/slapd/tools/pwdhash_bin-pwenc.$(OBJEXT) pwdhash_bin_OBJECTS = $(am_pwdhash_bin_OBJECTS) @@ -1482,6 +1483,9 @@ sttyexec = @sttyexec@ svrcore_inc = @svrcore_inc@ svrcore_lib = @svrcore_lib@ sysconfdir = @sysconfdir@ +systemd_defs = @systemd_defs@ +systemd_inc = @systemd_inc@ +systemd_lib = @systemd_lib@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ @@ -1510,6 +1514,7 @@ NQBUILDNUM := $(subst \,,$(subst $(QUOTE),,$(BUILDNUM))) DEBUG_DEFINES = @debug_defs@ GCCSEC_DEFINES = @gccsec_defs@ ASAN_DEFINES = @asan_defs@ +SYSTEMD_DEFINES = @systemd_defs@ # the -U undefines these symbols - should use the corresponding DS_ ones instead - see configure.ac DS_DEFINES = -DBUILD_NUM=$(BUILDNUM) -DVENDOR="\"$(vendor)\"" -DBRAND="\"$(brand)\"" -DCAPBRAND="\"$(capbrand)\"" \ -UPACKAGE_VERSION -UPACKAGE_TARNAME -UPACKAGE_STRING -UPACKAGE_BUGREPORT @@ -1535,8 +1540,8 @@ PATH_DEFINES = -DLOCALSTATEDIR="\"$(localstatedir)\"" -DSYSCONFDIR="\"$(sysconfd -DDATADIR="\"$(datadir)\"" -DDOCDIR="\"$(docdir)\"" \ -DSBINDIR="\"$(sbindir)\"" -DPLUGINDIR="\"$(serverplugindir)\"" -DTEMPLATEDIR="\"$(sampledatadir)\"" -AM_CPPFLAGS = $(DEBUG_DEFINES) $(GCCSEC_DEFINES) $(ASAN_DEFINES) $(DS_DEFINES) $(DS_INCLUDES) $(PATH_DEFINES) -PLUGIN_CPPFLAGS = $(AM_CPPFLAGS) @openldap_inc@ @ldapsdk_inc@ @nss_inc@ @nspr_inc@ +AM_CPPFLAGS = $(DEBUG_DEFINES) $(GCCSEC_DEFINES) $(ASAN_DEFINES) $(DS_DEFINES) $(DS_INCLUDES) $(PATH_DEFINES) $(SYSTEMD_DEFINES) +PLUGIN_CPPFLAGS = $(AM_CPPFLAGS) @openldap_inc@ @ldapsdk_inc@ @nss_inc@ @nspr_inc@ @systemd_inc@ #AM_LDFLAGS = -Wl,-z,defs @HPUX_FALSE@AM_LDFLAGS = $(ASAN_DEFINES) # We need to make sure that libpthread is linked before libc on HP-UX. @@ -1566,6 +1571,7 @@ NETSNMP_LINK = @netsnmp_lib@ @netsnmp_link@ PAM_LINK = -lpam KERBEROS_LINK = $(kerberos_lib) DLOPEN_LINK = -ldl +SYSTEMD_LINK = @systemd_lib@ #------------------------ # Generated Sources @@ -2387,7 +2393,7 @@ libslapd_la_SOURCES = ldap/servers/slapd/add.c \ ldap/servers/slapd/value.c ldap/servers/slapd/valueset.c \ ldap/servers/slapd/vattr.c $(libavl_a_SOURCES) $(am__append_1) libslapd_la_CPPFLAGS = $(PLUGIN_CPPFLAGS) @sasl_inc@ @db_inc@ @svrcore_inc@ @kerberos_inc@ @pcre_inc@ -libslapd_la_LIBADD = $(LDAPSDK_LINK) $(SASL_LINK) $(NUNC_STANS_LINK) $(SVRCORE_LINK) $(NSS_LINK) $(NSPR_LINK) $(KERBEROS_LINK) $(PCRE_LINK) $(THREADLIB) +libslapd_la_LIBADD = $(LDAPSDK_LINK) $(SASL_LINK) $(NUNC_STANS_LINK) $(SVRCORE_LINK) $(NSS_LINK) $(NSPR_LINK) $(KERBEROS_LINK) $(PCRE_LINK) $(THREADLIB) $(SYSTEMD_LINK) #//////////////////////////////////////////////////////////////// # @@ -2982,7 +2988,7 @@ ldif_bin_LDADD = $(NSPR_LINK) $(NSS_LINK) $(LDAPSDK_LINK_NOTHR) $(SASL_LINK) #------------------------ migratecred_bin_SOURCES = ldap/servers/slapd/tools/migratecred.c migratecred_bin_CPPFLAGS = $(AM_CPPFLAGS) @openldap_inc@ @ldapsdk_inc@ @nss_inc@ @nspr_inc@ -migratecred_bin_LDADD = libslapd.la $(NSPR_LINK) $(NSS_LINK) $(SVRCORE_LINK) $(LDAPSDK_LINK) $(SASL_LINK) +migratecred_bin_LDADD = libslapd.la $(NSPR_LINK) $(NSS_LINK) $(SVRCORE_LINK) $(LDAPSDK_LINK) $(SASL_LINK) $(SYSTEMD_LINK) #------------------------ # mmldif @@ -3032,10 +3038,10 @@ ns_slapd_SOURCES = ldap/servers/slapd/abandon.c \ $(GETSOCKETPEER) ns_slapd_CPPFLAGS = $(AM_CPPFLAGS) @sasl_inc@ @openldap_inc@ @ldapsdk_inc@ @nss_inc@ \ - @nspr_inc@ @svrcore_inc@ + @nspr_inc@ @svrcore_inc@ @systemd_inc@ ns_slapd_LDADD = libslapd.la libldaputil.a $(LDAPSDK_LINK) $(NSS_LINK) $(DLOPEN_LINK) \ - $(NSPR_LINK) $(SASL_LINK) $(SVRCORE_LINK) $(LIBNSL) $(LIBSOCKET) $(THREADLIB) + $(NSPR_LINK) $(SASL_LINK) $(SVRCORE_LINK) $(LIBNSL) $(LIBSOCKET) $(THREADLIB) $(SYSTEMD_LINK) @HPUX_FALSE@ns_slapd_LINK = $(LINK) # We need to link ns-slapd with the C++ compiler on HP-UX since we load diff --git a/configure b/configure index e043593..c266be0 100755 --- a/configure +++ b/configure @@ -639,6 +639,9 @@ LTLIBOBJS vendor capbrand brand +systemd_defs +systemd_lib +systemd_inc localrundir nunc_stans_libdir nunc_stans_lib @@ -973,6 +976,7 @@ with_selinux with_nunc_stans with_nunc_stans_inc with_nunc_stans_lib +with_systemd ' ac_precious_vars='build_alias host_alias @@ -1744,6 +1748,7 @@ Optional Packages: nunc-stans include file directory --with-nunc-stans-lib=PATH nunc-stans library directory + --with-systemd Enable Systemd native integration. Some influential environment variables: CXX C++ compiler command @@ -21347,6 +21352,94 @@ $as_echo "no" >&6; } fi +# BEGIN COPYRIGHT BLOCK +# Copyright (C) 2015 Red Hat, Inc. +# All rights reserved. +# +# License: GPL (version 3 or any later version). +# See LICENSE for details. +# END COPYRIGHT BLOCK + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Systemd..." >&5 +$as_echo "$as_me: checking for Systemd..." >&6;} + +# check for --with-systemd +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-systemd" >&5 +$as_echo_n "checking for --with-systemd... " >&6; } + +# Check whether --with-systemd was given. +if test "${with_systemd+set}" = set; then : + withval=$with_systemd; + if test "$withval" = yes + then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: using systemd native features" >&5 +$as_echo "using systemd native features" >&6; } + with_systemd=yes + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +if test "$with_systemd" = yes; then + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_PKG_CONFIG+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +$as_echo "$PKG_CONFIG" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Systemd with pkg-config" >&5 +$as_echo_n "checking for Systemd with pkg-config... " >&6; } + if test -n "$PKG_CONFIG" && $PKG_CONFIG --exists systemd libsystemd-journal libsystemd-daemon ; then + systemd_inc=`$PKG_CONFIG --cflags-only-I systemd libsystemd-journal libsystemd-daemon` + systemd_lib=`$PKG_CONFIG --libs-only-l systemd libsystemd-journal libsystemd-daemon` + systemd_defs="-DWITH_SYSTEMD" + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no Systemd pkg-config files" >&5 +$as_echo "no Systemd pkg-config files" >&6; } + fi + +fi + PACKAGE_BASE_VERSION=`echo $PACKAGE_VERSION | awk -F\. '{print $1"."$2}'` @@ -21441,6 +21534,9 @@ fi + + + # AC_DEFINE([USE_OLD_UNHASHED], [], [Use old unhashed code]) $as_echo "#define LDAP_DEBUG 1" >>confdefs.h diff --git a/configure.ac b/configure.ac index 468384c..d5045ef 100644 --- a/configure.ac +++ b/configure.ac @@ -717,6 +717,7 @@ m4_include(m4/kerberos.m4) m4_include(m4/pcre.m4) m4_include(m4/selinux.m4) m4_include(m4/nunc-stans.m4) +m4_include(m4/systemd.m4) PACKAGE_BASE_VERSION=`echo $PACKAGE_VERSION | awk -F\. '{print $1"."$2}'` AC_SUBST(PACKAGE_BASE_VERSION) @@ -778,6 +779,9 @@ AC_SUBST(nunc_stans_inc) AC_SUBST(nunc_stans_lib) AC_SUBST(nunc_stans_libdir) AC_SUBST(localrundir) +AC_SUBST(systemd_inc) +AC_SUBST(systemd_lib) +AC_SUBST(systemd_defs) AC_SUBST(brand) AC_SUBST(capbrand) diff --git a/ldap/ldif/template-dse.ldif.in b/ldap/ldif/template-dse.ldif.in index 1ec8009..6c62aaa 100644 --- a/ldap/ldif/template-dse.ldif.in +++ b/ldap/ldif/template-dse.ldif.in @@ -62,6 +62,7 @@ nsslapd-ndn-cache-enabled: on nsslapd-sasl-mapping-fallback: off nsslapd-dynamic-plugins: off nsslapd-allow-hashed-passwords: off +nsslapd-logging-backend: 1 dn: cn=features,cn=config objectclass: top diff --git a/ldap/schema/01core389.ldif b/ldap/schema/01core389.ldif index 42af40d..0b14c06 100644 --- a/ldap/schema/01core389.ldif +++ b/ldap/schema/01core389.ldif @@ -291,6 +291,7 @@ attributeTypes: ( 2.16.840.1.113730.3.1.2325 NAME 'nsslapd-auditfaillog-logging- attributeTypes: ( 2.16.840.1.113730.3.1.2326 NAME 'nsslapd-auditfaillog-logging-hide-unhashed-pw' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' ) attributeTypes: ( 2.16.840.1.113730.3.1.2327 NAME 'nsslapd-auditfaillog' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 SINGLE-VALUE X-ORIGIN 'Netscape Directory Server' ) attributeTypes: ( 2.16.840.1.113730.3.1.2328 NAME 'nsslapd-auditfaillog-list' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 X-ORIGIN 'Netscape Directory Server' ) +attributeTypes: ( 2.16.840.1.113730.3.1.2330 NAME 'nsslapd-logging-backend' DESC 'Netscape defined attribute type' SYNTAX 1.3.6.1.4.1.1466.115.121.1.15 X-ORIGIN 'Netscape Directory Server' ) # # objectclasses # diff --git a/ldap/servers/slapd/auditlog.c b/ldap/servers/slapd/auditlog.c index 5b1389d..1461fee 100644 --- a/ldap/servers/slapd/auditlog.c +++ b/ldap/servers/slapd/auditlog.c @@ -309,10 +309,10 @@ write_audit_file( switch (logtype) { case SLAPD_AUDIT_LOG: - slapd_log_audit_proc (l->ls_buf, l->ls_len); + slapd_log_audit (l->ls_buf, l->ls_len); break; case SLAPD_AUDITFAIL_LOG: - slapd_log_auditfail_proc (l->ls_buf, l->ls_len); + slapd_log_auditfail (l->ls_buf, l->ls_len); break; default: /* Unsupported log type, we should make some noise */ diff --git a/ldap/servers/slapd/libglobs.c b/ldap/servers/slapd/libglobs.c index d108bf3..94098bf 100644 --- a/ldap/servers/slapd/libglobs.c +++ b/ldap/servers/slapd/libglobs.c @@ -169,6 +169,7 @@ static int invalid_sasl_mech(char *str); #define ENTRYUSN_IMPORT_INIT "0" #define DEFAULT_ALLOWED_TO_DELETE_ATTRS "nsslapd-listenhost nsslapd-securelistenhost nsslapd-defaultnamingcontext nsslapd-snmp-index" #define SALTED_SHA1_SCHEME_NAME "SSHA" +#define INIT_LOGGING_BACKEND_INTERNAL "dirsrv-log" /* CONFIG_ON_OFF */ slapi_onoff_t init_accesslog_rotationsync_enabled; @@ -1186,8 +1187,13 @@ static struct config_get_and_set { {CONFIG_AUDITFAILFILE_ATTRIBUTE, config_set_auditfaillog, NULL, 0, (void**)&global_slapdFrontendConfig.auditfaillog, - CONFIG_STRING_OR_EMPTY, NULL, NULL/* deletion is not allowed */} + CONFIG_STRING_OR_EMPTY, NULL, NULL/* deletion is not allowed */}, /* End audit fail log configuration */ + /* warning: initialization makes pointer from integer without a cast [enabled by default]. Why do we get this? */ + {CONFIG_LOGGING_BACKEND, NULL, + log_set_backend, 0, + (void**)&global_slapdFrontendConfig.logging_backend, + CONFIG_STRING_OR_EMPTY, NULL, INIT_LOGGING_BACKEND_INTERNAL} }; /* @@ -8259,7 +8265,9 @@ remove_commas(char *str) static int invalid_sasl_mech(char *str) { - char *mech = NULL, *token = NULL, *next = NULL; + char *mech = NULL; + char *token = NULL; + char *next = NULL; int i; if(str == NULL){ diff --git a/ldap/servers/slapd/log.c b/ldap/servers/slapd/log.c index ef6e57b..cbfba54 100644 --- a/ldap/servers/slapd/log.c +++ b/ldap/servers/slapd/log.c @@ -300,6 +300,7 @@ void g_log_init(int log_enabled) loginfo.log_numof_auditfail_logs = 1; loginfo.log_auditfail_fdes = NULL; loginfo.log_auditfail_logchain = NULL; + loginfo.log_backend = LOGGING_BACKEND_INTERNAL; if ((loginfo.log_auditfail_rwlock =slapi_new_rwlock())== NULL ) { exit (-1); } @@ -388,6 +389,68 @@ log_set_logging(const char *attrname, char *value, int logtype, char *errorbuf, return LDAP_SUCCESS; } + +int +log_set_backend(const char *attrname, char *value, int logtype, char *errorbuf, int apply) { + + int retval = LDAP_SUCCESS; + int backend = 0; + char *backendstr = NULL; /* The backend we are looking at */ + char *token = NULL; /* String to tokenise, need to dup value */ + char *next = NULL; /* The next value */ + + + slapdFrontendConfig_t *slapdFrontendConfig = getFrontendConfig(); + + /* We don't need to bother checking log type ... */ + if ( !apply || !value || !*value ) { + return retval; + } + + + /* We have a comma seperated list. So split it up */ + token = slapi_ch_strdup(value); + for (backendstr = ldap_utf8strtok_r(token, ",", &next); + backendstr != NULL; + backendstr = ldap_utf8strtok_r(NULL, ",", &next)) + { + if(strlen(backendstr) == 0) { + /* Probably means someone did ",,"*/ + continue; + } else if (slapi_utf8ncasecmp(backendstr, "dirsrv-log", 10) ) { + backend |= LOGGING_BACKEND_INTERNAL; + } else if (slapi_utf8ncasecmp(backendstr, "syslog", 6) ) { + backend |= LOGGING_BACKEND_SYSLOG; +#ifdef WITH_SYSTEMD + } else if (slapi_utf8ncasecmp(backendstr, "journald", 8) ) { + backend |= LOGGING_BACKEND_JOURNALD; +#endif + } + } + slapi_ch_free_string(&token); + + if ( !( backend & LOGGING_BACKEND_INTERNAL) + && ! (backend & LOGGING_BACKEND_SYSLOG) +#ifdef WITH_SYSTEMD + && ! (backend & LOGGING_BACKEND_JOURNALD) +#endif + ) { + /* There is probably a better error here .... */ + retval = LDAP_OPERATIONS_ERROR; + } else { + /* We have a valid backend, set it */ + /* + * We just need to use any lock here, doesn't matter which. + */ + LOG_ACCESS_LOCK_WRITE( ); + loginfo.log_backend = backend; + slapi_ch_free_string(&(slapdFrontendConfig->logging_backend)); + slapdFrontendConfig->logging_backend = slapi_ch_strdup(value); + LOG_ACCESS_UNLOCK_WRITE( ); + } + + return retval; +} /****************************************************************************** * Tell me the access log file name inc path ******************************************************************************/ @@ -1900,8 +1963,36 @@ auditfail_log_openf( char *pathname, int locked) /****************************************************************************** * write in the audit log ******************************************************************************/ + +int +slapd_log_audit ( + char *buffer, + int buf_len) +{ + /* We use this to route audit log entries to where they need to go */ + int retval = LDAP_SUCCESS; + int lbackend = loginfo.log_backend; /* We copy this to make these next checks atomic */ + if (lbackend & LOGGING_BACKEND_INTERNAL) { + retval = slapd_log_audit_internal(buffer, buf_len); + } + + if (retval != LDAP_SUCCESS) { + return retval; + } + if (lbackend & LOGGING_BACKEND_SYSLOG) { + /* This returns void, so we hope it worked */ + syslog(LOG_NOTICE, buffer); + } +#ifdef WITH_SYSTEMD + if (lbackend & LOGGING_BACKEND_JOURNALD) { + retval = sd_journal_print(LOG_NOTICE, buffer); + } +#endif + return retval; +} + int -slapd_log_audit_proc ( +slapd_log_audit_internal ( char *buffer, int buf_len) { @@ -1934,7 +2025,33 @@ slapd_log_audit_proc ( * write in the audit fail log ******************************************************************************/ int -slapd_log_auditfail_proc ( +slapd_log_auditfail ( + char *buffer, + int buf_len) +{ + /* We use this to route audit log entries to where they need to go */ + int retval = LDAP_SUCCESS; + int lbackend = loginfo.log_backend; /* We copy this to make these next checks atomic */ + if (lbackend & LOGGING_BACKEND_INTERNAL) { + retval = slapd_log_auditfail_internal(buffer, buf_len); + } + if (retval != LDAP_SUCCESS) { + return retval; + } + if (lbackend & LOGGING_BACKEND_SYSLOG) { + /* This returns void, so we hope it worked */ + syslog(LOG_NOTICE, buffer); + } +#ifdef WITH_SYSTEMD + if (lbackend & LOGGING_BACKEND_JOURNALD) { + retval = sd_journal_print(LOG_NOTICE, buffer); + } +#endif + return retval; +} + +int +slapd_log_auditfail_internal ( char *buffer, int buf_len) { @@ -1972,14 +2089,41 @@ slapd_log_error_proc( char *fmt, ... ) { - va_list ap_err; - va_list ap_file; - va_start( ap_err, fmt ); - va_start( ap_file, fmt ); - slapd_log_error_proc_internal(subsystem, fmt, ap_err, ap_file); - va_end(ap_err); - va_end(ap_file); - return 0; + int rc = LDAP_SUCCESS; + va_list ap_err; + va_list ap_file; + + if (loginfo.log_backend & LOGGING_BACKEND_INTERNAL) { + va_start( ap_err, fmt ); + va_start( ap_file, fmt ); + rc = slapd_log_error_proc_internal( subsystem, fmt, ap_err, ap_file ); + va_end(ap_file); + va_end(ap_err); + } + if (rc != LDAP_SUCCESS) { + return(rc); + } + if (loginfo.log_backend & LOGGING_BACKEND_SYSLOG) { + va_start( ap_err, fmt ); + /* va_start( ap_file, fmt ); */ + /* This returns void, so we hope it worked */ + vsyslog(LOG_ERROR, fmt, ap_err); + /* vsyslog(LOG_ERROR, fmt, ap_file); */ + /* va_end(ap_file); */ + va_end(ap_err); + } +#ifdef WITH_SYSTEMD + if (loginfo.log_backend & LOGGING_BACKEND_JOURNALD) { + va_start( ap_err, fmt ); + /* va_start( ap_file, fmt ); */ + /* This isn't handling RC nicely ... */ + rc = sd_journal_printv(LOG_ERROR, fmt, ap_err); + /* rc = sd_journal_printv(LOG_ERROR, fmt, ap_file); */ + /* va_end(ap_file); */ + va_end(ap_err); + } +#endif + return rc; } static int @@ -1989,7 +2133,7 @@ slapd_log_error_proc_internal( va_list ap_err, va_list ap_file) { - int rc = 0; + int rc = LDAP_SUCCESS; if ( (loginfo.log_error_state & LOGGING_ENABLED) && (loginfo.log_error_file != NULL) ) { LOG_ERROR_LOCK_WRITE( ); @@ -2163,9 +2307,10 @@ vslapd_log_error( int slapi_log_error( int severity, char *subsystem, char *fmt, ... ) { - va_list ap1; - va_list ap2; - int rc; + va_list ap_err; + va_list ap_file; + int rc = LDAP_SUCCESS; + int lbackend = loginfo.log_backend; /* We copy this to make these next checks atomic */ if ( severity < SLAPI_LOG_MIN || severity > SLAPI_LOG_MAX ) { (void)slapd_log_error_proc( subsystem, @@ -2175,13 +2320,38 @@ slapi_log_error( int severity, char *subsystem, char *fmt, ... ) } if ( slapd_ldap_debug & slapi_log_map[ severity ] ) { - va_start( ap1, fmt ); - va_start( ap2, fmt ); - rc = slapd_log_error_proc_internal( subsystem, fmt, ap1, ap2 ); - va_end( ap1 ); - va_end( ap2 ); + if (lbackend & LOGGING_BACKEND_INTERNAL) { + va_start( ap_err, fmt ); + va_start( ap_file, fmt ); + rc = slapd_log_error_proc_internal( subsystem, fmt, ap_err, ap_file ); + va_end(ap_file); + va_end(ap_err); + } + if (rc != LDAP_SUCCESS) { + return(rc); + } + if (lbackend & LOGGING_BACKEND_SYSLOG) { + va_start( ap_err, fmt ); + /* va_start( ap_file, fmt ); */ + /* This returns void, so we hope it worked */ + vsyslog(LOG_ERROR, fmt, ap_err); + /* vsyslog(LOG_ERROR, fmt, ap_file); */ + /* va_end(ap_file); */ + va_end(ap_err); + } +#ifdef WITH_SYSTEMD + if (lbackend & LOGGING_BACKEND_JOURNALD) { + va_start( ap_err, fmt ); + /* va_start( ap_file, fmt ); */ + /* This isn't handling RC nicely ... */ + rc = sd_journal_printv(LOG_ERROR, fmt, ap_err); + /* rc = sd_journal_printv(LOG_ERROR, fmt, ap_file); */ + /* va_end(ap_file); */ + va_end(ap_err); + } +#endif } else { - rc = 0; /* nothing to be logged --> always return success */ + rc = LDAP_SUCCESS; /* nothing to be logged --> always return success */ } return( rc ); @@ -2226,7 +2396,8 @@ static int vslapd_log_access(char *fmt, va_list ap) char sign; char buffer[SLAPI_LOG_BUFSIZ]; char vbuf[SLAPI_LOG_BUFSIZ]; - int blen, vlen; + int blen; + int vlen; /* info needed to keep us from calling localtime/strftime so often: */ static time_t old_time = 0; static char old_tbuf[SLAPI_LOG_BUFSIZ]; @@ -2278,7 +2449,7 @@ static int vslapd_log_access(char *fmt, va_list ap) log_append_buffer2(tnl, loginfo.log_access_buffer, buffer, blen, vbuf, vlen); - return( 0 ); + return( LDAP_SUCCESS ); } int @@ -2288,16 +2459,43 @@ slapi_log_access( int level, { va_list ap; int rc=0; + int lbackend = loginfo.log_backend; /* We copy this to make these next checks atomic */ if (!(loginfo.log_access_state & LOGGING_ENABLED)) { return 0; } - va_start( ap, fmt ); - if (( level & loginfo.log_access_level ) && - ( loginfo.log_access_fdes != NULL ) && (loginfo.log_access_file != NULL) ) { - rc = vslapd_log_access(fmt, ap); - } - va_end( ap ); + + if (( level & loginfo.log_access_level ) && + ( loginfo.log_access_fdes != NULL ) && (loginfo.log_access_file != NULL) ) { + /* How do we handle the RC? + * + * What we do is we log to the "best" backend first going down. + * "best" meaning most reliable. + * As we descend, if we encounter an issue, we bail before the "lesser" + * backends. + */ + if (lbackend & LOGGING_BACKEND_INTERNAL) { + va_start( ap, fmt ); + rc = vslapd_log_access(fmt, ap); + va_end( ap ); + } + if (rc != LDAP_SUCCESS) { + return rc; + } + if (lbackend & LOGGING_BACKEND_SYSLOG) { + va_start( ap, fmt ); + /* This returns void, so we hope it worked */ + vsyslog(LOG_INFO, fmt, ap); + va_end( ap ); + } +#ifdef WITH_SYSTEMD + if (lbackend & LOGGING_BACKEND_JOURNALD) { + va_start (ap, fmt ); + rc = sd_journal_printv(LOG_INFO, fmt, ap); + va_end( ap ); + } +#endif + } return( rc ); } @@ -4869,6 +5067,8 @@ check_log_max_size( char *maxdiskspace_str, return rc; } + + /************************************************************************************/ /* E N D */ /************************************************************************************/ diff --git a/ldap/servers/slapd/log.h b/ldap/servers/slapd/log.h index 6c5f4f1..ff791e1 100644 --- a/ldap/servers/slapd/log.h +++ b/ldap/servers/slapd/log.h @@ -201,6 +201,7 @@ struct logging_opts { LogFileInfo *log_auditfail_logchain; /* all the logs info */ char *log_auditfailinfo_file; /* auditfail log rotation info file */ Slapi_RWLock *log_auditfail_rwlock; /* lock on auditfail */ + int log_backend; }; diff --git a/ldap/servers/slapd/proto-slap.h b/ldap/servers/slapd/proto-slap.h index f0a5257..2c6a7af 100644 --- a/ldap/servers/slapd/proto-slap.h +++ b/ldap/servers/slapd/proto-slap.h @@ -399,6 +399,8 @@ int config_set_mempool_maxfreelist( const char *attrname, char *value, char *err int config_set_maxsimplepaged_per_conn( const char *attrname, char *value, char *errorbuf, int apply ); +int log_set_backend(const char *attrname, char *value, int logtype, char *errorbuf, int apply); + int config_get_SSLclientAuth(); int config_get_ssl_check_hostname(); char *config_get_SSL3ciphers(); @@ -751,8 +753,10 @@ int slapi_log_access( int level, char *fmt, ... ) #else ; #endif -int slapd_log_audit_proc(char *buffer, int buf_len); -int slapd_log_auditfail_proc(char *buffer, int buf_len); +int slapd_log_audit(char *buffer, int buf_len); +int slapd_log_audit_internal(char *buffer, int buf_len); +int slapd_log_auditfail(char *buffer, int buf_len); +int slapd_log_auditfail_internal(char *buffer, int buf_len); void log_access_flush(); diff --git a/ldap/servers/slapd/slap.h b/ldap/servers/slapd/slap.h index 633cc45..c4bae76 100644 --- a/ldap/servers/slapd/slap.h +++ b/ldap/servers/slapd/slap.h @@ -125,6 +125,11 @@ typedef struct symbol_t { #include <nunc-stans/nunc-stans.h> #endif +#ifdef WITH_SYSTEMD +#include <systemd/sd-journal.h> +#include <systemd/sd-daemon.h> +#endif + #if defined(OS_solaris) # include <thread.h> # define GET_THREAD_ID() thr_self() @@ -1886,6 +1891,12 @@ typedef struct _slapdEntryPoints { #define SLAPD_AUDIT_LOG 0x4 #define SLAPD_AUDITFAIL_LOG 0x8 +#define LOGGING_BACKEND_INTERNAL 0x1 +#define LOGGING_BACKEND_SYSLOG 0x2 +#ifdef WITH_SYSTEMD +#define LOGGING_BACKEND_JOURNALD 0x4 +#endif + #define CONFIG_DATABASE_ATTRIBUTE "nsslapd-database" #define CONFIG_PLUGIN_ATTRIBUTE "nsslapd-plugin" #define CONFIG_SIZELIMIT_ATTRIBUTE "nsslapd-sizelimit" @@ -2110,6 +2121,7 @@ typedef struct _slapdEntryPoints { #define CONFIG_CN_USES_DN_SYNTAX_IN_DNS "nsslapd-cn-uses-dn-syntax-in-dns" #define CONFIG_MAXSIMPLEPAGED_PER_CONN_ATTRIBUTE "nsslapd-maxsimplepaged-per-conn" +#define CONFIG_LOGGING_BACKEND "nsslapd-logging-backend" /* getenv alternative */ #define CONFIG_MALLOC_MXFAST "nsslapd-malloc-mxfast" @@ -2305,6 +2317,8 @@ typedef struct _slapdFrontendConfig { char *auditfaillog_exptimeunit; slapi_onoff_t auditfaillog_logging_hide_unhashed_pw; + char *logging_backend; + slapi_onoff_t return_exact_case; /* Return attribute names with the same case as they appear in at.conf */ diff --git a/m4/systemd.m4 b/m4/systemd.m4 new file mode 100644 index 0000000..4f9533b --- /dev/null +++ b/m4/systemd.m4 @@ -0,0 +1,36 @@ +# BEGIN COPYRIGHT BLOCK +# Copyright (C) 2015 Red Hat, Inc. +# All rights reserved. +# +# License: GPL (version 3 or any later version). +# See LICENSE for details. +# END COPYRIGHT BLOCK + +AC_CHECKING(for Systemd) + +# check for --with-systemd +AC_MSG_CHECKING(for --with-systemd) +AC_ARG_WITH(systemd, AS_HELP_STRING([--with-systemd],[Enable Systemd native integration.]), +[ + if test "$withval" = yes + then + AC_MSG_RESULT([using systemd native features]) + with_systemd=yes + else + AC_MSG_RESULT(no) + fi +], +AC_MSG_RESULT(no)) + +if test "$with_systemd" = yes; then + AC_PATH_PROG(PKG_CONFIG, pkg-config) + AC_MSG_CHECKING(for Systemd with pkg-config) + if test -n "$PKG_CONFIG" && $PKG_CONFIG --exists systemd libsystemd-journal libsystemd-daemon ; then + systemd_inc=`$PKG_CONFIG --cflags-only-I systemd libsystemd-journal libsystemd-daemon` + systemd_lib=`$PKG_CONFIG --libs-only-l systemd libsystemd-journal libsystemd-daemon` + systemd_defs="-DWITH_SYSTEMD" + else + AC_MSG_RESULT([no Systemd pkg-config files]) + fi + +fi commit 24c7896ec218068842cece9c9c7f732d58d3df27 Author: William Brown <firstyear(a)redhat.com> Date: Tue Jan 5 08:26:46 2016 +1000 Ticket 48398 - Coverity defect 13352 - Resource leak in auditlog.c Bug Description: config_get_auditfaillog allocates a char* ptr and returns it. This if statement doesn't free that. Fix Description: Create a new char* variable to allocate the result of config_get_auditfaillog into, and then ensure we free it after we are done. https://fedorahosted.org/389/ticket/48398 Author: wibrown Review by: mreynolds (Thanks!) diff --git a/ldap/servers/slapd/auditlog.c b/ldap/servers/slapd/auditlog.c index 45ef16e..5b1389d 100644 --- a/ldap/servers/slapd/auditlog.c +++ b/ldap/servers/slapd/auditlog.c @@ -91,6 +91,7 @@ write_auditfail_log_entry( Slapi_PBlock *pb ) int flag = 0; Operation *op; int pbrc = 0; + char *auditfail_config = NULL; /* if the audit log is not enabled, just skip all of this stuff */ @@ -129,13 +130,15 @@ write_auditfail_log_entry( Slapi_PBlock *pb ) curtime = current_time(); /* log the raw, unnormalized DN */ dn = slapi_sdn_get_udn(sdn); - if (config_get_auditfaillog() == NULL || strlen(config_get_auditfaillog()) == 0) { + auditfail_config = config_get_auditfaillog(); + if (auditfail_config == NULL || strlen(auditfail_config) == 0) { /* If no auditfail log write to audit log */ write_audit_file(SLAPD_AUDIT_LOG, operation_get_type(op), dn, change, flag, curtime, pbrc); } else { /* If we have our own auditfail log path */ write_audit_file(SLAPD_AUDITFAIL_LOG, operation_get_type(op), dn, change, flag, curtime, pbrc); } + slapi_ch_free_string(&auditfail_config); }

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.3.4' - ldap/servers

by Mark Reynolds

ldap/servers/slapd/connection.c | 46 +++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) New commits: commit cd45d032421b0ecf76d8cbb9b1c3aeef7680d9a2 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Fri Jan 15 11:35:16 2016 -0500 Ticket 48412 - worker threads do not detect abnormally closed connections Bug Description: If a connection is abnormally closed there can still be data in the connection buffer(bytes vs offset). This prevents the connection from being removed from the connection table. The worker thread then goes into a loop trying to read this data on an already closed connection. If there are enough abnormally closed conenction eventually all the worker threads are stuck, and new connections are not accepted. Fix Description: When looking if there is more data in the buffer check if the connection was closed, and return 0 (no more data). Also did a little code cleanup. https://fedorahosted.org/389/ticket/48412 Reviewed by: rmeggins(Thanks!) (cherry picked from commit 30c4852a3d9ca527b78c0f89df5909bc9a268392) diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c index a3d123e..3e435a7 100644 --- a/ldap/servers/slapd/connection.c +++ b/ldap/servers/slapd/connection.c @@ -1102,9 +1102,16 @@ connection_read_ldap_data(Connection *conn, PRInt32 *err) } static size_t -conn_buffered_data_avail_nolock(Connection *conn) +conn_buffered_data_avail_nolock(Connection *conn, int *conn_closed) { - return conn->c_private->c_buffer_bytes - conn->c_private->c_buffer_offset; + if ( (conn->c_sd == SLAPD_INVALID_SOCKET) || (conn->c_flags & CONN_FLAG_CLOSING) ) { + /* connection is closed - ignore the buffer */ + *conn_closed = 1; + return 0; + } else { + *conn_closed = 0; + return conn->c_private->c_buffer_bytes - conn->c_private->c_buffer_offset; + } } /* Upon returning from this function, we have either: @@ -1127,6 +1134,7 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i PRErrorCode err = 0; PRInt32 syserr = 0; size_t buffer_data_avail; + int conn_closed = 0; PR_EnterMonitor(conn->c_mutex); /* @@ -1142,7 +1150,7 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i *tag = LBER_DEFAULT; /* First check to see if we have buffered data from "before" */ - if ((buffer_data_avail = conn_buffered_data_avail_nolock(conn))) { + if ((buffer_data_avail = conn_buffered_data_avail_nolock(conn, &conn_closed))) { /* If so, use that data first */ if ( 0 != get_next_from_buffer( buffer + conn->c_private->c_buffer_offset, @@ -1157,7 +1165,7 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i while (*tag == LBER_DEFAULT) { int ioblocktimeout_waits = config_get_ioblocktimeout() / CONN_TURBO_TIMEOUT_INTERVAL; /* We should never get here with data remaining in the buffer */ - PR_ASSERT( !new_operation || 0 == conn_buffered_data_avail_nolock(conn) ); + PR_ASSERT( !new_operation || !conn_buffered_data_avail_nolock(conn, &conn_closed)); /* We make a non-blocking read call */ if (CONNECTION_BUFFER_OFF != conn->c_private->use_buffer) { ret = connection_read_ldap_data(conn,&err); @@ -1269,8 +1277,12 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i } } /* If there is remaining buffered data, set the flag to tell the caller */ - if (conn_buffered_data_avail_nolock(conn)) { + if (conn_buffered_data_avail_nolock(conn, &conn_closed)) { *remaining_data = 1; + } else if (conn_closed){ + /* connection closed */ + ret = CONN_DONE; + goto done; } if ( *tag != LDAP_TAG_MESSAGE ) { @@ -1521,7 +1533,7 @@ connection_threadmain() continue; case CONN_SHUTDOWN: LDAPDebug( LDAP_DEBUG_TRACE, - "op_thread received shutdown signal\n", 0, 0, 0 ); + "op_thread received shutdown signal\n", 0, 0, 0 ); g_decr_active_threadcnt(); return; case CONN_FOUND_WORK_TO_DO: @@ -1542,8 +1554,9 @@ connection_threadmain() Slapi_DN *anon_sdn = slapi_sdn_new_normdn_byref( anon_dn ); reslimit_update_from_dn( pb->pb_conn, anon_sdn ); slapi_sdn_free( &anon_sdn ); - if (slapi_reslimit_get_integer_limit(pb->pb_conn, pb->pb_conn->c_idletimeout_handle, - &idletimeout) + if (slapi_reslimit_get_integer_limit(pb->pb_conn, + pb->pb_conn->c_idletimeout_handle, + &idletimeout) == SLAPI_RESLIMIT_STATUS_SUCCESS) { pb->pb_conn->c_idletimeout = idletimeout; @@ -1581,7 +1594,7 @@ connection_threadmain() op = pb->pb_op; maxthreads = config_get_maxthreadsperconn(); more_data = 0; - ret = connection_read_operation(conn,op,&tag,&more_data); + ret = connection_read_operation(conn, op, &tag, &more_data); if ((ret == CONN_DONE) || (ret == CONN_TIMEDOUT)) { slapi_log_error(SLAPI_LOG_CONNS, "connection_threadmain", "conn %" NSPRIu64 " read not ready due to %d - thread_turbo_flag %d more_data %d " @@ -1614,7 +1627,8 @@ connection_threadmain() /* turn off turbo mode immediately if any pb waiting in global queue */ if (thread_turbo_flag && !WORK_Q_EMPTY) { thread_turbo_flag = 0; - LDAPDebug2Args(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode - pb_q is not empty %d\n",conn->c_connid,work_q_size); + LDAPDebug2Args(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode - pb_q is not empty %d\n", + conn->c_connid,work_q_size); } #endif @@ -1639,7 +1653,8 @@ connection_threadmain() * should call connection_make_readable after the op is removed * connection_make_readable(conn); */ - LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode due to %d\n",conn->c_connid,ret,0); + LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode due to %d\n", + conn->c_connid,ret,0); goto done; case CONN_SHUTDOWN: LDAPDebug( LDAP_DEBUG_TRACE, @@ -1695,7 +1710,8 @@ connection_threadmain() */ conn->c_idlesince = curtime; connection_activity(conn, maxthreads); - LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " queued because more_data\n",conn->c_connid,0,0); + LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " queued because more_data\n", + conn->c_connid,0,0); } else { /* keep count of how many times maxthreads has blocked an operation */ conn->c_maxthreadsblocked++; @@ -1770,13 +1786,15 @@ done: memset(pb, 0, sizeof(*pb)); } else { /* delete from connection operation queue & decr refcnt */ + int conn_closed = 0; PR_EnterMonitor(conn->c_mutex); connection_remove_operation_ext( pb, conn, op ); /* If we're in turbo mode, we keep our reference to the connection alive */ /* can't use the more_data var because connection could have changed in another thread */ - more_data = conn_buffered_data_avail_nolock(conn) ? 1 : 0; - LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " check more_data %d thread_turbo_flag %d\n",conn->c_connid,more_data,thread_turbo_flag); + more_data = conn_buffered_data_avail_nolock(conn, &conn_closed) ? 1 : 0; + LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " check more_data %d thread_turbo_flag %d\n", + conn->c_connid,more_data,thread_turbo_flag); if (!more_data) { if (!thread_turbo_flag) { /*

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.2.11' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5.h | 1 ldap/servers/plugins/replication/repl5_connection.c | 19 - ldap/servers/plugins/replication/repl5_inc_protocol.c | 180 ++++++++++-------- ldap/servers/plugins/replication/repl5_plugins.c | 60 +++++- ldap/servers/plugins/replication/urp.c | 2 5 files changed, 167 insertions(+), 95 deletions(-) New commits: commit 96b3a5b76d9e9f3aa77627198c50741796fbe44c Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 19:33:44 2016 -0500 Ticket 47788 - Supplier can skip a failing update, although it should retry Bug Description: If a replicated update fails on the consumer, the update is never tried. This is due to the replication async result thread missing the failure before another update is replicated and it succeeds. This second update that succeeds updates the consumer RUV. This makes it appear that the consumer is caught up, and the supplier never resends that original failed update. Fix Description: When a replicated update fails, and its an error we can not ignore, the connection is closed. Which stops the replication session, and prevents any further updates coming in and updating the consumer RUV. This allows the supplier to correctly retry the operation that failed on the next replication session. https://fedorahosted.org/389/ticket/47788 Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) (cherry picked from commit 80c68e202ff2b50a60f35b6683ef26b41609bc56) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 66006f6..ee161ef 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -589,6 +589,7 @@ void replica_set_ruv_dirty (Replica *r); int replica_write_ruv (Replica *r); char *replica_get_dn(Replica *r); void replica_check_for_tasks(Replica*r, Slapi_Entry *e); +PRBool ignore_error_and_keep_going(int error); /* The functions below handles the state flag */ /* Current internal state flags */ diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index e080a3f..0360f07 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -467,17 +467,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = rc; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } else if (IS_DISCONNECT_ERROR(err)) { conn->last_ldap_error = err; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } /* Got a result */ - if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) - return_value = CONN_BUSY; - else if (retoidp) + if (retoidp /* total update */) { if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) { @@ -506,16 +506,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda } return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; } - /* - * XXXggood do I need to free matched, referrals, - * anything else? Or can I pass NULL for the args - * I'm not interested in? - */ - /* Good question! Meanwhile, as RTM aproaches, let's free them... */ - slapi_ch_free((void **) &errmsg); - slapi_ch_free((void **) &matched); - charray_free(referrals); conn->status = STATUS_CONNECTED; +done: + slapi_ch_free_string(&errmsg); + slapi_ch_free_string(&matched); + charray_free(referrals); } if (res) ldap_msgfree(res); PR_Unlock(conn->lock); /* release the conn lock */ diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 3268bfd..02b54b3 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -174,7 +174,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); -static PRBool ignore_error_and_keep_going(int error); static const char* state2name (int state); static const char* event2name (int event); static const char* op2string (int op); @@ -478,11 +477,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) PR_Unlock(rd->lock); } -static void +static int repl5_inc_waitfor_async_results(result_data *rd) { int done = 0; int loops = 0; + int rc = UPDATE_NO_MORE_UPDATES; + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ while (!done && !slapi_is_shutting_down()) { @@ -501,6 +502,10 @@ repl5_inc_waitfor_async_results(result_data *rd) { done = 1; /* no connection == no more results */ } + /* + * Return the last operation result + */ + rc = rd->result; PR_Unlock(rd->lock); /* If not then sleep a bit */ DS_Sleep(PR_SecondsToInterval(1)); @@ -516,6 +521,7 @@ repl5_inc_waitfor_async_results(result_data *rd) done = 1; } } + return rc; } /* @@ -1483,78 +1489,84 @@ static int repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) { int return_value = 0; - - /* Indentation is wrong here so we can get a sensible cvs diff */ - if (CONN_OPERATION_SUCCESS != replay_crc) - { - /* Figure out what to do next */ - if (CONN_OPERATION_FAILED == replay_crc) - { - /* Map ldap error code to return value */ - if (!ignore_error_and_keep_going(connection_error)) - { - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - } - else - { - agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); - } - slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - ldap_err2string(connection_error), connection_error, - *finished ? "Will retry later" : "Skipping"); - } - else if (CONN_NOT_CONNECTED == replay_crc) - { - /* We lost the connection - enter backoff state */ - return_value = UPDATE_CONNECTION_LOST; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " - "%s(%d). Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Connection lost", - connection_error); - } - else if (CONN_TIMEOUT == replay_crc) - { - return_value = UPDATE_TIMEOUT; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " - "%s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Timeout"); - } - else if (CONN_LOCAL_ERROR == replay_crc) - { - /* - * Something bad happened on the local server - enter - * backoff state. - */ - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " - "Local error. Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str); - } - - } - else - { - /* Positive response received */ - (*num_changes_sent)++; - agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); - } - return return_value; + if (CONN_OPERATION_SUCCESS != replay_crc) + { + /* Figure out what to do next */ + if (CONN_OPERATION_FAILED == replay_crc) + { + /* Map ldap error code to return value */ + if (!ignore_error_and_keep_going(connection_error)) + { + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + } + else + { + agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); + } + slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + ldap_err2string(connection_error), connection_error, + *finished ? "Will retry later" : "Skipping"); + } + else if (CONN_NOT_CONNECTED == replay_crc) + { + /* We lost the connection - enter backoff state */ + + return_value = UPDATE_CONNECTION_LOST; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s(%d). Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Connection lost", + connection_error); + } + else if (CONN_TIMEOUT == replay_crc) + { + return_value = UPDATE_TIMEOUT; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Timeout"); + } + else if (CONN_LOCAL_ERROR == replay_crc) + { + /* + * Something bad happened on the local server - enter + * backoff state. + */ + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Failed to replay change (uniqueid %s, CSN %s): " + "Local error. Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str); + } + if (*finished){ + /* + * A serious error has occurred, the consumer might have closed + * the connection already, but we need to close the conn on the + * supplier side to properly set the conn structure as closed. + */ + conn_disconnect(prp->conn); + } + } + else + { + /* Positive response received */ + (*num_changes_sent)++; + agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); + } + return return_value; } /* @@ -1572,7 +1584,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { CL5Entry entry; slapi_operation_parameters op; - int return_value; + int return_value = 0; int rc; CL5ReplayIterator *changelog_iterator; int message_id = 0; @@ -1937,8 +1949,22 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { /* We need to ensure that we wait until all the responses have been received from our operations */ if (return_value != UPDATE_CONNECTION_LOST) { - /* if connection was lost/closed, there will be nothing to read */ - repl5_inc_waitfor_async_results(rd); + /* + * If we already have an error, there is no need to check the + * async result thread anymore. + */ + if (return_value == UPDATE_NO_MORE_UPDATES) + { + /* + * We need to double check that an error hasn't popped up from + * the async result thread since our last check. + */ + int final_result; + + if((final_result = repl5_inc_waitfor_async_results(rd))){ + return_value = final_result; + } + } } rc = repl5_inc_destroy_async_result_thread(rd); @@ -2228,7 +2254,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) * We stop if there's some indication that the server just completely * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. */ -static PRBool +PRBool ignore_error_and_keep_going(int error) { int return_value = PR_FALSE; diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index dddbf15..d49a666 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1161,12 +1161,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) static int process_postop (Slapi_PBlock *pb) { - int rc = LDAP_SUCCESS; - Slapi_Operation *op; + Slapi_Operation *op; Slapi_Backend *be; - int is_replicated_operation = 0; + int is_replicated_operation = 0; CSN *opcsn = NULL; char sessionid[REPL_SESSION_ID_SIZE]; + int retval = LDAP_SUCCESS; + int rc = 0; /* we just let fixup operations through */ slapi_pblock_get( pb, SLAPI_OPERATION, &op ); @@ -1190,8 +1191,8 @@ process_postop (Slapi_PBlock *pb) get_repl_session_id (pb, sessionid, &opcsn); - slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); - if (rc == LDAP_SUCCESS) + slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); + if (retval == LDAP_SUCCESS) { agmtlist_notify_all(pb); } @@ -1233,6 +1234,55 @@ process_postop (Slapi_PBlock *pb) slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); } } + if (!ignore_error_and_keep_going(retval)){ + /* + * We have an error we can't ignore. Release the replica and close + * the connection to stop the replication session. + */ + consumer_connection_extension *connext = NULL; + Slapi_Connection *conn = NULL; + char csn_str[CSN_STRSIZE] = {'\0'}; + PRUint64 connid = 0; + int opid = 0; + + slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); + slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); + slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); + if (conn) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "process_postop: Failed to apply update (%s) error (%d). " + "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", + csn_as_string(opcsn, PR_FALSE, csn_str), retval, + (long long unsigned int)connid, opid); + /* + * Release this replica so new sessions can begin + */ + connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); + if (connext && connext->replica_acquired) + { + int zero = 0; + Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); + + replica_relinquish_exclusive_access(r, connid, opid); + object_release ((Object*)connext->replica_acquired); + connext->replica_acquired = NULL; + connext->isreplicationsession = 0; + slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); + } + if (connext){ + consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); + } + + /* + * Close the connection to end the current session with the + * supplier. This prevents new updates from coming in and + * updating the consumer RUV - which would cause this failed + * update to be never be replayed. + */ + slapi_disconnect_server(conn); + } + } if (NULL == opcsn) opcsn = operation_get_csn(op); if (opcsn) diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c index 0182af3..527995c 100644 --- a/ldap/servers/plugins/replication/urp.c +++ b/ldap/servers/plugins/replication/urp.c @@ -146,7 +146,7 @@ urp_add_operation( Slapi_PBlock *pb ) slapi_log_error(slapi_log_urp, sessionid, "urp_add (%s): an entry with this uniqueid already exists.\n", slapi_entry_get_dn_const(existing_uniqueid_entry)); - op_result= LDAP_UNWILLING_TO_PERFORM; + op_result= LDAP_ALREADY_EXISTS; slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); rc= -1; /* Ignore this Operation */ PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.3.1' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5.h | 1 ldap/servers/plugins/replication/repl5_connection.c | 19 - ldap/servers/plugins/replication/repl5_inc_protocol.c | 180 ++++++++++-------- ldap/servers/plugins/replication/repl5_plugins.c | 60 +++++- ldap/servers/plugins/replication/urp.c | 2 5 files changed, 167 insertions(+), 95 deletions(-) New commits: commit 80c68e202ff2b50a60f35b6683ef26b41609bc56 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 19:33:44 2016 -0500 Ticket 47788 - Supplier can skip a failing update, although it should retry Bug Description: If a replicated update fails on the consumer, the update is never tried. This is due to the replication async result thread missing the failure before another update is replicated and it succeeds. This second update that succeeds updates the consumer RUV. This makes it appear that the consumer is caught up, and the supplier never resends that original failed update. Fix Description: When a replicated update fails, and its an error we can not ignore, the connection is closed. Which stops the replication session, and prevents any further updates coming in and updating the consumer RUV. This allows the supplier to correctly retry the operation that failed on the next replication session. https://fedorahosted.org/389/ticket/47788 Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 7d85631..ac467eb 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -612,6 +612,7 @@ PRUint64 replica_get_backoff_min(Replica *r); PRUint64 replica_get_backoff_max(Replica *r); void replica_set_backoff_min(Replica *r, PRUint64 min); void replica_set_backoff_max(Replica *r, PRUint64 max); +PRBool ignore_error_and_keep_going(int error); /* The functions below handles the state flag */ /* Current internal state flags */ diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 918c5ea..8f4a6b6 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -473,17 +473,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = rc; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } else if (IS_DISCONNECT_ERROR(err)) { conn->last_ldap_error = err; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } /* Got a result */ - if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) - return_value = CONN_BUSY; - else if (retoidp) + if (retoidp /* total update */) { if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) { @@ -512,16 +512,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda } return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; } - /* - * XXXggood do I need to free matched, referrals, - * anything else? Or can I pass NULL for the args - * I'm not interested in? - */ - /* Good question! Meanwhile, as RTM aproaches, let's free them... */ - slapi_ch_free((void **) &errmsg); - slapi_ch_free((void **) &matched); - charray_free(referrals); conn->status = STATUS_CONNECTED; +done: + slapi_ch_free_string(&errmsg); + slapi_ch_free_string(&matched); + charray_free(referrals); } if (res) ldap_msgfree(res); PR_Unlock(conn->lock); /* release the conn lock */ diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 5cf170c..e02e883 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -174,7 +174,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); -static PRBool ignore_error_and_keep_going(int error); static const char* state2name (int state); static const char* event2name (int event); static const char* op2string (int op); @@ -478,11 +477,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) PR_Unlock(rd->lock); } -static void +static int repl5_inc_waitfor_async_results(result_data *rd) { int done = 0; int loops = 0; + int rc = UPDATE_NO_MORE_UPDATES; + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ while (!done && !slapi_is_shutting_down()) { @@ -501,6 +502,10 @@ repl5_inc_waitfor_async_results(result_data *rd) { done = 1; /* no connection == no more results */ } + /* + * Return the last operation result + */ + rc = rd->result; PR_Unlock(rd->lock); /* If not then sleep a bit */ DS_Sleep(PR_SecondsToInterval(1)); @@ -516,6 +521,7 @@ repl5_inc_waitfor_async_results(result_data *rd) done = 1; } } + return rc; } /* @@ -1496,78 +1502,84 @@ static int repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) { int return_value = 0; - - /* Indentation is wrong here so we can get a sensible cvs diff */ - if (CONN_OPERATION_SUCCESS != replay_crc) - { - /* Figure out what to do next */ - if (CONN_OPERATION_FAILED == replay_crc) - { - /* Map ldap error code to return value */ - if (!ignore_error_and_keep_going(connection_error)) - { - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - } - else - { - agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); - } - slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - ldap_err2string(connection_error), connection_error, - *finished ? "Will retry later" : "Skipping"); - } - else if (CONN_NOT_CONNECTED == replay_crc) - { - /* We lost the connection - enter backoff state */ - return_value = UPDATE_CONNECTION_LOST; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " - "%s(%d). Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Connection lost", - connection_error); - } - else if (CONN_TIMEOUT == replay_crc) - { - return_value = UPDATE_TIMEOUT; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " - "%s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Timeout"); - } - else if (CONN_LOCAL_ERROR == replay_crc) - { - /* - * Something bad happened on the local server - enter - * backoff state. - */ - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " - "Local error. Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str); - } - - } - else - { - /* Positive response received */ - (*num_changes_sent)++; - agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); - } - return return_value; + if (CONN_OPERATION_SUCCESS != replay_crc) + { + /* Figure out what to do next */ + if (CONN_OPERATION_FAILED == replay_crc) + { + /* Map ldap error code to return value */ + if (!ignore_error_and_keep_going(connection_error)) + { + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + } + else + { + agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); + } + slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + ldap_err2string(connection_error), connection_error, + *finished ? "Will retry later" : "Skipping"); + } + else if (CONN_NOT_CONNECTED == replay_crc) + { + /* We lost the connection - enter backoff state */ + + return_value = UPDATE_CONNECTION_LOST; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s(%d). Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Connection lost", + connection_error); + } + else if (CONN_TIMEOUT == replay_crc) + { + return_value = UPDATE_TIMEOUT; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Timeout"); + } + else if (CONN_LOCAL_ERROR == replay_crc) + { + /* + * Something bad happened on the local server - enter + * backoff state. + */ + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Failed to replay change (uniqueid %s, CSN %s): " + "Local error. Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str); + } + if (*finished){ + /* + * A serious error has occurred, the consumer might have closed + * the connection already, but we need to close the conn on the + * supplier side to properly set the conn structure as closed. + */ + conn_disconnect(prp->conn); + } + } + else + { + /* Positive response received */ + (*num_changes_sent)++; + agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); + } + return return_value; } /* @@ -1585,7 +1597,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { CL5Entry entry; slapi_operation_parameters op; - int return_value; + int return_value = 0; int rc; CL5ReplayIterator *changelog_iterator; int message_id = 0; @@ -1912,8 +1924,22 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { /* We need to ensure that we wait until all the responses have been received from our operations */ if (return_value != UPDATE_CONNECTION_LOST) { - /* if connection was lost/closed, there will be nothing to read */ - repl5_inc_waitfor_async_results(rd); + /* + * If we already have an error, there is no need to check the + * async result thread anymore. + */ + if (return_value == UPDATE_NO_MORE_UPDATES) + { + /* + * We need to double check that an error hasn't popped up from + * the async result thread since our last check. + */ + int final_result; + + if((final_result = repl5_inc_waitfor_async_results(rd))){ + return_value = final_result; + } + } } rc = repl5_inc_destroy_async_result_thread(rd); @@ -2202,7 +2228,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) * We stop if there's some indication that the server just completely * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. */ -static PRBool +PRBool ignore_error_and_keep_going(int error) { int return_value = PR_FALSE; diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index 5adcb6c..c2ca2e4 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1208,12 +1208,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) static int process_postop (Slapi_PBlock *pb) { - int rc = LDAP_SUCCESS; - Slapi_Operation *op; + Slapi_Operation *op; Slapi_Backend *be; - int is_replicated_operation = 0; + int is_replicated_operation = 0; CSN *opcsn = NULL; char sessionid[REPL_SESSION_ID_SIZE]; + int retval = LDAP_SUCCESS; + int rc = 0; /* we just let fixup operations through */ slapi_pblock_get( pb, SLAPI_OPERATION, &op ); @@ -1237,8 +1238,8 @@ process_postop (Slapi_PBlock *pb) get_repl_session_id (pb, sessionid, &opcsn); - slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); - if (rc == LDAP_SUCCESS) + slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); + if (retval == LDAP_SUCCESS) { agmtlist_notify_all(pb); rc = SLAPI_PLUGIN_SUCCESS; @@ -1283,6 +1284,55 @@ process_postop (Slapi_PBlock *pb) slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); } } + if (!ignore_error_and_keep_going(retval)){ + /* + * We have an error we can't ignore. Release the replica and close + * the connection to stop the replication session. + */ + consumer_connection_extension *connext = NULL; + Slapi_Connection *conn = NULL; + char csn_str[CSN_STRSIZE] = {'\0'}; + PRUint64 connid = 0; + int opid = 0; + + slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); + slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); + slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); + if (conn) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "process_postop: Failed to apply update (%s) error (%d). " + "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", + csn_as_string(opcsn, PR_FALSE, csn_str), retval, + (long long unsigned int)connid, opid); + /* + * Release this replica so new sessions can begin + */ + connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); + if (connext && connext->replica_acquired) + { + int zero = 0; + Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); + + replica_relinquish_exclusive_access(r, connid, opid); + object_release ((Object*)connext->replica_acquired); + connext->replica_acquired = NULL; + connext->isreplicationsession = 0; + slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); + } + if (connext){ + consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); + } + + /* + * Close the connection to end the current session with the + * supplier. This prevents new updates from coming in and + * updating the consumer RUV - which would cause this failed + * update to be never be replayed. + */ + slapi_disconnect_server(conn); + } + } if (NULL == opcsn) opcsn = operation_get_csn(op); if (opcsn) diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c index c7d60e2..3045e06 100644 --- a/ldap/servers/plugins/replication/urp.c +++ b/ldap/servers/plugins/replication/urp.c @@ -151,7 +151,7 @@ urp_add_operation( Slapi_PBlock *pb ) slapi_log_error(slapi_log_urp, sessionid, "urp_add (%s): an entry with this uniqueid already exists.\n", slapi_entry_get_dn_const(existing_uniqueid_entry)); - op_result= LDAP_UNWILLING_TO_PERFORM; + op_result= LDAP_ALREADY_EXISTS; slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); rc = SLAPI_PLUGIN_NOOP; /* Ignore this Operation */ PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.3.2' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5_plugins.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) New commits: commit 33f8698eb0dac2ef7bcf66ede647243839b1946b Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 19:29:26 2016 -0500 Ticket 47788 - Fix logging warning from cherry-pick Description: In 1.3.2 we need to cast PRUint64 connection var. https://fedorahosted.org/389/ticket/47788 diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index 277a03e..e286873 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1337,7 +1337,7 @@ process_postop (Slapi_PBlock *pb) "process_postop: Failed to apply update (%s) error (%d). " "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", csn_as_string(opcsn, PR_FALSE, csn_str), retval, - connid, opid); + (long long unsigned int)connid, opid); /* * Release this replica so new sessions can begin */

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.3.2' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5.h | 1 ldap/servers/plugins/replication/repl5_connection.c | 19 - ldap/servers/plugins/replication/repl5_inc_protocol.c | 180 ++++++++++-------- ldap/servers/plugins/replication/repl5_plugins.c | 60 +++++- ldap/servers/plugins/replication/urp.c | 2 5 files changed, 167 insertions(+), 95 deletions(-) New commits: commit 1643bbddbc7bf84c98f47403d0603144ae61ff94 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 18:25:43 2016 -0500 Ticket 47788 - Supplier can skip a failing update, although it should retry Bug Description: If a replicated update fails on the consumer, the update is never tried. This is due to the replication async result thread missing the failure before another update is replicated and it succeeds. This second update that succeeds updates the consumer RUV. This makes it appear that the consumer is caught up, and the supplier never resends that original failed update. Fix Description: When a replicated update fails, and its an error we can not ignore, the connection is closed. Which stops the replication session, and prevents any further updates coming in and updating the consumer RUV. This allows the supplier to correctly retry the operation that failed on the next replication session. https://fedorahosted.org/389/ticket/47788 Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) (cherry picked from commit ab6501a963c94b2b6b5fa8d1924519ef1c26b0bd) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index be71e93..b9818a7 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -617,6 +617,7 @@ PRUint64 replica_get_backoff_min(Replica *r); PRUint64 replica_get_backoff_max(Replica *r); void replica_set_backoff_min(Replica *r, PRUint64 min); void replica_set_backoff_max(Replica *r, PRUint64 max); +PRBool ignore_error_and_keep_going(int error); /* The functions below handles the state flag */ /* Current internal state flags */ diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 2971025..6ed68c7 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -478,17 +478,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = rc; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } else if (IS_DISCONNECT_ERROR(err)) { conn->last_ldap_error = err; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } /* Got a result */ - if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) - return_value = CONN_BUSY; - else if (retoidp) + if (retoidp /* total update */) { if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) { @@ -517,16 +517,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda } return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; } - /* - * XXXggood do I need to free matched, referrals, - * anything else? Or can I pass NULL for the args - * I'm not interested in? - */ - /* Good question! Meanwhile, as RTM aproaches, let's free them... */ - slapi_ch_free((void **) &errmsg); - slapi_ch_free((void **) &matched); - charray_free(referrals); conn->status = STATUS_CONNECTED; +done: + slapi_ch_free_string(&errmsg); + slapi_ch_free_string(&matched); + charray_free(referrals); } if (res) ldap_msgfree(res); PR_Unlock(conn->lock); /* release the conn lock */ diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 5cf170c..e02e883 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -174,7 +174,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); -static PRBool ignore_error_and_keep_going(int error); static const char* state2name (int state); static const char* event2name (int event); static const char* op2string (int op); @@ -478,11 +477,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) PR_Unlock(rd->lock); } -static void +static int repl5_inc_waitfor_async_results(result_data *rd) { int done = 0; int loops = 0; + int rc = UPDATE_NO_MORE_UPDATES; + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ while (!done && !slapi_is_shutting_down()) { @@ -501,6 +502,10 @@ repl5_inc_waitfor_async_results(result_data *rd) { done = 1; /* no connection == no more results */ } + /* + * Return the last operation result + */ + rc = rd->result; PR_Unlock(rd->lock); /* If not then sleep a bit */ DS_Sleep(PR_SecondsToInterval(1)); @@ -516,6 +521,7 @@ repl5_inc_waitfor_async_results(result_data *rd) done = 1; } } + return rc; } /* @@ -1496,78 +1502,84 @@ static int repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) { int return_value = 0; - - /* Indentation is wrong here so we can get a sensible cvs diff */ - if (CONN_OPERATION_SUCCESS != replay_crc) - { - /* Figure out what to do next */ - if (CONN_OPERATION_FAILED == replay_crc) - { - /* Map ldap error code to return value */ - if (!ignore_error_and_keep_going(connection_error)) - { - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - } - else - { - agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); - } - slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - ldap_err2string(connection_error), connection_error, - *finished ? "Will retry later" : "Skipping"); - } - else if (CONN_NOT_CONNECTED == replay_crc) - { - /* We lost the connection - enter backoff state */ - return_value = UPDATE_CONNECTION_LOST; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " - "%s(%d). Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Connection lost", - connection_error); - } - else if (CONN_TIMEOUT == replay_crc) - { - return_value = UPDATE_TIMEOUT; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " - "%s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Timeout"); - } - else if (CONN_LOCAL_ERROR == replay_crc) - { - /* - * Something bad happened on the local server - enter - * backoff state. - */ - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " - "Local error. Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str); - } - - } - else - { - /* Positive response received */ - (*num_changes_sent)++; - agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); - } - return return_value; + if (CONN_OPERATION_SUCCESS != replay_crc) + { + /* Figure out what to do next */ + if (CONN_OPERATION_FAILED == replay_crc) + { + /* Map ldap error code to return value */ + if (!ignore_error_and_keep_going(connection_error)) + { + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + } + else + { + agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); + } + slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + ldap_err2string(connection_error), connection_error, + *finished ? "Will retry later" : "Skipping"); + } + else if (CONN_NOT_CONNECTED == replay_crc) + { + /* We lost the connection - enter backoff state */ + + return_value = UPDATE_CONNECTION_LOST; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s(%d). Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Connection lost", + connection_error); + } + else if (CONN_TIMEOUT == replay_crc) + { + return_value = UPDATE_TIMEOUT; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Timeout"); + } + else if (CONN_LOCAL_ERROR == replay_crc) + { + /* + * Something bad happened on the local server - enter + * backoff state. + */ + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Failed to replay change (uniqueid %s, CSN %s): " + "Local error. Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str); + } + if (*finished){ + /* + * A serious error has occurred, the consumer might have closed + * the connection already, but we need to close the conn on the + * supplier side to properly set the conn structure as closed. + */ + conn_disconnect(prp->conn); + } + } + else + { + /* Positive response received */ + (*num_changes_sent)++; + agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); + } + return return_value; } /* @@ -1585,7 +1597,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { CL5Entry entry; slapi_operation_parameters op; - int return_value; + int return_value = 0; int rc; CL5ReplayIterator *changelog_iterator; int message_id = 0; @@ -1912,8 +1924,22 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { /* We need to ensure that we wait until all the responses have been received from our operations */ if (return_value != UPDATE_CONNECTION_LOST) { - /* if connection was lost/closed, there will be nothing to read */ - repl5_inc_waitfor_async_results(rd); + /* + * If we already have an error, there is no need to check the + * async result thread anymore. + */ + if (return_value == UPDATE_NO_MORE_UPDATES) + { + /* + * We need to double check that an error hasn't popped up from + * the async result thread since our last check. + */ + int final_result; + + if((final_result = repl5_inc_waitfor_async_results(rd))){ + return_value = final_result; + } + } } rc = repl5_inc_destroy_async_result_thread(rd); @@ -2202,7 +2228,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) * We stop if there's some indication that the server just completely * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. */ -static PRBool +PRBool ignore_error_and_keep_going(int error) { int return_value = PR_FALSE; diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index 9c97ea6..277a03e 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1241,12 +1241,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) static int process_postop (Slapi_PBlock *pb) { - int rc = LDAP_SUCCESS; - Slapi_Operation *op; + Slapi_Operation *op; Slapi_Backend *be; - int is_replicated_operation = 0; + int is_replicated_operation = 0; CSN *opcsn = NULL; char sessionid[REPL_SESSION_ID_SIZE]; + int retval = LDAP_SUCCESS; + int rc = 0; /* we just let fixup operations through */ slapi_pblock_get( pb, SLAPI_OPERATION, &op ); @@ -1270,8 +1271,8 @@ process_postop (Slapi_PBlock *pb) get_repl_session_id (pb, sessionid, &opcsn); - slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); - if (rc == LDAP_SUCCESS) + slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); + if (retval == LDAP_SUCCESS) { agmtlist_notify_all(pb); rc = SLAPI_PLUGIN_SUCCESS; @@ -1316,6 +1317,55 @@ process_postop (Slapi_PBlock *pb) slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); } } + if (!ignore_error_and_keep_going(retval)){ + /* + * We have an error we can't ignore. Release the replica and close + * the connection to stop the replication session. + */ + consumer_connection_extension *connext = NULL; + Slapi_Connection *conn = NULL; + char csn_str[CSN_STRSIZE] = {'\0'}; + PRUint64 connid = 0; + int opid = 0; + + slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); + slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); + slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); + if (conn) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "process_postop: Failed to apply update (%s) error (%d). " + "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", + csn_as_string(opcsn, PR_FALSE, csn_str), retval, + connid, opid); + /* + * Release this replica so new sessions can begin + */ + connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); + if (connext && connext->replica_acquired) + { + int zero = 0; + Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); + + replica_relinquish_exclusive_access(r, connid, opid); + object_release ((Object*)connext->replica_acquired); + connext->replica_acquired = NULL; + connext->isreplicationsession = 0; + slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); + } + if (connext){ + consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); + } + + /* + * Close the connection to end the current session with the + * supplier. This prevents new updates from coming in and + * updating the consumer RUV - which would cause this failed + * update to be never be replayed. + */ + slapi_disconnect_server(conn); + } + } if (NULL == opcsn) opcsn = operation_get_csn(op); if (opcsn) diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c index c7d60e2..3045e06 100644 --- a/ldap/servers/plugins/replication/urp.c +++ b/ldap/servers/plugins/replication/urp.c @@ -151,7 +151,7 @@ urp_add_operation( Slapi_PBlock *pb ) slapi_log_error(slapi_log_urp, sessionid, "urp_add (%s): an entry with this uniqueid already exists.\n", slapi_entry_get_dn_const(existing_uniqueid_entry)); - op_result= LDAP_UNWILLING_TO_PERFORM; + op_result= LDAP_ALREADY_EXISTS; slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); rc = SLAPI_PLUGIN_NOOP; /* Ignore this Operation */ PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.3.3' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5.h | 1 ldap/servers/plugins/replication/repl5_connection.c | 19 - ldap/servers/plugins/replication/repl5_inc_protocol.c | 182 ++++++++++-------- ldap/servers/plugins/replication/repl5_plugins.c | 60 +++++ ldap/servers/plugins/replication/urp.c | 2 5 files changed, 168 insertions(+), 96 deletions(-) New commits: commit 50dd77fae8e1ba6be0d36898edf2fc80c975a00e Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 18:25:43 2016 -0500 Ticket 47788 - Supplier can skip a failing update, although it should retry Bug Description: If a replicated update fails on the consumer, the update is never tried. This is due to the replication async result thread missing the failure before another update is replicated and it succeeds. This second update that succeeds updates the consumer RUV. This makes it appear that the consumer is caught up, and the supplier never resends that original failed update. Fix Description: When a replicated update fails, and its an error we can not ignore, the connection is closed. Which stops the replication session, and prevents any further updates coming in and updating the consumer RUV. This allows the supplier to correctly retry the operation that failed on the next replication session. https://fedorahosted.org/389/ticket/47788 Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) (cherry picked from commit ab6501a963c94b2b6b5fa8d1924519ef1c26b0bd) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 59cc11b..9f943c9 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -640,6 +640,7 @@ void replica_incr_agmt_count(Replica *r); void replica_decr_agmt_count(Replica *r); PRUint64 replica_get_precise_purging(Replica *r); void replica_set_precise_purging(Replica *r, PRUint64 on_off); +PRBool ignore_error_and_keep_going(int error); /* The functions below handles the state flag */ /* Current internal state flags */ diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 398ff98..b069742 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -509,17 +509,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = rc; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } else if (IS_DISCONNECT_ERROR(err)) { conn->last_ldap_error = err; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } /* Got a result */ - if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) - return_value = CONN_BUSY; - else if (retoidp) + if (retoidp /* total update */) { if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) { @@ -548,16 +548,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda } return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; } - /* - * XXXggood do I need to free matched, referrals, - * anything else? Or can I pass NULL for the args - * I'm not interested in? - */ - /* Good question! Meanwhile, as RTM aproaches, let's free them... */ - slapi_ch_free((void **) &errmsg); - slapi_ch_free((void **) &matched); - charray_free(referrals); conn->status = STATUS_CONNECTED; +done: + slapi_ch_free_string(&errmsg); + slapi_ch_free_string(&matched); + charray_free(referrals); } if (res) ldap_msgfree(res); PR_Unlock(conn->lock); /* release the conn lock */ diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 0d20b27..4c3d90a 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -175,7 +175,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); -static PRBool ignore_error_and_keep_going(int error); static const char* state2name (int state); static const char* event2name (int event); static const char* op2string (int op); @@ -479,11 +478,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) PR_Unlock(rd->lock); } -static void +static int repl5_inc_waitfor_async_results(result_data *rd) { int done = 0; int loops = 0; + int rc = UPDATE_NO_MORE_UPDATES; + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ while (!done && !slapi_is_shutting_down()) { @@ -499,6 +500,10 @@ repl5_inc_waitfor_async_results(result_data *rd) } else if (rd->abort && (rd->result == UPDATE_CONNECTION_LOST)) { done = 1; /* no connection == no more results */ } + /* + * Return the last operation result + */ + rc = rd->result; PR_Unlock(rd->lock); if (!done) { /* If not then sleep a bit */ @@ -516,6 +521,7 @@ repl5_inc_waitfor_async_results(result_data *rd) done = 1; } } + return rc; } /* @@ -1496,78 +1502,84 @@ static int repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) { int return_value = 0; - - /* Indentation is wrong here so we can get a sensible cvs diff */ - if (CONN_OPERATION_SUCCESS != replay_crc) - { - /* Figure out what to do next */ - if (CONN_OPERATION_FAILED == replay_crc) - { - /* Map ldap error code to return value */ - if (!ignore_error_and_keep_going(connection_error)) - { - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - } - else - { - agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); - } - slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - ldap_err2string(connection_error), connection_error, - *finished ? "Will retry later" : "Skipping"); - } - else if (CONN_NOT_CONNECTED == replay_crc) - { - /* We lost the connection - enter backoff state */ - return_value = UPDATE_CONNECTION_LOST; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " - "%s(%d). Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Connection lost", - connection_error); - } - else if (CONN_TIMEOUT == replay_crc) - { - return_value = UPDATE_TIMEOUT; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " - "%s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Timeout"); - } - else if (CONN_LOCAL_ERROR == replay_crc) - { - /* - * Something bad happened on the local server - enter - * backoff state. - */ - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " - "Local error. Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str); - } - - } - else - { - /* Positive response received */ - (*num_changes_sent)++; - agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); - } - return return_value; + if (CONN_OPERATION_SUCCESS != replay_crc) + { + /* Figure out what to do next */ + if (CONN_OPERATION_FAILED == replay_crc) + { + /* Map ldap error code to return value */ + if (!ignore_error_and_keep_going(connection_error)) + { + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + } + else + { + agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); + } + slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + ldap_err2string(connection_error), connection_error, + *finished ? "Will retry later" : "Skipping"); + } + else if (CONN_NOT_CONNECTED == replay_crc) + { + /* We lost the connection - enter backoff state */ + + return_value = UPDATE_CONNECTION_LOST; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s(%d). Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Connection lost", + connection_error); + } + else if (CONN_TIMEOUT == replay_crc) + { + return_value = UPDATE_TIMEOUT; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Timeout"); + } + else if (CONN_LOCAL_ERROR == replay_crc) + { + /* + * Something bad happened on the local server - enter + * backoff state. + */ + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Failed to replay change (uniqueid %s, CSN %s): " + "Local error. Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str); + } + if (*finished){ + /* + * A serious error has occurred, the consumer might have closed + * the connection already, but we need to close the conn on the + * supplier side to properly set the conn structure as closed. + */ + conn_disconnect(prp->conn); + } + } + else + { + /* Positive response received */ + (*num_changes_sent)++; + agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); + } + return return_value; } /* @@ -1585,7 +1597,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { CL5Entry entry; slapi_operation_parameters op; - int return_value; + int return_value = 0; int rc; CL5ReplayIterator *changelog_iterator; int message_id = 0; @@ -1950,9 +1962,23 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { /* We need to ensure that we wait until all the responses have been received from our operations */ if (return_value != UPDATE_CONNECTION_LOST) { - rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); - /* if connection was lost/closed, there will be nothing to read */ - repl5_inc_waitfor_async_results(rd); + /* + * If we already have an error, there is no need to check the + * async result thread anymore. + */ + if (return_value == UPDATE_NO_MORE_UPDATES) + { + /* + * We need to double check that an error hasn't popped up from + * the async result thread since our last check. + */ + int final_result; + + rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); + if((final_result = repl5_inc_waitfor_async_results(rd))){ + return_value = final_result; + } + } } rc = repl5_inc_destroy_async_result_thread(rd); @@ -2241,7 +2267,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) * We stop if there's some indication that the server just completely * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. */ -static PRBool +PRBool ignore_error_and_keep_going(int error) { int return_value = PR_FALSE; diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index 84e4a07..dc7d926 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1260,12 +1260,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) static int process_postop (Slapi_PBlock *pb) { - int rc = LDAP_SUCCESS; - Slapi_Operation *op; + Slapi_Operation *op; Slapi_Backend *be; - int is_replicated_operation = 0; + int is_replicated_operation = 0; CSN *opcsn = NULL; char sessionid[REPL_SESSION_ID_SIZE]; + int retval = LDAP_SUCCESS; + int rc = 0; /* we just let fixup operations through */ slapi_pblock_get( pb, SLAPI_OPERATION, &op ); @@ -1289,8 +1290,8 @@ process_postop (Slapi_PBlock *pb) get_repl_session_id (pb, sessionid, &opcsn); - slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); - if (rc == LDAP_SUCCESS) + slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); + if (retval == LDAP_SUCCESS) { agmtlist_notify_all(pb); rc = SLAPI_PLUGIN_SUCCESS; @@ -1335,6 +1336,55 @@ process_postop (Slapi_PBlock *pb) slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); } } + if (!ignore_error_and_keep_going(retval)){ + /* + * We have an error we can't ignore. Release the replica and close + * the connection to stop the replication session. + */ + consumer_connection_extension *connext = NULL; + Slapi_Connection *conn = NULL; + char csn_str[CSN_STRSIZE] = {'\0'}; + PRUint64 connid = 0; + int opid = 0; + + slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); + slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); + slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); + if (conn) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "process_postop: Failed to apply update (%s) error (%d). " + "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", + csn_as_string(opcsn, PR_FALSE, csn_str), retval, + connid, opid); + /* + * Release this replica so new sessions can begin + */ + connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); + if (connext && connext->replica_acquired) + { + int zero = 0; + Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); + + replica_relinquish_exclusive_access(r, connid, opid); + object_release ((Object*)connext->replica_acquired); + connext->replica_acquired = NULL; + connext->isreplicationsession = 0; + slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); + } + if (connext){ + consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); + } + + /* + * Close the connection to end the current session with the + * supplier. This prevents new updates from coming in and + * updating the consumer RUV - which would cause this failed + * update to be never be replayed. + */ + slapi_disconnect_server(conn); + } + } if (NULL == opcsn) opcsn = operation_get_csn(op); if (opcsn) diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c index 032cc94..3872444 100644 --- a/ldap/servers/plugins/replication/urp.c +++ b/ldap/servers/plugins/replication/urp.c @@ -151,7 +151,7 @@ urp_add_operation( Slapi_PBlock *pb ) slapi_log_error(slapi_log_urp, sessionid, "urp_add (%s): an entry with this uniqueid already exists.\n", slapi_entry_get_dn_const(existing_uniqueid_entry)); - op_result= LDAP_UNWILLING_TO_PERFORM; + op_result= LDAP_ALREADY_EXISTS; slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); rc = SLAPI_PLUGIN_NOOP; /* Ignore this Operation */ PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */

8 years, 3 months

1
0
0 / 0

Branch '389-ds-base-1.3.4' - ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5.h | 1 ldap/servers/plugins/replication/repl5_connection.c | 19 - ldap/servers/plugins/replication/repl5_inc_protocol.c | 182 ++++++++++-------- ldap/servers/plugins/replication/repl5_plugins.c | 60 +++++ ldap/servers/plugins/replication/urp.c | 2 5 files changed, 168 insertions(+), 96 deletions(-) New commits: commit 407c545f07c06520f8378649fc0ac8fe20748dc7 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 18:25:43 2016 -0500 Ticket 47788 - Supplier can skip a failing update, although it should retry Bug Description: If a replicated update fails on the consumer, the update is never tried. This is due to the replication async result thread missing the failure before another update is replicated and it succeeds. This second update that succeeds updates the consumer RUV. This makes it appear that the consumer is caught up, and the supplier never resends that original failed update. Fix Description: When a replicated update fails, and its an error we can not ignore, the connection is closed. Which stops the replication session, and prevents any further updates coming in and updating the consumer RUV. This allows the supplier to correctly retry the operation that failed on the next replication session. https://fedorahosted.org/389/ticket/47788 Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) (cherry picked from commit ab6501a963c94b2b6b5fa8d1924519ef1c26b0bd) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index df92ca0..307da82 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -608,6 +608,7 @@ void replica_incr_agmt_count(Replica *r); void replica_decr_agmt_count(Replica *r); PRUint64 replica_get_precise_purging(Replica *r); void replica_set_precise_purging(Replica *r, PRUint64 on_off); +PRBool ignore_error_and_keep_going(int error); /* The functions below handles the state flag */ /* Current internal state flags */ diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 1515ca1..d193938 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -480,17 +480,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = rc; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } else if (IS_DISCONNECT_ERROR(err)) { conn->last_ldap_error = err; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } /* Got a result */ - if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) - return_value = CONN_BUSY; - else if (retoidp) + if (retoidp /* total update */) { if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) { @@ -519,16 +519,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda } return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; } - /* - * XXXggood do I need to free matched, referrals, - * anything else? Or can I pass NULL for the args - * I'm not interested in? - */ - /* Good question! Meanwhile, as RTM aproaches, let's free them... */ - slapi_ch_free((void **) &errmsg); - slapi_ch_free((void **) &matched); - charray_free(referrals); conn->status = STATUS_CONNECTED; +done: + slapi_ch_free_string(&errmsg); + slapi_ch_free_string(&matched); + charray_free(referrals); } if (res) ldap_msgfree(res); PR_Unlock(conn->lock); /* release the conn lock */ diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 244bbb2..927f835 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -146,7 +146,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); -static PRBool ignore_error_and_keep_going(int error); static const char* state2name (int state); static const char* event2name (int event); static const char* op2string (int op); @@ -450,11 +449,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) PR_Unlock(rd->lock); } -static void +static int repl5_inc_waitfor_async_results(result_data *rd) { int done = 0; int loops = 0; + int rc = UPDATE_NO_MORE_UPDATES; + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ while (!done && !slapi_is_shutting_down()) { @@ -470,6 +471,10 @@ repl5_inc_waitfor_async_results(result_data *rd) } else if (rd->abort && (rd->result == UPDATE_CONNECTION_LOST)) { done = 1; /* no connection == no more results */ } + /* + * Return the last operation result + */ + rc = rd->result; PR_Unlock(rd->lock); if (!done) { /* If not then sleep a bit */ @@ -487,6 +492,7 @@ repl5_inc_waitfor_async_results(result_data *rd) done = 1; } } + return rc; } /* @@ -1467,78 +1473,84 @@ static int repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) { int return_value = 0; - - /* Indentation is wrong here so we can get a sensible cvs diff */ - if (CONN_OPERATION_SUCCESS != replay_crc) - { - /* Figure out what to do next */ - if (CONN_OPERATION_FAILED == replay_crc) - { - /* Map ldap error code to return value */ - if (!ignore_error_and_keep_going(connection_error)) - { - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - } - else - { - agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); - } - slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - ldap_err2string(connection_error), connection_error, - *finished ? "Will retry later" : "Skipping"); - } - else if (CONN_NOT_CONNECTED == replay_crc) - { - /* We lost the connection - enter backoff state */ - return_value = UPDATE_CONNECTION_LOST; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " - "%s(%d). Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Connection lost", - connection_error); - } - else if (CONN_TIMEOUT == replay_crc) - { - return_value = UPDATE_TIMEOUT; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " - "%s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Timeout"); - } - else if (CONN_LOCAL_ERROR == replay_crc) - { - /* - * Something bad happened on the local server - enter - * backoff state. - */ - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " - "Local error. Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str); - } - - } - else - { - /* Positive response received */ - (*num_changes_sent)++; - agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); - } - return return_value; + if (CONN_OPERATION_SUCCESS != replay_crc) + { + /* Figure out what to do next */ + if (CONN_OPERATION_FAILED == replay_crc) + { + /* Map ldap error code to return value */ + if (!ignore_error_and_keep_going(connection_error)) + { + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + } + else + { + agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); + } + slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + ldap_err2string(connection_error), connection_error, + *finished ? "Will retry later" : "Skipping"); + } + else if (CONN_NOT_CONNECTED == replay_crc) + { + /* We lost the connection - enter backoff state */ + + return_value = UPDATE_CONNECTION_LOST; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s(%d). Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Connection lost", + connection_error); + } + else if (CONN_TIMEOUT == replay_crc) + { + return_value = UPDATE_TIMEOUT; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Timeout"); + } + else if (CONN_LOCAL_ERROR == replay_crc) + { + /* + * Something bad happened on the local server - enter + * backoff state. + */ + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Failed to replay change (uniqueid %s, CSN %s): " + "Local error. Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str); + } + if (*finished){ + /* + * A serious error has occurred, the consumer might have closed + * the connection already, but we need to close the conn on the + * supplier side to properly set the conn structure as closed. + */ + conn_disconnect(prp->conn); + } + } + else + { + /* Positive response received */ + (*num_changes_sent)++; + agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); + } + return return_value; } /* @@ -1556,7 +1568,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { CL5Entry entry; slapi_operation_parameters op; - int return_value; + int return_value = 0; int rc; CL5ReplayIterator *changelog_iterator; int message_id = 0; @@ -1929,9 +1941,23 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { /* We need to ensure that we wait until all the responses have been received from our operations */ if (return_value != UPDATE_CONNECTION_LOST) { - rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); - /* if connection was lost/closed, there will be nothing to read */ - repl5_inc_waitfor_async_results(rd); + /* + * If we already have an error, there is no need to check the + * async result thread anymore. + */ + if (return_value == UPDATE_NO_MORE_UPDATES) + { + /* + * We need to double check that an error hasn't popped up from + * the async result thread since our last check. + */ + int final_result; + + rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); + if((final_result = repl5_inc_waitfor_async_results(rd))){ + return_value = final_result; + } + } } rc = repl5_inc_destroy_async_result_thread(rd); @@ -2220,7 +2246,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) * We stop if there's some indication that the server just completely * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. */ -static PRBool +PRBool ignore_error_and_keep_going(int error) { int return_value = PR_FALSE; diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index 8992055..c2fa214 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1231,12 +1231,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) static int process_postop (Slapi_PBlock *pb) { - int rc = LDAP_SUCCESS; - Slapi_Operation *op; + Slapi_Operation *op; Slapi_Backend *be; - int is_replicated_operation = 0; + int is_replicated_operation = 0; CSN *opcsn = NULL; char sessionid[REPL_SESSION_ID_SIZE]; + int retval = LDAP_SUCCESS; + int rc = 0; /* we just let fixup operations through */ slapi_pblock_get( pb, SLAPI_OPERATION, &op ); @@ -1260,8 +1261,8 @@ process_postop (Slapi_PBlock *pb) get_repl_session_id (pb, sessionid, &opcsn); - slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); - if (rc == LDAP_SUCCESS) + slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); + if (retval == LDAP_SUCCESS) { agmtlist_notify_all(pb); rc = SLAPI_PLUGIN_SUCCESS; @@ -1306,6 +1307,55 @@ process_postop (Slapi_PBlock *pb) slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); } } + if (!ignore_error_and_keep_going(retval)){ + /* + * We have an error we can't ignore. Release the replica and close + * the connection to stop the replication session. + */ + consumer_connection_extension *connext = NULL; + Slapi_Connection *conn = NULL; + char csn_str[CSN_STRSIZE] = {'\0'}; + PRUint64 connid = 0; + int opid = 0; + + slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); + slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); + slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); + if (conn) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "process_postop: Failed to apply update (%s) error (%d). " + "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", + csn_as_string(opcsn, PR_FALSE, csn_str), retval, + connid, opid); + /* + * Release this replica so new sessions can begin + */ + connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); + if (connext && connext->replica_acquired) + { + int zero = 0; + Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); + + replica_relinquish_exclusive_access(r, connid, opid); + object_release ((Object*)connext->replica_acquired); + connext->replica_acquired = NULL; + connext->isreplicationsession = 0; + slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); + } + if (connext){ + consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); + } + + /* + * Close the connection to end the current session with the + * supplier. This prevents new updates from coming in and + * updating the consumer RUV - which would cause this failed + * update to be never be replayed. + */ + slapi_disconnect_server(conn); + } + } if (NULL == opcsn) opcsn = operation_get_csn(op); if (opcsn) diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c index 5fe6f55..8d0d735 100644 --- a/ldap/servers/plugins/replication/urp.c +++ b/ldap/servers/plugins/replication/urp.c @@ -122,7 +122,7 @@ urp_add_operation( Slapi_PBlock *pb ) slapi_log_error(slapi_log_urp, sessionid, "urp_add (%s): an entry with this uniqueid already exists.\n", slapi_entry_get_dn_const(existing_uniqueid_entry)); - op_result= LDAP_UNWILLING_TO_PERFORM; + op_result= LDAP_ALREADY_EXISTS; slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); rc = SLAPI_PLUGIN_NOOP; /* Ignore this Operation */ PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */

8 years, 3 months

1
0
0 / 0

ldap/servers

by Mark Reynolds

ldap/servers/plugins/replication/repl5.h | 1 ldap/servers/plugins/replication/repl5_connection.c | 19 - ldap/servers/plugins/replication/repl5_inc_protocol.c | 182 ++++++++++-------- ldap/servers/plugins/replication/repl5_plugins.c | 60 +++++ ldap/servers/plugins/replication/urp.c | 2 5 files changed, 168 insertions(+), 96 deletions(-) New commits: commit ab6501a963c94b2b6b5fa8d1924519ef1c26b0bd Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Sun Jan 17 18:25:43 2016 -0500 Ticket 47788 - Supplier can skip a failing update, although it should retry Bug Description: If a replicated update fails on the consumer, the update is never tried. This is due to the replication async result thread missing the failure before another update is replicated and it succeeds. This second update that succeeds updates the consumer RUV. This makes it appear that the consumer is caught up, and the supplier never resends that original failed update. Fix Description: When a replicated update fails, and its an error we can not ignore, the connection is closed. Which stops the replication session, and prevents any further updates coming in and updating the consumer RUV. This allows the supplier to correctly retry the operation that failed on the next replication session. https://fedorahosted.org/389/ticket/47788 Reviewed by: nhosoi, wibrown, and rmeggins (Thanks!!!) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index df92ca0..307da82 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -608,6 +608,7 @@ void replica_incr_agmt_count(Replica *r); void replica_decr_agmt_count(Replica *r); PRUint64 replica_get_precise_purging(Replica *r); void replica_set_precise_purging(Replica *r, PRUint64 on_off); +PRBool ignore_error_and_keep_going(int error); /* The functions below handles the state flag */ /* Current internal state flags */ diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 1515ca1..d193938 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -480,17 +480,17 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = rc; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } else if (IS_DISCONNECT_ERROR(err)) { conn->last_ldap_error = err; close_connection_internal(conn); /* we already have the lock */ return_value = CONN_NOT_CONNECTED; + goto done; } /* Got a result */ - if ((rc == LDAP_SUCCESS) && (err == LDAP_BUSY)) - return_value = CONN_BUSY; - else if (retoidp) + if (retoidp /* total update */) { if (!((rc == LDAP_SUCCESS) && (err == LDAP_BUSY))) { @@ -519,16 +519,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda } return_value = LDAP_SUCCESS == conn->last_ldap_error ? CONN_OPERATION_SUCCESS : CONN_OPERATION_FAILED; } - /* - * XXXggood do I need to free matched, referrals, - * anything else? Or can I pass NULL for the args - * I'm not interested in? - */ - /* Good question! Meanwhile, as RTM aproaches, let's free them... */ - slapi_ch_free((void **) &errmsg); - slapi_ch_free((void **) &matched); - charray_free(referrals); conn->status = STATUS_CONNECTED; +done: + slapi_ch_free_string(&errmsg); + slapi_ch_free_string(&matched); + charray_free(referrals); } if (res) ldap_msgfree(res); PR_Unlock(conn->lock); /* release the conn lock */ diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index 244bbb2..927f835 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -146,7 +146,6 @@ static void protocol_sleep(Private_Repl_Protocol *prp, PRIntervalTime duration); static int send_updates(Private_Repl_Protocol *prp, RUV *ruv, PRUint32 *num_changes_sent); static void repl5_inc_backoff_expired(time_t timer_fire_time, void *arg); static int examine_update_vector(Private_Repl_Protocol *prp, RUV *ruv); -static PRBool ignore_error_and_keep_going(int error); static const char* state2name (int state); static const char* event2name (int event); static const char* op2string (int op); @@ -450,11 +449,13 @@ repl5_inc_flow_control_results(Repl_Agmt *agmt, result_data *rd) PR_Unlock(rd->lock); } -static void +static int repl5_inc_waitfor_async_results(result_data *rd) { int done = 0; int loops = 0; + int rc = UPDATE_NO_MORE_UPDATES; + /* Keep pulling results off the LDAP connection until we catch up to the last message id stored in the rd */ while (!done && !slapi_is_shutting_down()) { @@ -470,6 +471,10 @@ repl5_inc_waitfor_async_results(result_data *rd) } else if (rd->abort && (rd->result == UPDATE_CONNECTION_LOST)) { done = 1; /* no connection == no more results */ } + /* + * Return the last operation result + */ + rc = rd->result; PR_Unlock(rd->lock); if (!done) { /* If not then sleep a bit */ @@ -487,6 +492,7 @@ repl5_inc_waitfor_async_results(result_data *rd) done = 1; } } + return rc; } /* @@ -1467,78 +1473,84 @@ static int repl5_inc_update_from_op_result(Private_Repl_Protocol *prp, ConnResult replay_crc, int connection_error, char *csn_str, char *uniqueid, ReplicaId replica_id, int* finished, PRUint32 *num_changes_sent) { int return_value = 0; - - /* Indentation is wrong here so we can get a sensible cvs diff */ - if (CONN_OPERATION_SUCCESS != replay_crc) - { - /* Figure out what to do next */ - if (CONN_OPERATION_FAILED == replay_crc) - { - /* Map ldap error code to return value */ - if (!ignore_error_and_keep_going(connection_error)) - { - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - } - else - { - agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); - } - slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - ldap_err2string(connection_error), connection_error, - *finished ? "Will retry later" : "Skipping"); - } - else if (CONN_NOT_CONNECTED == replay_crc) - { - /* We lost the connection - enter backoff state */ - return_value = UPDATE_CONNECTION_LOST; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " - "%s(%d). Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Connection lost", - connection_error); - } - else if (CONN_TIMEOUT == replay_crc) - { - return_value = UPDATE_TIMEOUT; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " - "%s.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str, - connection_error ? ldap_err2string(connection_error) : "Timeout"); - } - else if (CONN_LOCAL_ERROR == replay_crc) - { - /* - * Something bad happened on the local server - enter - * backoff state. - */ - return_value = UPDATE_TRANSIENT_ERROR; - *finished = 1; - slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " - "Local error. Will retry later.\n", - agmt_get_long_name(prp->agmt), - uniqueid, csn_str); - } - - } - else - { - /* Positive response received */ - (*num_changes_sent)++; - agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); - } - return return_value; + if (CONN_OPERATION_SUCCESS != replay_crc) + { + /* Figure out what to do next */ + if (CONN_OPERATION_FAILED == replay_crc) + { + /* Map ldap error code to return value */ + if (!ignore_error_and_keep_going(connection_error)) + { + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + } + else + { + agmt_inc_last_update_changecount (prp->agmt, replica_id, 1 /*skipped*/); + } + slapi_log_error(*finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s (%d). %s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + ldap_err2string(connection_error), connection_error, + *finished ? "Will retry later" : "Skipping"); + } + else if (CONN_NOT_CONNECTED == replay_crc) + { + /* We lost the connection - enter backoff state */ + + return_value = UPDATE_CONNECTION_LOST; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s(%d). Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Connection lost", + connection_error); + } + else if (CONN_TIMEOUT == replay_crc) + { + return_value = UPDATE_TIMEOUT; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str, + connection_error ? ldap_err2string(connection_error) : "Timeout"); + } + else if (CONN_LOCAL_ERROR == replay_crc) + { + /* + * Something bad happened on the local server - enter + * backoff state. + */ + return_value = UPDATE_TRANSIENT_ERROR; + *finished = 1; + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "%s: Failed to replay change (uniqueid %s, CSN %s): " + "Local error. Will retry later.\n", + agmt_get_long_name(prp->agmt), + uniqueid, csn_str); + } + if (*finished){ + /* + * A serious error has occurred, the consumer might have closed + * the connection already, but we need to close the conn on the + * supplier side to properly set the conn structure as closed. + */ + conn_disconnect(prp->conn); + } + } + else + { + /* Positive response received */ + (*num_changes_sent)++; + agmt_inc_last_update_changecount (prp->agmt, replica_id, 0 /*replayed*/); + } + return return_value; } /* @@ -1556,7 +1568,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { CL5Entry entry; slapi_operation_parameters op; - int return_value; + int return_value = 0; int rc; CL5ReplayIterator *changelog_iterator; int message_id = 0; @@ -1929,9 +1941,23 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu { /* We need to ensure that we wait until all the responses have been received from our operations */ if (return_value != UPDATE_CONNECTION_LOST) { - rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); - /* if connection was lost/closed, there will be nothing to read */ - repl5_inc_waitfor_async_results(rd); + /* + * If we already have an error, there is no need to check the + * async result thread anymore. + */ + if (return_value == UPDATE_NO_MORE_UPDATES) + { + /* + * We need to double check that an error hasn't popped up from + * the async result thread since our last check. + */ + int final_result; + + rd->WaitForAsyncResults = agmt_get_WaitForAsyncResults(prp->agmt); + if((final_result = repl5_inc_waitfor_async_results(rd))){ + return_value = final_result; + } + } } rc = repl5_inc_destroy_async_result_thread(rd); @@ -2220,7 +2246,7 @@ examine_update_vector(Private_Repl_Protocol *prp, RUV *remote_ruv) * We stop if there's some indication that the server just completely * failed to process the operation, e.g. LDAP_OPERATIONS_ERROR. */ -static PRBool +PRBool ignore_error_and_keep_going(int error) { int return_value = PR_FALSE; diff --git a/ldap/servers/plugins/replication/repl5_plugins.c b/ldap/servers/plugins/replication/repl5_plugins.c index 8992055..c2fa214 100644 --- a/ldap/servers/plugins/replication/repl5_plugins.c +++ b/ldap/servers/plugins/replication/repl5_plugins.c @@ -1231,12 +1231,13 @@ write_changelog_and_ruv (Slapi_PBlock *pb) static int process_postop (Slapi_PBlock *pb) { - int rc = LDAP_SUCCESS; - Slapi_Operation *op; + Slapi_Operation *op; Slapi_Backend *be; - int is_replicated_operation = 0; + int is_replicated_operation = 0; CSN *opcsn = NULL; char sessionid[REPL_SESSION_ID_SIZE]; + int retval = LDAP_SUCCESS; + int rc = 0; /* we just let fixup operations through */ slapi_pblock_get( pb, SLAPI_OPERATION, &op ); @@ -1260,8 +1261,8 @@ process_postop (Slapi_PBlock *pb) get_repl_session_id (pb, sessionid, &opcsn); - slapi_pblock_get(pb, SLAPI_RESULT_CODE, &rc); - if (rc == LDAP_SUCCESS) + slapi_pblock_get(pb, SLAPI_RESULT_CODE, &retval); + if (retval == LDAP_SUCCESS) { agmtlist_notify_all(pb); rc = SLAPI_PLUGIN_SUCCESS; @@ -1306,6 +1307,55 @@ process_postop (Slapi_PBlock *pb) slapi_ch_free((void **) &op_params->p.p_modrdn.modrdn_newsuperior_address.uniqueid); } } + if (!ignore_error_and_keep_going(retval)){ + /* + * We have an error we can't ignore. Release the replica and close + * the connection to stop the replication session. + */ + consumer_connection_extension *connext = NULL; + Slapi_Connection *conn = NULL; + char csn_str[CSN_STRSIZE] = {'\0'}; + PRUint64 connid = 0; + int opid = 0; + + slapi_pblock_get(pb, SLAPI_CONNECTION, &conn); + slapi_pblock_get(pb, SLAPI_OPERATION_ID, &opid); + slapi_pblock_get(pb, SLAPI_CONN_ID, &connid); + if (conn) + { + slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, + "process_postop: Failed to apply update (%s) error (%d). " + "Aborting replication session(conn=%" NSPRIu64 " op=%d)\n", + csn_as_string(opcsn, PR_FALSE, csn_str), retval, + connid, opid); + /* + * Release this replica so new sessions can begin + */ + connext = consumer_connection_extension_acquire_exclusive_access(conn, connid, opid); + if (connext && connext->replica_acquired) + { + int zero = 0; + Replica *r = (Replica*)object_get_data ((Object*)connext->replica_acquired); + + replica_relinquish_exclusive_access(r, connid, opid); + object_release ((Object*)connext->replica_acquired); + connext->replica_acquired = NULL; + connext->isreplicationsession = 0; + slapi_pblock_set( pb, SLAPI_CONN_IS_REPLICATION_SESSION, &zero ); + } + if (connext){ + consumer_connection_extension_relinquish_exclusive_access(conn, connid, opid, PR_FALSE); + } + + /* + * Close the connection to end the current session with the + * supplier. This prevents new updates from coming in and + * updating the consumer RUV - which would cause this failed + * update to be never be replayed. + */ + slapi_disconnect_server(conn); + } + } if (NULL == opcsn) opcsn = operation_get_csn(op); if (opcsn) diff --git a/ldap/servers/plugins/replication/urp.c b/ldap/servers/plugins/replication/urp.c index 5fe6f55..8d0d735 100644 --- a/ldap/servers/plugins/replication/urp.c +++ b/ldap/servers/plugins/replication/urp.c @@ -122,7 +122,7 @@ urp_add_operation( Slapi_PBlock *pb ) slapi_log_error(slapi_log_urp, sessionid, "urp_add (%s): an entry with this uniqueid already exists.\n", slapi_entry_get_dn_const(existing_uniqueid_entry)); - op_result= LDAP_UNWILLING_TO_PERFORM; + op_result= LDAP_ALREADY_EXISTS; slapi_pblock_set(pb, SLAPI_RESULT_CODE, &op_result); rc = SLAPI_PLUGIN_NOOP; /* Ignore this Operation */ PROFILE_POINT; /* Add Conflict; UniqueID Exists; Ignore */

8 years, 3 months

1
0
0 / 0

ldap/servers

by Mark Reynolds

ldap/servers/slapd/connection.c | 46 +++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) New commits: commit 30c4852a3d9ca527b78c0f89df5909bc9a268392 Author: Mark Reynolds <mreynolds(a)redhat.com> Date: Fri Jan 15 11:35:16 2016 -0500 Ticket 48412 - worker threads do not detect abnormally closed connections Bug Description: If a connection is abnormally closed there can still be data in the connection buffer(bytes vs offset). This prevents the connection from being removed from the connection table. The worker thread then goes into a loop trying to read this data on an already closed connection. If there are enough abnormally closed conenction eventually all the worker threads are stuck, and new connections are not accepted. Fix Description: When looking if there is more data in the buffer check if the connection was closed, and return 0 (no more data). Also did a little code cleanup. https://fedorahosted.org/389/ticket/48412 Reviewed by: rmeggins(Thanks!) diff --git a/ldap/servers/slapd/connection.c b/ldap/servers/slapd/connection.c index c2cec85..718ba9e 100644 --- a/ldap/servers/slapd/connection.c +++ b/ldap/servers/slapd/connection.c @@ -1110,9 +1110,16 @@ connection_read_ldap_data(Connection *conn, PRInt32 *err) } static size_t -conn_buffered_data_avail_nolock(Connection *conn) +conn_buffered_data_avail_nolock(Connection *conn, int *conn_closed) { - return conn->c_private->c_buffer_bytes - conn->c_private->c_buffer_offset; + if ( (conn->c_sd == SLAPD_INVALID_SOCKET) || (conn->c_flags & CONN_FLAG_CLOSING) ) { + /* connection is closed - ignore the buffer */ + *conn_closed = 1; + return 0; + } else { + *conn_closed = 0; + return conn->c_private->c_buffer_bytes - conn->c_private->c_buffer_offset; + } } /* Upon returning from this function, we have either: @@ -1135,6 +1142,7 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i PRErrorCode err = 0; PRInt32 syserr = 0; size_t buffer_data_avail; + int conn_closed = 0; PR_EnterMonitor(conn->c_mutex); /* @@ -1150,7 +1158,7 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i *tag = LBER_DEFAULT; /* First check to see if we have buffered data from "before" */ - if ((buffer_data_avail = conn_buffered_data_avail_nolock(conn))) { + if ((buffer_data_avail = conn_buffered_data_avail_nolock(conn, &conn_closed))) { /* If so, use that data first */ if ( 0 != get_next_from_buffer( buffer + conn->c_private->c_buffer_offset, @@ -1165,7 +1173,7 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i while (*tag == LBER_DEFAULT) { int ioblocktimeout_waits = config_get_ioblocktimeout() / CONN_TURBO_TIMEOUT_INTERVAL; /* We should never get here with data remaining in the buffer */ - PR_ASSERT( !new_operation || 0 == conn_buffered_data_avail_nolock(conn) ); + PR_ASSERT( !new_operation || !conn_buffered_data_avail_nolock(conn, &conn_closed)); /* We make a non-blocking read call */ if (CONNECTION_BUFFER_OFF != conn->c_private->use_buffer) { ret = connection_read_ldap_data(conn,&err); @@ -1277,8 +1285,12 @@ int connection_read_operation(Connection *conn, Operation *op, ber_tag_t *tag, i } } /* If there is remaining buffered data, set the flag to tell the caller */ - if (conn_buffered_data_avail_nolock(conn)) { + if (conn_buffered_data_avail_nolock(conn, &conn_closed)) { *remaining_data = 1; + } else if (conn_closed){ + /* connection closed */ + ret = CONN_DONE; + goto done; } if ( *tag != LDAP_TAG_MESSAGE ) { @@ -1529,7 +1541,7 @@ connection_threadmain() continue; case CONN_SHUTDOWN: LDAPDebug( LDAP_DEBUG_TRACE, - "op_thread received shutdown signal\n", 0, 0, 0 ); + "op_thread received shutdown signal\n", 0, 0, 0 ); g_decr_active_threadcnt(); return; case CONN_FOUND_WORK_TO_DO: @@ -1550,8 +1562,9 @@ connection_threadmain() Slapi_DN *anon_sdn = slapi_sdn_new_normdn_byref( anon_dn ); reslimit_update_from_dn( pb->pb_conn, anon_sdn ); slapi_sdn_free( &anon_sdn ); - if (slapi_reslimit_get_integer_limit(pb->pb_conn, pb->pb_conn->c_idletimeout_handle, - &idletimeout) + if (slapi_reslimit_get_integer_limit(pb->pb_conn, + pb->pb_conn->c_idletimeout_handle, + &idletimeout) == SLAPI_RESLIMIT_STATUS_SUCCESS) { pb->pb_conn->c_idletimeout = idletimeout; @@ -1589,7 +1602,7 @@ connection_threadmain() op = pb->pb_op; maxthreads = config_get_maxthreadsperconn(); more_data = 0; - ret = connection_read_operation(conn,op,&tag,&more_data); + ret = connection_read_operation(conn, op, &tag, &more_data); if ((ret == CONN_DONE) || (ret == CONN_TIMEDOUT)) { slapi_log_error(SLAPI_LOG_CONNS, "connection_threadmain", "conn %" NSPRIu64 " read not ready due to %d - thread_turbo_flag %d more_data %d " @@ -1622,7 +1635,8 @@ connection_threadmain() /* turn off turbo mode immediately if any pb waiting in global queue */ if (thread_turbo_flag && !WORK_Q_EMPTY) { thread_turbo_flag = 0; - LDAPDebug2Args(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode - pb_q is not empty %d\n",conn->c_connid,work_q_size); + LDAPDebug2Args(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode - pb_q is not empty %d\n", + conn->c_connid,work_q_size); } #endif @@ -1647,7 +1661,8 @@ connection_threadmain() * should call connection_make_readable after the op is removed * connection_make_readable(conn); */ - LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode due to %d\n",conn->c_connid,ret,0); + LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " leaving turbo mode due to %d\n", + conn->c_connid,ret,0); goto done; case CONN_SHUTDOWN: LDAPDebug( LDAP_DEBUG_TRACE, @@ -1703,7 +1718,8 @@ connection_threadmain() */ conn->c_idlesince = curtime; connection_activity(conn, maxthreads); - LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " queued because more_data\n",conn->c_connid,0,0); + LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " queued because more_data\n", + conn->c_connid,0,0); } else { /* keep count of how many times maxthreads has blocked an operation */ conn->c_maxthreadsblocked++; @@ -1778,13 +1794,15 @@ done: memset(pb, 0, sizeof(*pb)); } else { /* delete from connection operation queue & decr refcnt */ + int conn_closed = 0; PR_EnterMonitor(conn->c_mutex); connection_remove_operation_ext( pb, conn, op ); /* If we're in turbo mode, we keep our reference to the connection alive */ /* can't use the more_data var because connection could have changed in another thread */ - more_data = conn_buffered_data_avail_nolock(conn) ? 1 : 0; - LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " check more_data %d thread_turbo_flag %d\n",conn->c_connid,more_data,thread_turbo_flag); + more_data = conn_buffered_data_avail_nolock(conn, &conn_closed) ? 1 : 0; + LDAPDebug(LDAP_DEBUG_CONNS,"conn %" NSPRIu64 " check more_data %d thread_turbo_flag %d\n", + conn->c_connid,more_data,thread_turbo_flag); if (!more_data) { if (!thread_turbo_flag) { /*

8 years, 3 months

1
0
0 / 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

389-commits January 2016