#!/usr/bin/env python

"""
Run a bunch of processes as root that call initgroups concurrently
with a slight delay between them to trigger a weird behaviour in sssd
after a cold start. It will print the number of bad answer from sssd.

Usage:

## log as root (do not run any of those commands through sudo, because it will populate 
## the sssd cache)
## check your number secondary group using id for example
# id jbdenis
uid=21489(jbdenis) gid=110(sis) groups=110(sis),3044(CIB),19(floppy),1177(dump-projets),56(netadm),3125(vpn-ssl-admin)

Here I've got 5 secondary groups (sis is my primary group)

## cleanup sssd state
/etc/init.d/sssd stop && rm -f /var/lib/sss/mc/* /var/lib/sss/db/* && /etc/init.d/sssd start

## run this program
# python initgroups.py jbdenis 110 5 24 200 
wrong number of secondary groups in process 17145 : 0 instead of 5 (sleep 55ms)
wrong number of secondary groups in process 17149 : 0 instead of 5 (sleep 55ms)
2/24 failed

# first parameter is a login
# second parameter is your primary gid
# third parameter is your number of secondary groups
# fourth parameter is the number of process you want to run conccurently 
# the last parameter is the maximum delay in milliseconds before calling initgroups
# this delay is randomized up to this maximum

I've got good results with 24 processes and randomized delay of 200ms between startup.
Those parameter are somewhat relative to the machine you're running the script on I guess.
You could have to run this test multiple time before triggering the bug.

I'm unable to reproduce the bug when I use 0 delay.

!! DONT FORGET TO CLEANUP SSSD STATE BEFORE RUNNING AGAIN THIS TEST !! 
"""

import sys
import os
import ctypes
import multiprocessing
import Queue
import time
import random

def initgroups(event, results, user, gid, sleep_ms):
    """
    Wrapper around initgroups

    We will sleep up to sleep_ms milliseconds before calling
    the real libc initgroups

    results parameter is queue which hold the results
    """
    sleep = random.randint(0, sleep_ms) 
    rsleep = sleep / 1000.0
    
    #event.wait() # this synchronization is not ncessary to trigger the bug
    time.sleep(rsleep)
    LIBC.initgroups(user, gid)
    results.put((LIBC.getgroups(0,0)-1, os.getpid(), sleep))


def queue_to_list(queue):
    """
    Dummy helper
    """
    res = list()
    try:
        while True:
            yield queue.get(block=False)
    except Queue.Empty:
        pass 

if __name__ == '__main__':
    if len(sys.argv) != 6:
        print "usage", sys.argv[0], "<user> <gid> <secondary_groups_number> <processes> <randsleep_ms>"
        sys.exit(1)

    user = sys.argv[1]
    gid = int(sys.argv[2])
    secondary_groups_nb = int(sys.argv[3])
    processes = int(sys.argv[4])
    randsleep = int(sys.argv[5])

    LIBC = ctypes.cdll.LoadLibrary("libc.so.6")

    worker_processes = list()
    results = multiprocessing.Queue()

    start = multiprocessing.Event()

    initgroups(start, results, user, gid, randsleep)
    for _ in range(processes):
        worker_processes.append(
                multiprocessing.Process(target=initgroups,
                                        args=(start, results, user, gid, randsleep)))
        worker_processes[-1].start()

    initgroups(start, results, user, gid, randsleep)
    start.set() # synchronization primitive not useful to reproduce the bug
    map(lambda p: p.join(), worker_processes)

    miss = 0
    qsize = 0
    for (nb, pid, sleep) in queue_to_list(results): 
        qsize += 1
        if nb != secondary_groups_nb:
	    miss += 1
            print "wrongs number of secondary groups in process %d : %d instead of %d (sleep %dms)" % (pid, nb, secondary_groups_nb, sleep)

    if miss > 0:
    	print "%d/%d failed" % (miss, qsize)
        sys.exit(1)