Nir Soffer has uploaded a new change for review.
Change subject: utils: Wait for terminated process ......................................................................
utils: Wait for terminated process
utils.terminating was not waiting for a terminated process, passing the process to zombiereaper. This behavior may be disastrous storage and mostly unwanted for other code.
When we run a child process touching shared storage, we must wait for the child process. If we kill the child process without waiting, engine may start the same operation on another host, while the original child process is still running, possibly in interruptible state, trying to write to storage.
To avoid this issue, we always invoke wait() after killing the process.
Special code that do not want to wait for the child process should not use this context manager.
Change-Id: Ida04e2c7ba092efdc2927ed9f460b0098ba2ad94 Signed-off-by: Nir Soffer nsoffer@redhat.com --- M lib/vdsm/utils.py M tests/utilsTests.py 2 files changed, 7 insertions(+), 34 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/24/65324/1
diff --git a/lib/vdsm/utils.py b/lib/vdsm/utils.py index a428c8e..e5c28b8 100644 --- a/lib/vdsm/utils.py +++ b/lib/vdsm/utils.py @@ -738,8 +738,7 @@ if proc.poll() is None: logging.debug('Terminating process pid=%d' % proc.pid) proc.kill() - if proc.poll() is None: - zombiereaper.autoReapPID(proc.pid) + proc.wait() except Exception: logging.exception('Failed to kill process %d' % proc.pid)
diff --git a/tests/utilsTests.py b/tests/utilsTests.py index 388a65d..6ff85d4 100644 --- a/tests/utilsTests.py +++ b/tests/utilsTests.py @@ -42,9 +42,8 @@ from vdsm import cmdutils from vdsm import commands from vdsm import panic -from vdsm.common import zombiereaper
-from monkeypatch import MonkeyPatch, MonkeyPatchScope +from monkeypatch import MonkeyPatch from vmTestsData import VM_STATUS_DUMP from monkeypatch import Patch from testlib import forked, online_cpus, namedTemporaryDir @@ -83,22 +82,11 @@ self.patch.revert()
-def wait_for_removal(path, timeout, wait=0.1): - deadline = utils.monotonic_time() + timeout - while True: - if not os.path.exists(path): - return True - if utils.monotonic_time() > deadline: - return False - time.sleep(wait) - - class TerminatingTests(TestCaseBase):
def setUp(self): self.proc = commands.execCmd([EXT_SLEEP, "2"], sync=False) self.kill_proc = self.proc.kill - self.reaped = set()
def tearDown(self): if self.proc.poll() is None: @@ -108,8 +96,7 @@ def test_terminating(self): with utils.terminating(self.proc): self.assertIsNone(self.proc.poll()) - proc_path = "/proc/%d" % self.proc.pid - self.assertTrue(wait_for_removal(proc_path, timeout=1)) + self.assertEqual(self.proc.returncode, -signal.SIGKILL)
def test_terminating_with_kill_exception(self): class FakeKillError(Exception): @@ -118,24 +105,11 @@ def fake_kill(): raise FakeKillError("fake kill exception")
- with MonkeyPatchScope([(zombiereaper, - 'autoReapPID', - self.reaped.add - )]): - self.proc.kill = fake_kill - with utils.terminating(self.proc): - self.assertIsNone(self.proc.poll()) - self.assertTrue(self.proc.pid not in self.reaped) + self.proc.kill = fake_kill + with utils.terminating(self.proc): + self.assertIsNone(self.proc.poll())
- def test_terminating_with_infected_kill(self): - with MonkeyPatchScope([(zombiereaper, - 'autoReapPID', - self.reaped.add - )]): - self.proc.kill = lambda: None - with utils.terminating(self.proc): - self.assertIsNone(self.proc.poll()) - self.assertTrue(self.proc.pid in self.reaped) + self.assertIsNone(self.proc.returncode)
class RetryTests(TestCaseBase):