Nir Soffer has uploaded a new change for review.
Change subject: utils: Wait for terminated process
......................................................................
utils: Wait for terminated process
utils.terminating was not waiting for a terminated process, passing the
process to zombiereaper. This behavior may be disastrous storage and
mostly unwanted for other code.
When we run a child process touching shared storage, we must wait for
the child process. If we kill the child process without waiting, engine
may start the same operation on another host, while the original child
process is still running, possibly in interruptible state, trying to
write to storage.
To avoid this issue, we always invoke wait() after killing the process.
Special code that do not want to wait for the child process should not
use this context manager.
Change-Id: Ida04e2c7ba092efdc2927ed9f460b0098ba2ad94
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
M lib/vdsm/utils.py
M tests/utilsTests.py
2 files changed, 7 insertions(+), 34 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/24/65324/1
diff --git a/lib/vdsm/utils.py b/lib/vdsm/utils.py
index a428c8e..e5c28b8 100644
--- a/lib/vdsm/utils.py
+++ b/lib/vdsm/utils.py
@@ -738,8 +738,7 @@
if proc.poll() is None:
logging.debug('Terminating process pid=%d' % proc.pid)
proc.kill()
- if proc.poll() is None:
- zombiereaper.autoReapPID(proc.pid)
+ proc.wait()
except Exception:
logging.exception('Failed to kill process %d' % proc.pid)
diff --git a/tests/utilsTests.py b/tests/utilsTests.py
index 388a65d..6ff85d4 100644
--- a/tests/utilsTests.py
+++ b/tests/utilsTests.py
@@ -42,9 +42,8 @@
from vdsm import cmdutils
from vdsm import commands
from vdsm import panic
-from vdsm.common import zombiereaper
-from monkeypatch import MonkeyPatch, MonkeyPatchScope
+from monkeypatch import MonkeyPatch
from vmTestsData import VM_STATUS_DUMP
from monkeypatch import Patch
from testlib import forked, online_cpus, namedTemporaryDir
@@ -83,22 +82,11 @@
self.patch.revert()
-def wait_for_removal(path, timeout, wait=0.1):
- deadline = utils.monotonic_time() + timeout
- while True:
- if not os.path.exists(path):
- return True
- if utils.monotonic_time() > deadline:
- return False
- time.sleep(wait)
-
-
class TerminatingTests(TestCaseBase):
def setUp(self):
self.proc = commands.execCmd([EXT_SLEEP, "2"], sync=False)
self.kill_proc = self.proc.kill
- self.reaped = set()
def tearDown(self):
if self.proc.poll() is None:
@@ -108,8 +96,7 @@
def test_terminating(self):
with utils.terminating(self.proc):
self.assertIsNone(self.proc.poll())
- proc_path = "/proc/%d" % self.proc.pid
- self.assertTrue(wait_for_removal(proc_path, timeout=1))
+ self.assertEqual(self.proc.returncode, -signal.SIGKILL)
def test_terminating_with_kill_exception(self):
class FakeKillError(Exception):
@@ -118,24 +105,11 @@
def fake_kill():
raise FakeKillError("fake kill exception")
- with MonkeyPatchScope([(zombiereaper,
- 'autoReapPID',
- self.reaped.add
- )]):
- self.proc.kill = fake_kill
- with utils.terminating(self.proc):
- self.assertIsNone(self.proc.poll())
- self.assertTrue(self.proc.pid not in self.reaped)
+ self.proc.kill = fake_kill
+ with utils.terminating(self.proc):
+ self.assertIsNone(self.proc.poll())
- def test_terminating_with_infected_kill(self):
- with MonkeyPatchScope([(zombiereaper,
- 'autoReapPID',
- self.reaped.add
- )]):
- self.proc.kill = lambda: None
- with utils.terminating(self.proc):
- self.assertIsNone(self.proc.poll())
- self.assertTrue(self.proc.pid in self.reaped)
+ self.assertIsNone(self.proc.returncode)
class RetryTests(TestCaseBase):
--
To view, visit
https://gerrit.ovirt.org/65324
To unsubscribe, visit
https://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ida04e2c7ba092efdc2927ed9f460b0098ba2ad94
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Nir Soffer <nsoffer(a)redhat.com>