[copr] master: Automatic commit of package [copr-backend] release [1.68-1]. (8062658)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit 8062658afa4a0cc6d6553185a4f0bb0ef43be595
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 28 11:20:56 2015 +0200
Automatic commit of package [copr-backend] release [1.68-1].
>---------------------------------------------------------------
backend/copr-backend.spec | 7 ++++++-
rel-eng/packages/copr-backend | 2 +-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/backend/copr-backend.spec b/backend/copr-backend.spec
index a39883e..6f49836 100644
--- a/backend/copr-backend.spec
+++ b/backend/copr-backend.spec
@@ -3,7 +3,7 @@
%endif
Name: copr-backend
-Version: 1.67
+Version: 1.68
Release: 1%{?dist}
Summary: Backend for Copr
@@ -240,6 +240,11 @@ useradd -r -g copr -G lighttpd -s /bin/bash -c "COPR user" copr
%exclude %{_pkgdocdir}/playbooks
%changelog
+* Thu May 28 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.68-1
+- [backend] add config option for VM health check timeout
+- [backend] moved config parameters from Threshold class into the backend
+ config file
+
* Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.67-1
- [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder
diff --git a/rel-eng/packages/copr-backend b/rel-eng/packages/copr-backend
index 49d1179..0380abb 100644
--- a/rel-eng/packages/copr-backend
+++ b/rel-eng/packages/copr-backend
@@ -1 +1 @@
-1.67-1 backend/
+1.68-1 backend/
8 years, 11 months
[copr] master: [backend] add config option for VM health check timeout (927d3b2)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit 927d3b25da662e5bea5c0272ae47bd47247cb6d4
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 28 11:05:26 2015 +0200
[backend] add config option for VM health check timeout
>---------------------------------------------------------------
backend/backend/helpers.py | 3 +++
backend/backend/vm_manage/check.py | 2 +-
backend/tests/vm_manager/test_check.py | 1 +
3 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/backend/backend/helpers.py b/backend/backend/helpers.py
index 1315bce..08b7794 100644
--- a/backend/backend/helpers.py
+++ b/backend/backend/helpers.py
@@ -184,6 +184,9 @@ class BackendConfigReader(object):
opts.vm_cycle_timeout = _get_conf(
cp, "backend", "vm_cycle_timeout",
default=10, mode="int")
+ opts.vm_ssh_check_timeout = _get_conf(
+ cp, "backend", "vm_ssh_check_timeout",
+ default=5, mode="int")
opts.destdir = _get_conf(cp, "backend", "destdir", None, mode="path")
diff --git a/backend/backend/vm_manage/check.py b/backend/backend/vm_manage/check.py
index 4209e44..e5b08fa 100644
--- a/backend/backend/vm_manage/check.py
+++ b/backend/backend/vm_manage/check.py
@@ -33,7 +33,7 @@ def check_health(opts, vm_name, vm_ip):
pattern=vm_ip,
forks=1,
transport=opts.ssh.transport,
- timeout=2
+ timeout=opts.vm_ssh_check_timeout
)
connection = Runner(**runner_options)
connection.module_name = "shell"
diff --git a/backend/tests/vm_manager/test_check.py b/backend/tests/vm_manager/test_check.py
index dae61e9..e7d6172 100644
--- a/backend/tests/vm_manager/test_check.py
+++ b/backend/tests/vm_manager/test_check.py
@@ -94,6 +94,7 @@ class TestChecker(object):
do_sign=True,
timeout=1800,
results_baseurl="/tmp",
+ vm_ssh_check_timeout=2,
)
# self.try_spawn_args = '-c ssh {}'.format(self.spawn_pb_path)
8 years, 11 months
[copr] master: [backend] moved config parameters from Threshold class into the backend config file (5a9ce39)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit 5a9ce398430c390066f89710d32ed74916c2566c
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 28 10:54:30 2015 +0200
[backend] moved config parameters from Threshold class into the backend config file
>---------------------------------------------------------------
backend/backend/daemons/vm_master.py | 16 ++++++++--------
backend/backend/helpers.py | 16 ++++++++++++++++
backend/backend/vm_manage/__init__.py | 11 -----------
backend/backend/vm_manage/event_handle.py | 8 +++++---
backend/tests/deamons/test_vm_master.py | 23 +++++++++++++++--------
backend/tests/vm_manager/test_event_handle.py | 5 +++--
6 files changed, 47 insertions(+), 32 deletions(-)
diff --git a/backend/backend/daemons/vm_master.py b/backend/backend/daemons/vm_master.py
index 7b397f6..78c7504 100644
--- a/backend/backend/daemons/vm_master.py
+++ b/backend/backend/daemons/vm_master.py
@@ -13,8 +13,8 @@ import traceback
import sys
import psutil
-from backend.constants import DEF_BUILD_TIMEOUT, JOB_GRAB_TASK_END_PUBSUB
-from backend.vm_manage import VmStates, Thresholds, KEY_VM_POOL_INFO
+from backend.constants import JOB_GRAB_TASK_END_PUBSUB
+from backend.vm_manage import VmStates, KEY_VM_POOL_INFO
from backend.vm_manage.event_handle import EventHandler
from ..helpers import get_redis_logger
@@ -109,7 +109,6 @@ class VmMaster(Process):
# VMM shouldn't do this
# check that process who acquired VMD still exists, otherwise release VM
- # TODO: fix 4 nested `if`. Ugly!
for vmd in self.vmm.get_vm_by_group_and_state_list(None, [VmStates.IN_USE]):
self.check_one_vm_for_dead_builder(vmd)
@@ -120,7 +119,8 @@ class VmMaster(Process):
for vmd in self.vmm.get_vm_by_group_and_state_list(None, states_to_check):
last_health_check = vmd.get_field(self.vmm.rc, "last_health_check")
- if not last_health_check or time.time() - float(last_health_check) > Thresholds.health_check_period:
+ check_period = self.opts.build_groups[vmd.group]["vm_health_check_period"]
+ if not last_health_check or time.time() - float(last_health_check) > check_period:
self.vmm.start_vm_check(vmd.vm_name)
def try_spawn_one(self, group):
@@ -205,12 +205,12 @@ class VmMaster(Process):
self.vmm.mark_server_start()
self.kill_received = False
- self.event_handler = EventHandler(self.vmm)
+ self.event_handler = EventHandler(self.opts, self.vmm)
self.event_handler.start()
self.log.info("VM master process started")
while not self.kill_received:
- time.sleep(Thresholds.cycle_timeout)
+ time.sleep(self.opts.vm_cycle_timeout)
try:
self.do_cycle()
except Exception as err:
@@ -230,7 +230,7 @@ class VmMaster(Process):
for vmd in self.vmm.get_vm_by_group_and_state_list(None, [VmStates.CHECK_HEALTH]):
time_elapsed = time.time() - float(vmd.get_field(self.vmm.rc, "last_health_check") or 0)
- if time_elapsed > Thresholds.health_check_max_time:
+ if time_elapsed > self.opts.build_groups[vmd.group]["vm_health_check_max_time"]:
self.log.info("VM marked with check fail state, "
"VM stayed too long in health check state, elapsed: {} VM: {}"
.format(time_elapsed, str(vmd)))
@@ -247,7 +247,7 @@ class VmMaster(Process):
for vmd in self.vmm.get_vm_by_group_and_state_list(None, [VmStates.TERMINATING]):
time_elapsed = time.time() - float(vmd.get_field(self.vmm.rc, "terminating_since") or 0)
- if time_elapsed > Thresholds.terminating_timeout:
+ if time_elapsed > self.opts.build_groups[vmd.group]["vm_terminating_timeout"]:
if len(self.vmm.lookup_vms_by_ip(vmd.vm_ip)) > 1:
self.log.info(
"Removing VM record: {}. There are more VM with the same ip, "
diff --git a/backend/backend/helpers.py b/backend/backend/helpers.py
index e6571d6..1315bce 100644
--- a/backend/backend/helpers.py
+++ b/backend/backend/helpers.py
@@ -166,9 +166,25 @@ class BackendConfigReader(object):
"vm_dirty_terminating_timeout": _get_conf(
cp, "backend", "group{}_vm_dirty_terminating_timeout".format(group_id),
default=120, mode="int"),
+ "vm_health_check_period": _get_conf(
+ cp, "backend", "group{}_vm_health_check_period".format(group_id),
+ default=120, mode="int"),
+ "vm_health_check_max_time": _get_conf(
+ cp, "backend", "group{}_vm_health_check_max_time".format(group_id),
+ default=300, mode="int"),
+ "vm_max_check_fails": _get_conf(
+ cp, "backend", "group{}_vm_max_check_fails".format(group_id),
+ default=2, mode="int"),
+ "vm_terminating_timeout": _get_conf(
+ cp, "backend", "group{}_vm_terminating_timeout".format(group_id),
+ default=600, mode="int"),
}
opts.build_groups.append(group)
+ opts.vm_cycle_timeout = _get_conf(
+ cp, "backend", "vm_cycle_timeout",
+ default=10, mode="int")
+
opts.destdir = _get_conf(cp, "backend", "destdir", None, mode="path")
opts.exit_on_worker = _get_conf(
diff --git a/backend/backend/vm_manage/__init__.py b/backend/backend/vm_manage/__init__.py
index d2f781a..3fb187f 100644
--- a/backend/backend/vm_manage/__init__.py
+++ b/backend/backend/vm_manage/__init__.py
@@ -37,14 +37,3 @@ KEY_SERVER_INFO = "copr:backend:server_info:hset::"
KEY_VM_INSTANCE = "copr:backend:vm_instance:hset::{vm_name}"
# hset to store VmDescriptor
-
-
-class Thresholds(object):
- """
- Time constants for VM manager,
- """
- health_check_period = 10 # [s.] how often health check is invoked
- health_check_max_time = 120 # [s.] if health check wasn't done for this time, mark check fail
- max_check_fails = 2 # maximum number of fails before starting VM termination
- terminating_timeout = 600 # [s.] time before we try to terminate VM again
- cycle_timeout = 10 # [s.] timeout for all periodical checks
diff --git a/backend/backend/vm_manage/event_handle.py b/backend/backend/vm_manage/event_handle.py
index 460440c..8c950b4 100644
--- a/backend/backend/vm_manage/event_handle.py
+++ b/backend/backend/vm_manage/event_handle.py
@@ -8,7 +8,7 @@ import sys
from backend.exceptions import VmDescriptorNotFound
from backend.helpers import format_tb, get_redis_logger
-from backend.vm_manage import Thresholds, VmStates, PUBSUB_MB, EventTopics, KEY_VM_INSTANCE
+from backend.vm_manage import VmStates, PUBSUB_MB, EventTopics
class Recycle(Thread):
@@ -66,8 +66,9 @@ class EventHandler(Process):
"""
:type vmm: VmManager
"""
- def __init__(self, vmm):
+ def __init__(self, opts, vmm):
super(EventHandler, self).__init__(name="EventHandler")
+ self.opts = opts
self.vmm = vmm
self.kill_received = False
@@ -104,7 +105,8 @@ class EventHandler(Process):
self.log.debug("recording check fail: {}".format(msg))
self.lua_scripts["record_failure"](keys=[vmd.vm_key])
fails_count = int(vmd.get_field(self.vmm.rc, "check_fails") or 0)
- if fails_count > Thresholds.max_check_fails and vmd.state != VmStates.IN_USE:
+ max_check_fails = self.opts.build_groups[vmd.group]["vm_max_check_fails"]
+ if fails_count > max_check_fails and vmd.state != VmStates.IN_USE:
self.log.info("check fail threshold reached: {}, terminating: {}"
.format(check_fails_count, msg))
self.vmm.start_vm_termination(vmd.vm_name)
diff --git a/backend/tests/deamons/test_vm_master.py b/backend/tests/deamons/test_vm_master.py
index d484bd1..e770828 100644
--- a/backend/tests/deamons/test_vm_master.py
+++ b/backend/tests/deamons/test_vm_master.py
@@ -16,7 +16,7 @@ import time
from multiprocessing import Queue
from backend import exceptions
from backend.constants import JOB_GRAB_TASK_END_PUBSUB
-from backend.exceptions import MockRemoteError, CoprSignError, BuilderError, VmError
+from backend.exceptions import VmError
import tempfile
import shutil
@@ -24,7 +24,7 @@ import os
import six
from backend.helpers import get_redis_connection
-from backend.vm_manage import VmStates, Thresholds
+from backend.vm_manage import VmStates
from backend.vm_manage.manager import VmManager
from backend.daemons.vm_master import VmMaster
@@ -92,22 +92,28 @@ class TestVmMaster(object):
"max_spawn_processes": 3,
"vm_spawn_min_interval": self.vm_spawn_min_interval,
"vm_dirty_terminating_timeout": 120,
+ "vm_health_check_period": 10,
+ "vm_health_check_max_time": 60,
+ "vm_terminating_timeout": 300,
},
1: {
"name": "arm",
"archs": ["armV7"],
"vm_spawn_min_interval": self.vm_spawn_min_interval,
"vm_dirty_terminating_timeout": 120,
+ "vm_health_check_period": 10,
+ "vm_health_check_max_time": 60,
+ "vm_terminating_timeout": 300,
}
},
fedmsg_enabled=False,
sleeptime=0.1,
+ vm_cycle_timeout=10,
)
-
self.queue = Queue()
self.vm_ip = "127.0.0.1"
@@ -271,7 +277,7 @@ class TestVmMaster(object):
self.vm_master.check_vms_health()
assert not self.vmm.start_vm_check.called
- mc_time.time.return_value = 1 + Thresholds.health_check_period
+ mc_time.time.return_value = 1 + self.opts.build_groups[0]["vm_health_check_period"]
self.vm_master.check_vms_health()
to_check = set(call[0][1] for call in self.vmm.start_vm_check.call_args_list)
assert set(['a1', 'a3', 'b1', 'b2']) == to_check
@@ -291,9 +297,10 @@ class TestVmMaster(object):
self.vmd_a3.store_field(self.rc, "state", VmStates.CHECK_HEALTH)
self.vmd_a2.store_field(self.rc, "last_health_check", 0)
- self.vmd_a3.store_field(self.rc, "last_health_check", Thresholds.health_check_max_time + 10 )
+ self.vmd_a3.store_field(self.rc, "last_health_check",
+ self.opts.build_groups[0]["vm_health_check_max_time"] + 10)
- mc_time.time.return_value = Thresholds.health_check_max_time + 11
+ mc_time.time.return_value = self.opts.build_groups[0]["vm_health_check_max_time"] + 11
self.vmm.mark_vm_check_failed = MagicMock()
self.vm_master.finalize_long_health_checks()
@@ -327,7 +334,7 @@ class TestVmMaster(object):
# case 3: one VM in terminating state with unique ip, time_elapsed > threshold
# start_vm_termination called, no remove_vm_from_pool
- mc_time.time.return_value = 1 + Thresholds.terminating_timeout
+ mc_time.time.return_value = 1 + self.opts.build_groups[0]["vm_terminating_timeout"]
self.vm_master.terminate_again()
assert not self.vmm.remove_vm_from_pool.called
@@ -347,7 +354,7 @@ class TestVmMaster(object):
# case 4: two VM with the same IP, one in terminating states, , time_elapsed > threshold
# no start_vm_termination, remove_vm_from_pool
- mc_time.time.return_value = 1 + Thresholds.terminating_timeout
+ mc_time.time.return_value = 1 + self.opts.build_groups[0]["vm_terminating_timeout"]
self.vm_master.terminate_again()
assert self.vmm.remove_vm_from_pool.called
assert self.vmm.remove_vm_from_pool.call_args[0][0] == self.vmd_a1.vm_name
diff --git a/backend/tests/vm_manager/test_event_handle.py b/backend/tests/vm_manager/test_event_handle.py
index b5bb82f..9db7f12 100644
--- a/backend/tests/vm_manager/test_event_handle.py
+++ b/backend/tests/vm_manager/test_event_handle.py
@@ -88,6 +88,7 @@ class TestEventHandle(object):
"terminate_playbook": self.terminate_pb_path,
"name": "base",
"archs": ["i386", "x86_64"],
+ "vm_max_check_fails": 2,
# "terminate_vars": ["vm_name", "ip"],
}
},
@@ -113,7 +114,7 @@ class TestEventHandle(object):
self.grl_patcher = mock.patch("{}.get_redis_logger".format(MODULE_REF))
self.grl_patcher.start()
- self.eh = EventHandler(self.vmm)
+ self.eh = EventHandler(self.opts, self.vmm)
self.eh.post_init()
self.vm_ip = "127.0.0.1"
@@ -135,7 +136,7 @@ class TestEventHandle(object):
self.erase_redis()
def test_post_init(self):
- test_eh = EventHandler(self.vmm)
+ test_eh = EventHandler(self.opts, self.vmm)
assert "on_health_check_success" not in test_eh.lua_scripts
test_eh.post_init()
assert test_eh.lua_scripts["on_health_check_success"]
8 years, 11 months
[copr] master: new logo (d0bfe42)
by asamalik@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit d0bfe42f64b0595a6504a4b0ee85a4b3ddc660b7
Author: Adam Samalik <asamalik(a)redhat.com>
Date: Fri May 22 11:45:34 2015 +0200
new logo
>---------------------------------------------------------------
frontend/coprs_frontend/coprs/static/copr.css | 1 +
frontend/coprs_frontend/coprs/static/copr_logo.png | Bin 3411 -> 8727 bytes
frontend/coprs_frontend/coprs/static/favicon.ico | Bin 3638 -> 4286 bytes
3 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/frontend/coprs_frontend/coprs/static/copr.css b/frontend/coprs_frontend/coprs/static/copr.css
index 7c7fb75..2110dcd 100644
--- a/frontend/coprs_frontend/coprs/static/copr.css
+++ b/frontend/coprs_frontend/coprs/static/copr.css
@@ -39,6 +39,7 @@ h4 {
#logo {
position: relative;
top: 8px;
+ height: 66px;
}
div.left-side-space {
diff --git a/frontend/coprs_frontend/coprs/static/copr_logo.png b/frontend/coprs_frontend/coprs/static/copr_logo.png
index 4576f78..255e00b 100644
Binary files a/frontend/coprs_frontend/coprs/static/copr_logo.png and b/frontend/coprs_frontend/coprs/static/copr_logo.png differ
diff --git a/frontend/coprs_frontend/coprs/static/favicon.ico b/frontend/coprs_frontend/coprs/static/favicon.ico
index 79d0ba9..ef67377 100644
Binary files a/frontend/coprs_frontend/coprs/static/favicon.ico and b/frontend/coprs_frontend/coprs/static/favicon.ico differ
8 years, 11 months
[copr] master: Automatic commit of package [copr-backend] release [1.67-1]. (687412f)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit 687412fe1ac6e04701807640a4f937352c249e0a
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 21 15:32:57 2015 +0200
Automatic commit of package [copr-backend] release [1.67-1].
>---------------------------------------------------------------
backend/copr-backend.spec | 5 ++++-
rel-eng/packages/copr-backend | 2 +-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/backend/copr-backend.spec b/backend/copr-backend.spec
index 07c90f6..a39883e 100644
--- a/backend/copr-backend.spec
+++ b/backend/copr-backend.spec
@@ -3,7 +3,7 @@
%endif
Name: copr-backend
-Version: 1.66
+Version: 1.67
Release: 1%{?dist}
Summary: Backend for Copr
@@ -240,6 +240,9 @@ useradd -r -g copr -G lighttpd -s /bin/bash -c "COPR user" copr
%exclude %{_pkgdocdir}/playbooks
%changelog
+* Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.67-1
+- [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder
+
* Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.66-1
- [backend] fix race condition in check for dead worker
diff --git a/rel-eng/packages/copr-backend b/rel-eng/packages/copr-backend
index 9f1dd4d..49d1179 100644
--- a/rel-eng/packages/copr-backend
+++ b/rel-eng/packages/copr-backend
@@ -1 +1 @@
-1.66-1 backend/
+1.67-1 backend/
8 years, 11 months
[copr] master: [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder (31f3a0c)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit 31f3a0ce4d14363b619df3e29cb252bde37b35de
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 21 15:06:46 2015 +0200
[backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder
>---------------------------------------------------------------
backend/backend/daemons/vm_master.py | 24 ++++++++++++++----------
1 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/backend/backend/daemons/vm_master.py b/backend/backend/daemons/vm_master.py
index 9af1163..7b397f6 100644
--- a/backend/backend/daemons/vm_master.py
+++ b/backend/backend/daemons/vm_master.py
@@ -84,16 +84,20 @@ class VmMaster(Process):
return
pid = int(pid)
- # here we can catch race condition: worker acquired VM but haven't set process title yet
- if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]:
- return
+ try:
+ # here we can catch race condition: worker acquired VM but haven't set process title yet
+ if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]:
+ return
- self.log.info("Process `{}` not exists anymore, doing second try. VM data: {}"
- .format(pid, vmd))
- # dirty hack: sleep and check again
- time.sleep(5)
- if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]:
- return
+ self.log.info("Process `{}` not exists anymore, doing second try. VM data: {}"
+ .format(pid, vmd))
+ # dirty hack: sleep and check again
+ time.sleep(5)
+ if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]:
+ return
+ except Exception:
+ self.log.exception("Failed do determine if process `{}` still alive for VM: {}, assuming dead"
+ .format(pid, vmd))
self.log.info("Process `{}` not exists anymore, terminating VM: {} ".format(pid, vmd.vm_name))
self.vmm.start_vm_termination(vmd.vm_name, allowed_pre_state=VmStates.IN_USE)
8 years, 11 months
[copr] master: Automatic commit of package [copr-backend] release [1.66-1]. (5f39857)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit 5f39857b1c5e5c8ebd79d9e3ed646d233b021f80
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 21 14:46:55 2015 +0200
Automatic commit of package [copr-backend] release [1.66-1].
>---------------------------------------------------------------
backend/copr-backend.spec | 5 ++++-
rel-eng/packages/copr-backend | 2 +-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/backend/copr-backend.spec b/backend/copr-backend.spec
index 6240c5e..07c90f6 100644
--- a/backend/copr-backend.spec
+++ b/backend/copr-backend.spec
@@ -3,7 +3,7 @@
%endif
Name: copr-backend
-Version: 1.65
+Version: 1.66
Release: 1%{?dist}
Summary: Backend for Copr
@@ -240,6 +240,9 @@ useradd -r -g copr -G lighttpd -s /bin/bash -c "COPR user" copr
%exclude %{_pkgdocdir}/playbooks
%changelog
+* Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.66-1
+- [backend] fix race condition in check for dead worker
+
* Wed May 20 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.65-1
- [backend] Rescheduling unfinished builds before stop
- fix indentation
diff --git a/rel-eng/packages/copr-backend b/rel-eng/packages/copr-backend
index c29dfea..9f1dd4d 100644
--- a/rel-eng/packages/copr-backend
+++ b/rel-eng/packages/copr-backend
@@ -1 +1 @@
-1.65-1 backend/
+1.66-1 backend/
8 years, 11 months
[copr] master: [backend] fix race condition in check for dead worker (ab97f52)
by vgologuz@fedoraproject.org
Repository : http://git.fedorahosted.org/cgit/copr.git
On branch : master
>---------------------------------------------------------------
commit ab97f52e648d3144ff02c586accf0bb4d065a177
Author: Valentin Gologuzov <vgologuz(a)redhat.com>
Date: Thu May 21 14:46:09 2015 +0200
[backend] fix race condition in check for dead worker
>---------------------------------------------------------------
backend/backend/daemons/vm_master.py | 13 ++++++++++++-
backend/tests/deamons/test_vm_master.py | 9 +++++++--
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/backend/backend/daemons/vm_master.py b/backend/backend/daemons/vm_master.py
index c8ccb0e..9af1163 100644
--- a/backend/backend/daemons/vm_master.py
+++ b/backend/backend/daemons/vm_master.py
@@ -70,6 +70,8 @@ class VmMaster(Process):
# self.log.info("Failed to release VM: {}".format(vmd.vm_name))
def check_one_vm_for_dead_builder(self, vmd):
+ # TODO: builder should renew lease periodically
+ # and we should use that time instead of in_use_since and pid checks
in_use_since = vmd.get_field(self.vmm.rc, "in_use_since")
pid = vmd.get_field(self.vmm.rc, "used_by_pid")
@@ -82,12 +84,21 @@ class VmMaster(Process):
return
pid = int(pid)
+ # here we can catch race condition: worker acquired VM but haven't set process title yet
+ if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]:
+ return
+
+ self.log.info("Process `{}` not exists anymore, doing second try. VM data: {}"
+ .format(pid, vmd))
+ # dirty hack: sleep and check again
+ time.sleep(5)
if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]:
return
self.log.info("Process `{}` not exists anymore, terminating VM: {} ".format(pid, vmd.vm_name))
self.vmm.start_vm_termination(vmd.vm_name, allowed_pre_state=VmStates.IN_USE)
- self.request_build_reschedule(vmd)
+ # cause race condition
+ # self.request_build_reschedule(vmd)
def remove_vm_with_dead_builder(self):
# TODO: rewrite build manage at backend and move functionality there
diff --git a/backend/tests/deamons/test_vm_master.py b/backend/tests/deamons/test_vm_master.py
index be8c5f8..d484bd1 100644
--- a/backend/tests/deamons/test_vm_master.py
+++ b/backend/tests/deamons/test_vm_master.py
@@ -247,8 +247,13 @@ class TestVmMaster(object):
self.vm_master.remove_vm_with_dead_builder()
msg_list = self.rcv_from_ps_message_bus()
- print(self.vm_master.log.call_args_list)
- assert set(["2", "4"]) == set([json.loads(m["data"])["build_id"] for m in msg_list])
+ assert self.vmm.start_vm_termination.call_args_list == [
+ mock.call('a2', allowed_pre_state='in_use'),
+ mock.call('b2', allowed_pre_state='in_use'),
+ mock.call('b3', allowed_pre_state='in_use')
+ ]
+ # changed logic for the moment
+ # assert set(["2", "4"]) == set([json.loads(m["data"])["build_id"] for m in msg_list])
def test_check_vms_health(self, mc_time, add_vmd):
self.vmm.start_vm_check = types.MethodType(MagicMock(), self.vmm)
8 years, 11 months