May 2015 - copr-commits - Fedora Mailing-Lists

[copr] master: Automatic commit of package [copr-backend] release [1.68-1]. (8062658)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit 8062658afa4a0cc6d6553185a4f0bb0ef43be595 Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 28 11:20:56 2015 +0200 Automatic commit of package [copr-backend] release [1.68-1]. >--------------------------------------------------------------- backend/copr-backend.spec | 7 ++++++- rel-eng/packages/copr-backend | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/copr-backend.spec b/backend/copr-backend.spec index a39883e..6f49836 100644 --- a/backend/copr-backend.spec +++ b/backend/copr-backend.spec @@ -3,7 +3,7 @@ %endif Name: copr-backend -Version: 1.67 +Version: 1.68 Release: 1%{?dist} Summary: Backend for Copr @@ -240,6 +240,11 @@ useradd -r -g copr -G lighttpd -s /bin/bash -c "COPR user" copr %exclude %{_pkgdocdir}/playbooks %changelog +* Thu May 28 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.68-1 +- [backend] add config option for VM health check timeout +- [backend] moved config parameters from Threshold class into the backend + config file + * Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.67-1 - [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder diff --git a/rel-eng/packages/copr-backend b/rel-eng/packages/copr-backend index 49d1179..0380abb 100644 --- a/rel-eng/packages/copr-backend +++ b/rel-eng/packages/copr-backend @@ -1 +1 @@ -1.67-1 backend/ +1.68-1 backend/

8 years, 11 months

1
0
0 / 0

[copr] master: [backend] add config option for VM health check timeout (927d3b2)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit 927d3b25da662e5bea5c0272ae47bd47247cb6d4 Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 28 11:05:26 2015 +0200 [backend] add config option for VM health check timeout >--------------------------------------------------------------- backend/backend/helpers.py | 3 +++ backend/backend/vm_manage/check.py | 2 +- backend/tests/vm_manager/test_check.py | 1 + 3 files changed, 5 insertions(+), 1 deletions(-) diff --git a/backend/backend/helpers.py b/backend/backend/helpers.py index 1315bce..08b7794 100644 --- a/backend/backend/helpers.py +++ b/backend/backend/helpers.py @@ -184,6 +184,9 @@ class BackendConfigReader(object): opts.vm_cycle_timeout = _get_conf( cp, "backend", "vm_cycle_timeout", default=10, mode="int") + opts.vm_ssh_check_timeout = _get_conf( + cp, "backend", "vm_ssh_check_timeout", + default=5, mode="int") opts.destdir = _get_conf(cp, "backend", "destdir", None, mode="path") diff --git a/backend/backend/vm_manage/check.py b/backend/backend/vm_manage/check.py index 4209e44..e5b08fa 100644 --- a/backend/backend/vm_manage/check.py +++ b/backend/backend/vm_manage/check.py @@ -33,7 +33,7 @@ def check_health(opts, vm_name, vm_ip): pattern=vm_ip, forks=1, transport=opts.ssh.transport, - timeout=2 + timeout=opts.vm_ssh_check_timeout ) connection = Runner(**runner_options) connection.module_name = "shell" diff --git a/backend/tests/vm_manager/test_check.py b/backend/tests/vm_manager/test_check.py index dae61e9..e7d6172 100644 --- a/backend/tests/vm_manager/test_check.py +++ b/backend/tests/vm_manager/test_check.py @@ -94,6 +94,7 @@ class TestChecker(object): do_sign=True, timeout=1800, results_baseurl="/tmp", + vm_ssh_check_timeout=2, ) # self.try_spawn_args = '-c ssh {}'.format(self.spawn_pb_path)

8 years, 11 months

1
0
0 / 0

[copr] master: [backend] moved config parameters from Threshold class into the backend config file (5a9ce39)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit 5a9ce398430c390066f89710d32ed74916c2566c Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 28 10:54:30 2015 +0200 [backend] moved config parameters from Threshold class into the backend config file >--------------------------------------------------------------- backend/backend/daemons/vm_master.py | 16 ++++++++-------- backend/backend/helpers.py | 16 ++++++++++++++++ backend/backend/vm_manage/__init__.py | 11 ----------- backend/backend/vm_manage/event_handle.py | 8 +++++--- backend/tests/deamons/test_vm_master.py | 23 +++++++++++++++-------- backend/tests/vm_manager/test_event_handle.py | 5 +++-- 6 files changed, 47 insertions(+), 32 deletions(-) diff --git a/backend/backend/daemons/vm_master.py b/backend/backend/daemons/vm_master.py index 7b397f6..78c7504 100644 --- a/backend/backend/daemons/vm_master.py +++ b/backend/backend/daemons/vm_master.py @@ -13,8 +13,8 @@ import traceback import sys import psutil -from backend.constants import DEF_BUILD_TIMEOUT, JOB_GRAB_TASK_END_PUBSUB -from backend.vm_manage import VmStates, Thresholds, KEY_VM_POOL_INFO +from backend.constants import JOB_GRAB_TASK_END_PUBSUB +from backend.vm_manage import VmStates, KEY_VM_POOL_INFO from backend.vm_manage.event_handle import EventHandler from ..helpers import get_redis_logger @@ -109,7 +109,6 @@ class VmMaster(Process): # VMM shouldn't do this # check that process who acquired VMD still exists, otherwise release VM - # TODO: fix 4 nested `if`. Ugly! for vmd in self.vmm.get_vm_by_group_and_state_list(None, [VmStates.IN_USE]): self.check_one_vm_for_dead_builder(vmd) @@ -120,7 +119,8 @@ class VmMaster(Process): for vmd in self.vmm.get_vm_by_group_and_state_list(None, states_to_check): last_health_check = vmd.get_field(self.vmm.rc, "last_health_check") - if not last_health_check or time.time() - float(last_health_check) > Thresholds.health_check_period: + check_period = self.opts.build_groups[vmd.group]["vm_health_check_period"] + if not last_health_check or time.time() - float(last_health_check) > check_period: self.vmm.start_vm_check(vmd.vm_name) def try_spawn_one(self, group): @@ -205,12 +205,12 @@ class VmMaster(Process): self.vmm.mark_server_start() self.kill_received = False - self.event_handler = EventHandler(self.vmm) + self.event_handler = EventHandler(self.opts, self.vmm) self.event_handler.start() self.log.info("VM master process started") while not self.kill_received: - time.sleep(Thresholds.cycle_timeout) + time.sleep(self.opts.vm_cycle_timeout) try: self.do_cycle() except Exception as err: @@ -230,7 +230,7 @@ class VmMaster(Process): for vmd in self.vmm.get_vm_by_group_and_state_list(None, [VmStates.CHECK_HEALTH]): time_elapsed = time.time() - float(vmd.get_field(self.vmm.rc, "last_health_check") or 0) - if time_elapsed > Thresholds.health_check_max_time: + if time_elapsed > self.opts.build_groups[vmd.group]["vm_health_check_max_time"]: self.log.info("VM marked with check fail state, " "VM stayed too long in health check state, elapsed: {} VM: {}" .format(time_elapsed, str(vmd))) @@ -247,7 +247,7 @@ class VmMaster(Process): for vmd in self.vmm.get_vm_by_group_and_state_list(None, [VmStates.TERMINATING]): time_elapsed = time.time() - float(vmd.get_field(self.vmm.rc, "terminating_since") or 0) - if time_elapsed > Thresholds.terminating_timeout: + if time_elapsed > self.opts.build_groups[vmd.group]["vm_terminating_timeout"]: if len(self.vmm.lookup_vms_by_ip(vmd.vm_ip)) > 1: self.log.info( "Removing VM record: {}. There are more VM with the same ip, " diff --git a/backend/backend/helpers.py b/backend/backend/helpers.py index e6571d6..1315bce 100644 --- a/backend/backend/helpers.py +++ b/backend/backend/helpers.py @@ -166,9 +166,25 @@ class BackendConfigReader(object): "vm_dirty_terminating_timeout": _get_conf( cp, "backend", "group{}_vm_dirty_terminating_timeout".format(group_id), default=120, mode="int"), + "vm_health_check_period": _get_conf( + cp, "backend", "group{}_vm_health_check_period".format(group_id), + default=120, mode="int"), + "vm_health_check_max_time": _get_conf( + cp, "backend", "group{}_vm_health_check_max_time".format(group_id), + default=300, mode="int"), + "vm_max_check_fails": _get_conf( + cp, "backend", "group{}_vm_max_check_fails".format(group_id), + default=2, mode="int"), + "vm_terminating_timeout": _get_conf( + cp, "backend", "group{}_vm_terminating_timeout".format(group_id), + default=600, mode="int"), } opts.build_groups.append(group) + opts.vm_cycle_timeout = _get_conf( + cp, "backend", "vm_cycle_timeout", + default=10, mode="int") + opts.destdir = _get_conf(cp, "backend", "destdir", None, mode="path") opts.exit_on_worker = _get_conf( diff --git a/backend/backend/vm_manage/__init__.py b/backend/backend/vm_manage/__init__.py index d2f781a..3fb187f 100644 --- a/backend/backend/vm_manage/__init__.py +++ b/backend/backend/vm_manage/__init__.py @@ -37,14 +37,3 @@ KEY_SERVER_INFO = "copr:backend:server_info:hset::" KEY_VM_INSTANCE = "copr:backend:vm_instance:hset::{vm_name}" # hset to store VmDescriptor - - -class Thresholds(object): - """ - Time constants for VM manager, - """ - health_check_period = 10 # [s.] how often health check is invoked - health_check_max_time = 120 # [s.] if health check wasn't done for this time, mark check fail - max_check_fails = 2 # maximum number of fails before starting VM termination - terminating_timeout = 600 # [s.] time before we try to terminate VM again - cycle_timeout = 10 # [s.] timeout for all periodical checks diff --git a/backend/backend/vm_manage/event_handle.py b/backend/backend/vm_manage/event_handle.py index 460440c..8c950b4 100644 --- a/backend/backend/vm_manage/event_handle.py +++ b/backend/backend/vm_manage/event_handle.py @@ -8,7 +8,7 @@ import sys from backend.exceptions import VmDescriptorNotFound from backend.helpers import format_tb, get_redis_logger -from backend.vm_manage import Thresholds, VmStates, PUBSUB_MB, EventTopics, KEY_VM_INSTANCE +from backend.vm_manage import VmStates, PUBSUB_MB, EventTopics class Recycle(Thread): @@ -66,8 +66,9 @@ class EventHandler(Process): """ :type vmm: VmManager """ - def __init__(self, vmm): + def __init__(self, opts, vmm): super(EventHandler, self).__init__(name="EventHandler") + self.opts = opts self.vmm = vmm self.kill_received = False @@ -104,7 +105,8 @@ class EventHandler(Process): self.log.debug("recording check fail: {}".format(msg)) self.lua_scripts["record_failure"](keys=[vmd.vm_key]) fails_count = int(vmd.get_field(self.vmm.rc, "check_fails") or 0) - if fails_count > Thresholds.max_check_fails and vmd.state != VmStates.IN_USE: + max_check_fails = self.opts.build_groups[vmd.group]["vm_max_check_fails"] + if fails_count > max_check_fails and vmd.state != VmStates.IN_USE: self.log.info("check fail threshold reached: {}, terminating: {}" .format(check_fails_count, msg)) self.vmm.start_vm_termination(vmd.vm_name) diff --git a/backend/tests/deamons/test_vm_master.py b/backend/tests/deamons/test_vm_master.py index d484bd1..e770828 100644 --- a/backend/tests/deamons/test_vm_master.py +++ b/backend/tests/deamons/test_vm_master.py @@ -16,7 +16,7 @@ import time from multiprocessing import Queue from backend import exceptions from backend.constants import JOB_GRAB_TASK_END_PUBSUB -from backend.exceptions import MockRemoteError, CoprSignError, BuilderError, VmError +from backend.exceptions import VmError import tempfile import shutil @@ -24,7 +24,7 @@ import os import six from backend.helpers import get_redis_connection -from backend.vm_manage import VmStates, Thresholds +from backend.vm_manage import VmStates from backend.vm_manage.manager import VmManager from backend.daemons.vm_master import VmMaster @@ -92,22 +92,28 @@ class TestVmMaster(object): "max_spawn_processes": 3, "vm_spawn_min_interval": self.vm_spawn_min_interval, "vm_dirty_terminating_timeout": 120, + "vm_health_check_period": 10, + "vm_health_check_max_time": 60, + "vm_terminating_timeout": 300, }, 1: { "name": "arm", "archs": ["armV7"], "vm_spawn_min_interval": self.vm_spawn_min_interval, "vm_dirty_terminating_timeout": 120, + "vm_health_check_period": 10, + "vm_health_check_max_time": 60, + "vm_terminating_timeout": 300, } }, fedmsg_enabled=False, sleeptime=0.1, + vm_cycle_timeout=10, ) - self.queue = Queue() self.vm_ip = "127.0.0.1" @@ -271,7 +277,7 @@ class TestVmMaster(object): self.vm_master.check_vms_health() assert not self.vmm.start_vm_check.called - mc_time.time.return_value = 1 + Thresholds.health_check_period + mc_time.time.return_value = 1 + self.opts.build_groups[0]["vm_health_check_period"] self.vm_master.check_vms_health() to_check = set(call[0][1] for call in self.vmm.start_vm_check.call_args_list) assert set(['a1', 'a3', 'b1', 'b2']) == to_check @@ -291,9 +297,10 @@ class TestVmMaster(object): self.vmd_a3.store_field(self.rc, "state", VmStates.CHECK_HEALTH) self.vmd_a2.store_field(self.rc, "last_health_check", 0) - self.vmd_a3.store_field(self.rc, "last_health_check", Thresholds.health_check_max_time + 10 ) + self.vmd_a3.store_field(self.rc, "last_health_check", + self.opts.build_groups[0]["vm_health_check_max_time"] + 10) - mc_time.time.return_value = Thresholds.health_check_max_time + 11 + mc_time.time.return_value = self.opts.build_groups[0]["vm_health_check_max_time"] + 11 self.vmm.mark_vm_check_failed = MagicMock() self.vm_master.finalize_long_health_checks() @@ -327,7 +334,7 @@ class TestVmMaster(object): # case 3: one VM in terminating state with unique ip, time_elapsed > threshold # start_vm_termination called, no remove_vm_from_pool - mc_time.time.return_value = 1 + Thresholds.terminating_timeout + mc_time.time.return_value = 1 + self.opts.build_groups[0]["vm_terminating_timeout"] self.vm_master.terminate_again() assert not self.vmm.remove_vm_from_pool.called @@ -347,7 +354,7 @@ class TestVmMaster(object): # case 4: two VM with the same IP, one in terminating states, , time_elapsed > threshold # no start_vm_termination, remove_vm_from_pool - mc_time.time.return_value = 1 + Thresholds.terminating_timeout + mc_time.time.return_value = 1 + self.opts.build_groups[0]["vm_terminating_timeout"] self.vm_master.terminate_again() assert self.vmm.remove_vm_from_pool.called assert self.vmm.remove_vm_from_pool.call_args[0][0] == self.vmd_a1.vm_name diff --git a/backend/tests/vm_manager/test_event_handle.py b/backend/tests/vm_manager/test_event_handle.py index b5bb82f..9db7f12 100644 --- a/backend/tests/vm_manager/test_event_handle.py +++ b/backend/tests/vm_manager/test_event_handle.py @@ -88,6 +88,7 @@ class TestEventHandle(object): "terminate_playbook": self.terminate_pb_path, "name": "base", "archs": ["i386", "x86_64"], + "vm_max_check_fails": 2, # "terminate_vars": ["vm_name", "ip"], } }, @@ -113,7 +114,7 @@ class TestEventHandle(object): self.grl_patcher = mock.patch("{}.get_redis_logger".format(MODULE_REF)) self.grl_patcher.start() - self.eh = EventHandler(self.vmm) + self.eh = EventHandler(self.opts, self.vmm) self.eh.post_init() self.vm_ip = "127.0.0.1" @@ -135,7 +136,7 @@ class TestEventHandle(object): self.erase_redis() def test_post_init(self): - test_eh = EventHandler(self.vmm) + test_eh = EventHandler(self.opts, self.vmm) assert "on_health_check_success" not in test_eh.lua_scripts test_eh.post_init() assert test_eh.lua_scripts["on_health_check_success"]

8 years, 11 months

1
0
0 / 0

[copr] master: new logo (d0bfe42)

by asamalik＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit d0bfe42f64b0595a6504a4b0ee85a4b3ddc660b7 Author: Adam Samalik <asamalik(a)redhat.com> Date: Fri May 22 11:45:34 2015 +0200 new logo >--------------------------------------------------------------- frontend/coprs_frontend/coprs/static/copr.css | 1 + frontend/coprs_frontend/coprs/static/copr_logo.png | Bin 3411 -> 8727 bytes frontend/coprs_frontend/coprs/static/favicon.ico | Bin 3638 -> 4286 bytes 3 files changed, 1 insertions(+), 0 deletions(-) diff --git a/frontend/coprs_frontend/coprs/static/copr.css b/frontend/coprs_frontend/coprs/static/copr.css index 7c7fb75..2110dcd 100644 --- a/frontend/coprs_frontend/coprs/static/copr.css +++ b/frontend/coprs_frontend/coprs/static/copr.css @@ -39,6 +39,7 @@ h4 { #logo { position: relative; top: 8px; + height: 66px; } div.left-side-space { diff --git a/frontend/coprs_frontend/coprs/static/copr_logo.png b/frontend/coprs_frontend/coprs/static/copr_logo.png index 4576f78..255e00b 100644 Binary files a/frontend/coprs_frontend/coprs/static/copr_logo.png and b/frontend/coprs_frontend/coprs/static/copr_logo.png differ diff --git a/frontend/coprs_frontend/coprs/static/favicon.ico b/frontend/coprs_frontend/coprs/static/favicon.ico index 79d0ba9..ef67377 100644 Binary files a/frontend/coprs_frontend/coprs/static/favicon.ico and b/frontend/coprs_frontend/coprs/static/favicon.ico differ

8 years, 11 months

1
0
0 / 0

[copr] tag 'copr-backend-1.67-1' created

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git New tag : copr-backend-1.67-1 Referencing: e41b5554fdb4b89ad15ca58b3f40b7a26499714e

8 years, 11 months

1
0
0 / 0

[copr] master: Automatic commit of package [copr-backend] release [1.67-1]. (687412f)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit 687412fe1ac6e04701807640a4f937352c249e0a Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 21 15:32:57 2015 +0200 Automatic commit of package [copr-backend] release [1.67-1]. >--------------------------------------------------------------- backend/copr-backend.spec | 5 ++++- rel-eng/packages/copr-backend | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/copr-backend.spec b/backend/copr-backend.spec index 07c90f6..a39883e 100644 --- a/backend/copr-backend.spec +++ b/backend/copr-backend.spec @@ -3,7 +3,7 @@ %endif Name: copr-backend -Version: 1.66 +Version: 1.67 Release: 1%{?dist} Summary: Backend for Copr @@ -240,6 +240,9 @@ useradd -r -g copr -G lighttpd -s /bin/bash -c "COPR user" copr %exclude %{_pkgdocdir}/playbooks %changelog +* Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.67-1 +- [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder + * Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.66-1 - [backend] fix race condition in check for dead worker diff --git a/rel-eng/packages/copr-backend b/rel-eng/packages/copr-backend index 9f1dd4d..49d1179 100644 --- a/rel-eng/packages/copr-backend +++ b/rel-eng/packages/copr-backend @@ -1 +1 @@ -1.66-1 backend/ +1.67-1 backend/

8 years, 11 months

1
0
0 / 0

[copr] master: [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder (31f3a0c)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit 31f3a0ce4d14363b619df3e29cb252bde37b35de Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 21 15:06:46 2015 +0200 [backend] Handle unexpected exception VmMaster::check_one_vm_for_dead_builder >--------------------------------------------------------------- backend/backend/daemons/vm_master.py | 24 ++++++++++++++---------- 1 files changed, 14 insertions(+), 10 deletions(-) diff --git a/backend/backend/daemons/vm_master.py b/backend/backend/daemons/vm_master.py index 9af1163..7b397f6 100644 --- a/backend/backend/daemons/vm_master.py +++ b/backend/backend/daemons/vm_master.py @@ -84,16 +84,20 @@ class VmMaster(Process): return pid = int(pid) - # here we can catch race condition: worker acquired VM but haven't set process title yet - if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]: - return + try: + # here we can catch race condition: worker acquired VM but haven't set process title yet + if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]: + return - self.log.info("Process `{}` not exists anymore, doing second try. VM data: {}" - .format(pid, vmd)) - # dirty hack: sleep and check again - time.sleep(5) - if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]: - return + self.log.info("Process `{}` not exists anymore, doing second try. VM data: {}" + .format(pid, vmd)) + # dirty hack: sleep and check again + time.sleep(5) + if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]: + return + except Exception: + self.log.exception("Failed do determine if process `{}` still alive for VM: {}, assuming dead" + .format(pid, vmd)) self.log.info("Process `{}` not exists anymore, terminating VM: {} ".format(pid, vmd.vm_name)) self.vmm.start_vm_termination(vmd.vm_name, allowed_pre_state=VmStates.IN_USE)

8 years, 11 months

1
0
0 / 0

[copr] tag 'copr-backend-1.66-1' created

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git New tag : copr-backend-1.66-1 Referencing: 55cf78a6dd5a8aa82d8ec070a36e7879ddf59ef1

8 years, 11 months

1
0
0 / 0

[copr] master: Automatic commit of package [copr-backend] release [1.66-1]. (5f39857)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit 5f39857b1c5e5c8ebd79d9e3ed646d233b021f80 Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 21 14:46:55 2015 +0200 Automatic commit of package [copr-backend] release [1.66-1]. >--------------------------------------------------------------- backend/copr-backend.spec | 5 ++++- rel-eng/packages/copr-backend | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/copr-backend.spec b/backend/copr-backend.spec index 6240c5e..07c90f6 100644 --- a/backend/copr-backend.spec +++ b/backend/copr-backend.spec @@ -3,7 +3,7 @@ %endif Name: copr-backend -Version: 1.65 +Version: 1.66 Release: 1%{?dist} Summary: Backend for Copr @@ -240,6 +240,9 @@ useradd -r -g copr -G lighttpd -s /bin/bash -c "COPR user" copr %exclude %{_pkgdocdir}/playbooks %changelog +* Thu May 21 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.66-1 +- [backend] fix race condition in check for dead worker + * Wed May 20 2015 Valentin Gologuzov <vgologuz(a)redhat.com> 1.65-1 - [backend] Rescheduling unfinished builds before stop - fix indentation diff --git a/rel-eng/packages/copr-backend b/rel-eng/packages/copr-backend index c29dfea..9f1dd4d 100644 --- a/rel-eng/packages/copr-backend +++ b/rel-eng/packages/copr-backend @@ -1 +1 @@ -1.65-1 backend/ +1.66-1 backend/

8 years, 11 months

1
0
0 / 0

[copr] master: [backend] fix race condition in check for dead worker (ab97f52)

by vgologuz＠fedoraproject.org

Repository : http://git.fedorahosted.org/cgit/copr.git On branch : master >--------------------------------------------------------------- commit ab97f52e648d3144ff02c586accf0bb4d065a177 Author: Valentin Gologuzov <vgologuz(a)redhat.com> Date: Thu May 21 14:46:09 2015 +0200 [backend] fix race condition in check for dead worker >--------------------------------------------------------------- backend/backend/daemons/vm_master.py | 13 ++++++++++++- backend/tests/deamons/test_vm_master.py | 9 +++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/backend/backend/daemons/vm_master.py b/backend/backend/daemons/vm_master.py index c8ccb0e..9af1163 100644 --- a/backend/backend/daemons/vm_master.py +++ b/backend/backend/daemons/vm_master.py @@ -70,6 +70,8 @@ class VmMaster(Process): # self.log.info("Failed to release VM: {}".format(vmd.vm_name)) def check_one_vm_for_dead_builder(self, vmd): + # TODO: builder should renew lease periodically + # and we should use that time instead of in_use_since and pid checks in_use_since = vmd.get_field(self.vmm.rc, "in_use_since") pid = vmd.get_field(self.vmm.rc, "used_by_pid") @@ -82,12 +84,21 @@ class VmMaster(Process): return pid = int(pid) + # here we can catch race condition: worker acquired VM but haven't set process title yet + if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]: + return + + self.log.info("Process `{}` not exists anymore, doing second try. VM data: {}" + .format(pid, vmd)) + # dirty hack: sleep and check again + time.sleep(5) if psutil.pid_exists(pid) and vmd.vm_name in psutil.Process(pid).cmdline[0]: return self.log.info("Process `{}` not exists anymore, terminating VM: {} ".format(pid, vmd.vm_name)) self.vmm.start_vm_termination(vmd.vm_name, allowed_pre_state=VmStates.IN_USE) - self.request_build_reschedule(vmd) + # cause race condition + # self.request_build_reschedule(vmd) def remove_vm_with_dead_builder(self): # TODO: rewrite build manage at backend and move functionality there diff --git a/backend/tests/deamons/test_vm_master.py b/backend/tests/deamons/test_vm_master.py index be8c5f8..d484bd1 100644 --- a/backend/tests/deamons/test_vm_master.py +++ b/backend/tests/deamons/test_vm_master.py @@ -247,8 +247,13 @@ class TestVmMaster(object): self.vm_master.remove_vm_with_dead_builder() msg_list = self.rcv_from_ps_message_bus() - print(self.vm_master.log.call_args_list) - assert set(["2", "4"]) == set([json.loads(m["data"])["build_id"] for m in msg_list]) + assert self.vmm.start_vm_termination.call_args_list == [ + mock.call('a2', allowed_pre_state='in_use'), + mock.call('b2', allowed_pre_state='in_use'), + mock.call('b3', allowed_pre_state='in_use') + ] + # changed logic for the moment + # assert set(["2", "4"]) == set([json.loads(m["data"])["build_id"] for m in msg_list]) def test_check_vms_health(self, mc_time, add_vmd): self.vmm.start_vm_check = types.MethodType(MagicMock(), self.vmm)

8 years, 11 months

1
0
0 / 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

copr-commits May 2015