This is an automated email from the git hooks/post-receive script.
praiskup pushed a change to branch master in repository copr/copr.
from d68d584 backend: depend on copr-messaging new 05e1341 backend: cleanup_vm_nova: more verbose output new d37d832 backend: cleanup_vm_nova.py: give the spawner 10 minutes new 770ef49 backend: cleanup_vm_nova: don't use format() for logging new 594f65f backend: cleanup_vm_nova: add --kill-also-unused option new be184b2 backend: cleanup_vm_nova: require 'copr' user
The 5 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
Summary of changes: backend/run/cleanup_vm_nova.py | 101 +++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 28 deletions(-)
This is an automated email from the git hooks/post-receive script.
praiskup pushed a commit to branch master in repository copr/copr.
commit 05e1341c74559a148ffc6aaedb513d0bab6a74dd Author: Pavel Raiskup praiskup@redhat.com AuthorDate: Fri Jul 26 15:23:40 2019 +0200
backend: cleanup_vm_nova: more verbose output
Make sure we see what VMs are checked, and what VMs are actually terminated. Move 'log' definition to the top of the file, since it is a global we shouldn't pretend otherwise. --- backend/run/cleanup_vm_nova.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-)
diff --git a/backend/run/cleanup_vm_nova.py b/backend/run/cleanup_vm_nova.py index ea4f7f0..c80c8e4 100755 --- a/backend/run/cleanup_vm_nova.py +++ b/backend/run/cleanup_vm_nova.py @@ -25,6 +25,16 @@ except ImportError:
logging.getLogger("requests").setLevel(logging.ERROR)
+log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) +log_format = logging.Formatter('[%(asctime)s][%(thread)s][%(levelname)6s]: %(message)s') +hfile = logging.FileHandler('/var/log/copr-backend/cleanup_vms.log') +hfile.setLevel(logging.INFO) +hstderr = logging.StreamHandler() +hfile.setFormatter(log_format) +log.addHandler(hfile) +log.addHandler(hstderr) +
nova_cloud_vars_path = os.environ.get("NOVA_CLOUD_VARS", "/home/copr/provision/nova_cloud_vars.yml")
@@ -61,7 +71,6 @@ class Cleaner(object): def terminate(srv): try: srv.delete() - log.info("delete invoked for: {}".format(srv)) except Exception as err: log.exception("failed to request VM termination: {}".format(err))
@@ -69,23 +78,27 @@ class Cleaner(object): def old_enough(srv): dt_created = dt_parse(srv.created) delta = (utc_now() - dt_created).total_seconds() - # log.info("Server {} created {} now {}; delta: {}".format(srv, dt_created, utc_now(), delta)) - return delta > 60 * 5 # 5 minutes + if delta > 60 * 5: # 5 minutes + log.debug("Server {} created {} now {}; delta: {}".format(srv, dt_created, utc_now(), delta)) + return True + return False
def check_one(self, srv_id, vms_names): srv = self.nt.servers.get(srv_id) - log.info("checking vm: {}".format(srv)) + log.debug("checking vm '{}'".format(srv.name)) srv.get() if srv.status.lower().strip() == "error": - log.info("server {} got into the error state, terminating".format(srv)) + log.info("vm '{}' got into the error state, terminating".format(srv.name)) self.terminate(srv) elif self.old_enough(srv) and srv.human_id.lower() not in vms_names: - log.info("server {} not placed in our db, terminating".format(srv)) + log.info("vm '{}' not placed in our db, terminating".format(srv.name)) self.terminate(srv)
def main(self): """ - Terminate erred VM's and VM's with uptime > 10 minutes and which doesn't have associated process + Terminate + - errored VM's and + - VM's with uptime > 5 minutes and which don't have entry in redis DB """ start = time.time() log.info("Cleanup start") @@ -103,15 +116,7 @@ class Cleaner(object):
log.info("cleanup consumed: {} seconds".format(time.time() - start))
-if __name__ == "__main__": - logging.basicConfig( - filename="/var/log/copr-backend/cleanup_vms.log", - # filename="/tmp/cleanup_vms.log", - # stream=sys.stdout, - format='[%(asctime)s][%(thread)s][%(levelname)6s]: %(message)s', - level=logging.INFO) - - log = logging.getLogger(__name__)
+if __name__ == "__main__": cleaner = Cleaner(read_config()) cleaner.main()
This is an automated email from the git hooks/post-receive script.
praiskup pushed a commit to branch master in repository copr/copr.
commit d37d83209d123aec96591242b4dc4ba88f917403 Author: Pavel Raiskup praiskup@redhat.com AuthorDate: Tue Jul 30 08:37:06 2019 +0200
backend: cleanup_vm_nova.py: give the spawner 10 minutes --- backend/run/cleanup_vm_nova.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/backend/run/cleanup_vm_nova.py b/backend/run/cleanup_vm_nova.py index c80c8e4..32e80db 100755 --- a/backend/run/cleanup_vm_nova.py +++ b/backend/run/cleanup_vm_nova.py @@ -13,6 +13,9 @@ from dateutil.parser import parse as dt_parse import yaml from novaclient.client import Client
+# don't kill younger VMs than this (minutes) +SPAWN_TIMEOUT = 10 + sys.path.append("/usr/share/copr/")
from backend.helpers import BackendConfigReader @@ -78,7 +81,7 @@ class Cleaner(object): def old_enough(srv): dt_created = dt_parse(srv.created) delta = (utc_now() - dt_created).total_seconds() - if delta > 60 * 5: # 5 minutes + if delta > 60 * SPAWN_TIMEOUT: log.debug("Server {} created {} now {}; delta: {}".format(srv, dt_created, utc_now(), delta)) return True return False @@ -98,7 +101,8 @@ class Cleaner(object): """ Terminate - errored VM's and - - VM's with uptime > 5 minutes and which don't have entry in redis DB + - VM's with uptime > SPAWN_TIMEOUT minutes and which don't have entry in + redis DB """ start = time.time() log.info("Cleanup start")
This is an automated email from the git hooks/post-receive script.
praiskup pushed a commit to branch master in repository copr/copr.
commit 770ef4902567e93fcaa4a0807efe9c4ef134c7fd Author: Pavel Raiskup praiskup@redhat.com AuthorDate: Tue Jul 30 08:43:46 2019 +0200
backend: cleanup_vm_nova: don't use format() for logging --- backend/run/cleanup_vm_nova.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/backend/run/cleanup_vm_nova.py b/backend/run/cleanup_vm_nova.py index 32e80db..5ac2e0b 100755 --- a/backend/run/cleanup_vm_nova.py +++ b/backend/run/cleanup_vm_nova.py @@ -74,27 +74,28 @@ class Cleaner(object): def terminate(srv): try: srv.delete() - except Exception as err: - log.exception("failed to request VM termination: {}".format(err)) + except Exception: + log.exception("failed to request VM termination")
@staticmethod def old_enough(srv): dt_created = dt_parse(srv.created) delta = (utc_now() - dt_created).total_seconds() if delta > 60 * SPAWN_TIMEOUT: - log.debug("Server {} created {} now {}; delta: {}".format(srv, dt_created, utc_now(), delta)) + log.debug("Server '%s', created: %s, now: %s, delta: %s", + srv.name, dt_created, utc_now(), delta) return True return False
def check_one(self, srv_id, vms_names): srv = self.nt.servers.get(srv_id) - log.debug("checking vm '{}'".format(srv.name)) + log.debug("checking vm '%s'", srv.name) srv.get() if srv.status.lower().strip() == "error": - log.info("vm '{}' got into the error state, terminating".format(srv.name)) + log.info("vm '%s' got into the error state, terminating", srv.name) self.terminate(srv) elif self.old_enough(srv) and srv.human_id.lower() not in vms_names: - log.info("vm '{}' not placed in our db, terminating".format(srv.name)) + log.info("vm '%s' not placed in our db, terminating", srv.name) self.terminate(srv)
def main(self): @@ -118,7 +119,7 @@ class Cleaner(object): except Exception as exc: log.exception(exc)
- log.info("cleanup consumed: {} seconds".format(time.time() - start)) + log.info("cleanup consumed: %s seconds", time.time() - start)
if __name__ == "__main__":
This is an automated email from the git hooks/post-receive script.
praiskup pushed a commit to branch master in repository copr/copr.
commit 594f65f5df4c04dda5cefb4b783a22f2dab533e6 Author: Pavel Raiskup praiskup@redhat.com AuthorDate: Tue Jul 30 08:48:32 2019 +0200
backend: cleanup_vm_nova: add --kill-also-unused option
This is useful when we change the builder deployment configuration, and we want to re-spawn as many VMs as possible (but still we don't want to terminate working builders). --- backend/run/cleanup_vm_nova.py | 49 +++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-)
diff --git a/backend/run/cleanup_vm_nova.py b/backend/run/cleanup_vm_nova.py index 5ac2e0b..20bb8f4 100755 --- a/backend/run/cleanup_vm_nova.py +++ b/backend/run/cleanup_vm_nova.py @@ -5,6 +5,7 @@ import os import sys import time import logging +import argparse
from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed @@ -23,6 +24,7 @@ from backend.helpers import utc_now
try: from backend.vm_manage.manager import VmManager + from backend.vm_manage import VmStates except ImportError: VmManager = None
@@ -42,6 +44,18 @@ log.addHandler(hstderr) nova_cloud_vars_path = os.environ.get("NOVA_CLOUD_VARS", "/home/copr/provision/nova_cloud_vars.yml")
+def get_arg_parser(): + parser = argparse.ArgumentParser( + description="Delete all errored or copr-managed VMs from relevant " + "OpenStack tenant", + ) + + parser.add_argument('--kill-also-unused', action='store_true', + help='Delete also tracked, but unused VMs', + default=False) + return parser + + def read_config(): with open(nova_cloud_vars_path) as handle: conf = yaml.safe_load(handle.read()) @@ -56,12 +70,15 @@ def get_client(conf): return Client('2', username, password, tenant_name, auth_url)
-def get_managed_vms_names(): - result = [] +def get_managed_vms(): + result = {} if VmManager: opts = BackendConfigReader().read() vmm = VmManager(opts, log) - result.extend(vmd.vm_name.lower() for vmd in vmm.get_all_vm()) + for vmd in vmm.get_all_vm(): + result[vmd.vm_name.lower()] = { + 'unused': vmd.state == VmStates.READY, + } return result
@@ -87,32 +104,44 @@ class Cleaner(object): return True return False
- def check_one(self, srv_id, vms_names): + def check_one(self, srv_id, managed_vms, opts): srv = self.nt.servers.get(srv_id) log.debug("checking vm '%s'", srv.name) srv.get() + + managed = managed_vms.get(srv.human_id.lower()) + if srv.status.lower().strip() == "error": log.info("vm '%s' got into the error state, terminating", srv.name) self.terminate(srv) - elif self.old_enough(srv) and srv.human_id.lower() not in vms_names: - log.info("vm '%s' not placed in our db, terminating", srv.name) + elif not managed: + if self.old_enough(srv): # give the spawner some time + log.info("vm '%s' not placed in our db, terminating", srv.name) + self.terminate(srv) + elif opts.kill_also_unused and managed['unused']: + log.info("terminating unused vm %s", srv.name) self.terminate(srv)
- def main(self): + + def main(self, opts): """ Terminate - errored VM's and - VM's with uptime > SPAWN_TIMEOUT minutes and which don't have entry in redis DB + - when --kill-also-unused, we also terminate ready VMs """ start = time.time() log.info("Cleanup start")
self.nt = get_client(self.conf) srv_list = self.nt.servers.list(detailed=False) - vms_names = get_managed_vms_names() + managed_vms = get_managed_vms() with ThreadPoolExecutor(max_workers=20) as executor: - future_check = {executor.submit(self.check_one, srv.id, vms_names): srv.id for srv in srv_list} + future_check = { + executor.submit(self.check_one, srv.id, managed_vms, opts): + srv.id for srv in srv_list + } for future in as_completed(future_check): try: future.result() @@ -124,4 +153,4 @@ class Cleaner(object):
if __name__ == "__main__": cleaner = Cleaner(read_config()) - cleaner.main() + cleaner.main(get_arg_parser().parse_args())
This is an automated email from the git hooks/post-receive script.
praiskup pushed a commit to branch master in repository copr/copr.
commit be184b259f7978cf11a75f5e2170ff47e3ec0e25 Author: Pavel Raiskup praiskup@redhat.com AuthorDate: Tue Jul 30 08:58:40 2019 +0200
backend: cleanup_vm_nova: require 'copr' user
We plan to put this script into cronjob, and since the script writes to log file - we'd need to deal with root-owned log files very soon. --- backend/run/cleanup_vm_nova.py | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/backend/run/cleanup_vm_nova.py b/backend/run/cleanup_vm_nova.py index 20bb8f4..1b8498c 100755 --- a/backend/run/cleanup_vm_nova.py +++ b/backend/run/cleanup_vm_nova.py @@ -3,6 +3,7 @@
import os import sys +import getpass import time import logging import argparse @@ -131,6 +132,11 @@ class Cleaner(object): redis DB - when --kill-also-unused, we also terminate ready VMs """ + + if getpass.getuser() != 'copr': + log.error("This script needs to be executed as copr user") + sys.exit(1) + start = time.time() log.info("Cleanup start")
copr-commits@lists.fedorahosted.org