From 0e407910c4addee447b8859f9591e77cbb73e2e9 Mon Sep 17 00:00:00 2001 From: Andreas Plach <andreas.plach@univie.ac.at> Date: Tue, 22 Dec 2020 11:48:41 +0100 Subject: [PATCH] tools.py: added try/except query for ecaccess-job-list because calling this command sometimes results in an error; waiting helps --- Source/Python/Mods/tools.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/Source/Python/Mods/tools.py b/Source/Python/Mods/tools.py index d0274c5..74007cf 100644 --- a/Source/Python/Mods/tools.py +++ b/Source/Python/Mods/tools.py @@ -725,20 +725,40 @@ def submit_job_to_ecserver(target, jobname): # apl: start noEXEC = 99 noWAIT = 99 - waittime = 10 + noECTRANS = 2000 + waittime = 60 + import datetime as dt - while (noEXEC+noWAIT)>12: + while (noEXEC+noWAIT)>20: # do "ecaccess-job-list" and check the numbers of EXEC and # WAIT => # of active jobs - ps = subprocess.check_output(('ecaccess-job-list')) + try: + ps = subprocess.check_output(('ecaccess-job-list')) + except: + print('problem with ecaccess-job-list; waiting five minutes') + time.sleep(300) + continue noEXEC = str(ps).count('EXEC') noWAIT = str(ps).count('WAIT') + print(dt.datetime.now()) print('number of EXEC jobs: ', str(noEXEC)) print('number of WAIT jobs: ', str(noWAIT)) import time - print('waiting until less processes are active (checking every '+str(waittime)+'s)') - time.sleep(waittime) - # apl: end + # only wait if there are already a few active jobs (e.g., also + # waits to set new ./setup.sh jobs + if (noEXEC+noWAIT)>10: + print('waiting until less processes are active (checking every '+str(waittime)+'s)') + time.sleep(waittime) + + # add check for number of jobs in ectrans queue + while (noECTRANS)>1600: + ectrans_out = subprocess.check_output(('ecaccess-ectrans-list')) + noECTRANS = len(str(ectrans_out).split('\\n')) + if (noECTRANS)>1500: + print('waiting until less processes in ectrans queue (limit 1600); checking every '+str(waittime)+'s)') + time.sleep(waittime) + # ecaccess-ectrans-list | wc -l + # apl: end try: job_id = subprocess.check_output(['ecaccess-job-submit', '-queueName', -- GitLab