From f2616a333db26377c5a93a897e13f877b4118e5a Mon Sep 17 00:00:00 2001
From: Anne Philipp <anne.philipp@univie.ac.at>
Date: Fri, 14 Dec 2018 14:03:35 +0100
Subject: [PATCH] implemented a job split with a new parameter 'job_chunk' so
 that huge time periods can automatically be splitted

---
 source/python/classes/ControlFile.py |  3 +-
 source/python/mods/checks.py         | 23 +++++++++++++++
 source/python/mods/tools.py          |  3 ++
 source/python/submit.py              | 43 ++++++++++++++++++++++++----
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/source/python/classes/ControlFile.py b/source/python/classes/ControlFile.py
index 5d11c65..00c316a 100644
--- a/source/python/classes/ControlFile.py
+++ b/source/python/classes/ControlFile.py
@@ -67,7 +67,7 @@ from mods.checks import (check_grid, check_area, check_levels, check_purefc,
                          check_basetime, check_public, check_acctype,
                          check_acctime, check_accmaxstep, check_time,
                          check_logicals_type, check_len_type_time_step,
-                         check_addpar)
+                         check_addpar, check_job_chunk)
 
 # ------------------------------------------------------------------------------
 # CLASS
@@ -382,6 +382,7 @@ class ControlFile(object):
 
         self.addpar = check_addpar(self.addpar)
 
+        self.job_chunk = check_job_chunk(self.job_chunk)
 
         return
 
diff --git a/source/python/mods/checks.py b/source/python/mods/checks.py
index 7e7d91f..7083339 100644
--- a/source/python/mods/checks.py
+++ b/source/python/mods/checks.py
@@ -795,3 +795,26 @@ def check_addpar(addpar):
 
     return addpar
 
+
+def check_job_chunk(job_chunk):
+    '''Checks that the job chunk number is positive and non zero.
+
+    Parameters
+    ----------
+    job_chunk : :obj:`integer`
+        The number of days for a single job script.
+
+    Return
+    ------
+    job_chunk : :obj:`integer`
+        The number of days for a single job script.
+    '''
+    if job_chunk < 0:
+        raise ValueError('ERROR: The number of job chunk is negative!\n'
+                         'It has to be a positive number!')
+    elif job_chunk == 0:
+        job_chunk = None
+    else:
+        pass
+
+    return job_chunk
diff --git a/source/python/mods/tools.py b/source/python/mods/tools.py
index 00c7b88..6f64903 100644
--- a/source/python/mods/tools.py
+++ b/source/python/mods/tools.py
@@ -127,6 +127,9 @@ def get_cmdline_args():
     parser.add_argument("--date_chunk", dest="date_chunk",
                         type=none_or_int, default=None,
                         help="# of days to be retrieved at once")
+    parser.add_argument("--job_chunk", dest="job_chunk",
+                        type=none_or_int, default=None,
+                        help="# of days to be retrieved within a single job")
     parser.add_argument("--controlfile", dest="controlfile",
                         type=none_or_str, default='CONTROL.temp',
                         help="file with CONTROL parameters")
diff --git a/source/python/submit.py b/source/python/submit.py
index 99b3202..98fb458 100755
--- a/source/python/submit.py
+++ b/source/python/submit.py
@@ -47,6 +47,7 @@ import sys
 import subprocess
 import inspect
 import collections
+from datetime import datetime, timedelta
 
 # software specific classes and modules from flex_extract
 import _config
@@ -139,9 +140,37 @@ def submit(jtemplate, c, queue):
         job_file = os.path.join(_config.PATH_JOBSCRIPTS,
                                 jtemplate[:-5] + '.ksh')
 
-        clist = c.to_list()
+        # divide time periode into specified number of job chunks
+        # to have multiple job scripts
+        if c.job_chunk:
+            start = datetime.strptime(c.start_date, '%Y%m%d')
+            end = datetime.strptime(c.end_date, '%Y%m%d')
+            chunk = timedelta(days=c.job_chunk)
 
-        mk_jobscript(jtemplate, job_file, clist)
+            while start <= end:
+                if (start + chunk) <= end:
+                    c.end_date = (start + chunk).strftime("%Y%m%d")
+                else:
+                    c.end_date = end.strftime("%Y%m%d")
+                print c.start_date +' bis ' + c.end_date
+
+                clist = c.to_list()
+
+                mk_jobscript(jtemplate, job_file, clist)
+
+                job_id = submit_job_to_ecserver(queue, job_file)
+                print('The job id is: ' + str(job_id.strip()))
+
+                start = start + chunk
+                c.start_date = start.strftime("%Y%m%d")
+        # submit a single job script
+        else:
+            clist = c.to_list()
+
+            mk_jobscript(jtemplate, job_file, clist)
+
+            job_id = submit_job_to_ecserver(queue, job_file)
+            print('The job id is: ' + str(job_id.strip()))
 
     else:
     # --------- create operational job script ----------------------------------
@@ -160,10 +189,11 @@ def submit(jtemplate, c, queue):
 
         mk_jobscript(jtemplate, job_file, clist)
 
-    # --------- submit the job_script to the ECMWF server
-    job_id = submit_job_to_ecserver(queue, job_file)
-    print('The job id is: ' + str(job_id.strip()))
-    print('You should get an email with subject flex.hostname.pid')
+        job_id = submit_job_to_ecserver(queue, job_file)
+        print('The job id is: ' + str(job_id.strip()))
+
+
+    print('You should get an email per job with subject flex.hostname.pid')
 
     return
 
@@ -222,5 +252,6 @@ def mk_jobscript(jtemplate, job_file, clist):
 
     return
 
+
 if __name__ == "__main__":
     main()
-- 
GitLab