From 5ca6244fae861f3864e362e169255bd1af541456 Mon Sep 17 00:00:00 2001
From: Gerhard Gonter <ggonter@gmail.com>
Date: Sun, 12 Jul 2020 13:12:11 +0200
Subject: [PATCH] Files eprints1.pl, lib/IRMA/db.pm: * refactored convenience
 functions get_any_db() and get_db_config() into the package IRMA::db

File eprints1.pl:
* add context information to sync db record and chat messages
---
 eprints1.pl    | 124 +++++++++++++++++++++++--------------------------
 lib/IRMA/db.pm |  44 ++++++++++++++++++
 2 files changed, 102 insertions(+), 66 deletions(-)

diff --git a/eprints1.pl b/eprints1.pl
index 4ce8e9f..313470d 100755
--- a/eprints1.pl
+++ b/eprints1.pl
@@ -93,8 +93,6 @@ my $config_fnm= '/etc/irma/eprints.json';
 # my $agent_name= 'irma-urn-othes-test';
 my $agent_name= 'irma-urn-othes-prod';
 
-my %KNOWN_DB_DRIVERS= map { $_ => 1 } qw(mysql mongodb);
-
 # querying mab records
 my $MAX_MAB_AGE= 86400*14;
 my $MAX_MAB_REQUESTS= 10_000;
@@ -492,31 +490,6 @@ sub reset_errors
   print "finished reset_errors\n";
 }
 
-=head2 get_db_config ($cnf, $db_name)
-
-get database config data from $cnf hash
-
-=cut
-
-sub get_db_config
-{
-  my $cnf= shift || die "no configuration from ".join (' ', caller());
-  my $db_name= shift;
-
-# print "get_db_config: cnf=", main::Dumper($cnf);
-  my $dbc= $cnf->{$db_name} || die "no database definition for db_name=[$db_name]";
-
-  my $driver= $dbc->{driver};
-  if (exists ($KNOWN_DB_DRIVERS{$driver}))
-  {
-    my $dbcc= $dbc->{$driver};
-    return ($driver, $dbcc);
-  }
-  else { die "database driver not known; allowed: ". join(', ', keys %KNOWN_DB_DRIVERS); }
-
-  return undef;
-}
-
 =head2 connect_db_interactive($cnf, $db_name)
 
 Connect to the database which logic name is searched in $cnf, the hash
@@ -531,7 +504,7 @@ sub connect_db_interactive
   my $action= shift || 'connect';
   my @tables= @_;
 
-  my ($driver, $dbcc)= get_db_config ($cnf, $db_name);
+  my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, $db_name);
 
   if ($driver eq 'mysql')
   {
@@ -566,7 +539,7 @@ sub get_mab_db
 {
   my $cnf= shift;
 
-  my ($driver, $dbcc)= main::get_db_config ($cnf, 'mab_database');
+  my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, 'mab_database');
   # print __LINE__, " mab: driver=[$driver] dbcc=", Dumper ($dbcc);
 
   my $mab= new IRMA::Mab ('driver' => $driver, 'dbcc' => $dbcc);
@@ -954,7 +927,7 @@ sub get_marc_db
 {
   my $cnf= shift;
 
-  my ($driver, $dbcc)= main::get_db_config ($cnf, 'marc_database');
+  my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, 'marc_database');
   # print __LINE__, " marc: driver=[$driver] dbcc=", Dumper ($dbcc);
 
   my $marc= new IRMA::db ('driver' => $driver, 'dbcc' => $dbcc);
@@ -968,20 +941,7 @@ sub get_marc_db
 sub get_marc_db
 {
   my $cnf= shift;
-  get_any_db ($cnf, 'marc_database');
-}
-
-sub get_any_db
-{
-  my $cnf= shift;
-  my $cnf_name= shift;
-
-  my ($driver, $dbcc)= main::get_db_config ($cnf, $cnf_name);
-  # print __LINE__, " marc: driver=[$driver] dbcc=", Dumper ($dbcc);
-
-  my $marc= new IRMA::db ('driver' => $driver, 'dbcc' => $dbcc);
-  # print __LINE__, " marc: ", Dumper ($marc);
-  $marc;
+  IRMA::db::get_any_db ($cnf, 'marc_database');
 }
 
 =head2 check_marc ($marc, @ac_numbers);
@@ -1417,7 +1377,7 @@ sub get_eprints_db
 {
   my $cnf= shift;
 
-  my ($driver, $dbcc)= main::get_db_config ($cnf, 'eprints_database');
+  my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, 'eprints_database');
 
   my $epr= new IRMA::eprints
   (
@@ -1595,7 +1555,7 @@ sub get_irma_na_db
 {
   my $cnf= shift;
 
-  my ($driver2, $dbcc2)= main::get_db_config ($cnf, 'irma_database');
+  my ($driver2, $dbcc2)= IRMA::db::get_db_config ($cnf, 'irma_database');
   # print "dbcc2: ", main::Dumper($dbcc2);
   my $irma_na= new IRMA::NA ('driver' => $driver2, 'dbcc' => $dbcc2);
 
@@ -2489,7 +2449,7 @@ Othes Migration Agent: listen to requests in MongoDB and perform them
 
 sub oma
 {
-  my $ot2ut= get_any_db($cnf, 'ot2ut_database');
+  my $ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database');
 
   my $col_sync= $ot2ut->get_collection('sync');
   my $col_msg= $ot2ut->get_collection('messages');
@@ -2516,18 +2476,25 @@ sub oma
       $MAX_SYNC= $bs;
 
       my $eprint_status= $row->{eprint_status};
-      if ($eprint_status eq 'buffer') { $ot2ut_eprint_status= 'buffer'; $no_doi= 1; $ignore_errors= 1; }
+
+      if ($eprint_status eq 'buffer')
+           { $ot2ut_eprint_status= 'buffer';  $no_doi= 1; $ignore_errors= 1; }
       else { $ot2ut_eprint_status= 'archive'; $no_doi= 0; $ignore_errors= 0; }
 
-      $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'working' }});
+      $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }});
+
+      send_message($col_msg, "send_batch: sending $bs objects in $ot2ut_eprint_status to $ot2ut_context");
       my ($synced, $res)= ot2ut();
       send_message($col_msg, "send_batch: $res");
+
       $new_status= 'done' if (@$synced);
     }
     elsif ($row->{action} eq 'send_ids')
     {
       $ignore_errors= 1;
-      $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'working' }});
+      $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }});
+      my $cnt= @{$row->{ids}};
+      send_message($col_msg, "send_ids: sending $cnt objects to $ot2ut_context");
       my ($synced, $res)= ot2ut(@{$row->{ids}});
       send_message($col_msg, "send_ids: $res");
       $new_status= 'done' if (@$synced);
@@ -2561,7 +2528,7 @@ sub ot2ut
   # my $irma_na= get_irma_na_db($cnf);
   my $epr= get_eprints_db($cnf);
 
-  my $ot2ut= get_any_db($cnf, 'ot2ut_database');
+  my $ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database');
   my $col_sync= $ot2ut->get_collection('sync');
   my $col_msg= $ot2ut->get_collection('messages');
 
@@ -2615,9 +2582,21 @@ sub ot2ut
 
     my $t_start= time();
 
-    my $sync_info= $col_sync->find_one({eprint_id => $eprint_id});
+    my $sync_info= $col_sync->find_one({ eprint_id => $eprint_id, context => $ot2ut_context });
+
+    if (defined ($sync_info))
+    {
+      if ($sync_info->{error_code} ne 'ok')
+      {
+        print __LINE__, " earlier sync attempt had errors, retrying...\n";
+        print __LINE__, " sync_info: ", Dumper($sync_info);
+        $col_sync->remove( { _id => $sync_info->{_id} } );
+        $sync_info= undef;
+      }
+    }
 
-    my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path)= generate_utheses_metadata($epr, $eprint_id);
+    my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path)=
+       generate_utheses_metadata($epr, $eprint_id);
     my ($eprint_status)= map { $row->{$_} } qw(eprint_status);
 
     print __LINE__, " sync_info=[$sync_info]\n";
@@ -2635,18 +2614,12 @@ sub ot2ut
         }
         else
         {
-          print __LINE__, " ERROR/NOT YET IMPLEMENTED: already synced but othes object was modified againg\n";
+          print __LINE__, " ERROR/NOT YET IMPLEMENTED: already synced but othes object was modified again\n";
           print __LINE__, " ut2ot=[", $sync_info->{lastmod}, "] othes=[", $lastmod, "]\n";
 
           next;
         }
       }
-      else
-      {
-        print __LINE__, " earlier sync attempt had errors, retrying...\n";
-        $col_sync->remove( { _id => $sync_info->{_id} } );
-        $sync_info= undef;
-      }
     }
 
     my $ts_upload= ts_ISO_gmt();
@@ -2664,9 +2637,18 @@ sub ot2ut
 
     if (@$errors && ! $ignore_errors)
     {
-      my $el= { eprint_id => $eprint_id, lastmod => $lastmod, ts_upload => $ts_upload, context => $ot2ut_context, error_code => 'conversion_errors', error_cnt => scalar @$errors };
+      my $el=
+      {
+        eprint_id     => $eprint_id,
+        eprint_status => $eprint_status,
+        lastmod       => $lastmod,
+        ts_upload     => $ts_upload,
+        context       => $ot2ut_context,
+        error_code    => 'conversion_errors',
+        error_cnt     => scalar @$errors,
+        errors        => $errors,
+      };
       push (@synced, $el);
-      $el->{errors}= $errors;
       $col_sync->insert($el);
 
       send_message($col_msg, "upload error: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] [conversion errors]");
@@ -2712,13 +2694,23 @@ sub ot2ut
           # $result_data= from_json($upload_result);
           $result_data= Util::JSON::read_json_file($utheses_upload_result_json_path);
         };
+
         if ($@)
         {
           print __LINE__, " can't parse upload_result; error=[$@]\n";
           push (@$errors, { error => 'upload_error', error_info => $@ });
-          my $el= { eprint_id => $eprint_id, lastmod => $lastmod, ts_upload => $ts_upload, context => $ot2ut_context, error_code => 'upload_error', 1 };
+          my $el=
+          {
+            eprint_id     => $eprint_id,
+            eprint_status => $eprint_status,
+            lastmod       => $lastmod,
+            ts_upload     => $ts_upload,
+            context       => $ot2ut_context,
+            error_code    => 'upload_error',
+            errors        => $errors,
+          };
+
           push (@synced, $el);
-          $el->{errors}= $errors;
           $col_sync->insert($el);
           sleep(2);
         }
@@ -2779,7 +2771,7 @@ old format 2019-11..2020-01
           push (@synced, $out_row);
           $col_sync->insert($out_row);
 
-          send_message($col_msg, "upload success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl");
+          # send_message($col_msg, "upload success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl");
         }
 
         sleep(2);
@@ -2798,7 +2790,7 @@ old format 2019-11..2020-01
   my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO());
   Util::Matrix::save_hash_as_csv(\@columns, \@synced, $fnm, "\t", '',  "\n", 1);
 
-  my $res= "synced $cnt_synced objects; $cnt_errors objects with errors";
+  my $res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors";
   print __LINE__, " $res, see [$fnm]\n";
   (\@synced, $res);
 }
diff --git a/lib/IRMA/db.pm b/lib/IRMA/db.pm
index 683d60d..d0b6192 100644
--- a/lib/IRMA/db.pm
+++ b/lib/IRMA/db.pm
@@ -2,6 +2,8 @@ package IRMA::db;
 
 use strict;
 
+my %KNOWN_DB_DRIVERS= map { $_ => 1 } qw(mysql mongodb);
+
 sub new
 {
   my $class= shift;
@@ -93,5 +95,47 @@ sub get_collection
   $col;
 }
 
+=head1 Convenience
+
+=cut
+
+sub get_any_db
+{
+  my $cnf= shift;
+  my $cnf_name= shift;
+
+  my ($driver, $dbcc)= get_db_config ($cnf, $cnf_name);
+  # print __LINE__, " marc: driver=[$driver] dbcc=", Dumper ($dbcc);
+
+  my $irma_db= new IRMA::db ('driver' => $driver, 'dbcc' => $dbcc);
+  # print __LINE__, " irma_db: ", Dumper ($irma_db);
+  $irma_db;
+}
+
+=head2 get_db_config ($cnf, $db_name)
+
+get database config data from $cnf hash
+
+=cut
+
+sub get_db_config
+{
+  my $cnf= shift || die "no configuration from ".join (' ', caller());
+  my $db_name= shift;
+
+# print "get_db_config: cnf=", main::Dumper($cnf);
+  my $dbc= $cnf->{$db_name} || die "no database definition for db_name=[$db_name]";
+
+  my $driver= $dbc->{driver};
+  if (exists ($KNOWN_DB_DRIVERS{$driver}))
+  {
+    my $dbcc= $dbc->{$driver};
+    return ($driver, $dbcc);
+  }
+  else { die "database driver not known; allowed: ". join(', ', keys %KNOWN_DB_DRIVERS); }
+
+  return undef;
+}
+
 1;
 
-- 
GitLab