From 5ca6244fae861f3864e362e169255bd1af541456 Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Sun, 12 Jul 2020 13:12:11 +0200 Subject: [PATCH] Files eprints1.pl, lib/IRMA/db.pm: * refactored convenience functions get_any_db() and get_db_config() into the package IRMA::db File eprints1.pl: * add context information to sync db record and chat messages --- eprints1.pl | 124 +++++++++++++++++++++++-------------------------- lib/IRMA/db.pm | 44 ++++++++++++++++++ 2 files changed, 102 insertions(+), 66 deletions(-) diff --git a/eprints1.pl b/eprints1.pl index 4ce8e9f..313470d 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -93,8 +93,6 @@ my $config_fnm= '/etc/irma/eprints.json'; # my $agent_name= 'irma-urn-othes-test'; my $agent_name= 'irma-urn-othes-prod'; -my %KNOWN_DB_DRIVERS= map { $_ => 1 } qw(mysql mongodb); - # querying mab records my $MAX_MAB_AGE= 86400*14; my $MAX_MAB_REQUESTS= 10_000; @@ -492,31 +490,6 @@ sub reset_errors print "finished reset_errors\n"; } -=head2 get_db_config ($cnf, $db_name) - -get database config data from $cnf hash - -=cut - -sub get_db_config -{ - my $cnf= shift || die "no configuration from ".join (' ', caller()); - my $db_name= shift; - -# print "get_db_config: cnf=", main::Dumper($cnf); - my $dbc= $cnf->{$db_name} || die "no database definition for db_name=[$db_name]"; - - my $driver= $dbc->{driver}; - if (exists ($KNOWN_DB_DRIVERS{$driver})) - { - my $dbcc= $dbc->{$driver}; - return ($driver, $dbcc); - } - else { die "database driver not known; allowed: ". join(', ', keys %KNOWN_DB_DRIVERS); } - - return undef; -} - =head2 connect_db_interactive($cnf, $db_name) Connect to the database which logic name is searched in $cnf, the hash @@ -531,7 +504,7 @@ sub connect_db_interactive my $action= shift || 'connect'; my @tables= @_; - my ($driver, $dbcc)= get_db_config ($cnf, $db_name); + my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, $db_name); if ($driver eq 'mysql') { @@ -566,7 +539,7 @@ sub get_mab_db { my $cnf= shift; - my ($driver, $dbcc)= main::get_db_config ($cnf, 'mab_database'); + my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, 'mab_database'); # print __LINE__, " mab: driver=[$driver] dbcc=", Dumper ($dbcc); my $mab= new IRMA::Mab ('driver' => $driver, 'dbcc' => $dbcc); @@ -954,7 +927,7 @@ sub get_marc_db { my $cnf= shift; - my ($driver, $dbcc)= main::get_db_config ($cnf, 'marc_database'); + my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, 'marc_database'); # print __LINE__, " marc: driver=[$driver] dbcc=", Dumper ($dbcc); my $marc= new IRMA::db ('driver' => $driver, 'dbcc' => $dbcc); @@ -968,20 +941,7 @@ sub get_marc_db sub get_marc_db { my $cnf= shift; - get_any_db ($cnf, 'marc_database'); -} - -sub get_any_db -{ - my $cnf= shift; - my $cnf_name= shift; - - my ($driver, $dbcc)= main::get_db_config ($cnf, $cnf_name); - # print __LINE__, " marc: driver=[$driver] dbcc=", Dumper ($dbcc); - - my $marc= new IRMA::db ('driver' => $driver, 'dbcc' => $dbcc); - # print __LINE__, " marc: ", Dumper ($marc); - $marc; + IRMA::db::get_any_db ($cnf, 'marc_database'); } =head2 check_marc ($marc, @ac_numbers); @@ -1417,7 +1377,7 @@ sub get_eprints_db { my $cnf= shift; - my ($driver, $dbcc)= main::get_db_config ($cnf, 'eprints_database'); + my ($driver, $dbcc)= IRMA::db::get_db_config ($cnf, 'eprints_database'); my $epr= new IRMA::eprints ( @@ -1595,7 +1555,7 @@ sub get_irma_na_db { my $cnf= shift; - my ($driver2, $dbcc2)= main::get_db_config ($cnf, 'irma_database'); + my ($driver2, $dbcc2)= IRMA::db::get_db_config ($cnf, 'irma_database'); # print "dbcc2: ", main::Dumper($dbcc2); my $irma_na= new IRMA::NA ('driver' => $driver2, 'dbcc' => $dbcc2); @@ -2489,7 +2449,7 @@ Othes Migration Agent: listen to requests in MongoDB and perform them sub oma { - my $ot2ut= get_any_db($cnf, 'ot2ut_database'); + my $ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database'); my $col_sync= $ot2ut->get_collection('sync'); my $col_msg= $ot2ut->get_collection('messages'); @@ -2516,18 +2476,25 @@ sub oma $MAX_SYNC= $bs; my $eprint_status= $row->{eprint_status}; - if ($eprint_status eq 'buffer') { $ot2ut_eprint_status= 'buffer'; $no_doi= 1; $ignore_errors= 1; } + + if ($eprint_status eq 'buffer') + { $ot2ut_eprint_status= 'buffer'; $no_doi= 1; $ignore_errors= 1; } else { $ot2ut_eprint_status= 'archive'; $no_doi= 0; $ignore_errors= 0; } - $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'working' }}); + $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }}); + + send_message($col_msg, "send_batch: sending $bs objects in $ot2ut_eprint_status to $ot2ut_context"); my ($synced, $res)= ot2ut(); send_message($col_msg, "send_batch: $res"); + $new_status= 'done' if (@$synced); } elsif ($row->{action} eq 'send_ids') { $ignore_errors= 1; - $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'working' }}); + $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }}); + my $cnt= @{$row->{ids}}; + send_message($col_msg, "send_ids: sending $cnt objects to $ot2ut_context"); my ($synced, $res)= ot2ut(@{$row->{ids}}); send_message($col_msg, "send_ids: $res"); $new_status= 'done' if (@$synced); @@ -2561,7 +2528,7 @@ sub ot2ut # my $irma_na= get_irma_na_db($cnf); my $epr= get_eprints_db($cnf); - my $ot2ut= get_any_db($cnf, 'ot2ut_database'); + my $ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database'); my $col_sync= $ot2ut->get_collection('sync'); my $col_msg= $ot2ut->get_collection('messages'); @@ -2615,9 +2582,21 @@ sub ot2ut my $t_start= time(); - my $sync_info= $col_sync->find_one({eprint_id => $eprint_id}); + my $sync_info= $col_sync->find_one({ eprint_id => $eprint_id, context => $ot2ut_context }); + + if (defined ($sync_info)) + { + if ($sync_info->{error_code} ne 'ok') + { + print __LINE__, " earlier sync attempt had errors, retrying...\n"; + print __LINE__, " sync_info: ", Dumper($sync_info); + $col_sync->remove( { _id => $sync_info->{_id} } ); + $sync_info= undef; + } + } - my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path)= generate_utheses_metadata($epr, $eprint_id); + my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path)= + generate_utheses_metadata($epr, $eprint_id); my ($eprint_status)= map { $row->{$_} } qw(eprint_status); print __LINE__, " sync_info=[$sync_info]\n"; @@ -2635,18 +2614,12 @@ sub ot2ut } else { - print __LINE__, " ERROR/NOT YET IMPLEMENTED: already synced but othes object was modified againg\n"; + print __LINE__, " ERROR/NOT YET IMPLEMENTED: already synced but othes object was modified again\n"; print __LINE__, " ut2ot=[", $sync_info->{lastmod}, "] othes=[", $lastmod, "]\n"; next; } } - else - { - print __LINE__, " earlier sync attempt had errors, retrying...\n"; - $col_sync->remove( { _id => $sync_info->{_id} } ); - $sync_info= undef; - } } my $ts_upload= ts_ISO_gmt(); @@ -2664,9 +2637,18 @@ sub ot2ut if (@$errors && ! $ignore_errors) { - my $el= { eprint_id => $eprint_id, lastmod => $lastmod, ts_upload => $ts_upload, context => $ot2ut_context, error_code => 'conversion_errors', error_cnt => scalar @$errors }; + my $el= + { + eprint_id => $eprint_id, + eprint_status => $eprint_status, + lastmod => $lastmod, + ts_upload => $ts_upload, + context => $ot2ut_context, + error_code => 'conversion_errors', + error_cnt => scalar @$errors, + errors => $errors, + }; push (@synced, $el); - $el->{errors}= $errors; $col_sync->insert($el); send_message($col_msg, "upload error: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] [conversion errors]"); @@ -2712,13 +2694,23 @@ sub ot2ut # $result_data= from_json($upload_result); $result_data= Util::JSON::read_json_file($utheses_upload_result_json_path); }; + if ($@) { print __LINE__, " can't parse upload_result; error=[$@]\n"; push (@$errors, { error => 'upload_error', error_info => $@ }); - my $el= { eprint_id => $eprint_id, lastmod => $lastmod, ts_upload => $ts_upload, context => $ot2ut_context, error_code => 'upload_error', 1 }; + my $el= + { + eprint_id => $eprint_id, + eprint_status => $eprint_status, + lastmod => $lastmod, + ts_upload => $ts_upload, + context => $ot2ut_context, + error_code => 'upload_error', + errors => $errors, + }; + push (@synced, $el); - $el->{errors}= $errors; $col_sync->insert($el); sleep(2); } @@ -2779,7 +2771,7 @@ old format 2019-11..2020-01 push (@synced, $out_row); $col_sync->insert($out_row); - send_message($col_msg, "upload success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl"); + # send_message($col_msg, "upload success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl"); } sleep(2); @@ -2798,7 +2790,7 @@ old format 2019-11..2020-01 my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO()); Util::Matrix::save_hash_as_csv(\@columns, \@synced, $fnm, "\t", '', "\n", 1); - my $res= "synced $cnt_synced objects; $cnt_errors objects with errors"; + my $res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors"; print __LINE__, " $res, see [$fnm]\n"; (\@synced, $res); } diff --git a/lib/IRMA/db.pm b/lib/IRMA/db.pm index 683d60d..d0b6192 100644 --- a/lib/IRMA/db.pm +++ b/lib/IRMA/db.pm @@ -2,6 +2,8 @@ package IRMA::db; use strict; +my %KNOWN_DB_DRIVERS= map { $_ => 1 } qw(mysql mongodb); + sub new { my $class= shift; @@ -93,5 +95,47 @@ sub get_collection $col; } +=head1 Convenience + +=cut + +sub get_any_db +{ + my $cnf= shift; + my $cnf_name= shift; + + my ($driver, $dbcc)= get_db_config ($cnf, $cnf_name); + # print __LINE__, " marc: driver=[$driver] dbcc=", Dumper ($dbcc); + + my $irma_db= new IRMA::db ('driver' => $driver, 'dbcc' => $dbcc); + # print __LINE__, " irma_db: ", Dumper ($irma_db); + $irma_db; +} + +=head2 get_db_config ($cnf, $db_name) + +get database config data from $cnf hash + +=cut + +sub get_db_config +{ + my $cnf= shift || die "no configuration from ".join (' ', caller()); + my $db_name= shift; + +# print "get_db_config: cnf=", main::Dumper($cnf); + my $dbc= $cnf->{$db_name} || die "no database definition for db_name=[$db_name]"; + + my $driver= $dbc->{driver}; + if (exists ($KNOWN_DB_DRIVERS{$driver})) + { + my $dbcc= $dbc->{$driver}; + return ($driver, $dbcc); + } + else { die "database driver not known; allowed: ". join(', ', keys %KNOWN_DB_DRIVERS); } + + return undef; +} + 1; -- GitLab