diff --git a/eprints1.pl b/eprints1.pl index fc37e44e775de064e362408ab64e3b92aae8ddbe..11ddae09b679c438f77809b357dd447ac0c47314 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -118,6 +118,7 @@ my $op_mode= 'unknown'; my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize # my $ot2ut_context= 'ot2ut-prod'; # TODO: parametrize my $oma_sleep_time= 10; +my $MAX_METABLOCK= 670; my %map_ot2ut_roles= ( @@ -451,7 +452,7 @@ my $db_ot2ut; my $col_msg; my $col_activity; my $last_activity= 0; -my $activity_period= 300; +my $activity_period= 60; my $cnf= Util::JSON::read_json_file ($config_fnm); @@ -2756,7 +2757,7 @@ sub send_message $col_msg= $db_ot2ut->get_collection('messages') unless (defined ($col_msg)); return undef unless (defined ($col_msg)); - print __LINE__, " sending message [$text]\n"; + print __LINE__, ' ', scalar localtime(time()), " sending message [$text]\n"; my $msg= { message => $text, @@ -2845,8 +2846,9 @@ sub ot2ut } my @synced= (); - my ($cnt_synced, $cnt_upload_ok, $cnt_errors_data, $cnt_errors_upload, $cnt_errors_ingest, $cnt_skipped)= (0, 0, 0, 0, 0, 0); - my ($cnt_att_synced, $cnt_att_ok, $cnt_att_errors_upload)= (0, 0, 0); + my ($cnt_synced, $cnt_upload_ok, $cnt_errors_data, $cnt_errors_upload, $cnt_errors_ingest, $cnt_errors_attachments, $cnt_skipped)= + (0, 0, 0, 0, 0, 0, 0); + my ($cnt_att_synced, $cnt_att_ok, $cnt_att_errors_upload)= (0, 0, 0); # counters for the whole batch my $cnt_eprint_ids= @eprint_ids; print __LINE__, " ot2ut: ot2ut_eprint_status=$ot2ut_eprint_status cnt_eprint_ids=$cnt_eprint_ids MAX_SYNC=$MAX_SYNC\n"; sleep(3); @@ -2988,6 +2990,8 @@ sub ot2ut $result_data= Util::JSON::read_json_file($utheses_upload_result_json_path); }; + my $out_row; # mongodb record in the sync database (or collection) + my ($upload_success, $td_start, $td_curl); if ($@) { print __LINE__, " can't parse upload_result; error=[$@]\n"; @@ -3015,11 +3019,10 @@ sub ot2ut my ($status, $import_status, $utheses_id1, $response_msg, $alerts)= map { $result_data->{$_} } qw(status importStatus uthesesId responseMsg alerts); print __LINE__, " status=[$status] response_msg=[$response_msg]\n"; - my $td_start= time()-$t_start; - my $td_curl= time()-$t_curl; + $td_start= time()-$t_start; + $td_curl= time()-$t_curl; - my $upload_success; - my $out_row= + $out_row= { eprint_id => $eprint_id, eprint_status => $eprint_status, @@ -3032,7 +3035,7 @@ sub ot2ut uploaded_fnm => $lfnm, upload_status => $status, response_msg => $response_msg, - attachement_cnt => scalar @docs, + attachment_count => scalar @docs, }; if (defined ($utheses_id1) && $status eq '200') @@ -3058,9 +3061,11 @@ sub ot2ut } push (@synced, $out_row); - $col_sync->insert($out_row); - send_message("upload $upload_success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl") unless ($silent_upload_success && $upload_success eq 'ok'); +# moved down +# $col_sync->insert($out_row); +# send_message("upload $upload_success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl") unless ($silent_upload_success && $upload_success eq 'ok'); + } # sleep(2); @@ -3068,7 +3073,7 @@ sub ot2ut if (defined ($utheses_id)) { # process remaining documents as attachments - my $attachment_number= 0; + my ($attachment_number, $attachment_ok, $attachment_error)= (0, 0, 0); my $attachment_pid; my $curl_status; while (my $attachment= shift(@docs)) @@ -3122,6 +3127,7 @@ sub ot2ut { print __LINE__, " can't parse upload_result; error=[$@]\n"; $cnt_att_errors_upload++; + $attachment_error++; $att_status= $ai->{error_code}= 'upload_error'; $ai->{errors}= [ { error => 'upload_error', error_info => $@ } ]; } @@ -3136,11 +3142,13 @@ sub ot2ut if ($curl_status1 eq '200') { $cnt_att_ok++; + $attachment_ok++; $att_status= $ai->{error_code}= 'ok'; } else { $cnt_att_errors_upload++; + $attachment_error++; $att_status= $ai->{error_code}= 'ingest_error'; } @@ -3194,6 +3202,23 @@ sub ot2ut $col_att->insert($ai); # NOTE/TODO: no effort is made to check for duplicate uploads of attachments; } # end of processing for one attachment + + # TODO 2020-11-09: check, if uploading attachments returned errors, record this fact in the objects sync record + if ($attachment_number > 0) # any attacments at all + { + # $out_row->{attachment_count}= $attachment_number; + $out_row->{attachment_ok}= $attachment_ok; + $out_row->{attachment_error}= $attachment_error; + + if ($attachment_error) + { + $out_row->{error_code}= $upload_success= 'attachment_error'; + $cnt_errors_attachments++; # global counter + } + } + + $col_sync->insert($out_row); + send_message("upload $upload_success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl") unless ($silent_upload_success && $upload_success eq 'ok'); } else { @@ -3208,7 +3233,7 @@ sub ot2ut my $res; if ($cnt_synced) { - $res= "synced $cnt_synced objects in context $ot2ut_context; data_errors: $cnt_errors_data; upload_errors: $cnt_errors_upload; ingest_errors: $cnt_errors_ingest"; + $res= "synced $cnt_synced objects in context $ot2ut_context; data_errors: $cnt_errors_data; upload_errors: $cnt_errors_upload; ingest_errors: $cnt_errors_ingest; attachment_errors: $cnt_errors_attachments"; my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO()); Util::Matrix::save_hash_as_csv(\@ot2ut_synced_columns, \@synced, $fnm, "\t", '', "\n", 1); print __LINE__, " $res, see [$fnm]\n"; @@ -3505,6 +3530,17 @@ the structure of json file has slightly changed # 2020-08-05: $thesis->{policies}->{fulltext_locked}= 0 if ($eprint_status eq 'archive' && $main_file->{security} eq 'public'); + # 2020-11-23: column formatdesc in table document contains notes by UBW staff for that particular document. + # For attachments, these are all but the first document, this is stored as "description" in the attachment's metadata. + # For the main document it self, this information was not stored anywhere. + # Feature Request: store that information into utheses_note_2, e.g. by concatinating it + if (defined ($main_file->{formatdesc}) && $main_file->{formatdesc} ne '') + { + $utp->{utheses_note_2}= (exists($utp->{utheses_note_2})) + ? join('; ', $utp->{utheses_note_2}, $main_file->{formatdesc}) + : $main_file->{formatdesc}; + } + $ut->public('thesis', $thesis); # Mon May 11 22:12:38 CEST 2020 asked nd about this, especially thesis_doc_added_date: @@ -3921,7 +3957,7 @@ mysql> select count(*), abstract_nicht_anzeigen from eprint group by abstract_ni # diese objekte sollten wir uns notieren. bitte stattdessen einen leeren string uebergeben. push (@warnings, { warning => 'date_app missing, can not assign assessment_date' }); - $thesis{assessment_date}= ''; + $thesis{assessment_date}= undef; } # print __LINE__, " thesis: ", Dumper (\%thesis); @@ -4565,7 +4601,7 @@ sub update_policies $db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut)); my $col_utp= $db_ot2ut->get_collection('utheses.policies'); - if (defined ($refresh_oldest_policies) && $refresh_oldest_policies > 0 && $refresh_oldest_policies <= 3000) + if (defined ($refresh_oldest_policies) && $refresh_oldest_policies > 0 && $refresh_oldest_policies <= 5000) { my $c1= $col_utp->find( {}, { '_id' => 1, eprint_id => 1, generated => 1 }); $c1->sort( { generated => 1 } ); @@ -4730,17 +4766,18 @@ sub policies_stats $eprint_ids[$eprint_id]->{$context}= $row_sync; - if (defined ($utheses_id)) + if (defined ($utheses_id) && $error_code eq 'ok') { $synced{$eprint_id}->{$context}= [ $lastmod, $utheses_id ]; + $totals{$context}->{cnt_ok}++; $blocks[$block_nr]->{$context}->{cnt_ok}++; $metablocks[$metablock_nr]->{$context}->{cnt_ok}++; } else { - # push (@upload_errors, $row_sync); push (@{$upload_errors{$error_code}}, $eprint_id); + $totals{$context}->{cnt_error}++; $blocks[$block_nr]->{$context}->{cnt_error}++; $metablocks[$metablock_nr]->{$context}->{cnt_error}++; @@ -5123,7 +5160,7 @@ EOX EOX my $block_start= $metablock_nr*100; - my $block_last= ($metablock_nr == 7) ? 750 : $block_start+99; + my $block_last= ($metablock_nr == 6) ? $MAX_METABLOCK : $block_start+99; for (my $block_nr= $block_start; $block_nr <= $block_last; $block_nr++) { next unless (defined ($blocks[$block_nr]));