diff --git a/eprints1.pl b/eprints1.pl index 2a94e197d700b8b9f4a19810dbf5d8683710f3e2..ce2b326a81e28dead102a353695130793c0221fc 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -206,6 +206,7 @@ my %bucketlist_column_descriptions= my %ot2ut_sync_anyway= map { $_ => 1 } qw(33905); # these should be synced, even if they were already marked as ok my %doc_embargo_dates; +my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config... # END OT2UT: Othes to Utheses # ====================================================================== @@ -439,7 +440,7 @@ elsif ($op_mode eq 'update-policies') } elsif ($op_mode eq 'policies-stats') { - policies_stats(); + policies_stats(@PARS); } elsif ($op_mode eq 'reset') # reset error conditions for given ac_numbers { @@ -2541,7 +2542,7 @@ sub oma { $ot2ut_eprint_status= 'buffer'; $ignore_errors= 1; } else { $ot2ut_eprint_status= 'archive'; $ignore_errors= 0; } - $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }}); + $col_req->update( { _id => $row->{_id} }, { '$set' => { status => 'finish', ts_start => Util::ts::ts_ISO_gmt() }} ); my $msg= "send_batch: sending $bs objects in $ot2ut_eprint_status to $ot2ut_context"; activity({ activity => 'send_batch', msg => $msg}); @@ -2558,20 +2559,22 @@ sub oma $silent_upload_success= 1; $ignore_errors= 0; - $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }}); + $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress', ts_start => Util::ts::ts_ISO_gmt() }}); my $msg= "send_block: sending objects of block $block to $ot2ut_context"; activity({ activity => 'send_batch', msg => $msg}); send_message($msg); my ($synced, $res)= ot2ut('block' => $block); - send_message("send_block: $res"); + send_message("send_block: block $block, result: $res"); $new_status= 'done' if (@$synced); + + policies_stats("processed block $block"); } elsif ($row->{action} eq 'send_ids') { - $ignore_errors= 1; - $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }}); + $ignore_errors= 0; + $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress', ts_start => Util::ts::ts_ISO_gmt() }}); my @ids; foreach my $id (@{$row->{ids}}) { push (@ids, $id) if ($id =~ m#^\d+$#); } my $cnt= @ids; @@ -2586,7 +2589,7 @@ sub oma $new_status= 'done' if (@$synced); } - $col_req->update({ _id => $row->{_id}}, { '$set' => { status => $new_status }}); + $col_req->update({ _id => $row->{_id}}, { '$set' => { status => $new_status, ts_finish => Util::ts::ts_ISO_gmt() }}); activity({ activity => 'listening'}); } } @@ -2636,8 +2639,19 @@ sub ot2ut # my $irma_na= get_irma_na_db($cnf); my $epr= get_eprints_db($cnf); + my $upload_cnf= $cnf->{$ot2ut_context}; + die "no valid ot2ut context" unless (defined ($upload_cnf)); + + my @extra_curl_parameters; + if (exists ($upload_cnf->{headers})) + { + @extra_curl_parameters= map { ('--header', $_) } @{$upload_cnf->{headers}}; + # push (@upload_cmd, @extra_curl_parameters) if (@extra_curl_parameters); + } + $db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut)); my $col_sync= $db_ot2ut->get_collection('sync'); + my $col_att= $db_ot2ut->get_collection('attachments'); my $col_policy_utheses= $db_ot2ut->get_collection('policy.utheses'); unless (defined ($utheses_faculty_map)) @@ -2685,8 +2699,8 @@ sub ot2ut } my @synced= (); - my $cnt_synced= 0; - my $cnt_errors= 0; + my ($cnt_synced, $cnt_upload_ok, $cnt_errors_data, $cnt_errors_upload, $cnt_errors_ingest, $cnt_skipped)= (0, 0, 0, 0, 0, 0); + my ($cnt_att_synced, $cnt_att_ok, $cnt_att_errors_upload)= (0, 0, 0); my $cnt_eprint_ids= @eprint_ids; print __LINE__, " ot2ut: ot2ut_eprint_status=$ot2ut_eprint_status cnt_eprint_ids=$cnt_eprint_ids MAX_SYNC=$MAX_SYNC\n"; sleep(3); @@ -2733,6 +2747,7 @@ sub ot2ut if ($sync_info->{lastmod} eq $lastmod) { print __LINE__, " eprint_id=[$eprint_id] already synced; skipping...\n"; + $cnt_skipped++; next; } else @@ -2781,7 +2796,7 @@ sub ot2ut my $utheses_errors_json_path= 'othes/utheses_json/errors/' . $eprint_id . '.json'; Util::JSON::write_json_file($utheses_errors_json_path, $errors); - $cnt_errors++; + $cnt_errors_data++; } else { # go ahead ... @@ -2789,9 +2804,6 @@ sub ot2ut print __LINE__, " files: ", Dumper($files); print __LINE__, " utheses_json_path=[$utheses_json_path]\n"; - my $upload_cnf= $cnf->{$ot2ut_context}; - die "no valid ot2ut context" unless (defined ($upload_cnf)); - my @docs= @{$docs->{documents}}; my $main_file= shift(@docs); my ($local_filename, $lfnm)= map { $main_file->{$_} } qw(path_doc main); @@ -2803,13 +2815,7 @@ sub ot2ut '-F', 'file=@"' . $local_filename . '";filename="' . $lfnm . '"', '-F', 'type=application/pdf', $upload_cnf->{import_url}, '-o' . $utheses_upload_result_json_path); - if (exists ($upload_cnf->{headers})) - { - foreach my $header (@{$upload_cnf->{headers}}) - { - push (@upload_cmd, '--header', $header ); - } - } + push (@upload_cmd, @extra_curl_parameters) if (@extra_curl_parameters); print __LINE__, " upload_cmd: [", join(' ', @upload_cmd), "]\n"; my $utheses_id; @@ -2842,42 +2848,13 @@ sub ot2ut push (@synced, $el); $col_sync->insert($el); + $cnt_errors_upload++; sleep(2); } else { print __LINE__, " result_data: ", Dumper($result_data); -=begin comment - -old format 2019-11..2020-01 - my ($status, $result)= map { $result_data->{$_} } qw(status result); - my ($ac, $ad, $ia)= map { $result->{$_} } qw(add_container add_document import_activate); - - my ($container_pid, $container_result, $utheses_id)= map { $ac->{result}->{$_} } qw(pid status uthesesId); - my ($document_pid, $document_result)= map { $ad->{result}->{$_} } qw(pid status); - - my $out_row= - { - eprint_id => $eprint_id, - lastmod => $lastmod, - ts_upload => $ts_upload, - context => $ot2ut_context, - error_code => 'ok', - error_cnt => 0, - utheses_id => $utheses_id, - container_pid => $container_pid, - container_result => $container_result, - document_pid => $document_pid, - document_result => $document_result, - activate_result => $ia->{status}, - import_code => $ia->{import_validate}->{import_code}, - import_note => $ia->{import_validate}->{import_note}, - }; - -=end comment -=cut - my ($status, $import_status, $utheses_id1, $response_msg, $alerts)= map { $result_data->{$_} } qw(status importStatus uthesesId responseMsg alerts); print __LINE__, " status=[$status] response_msg=[$response_msg]\n"; @@ -2898,6 +2875,7 @@ old format 2019-11..2020-01 uploaded_fnm => $lfnm, upload_status => $status, response_msg => $response_msg, + attachement_cnt => scalar @docs, }; if (defined ($utheses_id1) && $status eq '200') @@ -2905,10 +2883,12 @@ old format 2019-11..2020-01 $utheses_id= $utheses_id1; $out_row->{error_code}= $upload_success= 'ok'; $out_row->{error_cnt}= 0; + $cnt_upload_ok++; } else { - $out_row->{error_code}= $upload_success= 'error'; + $out_row->{error_code}= $upload_success= 'ingest_error'; + $cnt_errors_ingest++; } push (@synced, $out_row); @@ -2923,6 +2903,8 @@ old format 2019-11..2020-01 { # process remaining documents as attachments my $attachment_number= 0; + my $attachment_pid; + my $curl_status; while (my $attachment= shift(@docs)) { $attachment_number++; @@ -2947,7 +2929,7 @@ old format 2019-11..2020-01 }; $ai->{embargo_until_date}= join ('-', map { $attachment->{$_} } qw(date_embargo_year date_embargo_month date_embargo_day)) if (exists ($attachment->{date_embargo})); - $ai->{description}= $attachment->{formatdesc} if (defined ($attachment->{formatdate})); + $ai->{description}= $attachment->{formatdesc} if (defined ($attachment->{formatdesc})); Util::JSON::write_json_file($fnm_attachment_md, $attachment_md); @@ -2958,47 +2940,54 @@ old format 2019-11..2020-01 '-F', 'file=@"' . $attachment->{path_doc} . '"', $url1, '-o' . $fnm_attachment_res); - if (exists ($upload_cnf->{headers})) - { - foreach my $header (@{$upload_cnf->{headers}}) - { - push (@attachment_add_cmd, '--header', $header ); - } - } + push (@attachment_add_cmd, @extra_curl_parameters) if (@extra_curl_parameters); + my $att_status; print __LINE__, " attachment_add_cmd: [", join(' ', @attachment_add_cmd), "]\n"; if ($do_upload) { my $t_curl= time(); system(@attachment_add_cmd); + $cnt_att_synced++; my $result_data1; eval { $result_data1= Util::JSON::read_json_file($fnm_attachment_res); }; if ($@) { print __LINE__, " can't parse upload_result; error=[$@]\n"; + $cnt_att_errors_upload++; + $att_status= $ai->{error_code}= 'upload_error'; + $ai->{errors}= [ { error => 'upload_error', error_info => $@ } ]; } else { print __LINE__, " upload attchment [$attachment_number] result: ", Dumper($result_data1); - my ($attachment_pid, $curl_status, $response_msg)= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg); - - send_message("upload attachment success: eprint_id=[$eprint_id] context=[$ot2ut_context] utheses_id=[$utheses_id] attachment_number=[$attachment_number] attachment_pid=[$attachment_pid] curl_status=[$curl_status]"); # unless ($silent_upload_success); - + my ($attachment_pid1, $curl_status1, $response_msg)= map { $result_data1->{$_} } qw(attachmentPid status responseMsg); + $ai->{attachment_pid}= $attachment_pid= $attachment_pid1; + $ai->{upload_status}= $curl_status= $curl_status1; + $ai->{response_msg}= $response_msg; + + if ($curl_status1 eq '200') + { + $cnt_att_ok++; + $att_status= $ai->{error_code}= 'ok'; + } + else + { + $cnt_att_errors_upload++; + $att_status= $ai->{error_code}= 'ingest_error'; + } + + $ai->{security}= $attachment->{security}; if ($attachment->{security} eq 'public') { # set attachment status to Active (in Phaidra) only when this attachment is public + # curl -X POST http://localhost:3000/attachment/changeStatus/#attachmentPid -F "status=A" my $url2= join ('/', $upload_cnf->{api_url}, qw(attachment changeStatus), $attachment_pid); my @attachment_chg_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F status=A), $url2, '-o' . $fnm_attachment_chg); - if (exists ($upload_cnf->{headers})) - { - foreach my $header (@{$upload_cnf->{headers}}) - { - push (@attachment_chg_cmd, '--header', $header ); - } - } + push (@attachment_chg_cmd, @extra_curl_parameters) if (@extra_curl_parameters); print __LINE__, " attachment_chg_cmd: [", join(' ', @attachment_chg_cmd), "]\n"; my $t_curl= time(); @@ -3009,22 +2998,40 @@ old format 2019-11..2020-01 if ($@) { print __LINE__, " can't parse upload_result; error=[$@]\n"; + $ai->{activate}= 'error_curl'; } else { - print __LINE__, " change attchment [$attachment_number] result: ", Dumper($result_data2); - # my $attachment_pid= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg); + print __LINE__, " change attachment [$attachment_number] result: ", Dumper($result_data2); + my ($attachment_pid2, $curl_status2, $response_msg2)= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg); + $ai->{activate}= ($curl_status2 eq '200') ? 'ok' : 'error_activate'; + $ai->{activate_status}= $curl_status2; + $ai->{activate_response_msg}= $response_msg2; } } # if attachment is public } # when main document was uploaded successfully } # if ($do_upload) - } + else + { # attachment was not uploaded + $att_status= 'no_upload'; + } + + # finish processing of this one attachment + + send_message("upload attachment $att_status: eprint_id=[$eprint_id] context=[$ot2ut_context] utheses_id=[$utheses_id] attachment_number=[$attachment_number] attachment_pid=[$attachment_pid] curl_status=[$curl_status]"); # unless ($silent_upload_success); + + $ai->{eprint_id}= $eprint_id; + $ai->{context}= $ot2ut_context; + $ai->{utheses_id}= $utheses_id; + + $col_att->insert($ai); # NOTE/TODO: no effort is made to check for duplicate uploads of attachments; + } # end of processing for one attachment } else { - # no utheses_id defined + # no utheses_id defined, so upload must have gone wrong somehow } } } @@ -3035,7 +3042,7 @@ old format 2019-11..2020-01 my $res; if ($cnt_synced) { - $res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors"; + $res= "synced $cnt_synced objects in context $ot2ut_context; data_errors: $cnt_errors_data; upload_errors: $cnt_errors_upload; ingest_errors: $cnt_errors_ingest"; my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO()); Util::Matrix::save_hash_as_csv(\@ot2ut_synced_columns, \@synced, $fnm, "\t", '', "\n", 1); print __LINE__, " $res, see [$fnm]\n"; @@ -3045,6 +3052,10 @@ old format 2019-11..2020-01 print __LINE__, " $res\n"; $res= "synced no objects in context $ot2ut_context"; } + if ($cnt_skipped) + { + $res .= "; skipped $cnt_skipped objects"; + } (\@synced, $res); } @@ -3733,7 +3744,7 @@ sub get_names_for_role $vn =~ s/\s*$//; print __LINE__, " column_name=[$column_name] name=[$name] nn=[$nn] vn=[$vn]\n"; - if ($vn eq '' || !($vn =~ m#^\U\E[\w\-\x{2010} ]+\.?$#) || !($nn =~ m#^\U\E[\w\-\x{2010} ]+$#)) + if ($vn eq '' || !($vn =~ m#^\U\E[\w\-\x{2010}\. ]+?$#) || !($nn =~ m#^\U\E[\w\-\x{2010} ]+$#)) { # TODO: add option to flag this as a warning instead of as an error push (@errors, { error => 'bad_name', column_name => $column_name, name => $name } ); push (@result, { family_name => $name }); # fill everything in into family_name @@ -4098,13 +4109,14 @@ sub update_policies # $epr_db->show_query(1); my $search_term= "eprint_status IN ('archive', 'buffer')"; my $keys= $epr_db->get_all_x('eprint', [$search_term], join(',', @col_names)); + my @eprint_ids_in_mysql= map { $_."" } sort { $a <=> $b } keys %$keys; # mongodb stores eprint_id as string my $ts_start= Util::ts::ts_ISO_gmt(time()); my ($cnt_updated, $cnt_inserted, $cnt_unchanged)= (0, 0, 0, 0); my (@lst_updated, @lst_inserted, @lst_unchanged); - foreach my $eprint_id (keys %$keys) + foreach my $eprint_id (@eprint_ids_in_mysql) { last unless ($running); @@ -4185,6 +4197,9 @@ sub update_policies print __LINE__, ' ', '='x70, "\n"; } + my @removed_from_mysql= $col_utp->find( { eprint_id => { '$nin' => \@eprint_ids_in_mysql } }, { '_id' => 1, eprint_id => 1 })->all(); + print __LINE__, " removed from mysql ", Dumper(\@removed_from_mysql); + my %stats= ( agent => 'update_policies', @@ -4210,6 +4225,8 @@ sub update_policies sub policies_stats { + my $msg= shift; + $db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut)); # prepare: get info from sync database @@ -4218,25 +4235,43 @@ sub policies_stats my %synced; my (@upload_errors, %upload_errors); + my (%totals, @blocks, @metablocks, @eprint_ids); while ($running) { my $row_sync= $cur_sync->next(); last unless (defined ($row_sync)); # print __LINE__, " row_sync: ", Dumper($row_sync); last; - my ($eprint_id, $lastmod, $context, $utheses_id)= map { $row_sync->{$_} } qw(eprint_id lastmod context utheses_id); - unless (defined ($utheses_id)) - { - # push (@upload_errors, $row_sync); - push (@{$upload_errors{$row_sync->{error_code}}}, $eprint_id); - next; - } + my ($eprint_id, $lastmod, $context, $utheses_id, $error_code)= map { $row_sync->{$_} } qw(eprint_id lastmod context utheses_id error_code); if ($context eq 'ot2ut-test') { $context= 'test'; } elsif ($context eq 'ot2ut-entw') { $context= 'entw'; } elsif ($context eq 'ot2ut-prod') { $context= 'prod'; } + # else ... this should not happen + + my $block_nr= int($eprint_id/100); + my $metablock_nr= int($block_nr/100); + $totals{$context}->{cnt_total}++; + $blocks[$block_nr]->{$context}->{cnt_total}++; + $metablocks[$metablock_nr]->{$context}->{cnt_total}++; + + $eprint_ids[$eprint_id]->{$context}= $row_sync; - $synced{$eprint_id}->{$context}= [ $lastmod, $utheses_id ]; + if (defined ($utheses_id)) + { + $synced{$eprint_id}->{$context}= [ $lastmod, $utheses_id ]; + $totals{$context}->{cnt_ok}++; + $blocks[$block_nr]->{$context}->{cnt_ok}++; + $metablocks[$metablock_nr]->{$context}->{cnt_ok}++; + } + else + { + # push (@upload_errors, $row_sync); + push (@{$upload_errors{$error_code}}, $eprint_id); + $totals{$context}->{cnt_error}++; + $blocks[$block_nr]->{$context}->{cnt_error}++; + $metablocks[$metablock_nr]->{$context}->{cnt_error}++; + } } # MAIN PART: analyze othes policies collection @@ -4253,7 +4288,7 @@ sub policies_stats push (@columns, qw(urn doi)); } - my $cctab= new cctab(columns => \@columns); + my $cctab= new cctab(columns => \@columns, base_path => $base_path); my $max; #= 1000; my @contexts= qw(entw test prod); @@ -4271,6 +4306,14 @@ sub policies_stats map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings errors warnings docs ut_public urn doi ac_nummer); $eprint_id += 0; + my $block_nr= int($eprint_id/100); + my $metablock_nr= int($block_nr/100); + $totals{othes}->{cnt_total}++; + $blocks[$block_nr]->{othes}->{cnt_total}++; + $metablocks[$metablock_nr]->{othes}->{cnt_total}++; + + $eprint_ids[$eprint_id]->{othes}= 1; + my ($cnt_docs, $cnt_public, $cnt_restricted, $cnt_embargo)= map { $docs->{$_} } qw(cnt_docs cnt_public cnt_restricted cnt_embargo); @@ -4381,7 +4424,7 @@ sub policies_stats if (defined ($b1)) { $b1->{annotation}= { bgcolor => 'pink', note => 'restricted aber public doc' }; - print __LINE__, " special bucket found: ", Dumper($b1); + # print __LINE__, " special bucket found: ", Dumper($b1); } my @s2= qw(archive FALSE FALSE no public 1 1 0 0 no); @@ -4391,7 +4434,7 @@ sub policies_stats if (defined ($b2)) { $b2->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis FALSE aber public' }; - print __LINE__, " special bucket found: ", Dumper($b2); + # print __LINE__, " special bucket found: ", Dumper($b2); } my @s2b= qw(archive TRUE FALSE no public 1 1 0 0 no); @@ -4401,7 +4444,7 @@ sub policies_stats if (defined ($b2b)) { $b2b->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' }; - print __LINE__, " special bucket found: ", Dumper($b2b); + # print __LINE__, " special bucket found: ", Dumper($b2b); } my @s2c= qw(archive TRUE NULL no public 1 1 0 0 no); @@ -4411,7 +4454,7 @@ sub policies_stats if (defined ($b2c)) { $b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' }; - print __LINE__, " special bucket found: ", Dumper($b2c); + # print __LINE__, " special bucket found: ", Dumper($b2c); } # ZZZZ @@ -4422,7 +4465,7 @@ sub policies_stats if (defined ($b3)) { $b3->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis TRUE aber restricted mit public document' }; - print __LINE__, " special bucket found: ", Dumper($b3); + # print __LINE__, " special bucket found: ", Dumper($b3); } my @s4a= qw(archive TRUE NULL no public 1 1 0 0 no); @@ -4432,7 +4475,7 @@ sub policies_stats if (defined ($b4a)) { $b4a->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' }; - print __LINE__, " special bucket found: ", Dumper($b4a); + # print __LINE__, " special bucket found: ", Dumper($b4a); } my @s4b= qw(archive TRUE NULL no public 1 1 0 0 yes); @@ -4442,7 +4485,7 @@ sub policies_stats if (defined ($b4b)) { $b4b->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' }; - print __LINE__, " special bucket found: ", Dumper($b4b); + # print __LINE__, " special bucket found: ", Dumper($b4b); } my @s4c= qw(archive TRUE NULL no public 1 1 0 0 yes); @@ -4452,7 +4495,7 @@ sub policies_stats if (defined ($b4c)) { $b4c->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' }; - print __LINE__, " special bucket found: ", Dumper($b4c); + # print __LINE__, " special bucket found: ", Dumper($b4c); } my @s5a= qw(archive TRUE FALSE no public 1 1 0 0 no); @@ -4462,7 +4505,7 @@ sub policies_stats if (defined ($b5a)) { $b5a->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones' }; - print __LINE__, " special bucket found: ", Dumper($b5a); + # print __LINE__, " special bucket found: ", Dumper($b5a); } my @s5b= qw(archive TRUE FALSE no public 1 1 0 0 no); @@ -4472,7 +4515,7 @@ sub policies_stats if (defined ($b5b)) { $b5b->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; DOI wird nachgereicht' }; - print __LINE__, " special bucket found: ", Dumper($b5b); + # print __LINE__, " special bucket found: ", Dumper($b5b); } my @s5c= qw(archive TRUE FALSE no public 1 1 0 0 no); @@ -4482,7 +4525,7 @@ sub policies_stats if (defined ($b5c)) { $b5c->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; URN fehlt noch, DOI wird nachgereicht' }; - print __LINE__, " special bucket found: ", Dumper($b5c); + # print __LINE__, " special bucket found: ", Dumper($b5c); } my @s6a= qw(archive FALSE FALSE no restricted 1 0 1 1 no); @@ -4492,7 +4535,7 @@ sub policies_stats if (defined ($b6a)) { $b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN schon vergeben' }; - print __LINE__, " special bucket found: ", Dumper($b6a); + # print __LINE__, " special bucket found: ", Dumper($b6a); } my @s6a2= qw(archive FALSE FALSE no restricted 1 0 1 1 no); @@ -4502,7 +4545,7 @@ sub policies_stats if (defined ($b6a)) { $b6a2->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN noch nicht vergeben' }; - print __LINE__, " special bucket found: ", Dumper($b6a2); + # print __LINE__, " special bucket found: ", Dumper($b6a2); } my @s6b= qw(archive FALSE FALSE no restricted 1 0 1 1 no); @@ -4512,7 +4555,7 @@ sub policies_stats if (defined ($b6b)) { $b6b->{annotation}= { bgcolor => '#33a2ff', note => 'Volltext gesperrt; DOI wurde offenbar vorher vergeben' }; - print __LINE__, " special bucket found: ", Dumper($b6b); + # print __LINE__, " special bucket found: ", Dumper($b6b); } my @s6c= qw(archive FALSE FALSE no restricted 1 0 1 0 no); @@ -4521,22 +4564,218 @@ sub policies_stats my $b6c= $cctab->bucket(0, @s6c); if (defined ($b6c)) { - $b6c->{annotation}= { bgcolor => 'pink', note => 'Volltext restrected, kein Embargo' }; - print __LINE__, " special bucket found: ", Dumper($b6b); + $b6c->{annotation}= { bgcolor => 'pink', note => 'Volltext restricted, kein Embargo' }; + # print __LINE__, " special bucket found: ", Dumper($b6b); } # END annotations + my $bucket_cnt= $cctab->show_tsv(['othes', @contexts], 'counters.tsv'); + my $now= scalar localtime(time()); + my $msg_html; + if ($msg) + { + $msg_html= '<p><font color="red">' . $msg . '</font></p>'; + } + + my $idx_html= join('/', $base_path, 'index.html'); + open(IDX, '>:utf8', $idx_html); + print IDX <<"EOX"; +<html> +<head> +<meta charset="UTF-8" /> +<meta refresh="600" /> +<title>othes to utheses migration statistics</title> +<style> + td { text-align:right; } +</style> +</head> +<body> + +<p>last refreshed: $now</p> +$msg_html +<p><a href="buckets.html" target="buckets">$bucket_cnt buckets</a></p> +<h2>upload counters</h2> +<table border="1"> +<tr> + <th>metablock</th> + <th width="100px">othes</th> + <th colspan="3" width="200px">entw</th> + <th colspan="3" width="200px">test</th> + <th colspan="3" width="200px">prod</th> +</tr> +EOX + + for (my $metablock_nr= 0; $metablock_nr <= $#metablocks; $metablock_nr++) + { + my $mb_othes= $metablocks[$metablock_nr]->{othes}->{cnt_total}; + + my $mb_html= sprintf("metablock_%d.html", $metablock_nr); + my $mb_html_fp= join('/', $base_path, $mb_html); + + print IDX "<tr>\n <td><a href=\"$mb_html\" target=\"metablocks\">", $metablock_nr, "</a></td>\n <td>", $mb_othes, "</td>\n"; + + foreach my $context (qw(entw test prod)) + { + my $c= $metablocks[$metablock_nr]->{$context}->{cnt_ok}; + my $e= $metablocks[$metablock_nr]->{$context}->{cnt_error}; + + my $pct= $c*100.0/$mb_othes; + my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue'; + my $ck2= ($e == 0) ? 'lightgreen' : 'lightpink'; + + printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); + } + print IDX "</tr>\n"; + + open (MB, '>:utf8', $mb_html_fp); + print MB <<"EOX"; +<html> +<head> +<meta charset="UTF-8" /> +<title>othes to utheses migration metablock $metablock_nr</title> +<style> + td { text-align:right; } +</style> +</head> +<body> +<table border="1"> +<tr> + <th>block</th> + <th width="100px">othes</th> + <th colspan="3" width="200px">entw</th> + <th colspan="3" width="200px">test</th> + <th colspan="3" width="200px">prod</th> +</tr> +EOX + + my $block_start= $metablock_nr*100; + my $block_last= ($metablock_nr == 7) ? 750 : $block_start+99; + for (my $block_nr= $block_start; $block_nr <= $block_last; $block_nr++) + { + next unless (defined ($blocks[$block_nr])); + + my $block= $blocks[$block_nr]; + + my $b_html= sprintf("block_%d.html", $block_nr); + my $b_html_fp= join('/', $base_path, $b_html); + + my $b_othes= $block->{othes}->{cnt_total}; + print MB "<tr>\n <td><a href=\"$b_html\" target=\"blocks\">", $block_nr, "</a></td>\n <td>", $b_othes, "</td>\n"; + + foreach my $context (qw(entw test prod)) + { + my $c= $block->{$context}->{cnt_ok}; + my $e= $block->{$context}->{cnt_error}; + my $pct= $c*100.0/$b_othes; + my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue'; + my $ck2= ($e == 0) ? 'lightgreen' : 'pink'; + printf MB (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); + } + print MB "</tr>\n"; + + open (BLOCK, '>:utf8', $b_html_fp); + print BLOCK <<"EOX"; +<html> +<head> +<meta charset="UTF-8" /> +<title>othes to utheses migration block $block_nr</title> +<style> + td { text-align:right; } +</style> +</head> +<body> +<table border="1"> +<tr> + <th width="100px">eprint_id</th> + <th width="100px">entw</th> + <th width="100px">test</th> + <th width="100px">prod</th> +</tr> +EOX + + my $items= $block->{items}; + my $first= $block_nr*100; + my $last= $first+99; + for (my $eprint_id= $first; $eprint_id <= $last; $eprint_id++) + { + my $x= $eprint_ids[$eprint_id]; + next unless (defined ($x)); + + print BLOCK "<tr>\n <td><a href=\"https://othes.univie.ac.at/$eprint_id/\" target=\"othes\">$eprint_id</a></td>\n"; + + foreach my $context (qw(entw test prod)) + { + unless (exists ($x->{$context})) + { + print BLOCK " <td> </td>\n"; + next; + } + + my $c= $x->{$context}; + my ($error_code, $utheses_id, $errors)= map { $c->{$_} } qw(error_code utheses_id errors); + if ($error_code eq 'ok') + { + my $link; + + if ($context eq 'test') { $link= 'https://utheses-frontend.ctest.univie.ac.at/client/?#/view/document/utheses/' . $utheses_id; } + elsif ($context eq 'entw') { $link= 'https://utheses-frontend-entw-utheses.ctest.univie.ac.at/?#/view/document/utheses/' . $utheses_id; } + elsif ($context eq 'prod') { $link= 'unknown'; } + + print BLOCK " <td bgcolor=\"lightgreen\"><a href=\"$link\" target=\"$context\">$utheses_id</a></td>\n"; + } + else + { + print BLOCK " <td bgcolor=\"lightpink\">$error_code ", Dumper($errors), "</td>\n"; + } + } + print BLOCK "</tr>\n"; + } + + print BLOCK <<"EOX"; +</table> +</body> +</html> +EOX + + close (BLOCK); + } + + print MB <<"EOX"; +</table> +</body> +</html> +EOX + close (MB); + + } + + my $total_othes= $totals{othes}->{cnt_total}; + print IDX "<tr>\n <td>total</td>\n <td>", $total_othes, "</td>\n"; + foreach my $context (qw(entw test prod)) + { + my $c= $totals{$context}->{cnt_ok}; + my $e= $totals{$context}->{cnt_error}; + my $pct= $c*100.0/$total_othes; + my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue'; + my $ck2= ($e == 0) ? 'lightgreen' : 'lightpink'; + printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); + } + print IDX "</tr>\n"; + + print IDX "</table>\n"; # print __LINE__, " cctab: ", Dumper($cctab); - my $trailer= "<h2>errors</h2>\n". Dumper(\%cnt_errors); - $trailer .= "<h2>warnings</h2>\n". Dumper(\%cnt_warnings); - print __LINE__, " trailer=[",$trailer,"]\n"; + print IDX "<h2>errors</h2>\n". Dumper(\%cnt_errors); + print IDX "<h2>warnings</h2>\n". Dumper(\%cnt_warnings); - $trailer .= "<h2>nonpublic_doc_first</h2>\n". Dumper(\@lst_nonpublic_doc_first) if (@lst_nonpublic_doc_first); - $trailer .= "<h2>docs with notes</h2>\n". Dumper(\@lst_docs_with_notes) if (@lst_docs_with_notes); - $trailer .= "<h2>embargo dates</h2>\n". Dumper(\%doc_embargo_dates); - $trailer .= "<h2>errors</h2>\n". Dumper(\%upload_errors); + print IDX "<h2>nonpublic_doc_first</h2>\n". Dumper(\@lst_nonpublic_doc_first) if (@lst_nonpublic_doc_first); + print IDX "<h2>docs with notes</h2>\n". Dumper(\@lst_docs_with_notes) if (@lst_docs_with_notes); + print IDX "<h2>embargo dates</h2>\n". Dumper(\%doc_embargo_dates); + print IDX "<h2>errors</h2>\n". Dumper(\%upload_errors); - $cctab->show_tsv(['othes', @contexts], 'counters.tsv', $trailer); +print IDX <<"EOX"; +</body> +</html> +EOX # show objects which were uploaded to utheses but are no longer present in othes my @synced_not_found= sort { $a <=> $b } keys %synced; @@ -4843,7 +5082,7 @@ sub show_tsv # TODO: rename ... my $self= shift; my $counters= shift; my $fnm_counters= shift || 'counters.tsv'; - my $trailer= shift; + # my $trailer= shift; my @columns= @{$self->{columns}}; my $column_count= @columns; @@ -4856,19 +5095,23 @@ sub show_tsv # TODO: rename ... my @counters= @$counters; my @heading= ('bucket', @columns, @counters); - my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config... - my $idx_html= join('/', $base_path, 'index.html'); - open (IDX, '>:utf8', $idx_html) or die; - print IDX <<EOX; + my $idx_html= join('/', $self->{base_path}, 'buckets.html'); + open (BUCKETS, '>:utf8', $idx_html) or die; + print BUCKETS <<"EOX"; +<html> +<head> +<meta charset="UTF-8" /> <title>ot2ut bucket list</title> -<style type="text/css"> - +<style> + td { text-align:right; } </style> +</head> +<body> <table border="1"> <tr> EOX - foreach my $hdr (@heading) { print IDX "<th>$hdr</th>"; } - print IDX <<EOX; + foreach my $hdr (@heading) { print BUCKETS "<th>$hdr</th>"; } + print BUCKETS <<EOX; </tr> EOX @@ -4893,7 +5136,7 @@ EOX my $row_info; if (defined ($annotation)) { - print __LINE__, " bucket has annoation: ", main::Dumper($annotation); + # print __LINE__, " bucket has annotation: ", main::Dumper($annotation); $row_info= ' bgcolor="green"'; if (exists ($annotation->{bgcolor})) { @@ -4902,12 +5145,16 @@ EOX } my $fnm_lst= sprintf("bucket_%d.html", $bucket_nr); - my $path_lst= join ('/', $base_path, $fnm_lst); + my $path_lst= join ('/', $self->{base_path}, $fnm_lst); open (LST, '>:utf8', $path_lst) or die; print LST <<"EOX"; +<html> <head> -<meta charset="UTF-8"> +<meta charset="UTF-8" /> <title>bucket $bucket_nr</title> +<style> + td { text-align:right; } +</style> </head> <body> <table> @@ -4917,7 +5164,7 @@ EOX foreach my $col (@columns) { my $val= shift (@vals); - my $desc= $bucketlist_column_descriptions{$col} || + my $desc= $bucketlist_column_descriptions{$col} || ' '; print LST "<tr><th>$col</th><td>$val</td><td>$desc</td></tr>\n"; } print LST "</table>\n"; @@ -4928,7 +5175,7 @@ EOX } print LST <<"EOX"; <table border="1"> -<tr><th>ac_number</th><th>othes</th><th>entw</th><th>test</th><th>prod</th><th>errors</th><th>warnings</th></tr> +<tr><th width="150px">ac_number</th><th width="100px">othes</th><th width="100px">entw</th><th width="100px">test</th><th width="100px">prod</th><th>errors</th><th>warnings</th></tr> EOX my $ids= $bucket->{ids}; @@ -4985,41 +5232,36 @@ EOX print LST "</tr>\n"; } - print LST <<EOX; + print LST <<"EOX"; </table> +</body> +</html> EOX close (LST); - print IDX "<tr$row_info><td><a href=\"$fnm_lst\" target=\"bucket\">$bucket_nr</a></td>"; - foreach my $val (@$vals) { print IDX "<td>$val</td>"; } + print BUCKETS "<tr$row_info><td><a href=\"$fnm_lst\" target=\"bucket\">$bucket_nr</a></td>"; + foreach my $val (@$vals) { print BUCKETS "<td>$val</td>"; } ctr: foreach my $ctr (@counters) { my $cnt= $bucket->{$ctr}; - if (defined ($cnt)) { print IDX "<td>$cnt</td>"; } - else { print IDX "<td> </td>"; } + if (defined ($cnt)) { print BUCKETS "<td>$cnt</td>"; } + else { print BUCKETS "<td> </td>"; } } - print IDX "</tr>\n"; + print BUCKETS "</tr>\n"; } - print IDX <<EOX; + print BUCKETS <<EOX; </table> -EOX - - print IDX "<p>ac_numbers found: ", $cnt_ac_numbers, "</p>\n"; - - if (defined($trailer)) - { - print IDX $trailer; - # print __LINE__, " trailer=[",$trailer,"]\n"; - } - - print IDX <<EOX; </body> +</html> EOX - close (IDX); + + close (BUCKETS); close (TSV); + + $bucket_nr; } sub enumerate