Skip to content
Snippets Groups Projects
Commit d2a4f761 authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

progress information; save timing information in request record

parent 48c0e056
Branches
No related tags found
No related merge requests found
......@@ -206,6 +206,7 @@ my %bucketlist_column_descriptions=
my %ot2ut_sync_anyway= map { $_ => 1 } qw(33905); # these should be synced, even if they were already marked as ok
my %doc_embargo_dates;
my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config...
# END OT2UT: Othes to Utheses
# ======================================================================
......@@ -439,7 +440,7 @@ elsif ($op_mode eq 'update-policies')
}
elsif ($op_mode eq 'policies-stats')
{
policies_stats();
policies_stats(@PARS);
}
elsif ($op_mode eq 'reset') # reset error conditions for given ac_numbers
{
......@@ -2541,7 +2542,7 @@ sub oma
{ $ot2ut_eprint_status= 'buffer'; $ignore_errors= 1; }
else { $ot2ut_eprint_status= 'archive'; $ignore_errors= 0; }
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }});
$col_req->update( { _id => $row->{_id} }, { '$set' => { status => 'finish', ts_start => Util::ts::ts_ISO_gmt() }} );
my $msg= "send_batch: sending $bs objects in $ot2ut_eprint_status to $ot2ut_context";
activity({ activity => 'send_batch', msg => $msg});
......@@ -2558,20 +2559,22 @@ sub oma
$silent_upload_success= 1;
$ignore_errors= 0;
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }});
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress', ts_start => Util::ts::ts_ISO_gmt() }});
my $msg= "send_block: sending objects of block $block to $ot2ut_context";
activity({ activity => 'send_batch', msg => $msg});
send_message($msg);
my ($synced, $res)= ot2ut('block' => $block);
send_message("send_block: $res");
send_message("send_block: block $block, result: $res");
$new_status= 'done' if (@$synced);
policies_stats("processed block $block");
}
elsif ($row->{action} eq 'send_ids')
{
$ignore_errors= 1;
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }});
$ignore_errors= 0;
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress', ts_start => Util::ts::ts_ISO_gmt() }});
my @ids;
foreach my $id (@{$row->{ids}}) { push (@ids, $id) if ($id =~ m#^\d+$#); }
my $cnt= @ids;
......@@ -2586,7 +2589,7 @@ sub oma
$new_status= 'done' if (@$synced);
}
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => $new_status }});
$col_req->update({ _id => $row->{_id}}, { '$set' => { status => $new_status, ts_finish => Util::ts::ts_ISO_gmt() }});
activity({ activity => 'listening'});
}
}
......@@ -2636,8 +2639,19 @@ sub ot2ut
# my $irma_na= get_irma_na_db($cnf);
my $epr= get_eprints_db($cnf);
my $upload_cnf= $cnf->{$ot2ut_context};
die "no valid ot2ut context" unless (defined ($upload_cnf));
my @extra_curl_parameters;
if (exists ($upload_cnf->{headers}))
{
@extra_curl_parameters= map { ('--header', $_) } @{$upload_cnf->{headers}};
# push (@upload_cmd, @extra_curl_parameters) if (@extra_curl_parameters);
}
$db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut));
my $col_sync= $db_ot2ut->get_collection('sync');
my $col_att= $db_ot2ut->get_collection('attachments');
my $col_policy_utheses= $db_ot2ut->get_collection('policy.utheses');
unless (defined ($utheses_faculty_map))
......@@ -2685,8 +2699,8 @@ sub ot2ut
}
my @synced= ();
my $cnt_synced= 0;
my $cnt_errors= 0;
my ($cnt_synced, $cnt_upload_ok, $cnt_errors_data, $cnt_errors_upload, $cnt_errors_ingest, $cnt_skipped)= (0, 0, 0, 0, 0, 0);
my ($cnt_att_synced, $cnt_att_ok, $cnt_att_errors_upload)= (0, 0, 0);
my $cnt_eprint_ids= @eprint_ids;
print __LINE__, " ot2ut: ot2ut_eprint_status=$ot2ut_eprint_status cnt_eprint_ids=$cnt_eprint_ids MAX_SYNC=$MAX_SYNC\n";
sleep(3);
......@@ -2733,6 +2747,7 @@ sub ot2ut
if ($sync_info->{lastmod} eq $lastmod)
{
print __LINE__, " eprint_id=[$eprint_id] already synced; skipping...\n";
$cnt_skipped++;
next;
}
else
......@@ -2781,7 +2796,7 @@ sub ot2ut
my $utheses_errors_json_path= 'othes/utheses_json/errors/' . $eprint_id . '.json';
Util::JSON::write_json_file($utheses_errors_json_path, $errors);
$cnt_errors++;
$cnt_errors_data++;
}
else
{ # go ahead ...
......@@ -2789,9 +2804,6 @@ sub ot2ut
print __LINE__, " files: ", Dumper($files);
print __LINE__, " utheses_json_path=[$utheses_json_path]\n";
my $upload_cnf= $cnf->{$ot2ut_context};
die "no valid ot2ut context" unless (defined ($upload_cnf));
my @docs= @{$docs->{documents}};
my $main_file= shift(@docs);
my ($local_filename, $lfnm)= map { $main_file->{$_} } qw(path_doc main);
......@@ -2803,13 +2815,7 @@ sub ot2ut
'-F', 'file=@"' . $local_filename . '";filename="' . $lfnm . '"', '-F', 'type=application/pdf',
$upload_cnf->{import_url}, '-o' . $utheses_upload_result_json_path);
if (exists ($upload_cnf->{headers}))
{
foreach my $header (@{$upload_cnf->{headers}})
{
push (@upload_cmd, '--header', $header );
}
}
push (@upload_cmd, @extra_curl_parameters) if (@extra_curl_parameters);
print __LINE__, " upload_cmd: [", join(' ', @upload_cmd), "]\n";
my $utheses_id;
......@@ -2842,42 +2848,13 @@ sub ot2ut
push (@synced, $el);
$col_sync->insert($el);
$cnt_errors_upload++;
sleep(2);
}
else
{
print __LINE__, " result_data: ", Dumper($result_data);
=begin comment
old format 2019-11..2020-01
my ($status, $result)= map { $result_data->{$_} } qw(status result);
my ($ac, $ad, $ia)= map { $result->{$_} } qw(add_container add_document import_activate);
my ($container_pid, $container_result, $utheses_id)= map { $ac->{result}->{$_} } qw(pid status uthesesId);
my ($document_pid, $document_result)= map { $ad->{result}->{$_} } qw(pid status);
my $out_row=
{
eprint_id => $eprint_id,
lastmod => $lastmod,
ts_upload => $ts_upload,
context => $ot2ut_context,
error_code => 'ok',
error_cnt => 0,
utheses_id => $utheses_id,
container_pid => $container_pid,
container_result => $container_result,
document_pid => $document_pid,
document_result => $document_result,
activate_result => $ia->{status},
import_code => $ia->{import_validate}->{import_code},
import_note => $ia->{import_validate}->{import_note},
};
=end comment
=cut
my ($status, $import_status, $utheses_id1, $response_msg, $alerts)= map { $result_data->{$_} } qw(status importStatus uthesesId responseMsg alerts);
print __LINE__, " status=[$status] response_msg=[$response_msg]\n";
......@@ -2898,6 +2875,7 @@ old format 2019-11..2020-01
uploaded_fnm => $lfnm,
upload_status => $status,
response_msg => $response_msg,
attachement_cnt => scalar @docs,
};
if (defined ($utheses_id1) && $status eq '200')
......@@ -2905,10 +2883,12 @@ old format 2019-11..2020-01
$utheses_id= $utheses_id1;
$out_row->{error_code}= $upload_success= 'ok';
$out_row->{error_cnt}= 0;
$cnt_upload_ok++;
}
else
{
$out_row->{error_code}= $upload_success= 'error';
$out_row->{error_code}= $upload_success= 'ingest_error';
$cnt_errors_ingest++;
}
push (@synced, $out_row);
......@@ -2923,6 +2903,8 @@ old format 2019-11..2020-01
{
# process remaining documents as attachments
my $attachment_number= 0;
my $attachment_pid;
my $curl_status;
while (my $attachment= shift(@docs))
{
$attachment_number++;
......@@ -2947,7 +2929,7 @@ old format 2019-11..2020-01
};
$ai->{embargo_until_date}= join ('-', map { $attachment->{$_} } qw(date_embargo_year date_embargo_month date_embargo_day)) if (exists ($attachment->{date_embargo}));
$ai->{description}= $attachment->{formatdesc} if (defined ($attachment->{formatdate}));
$ai->{description}= $attachment->{formatdesc} if (defined ($attachment->{formatdesc}));
Util::JSON::write_json_file($fnm_attachment_md, $attachment_md);
......@@ -2958,47 +2940,54 @@ old format 2019-11..2020-01
'-F', 'file=@"' . $attachment->{path_doc} . '"',
$url1, '-o' . $fnm_attachment_res);
if (exists ($upload_cnf->{headers}))
{
foreach my $header (@{$upload_cnf->{headers}})
{
push (@attachment_add_cmd, '--header', $header );
}
}
push (@attachment_add_cmd, @extra_curl_parameters) if (@extra_curl_parameters);
my $att_status;
print __LINE__, " attachment_add_cmd: [", join(' ', @attachment_add_cmd), "]\n";
if ($do_upload)
{
my $t_curl= time();
system(@attachment_add_cmd);
$cnt_att_synced++;
my $result_data1;
eval { $result_data1= Util::JSON::read_json_file($fnm_attachment_res); };
if ($@)
{
print __LINE__, " can't parse upload_result; error=[$@]\n";
$cnt_att_errors_upload++;
$att_status= $ai->{error_code}= 'upload_error';
$ai->{errors}= [ { error => 'upload_error', error_info => $@ } ];
}
else
{
print __LINE__, " upload attchment [$attachment_number] result: ", Dumper($result_data1);
my ($attachment_pid, $curl_status, $response_msg)= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg);
my ($attachment_pid1, $curl_status1, $response_msg)= map { $result_data1->{$_} } qw(attachmentPid status responseMsg);
$ai->{attachment_pid}= $attachment_pid= $attachment_pid1;
$ai->{upload_status}= $curl_status= $curl_status1;
$ai->{response_msg}= $response_msg;
send_message("upload attachment success: eprint_id=[$eprint_id] context=[$ot2ut_context] utheses_id=[$utheses_id] attachment_number=[$attachment_number] attachment_pid=[$attachment_pid] curl_status=[$curl_status]"); # unless ($silent_upload_success);
if ($curl_status1 eq '200')
{
$cnt_att_ok++;
$att_status= $ai->{error_code}= 'ok';
}
else
{
$cnt_att_errors_upload++;
$att_status= $ai->{error_code}= 'ingest_error';
}
$ai->{security}= $attachment->{security};
if ($attachment->{security} eq 'public')
{ # set attachment status to Active (in Phaidra) only when this attachment is public
# curl -X POST http://localhost:3000/attachment/changeStatus/#attachmentPid -F "status=A"
my $url2= join ('/', $upload_cnf->{api_url}, qw(attachment changeStatus), $attachment_pid);
my @attachment_chg_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F status=A),
$url2, '-o' . $fnm_attachment_chg);
if (exists ($upload_cnf->{headers}))
{
foreach my $header (@{$upload_cnf->{headers}})
{
push (@attachment_chg_cmd, '--header', $header );
}
}
push (@attachment_chg_cmd, @extra_curl_parameters) if (@extra_curl_parameters);
print __LINE__, " attachment_chg_cmd: [", join(' ', @attachment_chg_cmd), "]\n";
my $t_curl= time();
......@@ -3009,22 +2998,40 @@ old format 2019-11..2020-01
if ($@)
{
print __LINE__, " can't parse upload_result; error=[$@]\n";
$ai->{activate}= 'error_curl';
}
else
{
print __LINE__, " change attchment [$attachment_number] result: ", Dumper($result_data2);
# my $attachment_pid= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg);
print __LINE__, " change attachment [$attachment_number] result: ", Dumper($result_data2);
my ($attachment_pid2, $curl_status2, $response_msg2)= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg);
$ai->{activate}= ($curl_status2 eq '200') ? 'ok' : 'error_activate';
$ai->{activate_status}= $curl_status2;
$ai->{activate_response_msg}= $response_msg2;
}
} # if attachment is public
} # when main document was uploaded successfully
} # if ($do_upload)
else
{ # attachment was not uploaded
$att_status= 'no_upload';
}
# finish processing of this one attachment
send_message("upload attachment $att_status: eprint_id=[$eprint_id] context=[$ot2ut_context] utheses_id=[$utheses_id] attachment_number=[$attachment_number] attachment_pid=[$attachment_pid] curl_status=[$curl_status]"); # unless ($silent_upload_success);
$ai->{eprint_id}= $eprint_id;
$ai->{context}= $ot2ut_context;
$ai->{utheses_id}= $utheses_id;
$col_att->insert($ai); # NOTE/TODO: no effort is made to check for duplicate uploads of attachments;
} # end of processing for one attachment
}
else
{
# no utheses_id defined
# no utheses_id defined, so upload must have gone wrong somehow
}
}
}
......@@ -3035,7 +3042,7 @@ old format 2019-11..2020-01
my $res;
if ($cnt_synced)
{
$res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors";
$res= "synced $cnt_synced objects in context $ot2ut_context; data_errors: $cnt_errors_data; upload_errors: $cnt_errors_upload; ingest_errors: $cnt_errors_ingest";
my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO());
Util::Matrix::save_hash_as_csv(\@ot2ut_synced_columns, \@synced, $fnm, "\t", '', "\n", 1);
print __LINE__, " $res, see [$fnm]\n";
......@@ -3045,6 +3052,10 @@ old format 2019-11..2020-01
print __LINE__, " $res\n";
$res= "synced no objects in context $ot2ut_context";
}
if ($cnt_skipped)
{
$res .= "; skipped $cnt_skipped objects";
}
(\@synced, $res);
}
......@@ -3733,7 +3744,7 @@ sub get_names_for_role
$vn =~ s/\s*$//;
print __LINE__, " column_name=[$column_name] name=[$name] nn=[$nn] vn=[$vn]\n";
if ($vn eq '' || !($vn =~ m#^\U\E[\w\-\x{2010} ]+\.?$#) || !($nn =~ m#^\U\E[\w\-\x{2010} ]+$#))
if ($vn eq '' || !($vn =~ m#^\U\E[\w\-\x{2010}\. ]+?$#) || !($nn =~ m#^\U\E[\w\-\x{2010} ]+$#))
{ # TODO: add option to flag this as a warning instead of as an error
push (@errors, { error => 'bad_name', column_name => $column_name, name => $name } );
push (@result, { family_name => $name }); # fill everything in into family_name
......@@ -4098,13 +4109,14 @@ sub update_policies
# $epr_db->show_query(1);
my $search_term= "eprint_status IN ('archive', 'buffer')";
my $keys= $epr_db->get_all_x('eprint', [$search_term], join(',', @col_names));
my @eprint_ids_in_mysql= map { $_."" } sort { $a <=> $b } keys %$keys; # mongodb stores eprint_id as string
my $ts_start= Util::ts::ts_ISO_gmt(time());
my ($cnt_updated, $cnt_inserted, $cnt_unchanged)= (0, 0, 0, 0);
my (@lst_updated, @lst_inserted, @lst_unchanged);
foreach my $eprint_id (keys %$keys)
foreach my $eprint_id (@eprint_ids_in_mysql)
{
last unless ($running);
......@@ -4185,6 +4197,9 @@ sub update_policies
print __LINE__, ' ', '='x70, "\n";
}
my @removed_from_mysql= $col_utp->find( { eprint_id => { '$nin' => \@eprint_ids_in_mysql } }, { '_id' => 1, eprint_id => 1 })->all();
print __LINE__, " removed from mysql ", Dumper(\@removed_from_mysql);
my %stats=
(
agent => 'update_policies',
......@@ -4210,6 +4225,8 @@ sub update_policies
sub policies_stats
{
my $msg= shift;
$db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut));
# prepare: get info from sync database
......@@ -4218,25 +4235,43 @@ sub policies_stats
my %synced;
my (@upload_errors, %upload_errors);
my (%totals, @blocks, @metablocks, @eprint_ids);
while ($running)
{
my $row_sync= $cur_sync->next();
last unless (defined ($row_sync));
# print __LINE__, " row_sync: ", Dumper($row_sync); last;
my ($eprint_id, $lastmod, $context, $utheses_id)= map { $row_sync->{$_} } qw(eprint_id lastmod context utheses_id);
unless (defined ($utheses_id))
{
# push (@upload_errors, $row_sync);
push (@{$upload_errors{$row_sync->{error_code}}}, $eprint_id);
next;
}
my ($eprint_id, $lastmod, $context, $utheses_id, $error_code)= map { $row_sync->{$_} } qw(eprint_id lastmod context utheses_id error_code);
if ($context eq 'ot2ut-test') { $context= 'test'; }
elsif ($context eq 'ot2ut-entw') { $context= 'entw'; }
elsif ($context eq 'ot2ut-prod') { $context= 'prod'; }
# else ... this should not happen
my $block_nr= int($eprint_id/100);
my $metablock_nr= int($block_nr/100);
$totals{$context}->{cnt_total}++;
$blocks[$block_nr]->{$context}->{cnt_total}++;
$metablocks[$metablock_nr]->{$context}->{cnt_total}++;
$eprint_ids[$eprint_id]->{$context}= $row_sync;
if (defined ($utheses_id))
{
$synced{$eprint_id}->{$context}= [ $lastmod, $utheses_id ];
$totals{$context}->{cnt_ok}++;
$blocks[$block_nr]->{$context}->{cnt_ok}++;
$metablocks[$metablock_nr]->{$context}->{cnt_ok}++;
}
else
{
# push (@upload_errors, $row_sync);
push (@{$upload_errors{$error_code}}, $eprint_id);
$totals{$context}->{cnt_error}++;
$blocks[$block_nr]->{$context}->{cnt_error}++;
$metablocks[$metablock_nr]->{$context}->{cnt_error}++;
}
}
# MAIN PART: analyze othes policies collection
......@@ -4253,7 +4288,7 @@ sub policies_stats
push (@columns, qw(urn doi));
}
my $cctab= new cctab(columns => \@columns);
my $cctab= new cctab(columns => \@columns, base_path => $base_path);
my $max; #= 1000;
my @contexts= qw(entw test prod);
......@@ -4271,6 +4306,14 @@ sub policies_stats
map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings errors warnings docs ut_public urn doi ac_nummer);
$eprint_id += 0;
my $block_nr= int($eprint_id/100);
my $metablock_nr= int($block_nr/100);
$totals{othes}->{cnt_total}++;
$blocks[$block_nr]->{othes}->{cnt_total}++;
$metablocks[$metablock_nr]->{othes}->{cnt_total}++;
$eprint_ids[$eprint_id]->{othes}= 1;
my ($cnt_docs, $cnt_public, $cnt_restricted, $cnt_embargo)=
map { $docs->{$_} } qw(cnt_docs cnt_public cnt_restricted cnt_embargo);
......@@ -4381,7 +4424,7 @@ sub policies_stats
if (defined ($b1))
{
$b1->{annotation}= { bgcolor => 'pink', note => 'restricted aber public doc' };
print __LINE__, " special bucket found: ", Dumper($b1);
# print __LINE__, " special bucket found: ", Dumper($b1);
}
my @s2= qw(archive FALSE FALSE no public 1 1 0 0 no);
......@@ -4391,7 +4434,7 @@ sub policies_stats
if (defined ($b2))
{
$b2->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis FALSE aber public' };
print __LINE__, " special bucket found: ", Dumper($b2);
# print __LINE__, " special bucket found: ", Dumper($b2);
}
my @s2b= qw(archive TRUE FALSE no public 1 1 0 0 no);
......@@ -4401,7 +4444,7 @@ sub policies_stats
if (defined ($b2b))
{
$b2b->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' };
print __LINE__, " special bucket found: ", Dumper($b2b);
# print __LINE__, " special bucket found: ", Dumper($b2b);
}
my @s2c= qw(archive TRUE NULL no public 1 1 0 0 no);
......@@ -4411,7 +4454,7 @@ sub policies_stats
if (defined ($b2c))
{
$b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' };
print __LINE__, " special bucket found: ", Dumper($b2c);
# print __LINE__, " special bucket found: ", Dumper($b2c);
}
# ZZZZ
......@@ -4422,7 +4465,7 @@ sub policies_stats
if (defined ($b3))
{
$b3->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis TRUE aber restricted mit public document' };
print __LINE__, " special bucket found: ", Dumper($b3);
# print __LINE__, " special bucket found: ", Dumper($b3);
}
my @s4a= qw(archive TRUE NULL no public 1 1 0 0 no);
......@@ -4432,7 +4475,7 @@ sub policies_stats
if (defined ($b4a))
{
$b4a->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' };
print __LINE__, " special bucket found: ", Dumper($b4a);
# print __LINE__, " special bucket found: ", Dumper($b4a);
}
my @s4b= qw(archive TRUE NULL no public 1 1 0 0 yes);
......@@ -4442,7 +4485,7 @@ sub policies_stats
if (defined ($b4b))
{
$b4b->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' };
print __LINE__, " special bucket found: ", Dumper($b4b);
# print __LINE__, " special bucket found: ", Dumper($b4b);
}
my @s4c= qw(archive TRUE NULL no public 1 1 0 0 yes);
......@@ -4452,7 +4495,7 @@ sub policies_stats
if (defined ($b4c))
{
$b4c->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' };
print __LINE__, " special bucket found: ", Dumper($b4c);
# print __LINE__, " special bucket found: ", Dumper($b4c);
}
my @s5a= qw(archive TRUE FALSE no public 1 1 0 0 no);
......@@ -4462,7 +4505,7 @@ sub policies_stats
if (defined ($b5a))
{
$b5a->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones' };
print __LINE__, " special bucket found: ", Dumper($b5a);
# print __LINE__, " special bucket found: ", Dumper($b5a);
}
my @s5b= qw(archive TRUE FALSE no public 1 1 0 0 no);
......@@ -4472,7 +4515,7 @@ sub policies_stats
if (defined ($b5b))
{
$b5b->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; DOI wird nachgereicht' };
print __LINE__, " special bucket found: ", Dumper($b5b);
# print __LINE__, " special bucket found: ", Dumper($b5b);
}
my @s5c= qw(archive TRUE FALSE no public 1 1 0 0 no);
......@@ -4482,7 +4525,7 @@ sub policies_stats
if (defined ($b5c))
{
$b5c->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; URN fehlt noch, DOI wird nachgereicht' };
print __LINE__, " special bucket found: ", Dumper($b5c);
# print __LINE__, " special bucket found: ", Dumper($b5c);
}
my @s6a= qw(archive FALSE FALSE no restricted 1 0 1 1 no);
......@@ -4492,7 +4535,7 @@ sub policies_stats
if (defined ($b6a))
{
$b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN schon vergeben' };
print __LINE__, " special bucket found: ", Dumper($b6a);
# print __LINE__, " special bucket found: ", Dumper($b6a);
}
my @s6a2= qw(archive FALSE FALSE no restricted 1 0 1 1 no);
......@@ -4502,7 +4545,7 @@ sub policies_stats
if (defined ($b6a))
{
$b6a2->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN noch nicht vergeben' };
print __LINE__, " special bucket found: ", Dumper($b6a2);
# print __LINE__, " special bucket found: ", Dumper($b6a2);
}
my @s6b= qw(archive FALSE FALSE no restricted 1 0 1 1 no);
......@@ -4512,7 +4555,7 @@ sub policies_stats
if (defined ($b6b))
{
$b6b->{annotation}= { bgcolor => '#33a2ff', note => 'Volltext gesperrt; DOI wurde offenbar vorher vergeben' };
print __LINE__, " special bucket found: ", Dumper($b6b);
# print __LINE__, " special bucket found: ", Dumper($b6b);
}
my @s6c= qw(archive FALSE FALSE no restricted 1 0 1 0 no);
......@@ -4521,22 +4564,218 @@ sub policies_stats
my $b6c= $cctab->bucket(0, @s6c);
if (defined ($b6c))
{
$b6c->{annotation}= { bgcolor => 'pink', note => 'Volltext restrected, kein Embargo' };
print __LINE__, " special bucket found: ", Dumper($b6b);
$b6c->{annotation}= { bgcolor => 'pink', note => 'Volltext restricted, kein Embargo' };
# print __LINE__, " special bucket found: ", Dumper($b6b);
}
# END annotations
my $bucket_cnt= $cctab->show_tsv(['othes', @contexts], 'counters.tsv');
my $now= scalar localtime(time());
my $msg_html;
if ($msg)
{
$msg_html= '<p><font color="red">' . $msg . '</font></p>';
}
my $idx_html= join('/', $base_path, 'index.html');
open(IDX, '>:utf8', $idx_html);
print IDX <<"EOX";
<html>
<head>
<meta charset="UTF-8" />
<meta refresh="600" />
<title>othes to utheses migration statistics</title>
<style>
td { text-align:right; }
</style>
</head>
<body>
<p>last refreshed: $now</p>
$msg_html
<p><a href="buckets.html" target="buckets">$bucket_cnt buckets</a></p>
<h2>upload counters</h2>
<table border="1">
<tr>
<th>metablock</th>
<th width="100px">othes</th>
<th colspan="3" width="200px">entw</th>
<th colspan="3" width="200px">test</th>
<th colspan="3" width="200px">prod</th>
</tr>
EOX
for (my $metablock_nr= 0; $metablock_nr <= $#metablocks; $metablock_nr++)
{
my $mb_othes= $metablocks[$metablock_nr]->{othes}->{cnt_total};
my $mb_html= sprintf("metablock_%d.html", $metablock_nr);
my $mb_html_fp= join('/', $base_path, $mb_html);
print IDX "<tr>\n <td><a href=\"$mb_html\" target=\"metablocks\">", $metablock_nr, "</a></td>\n <td>", $mb_othes, "</td>\n";
foreach my $context (qw(entw test prod))
{
my $c= $metablocks[$metablock_nr]->{$context}->{cnt_ok};
my $e= $metablocks[$metablock_nr]->{$context}->{cnt_error};
my $pct= $c*100.0/$mb_othes;
my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue';
my $ck2= ($e == 0) ? 'lightgreen' : 'lightpink';
printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e);
}
print IDX "</tr>\n";
open (MB, '>:utf8', $mb_html_fp);
print MB <<"EOX";
<html>
<head>
<meta charset="UTF-8" />
<title>othes to utheses migration metablock $metablock_nr</title>
<style>
td { text-align:right; }
</style>
</head>
<body>
<table border="1">
<tr>
<th>block</th>
<th width="100px">othes</th>
<th colspan="3" width="200px">entw</th>
<th colspan="3" width="200px">test</th>
<th colspan="3" width="200px">prod</th>
</tr>
EOX
my $block_start= $metablock_nr*100;
my $block_last= ($metablock_nr == 7) ? 750 : $block_start+99;
for (my $block_nr= $block_start; $block_nr <= $block_last; $block_nr++)
{
next unless (defined ($blocks[$block_nr]));
my $block= $blocks[$block_nr];
my $b_html= sprintf("block_%d.html", $block_nr);
my $b_html_fp= join('/', $base_path, $b_html);
my $b_othes= $block->{othes}->{cnt_total};
print MB "<tr>\n <td><a href=\"$b_html\" target=\"blocks\">", $block_nr, "</a></td>\n <td>", $b_othes, "</td>\n";
foreach my $context (qw(entw test prod))
{
my $c= $block->{$context}->{cnt_ok};
my $e= $block->{$context}->{cnt_error};
my $pct= $c*100.0/$b_othes;
my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue';
my $ck2= ($e == 0) ? 'lightgreen' : 'pink';
printf MB (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e);
}
print MB "</tr>\n";
open (BLOCK, '>:utf8', $b_html_fp);
print BLOCK <<"EOX";
<html>
<head>
<meta charset="UTF-8" />
<title>othes to utheses migration block $block_nr</title>
<style>
td { text-align:right; }
</style>
</head>
<body>
<table border="1">
<tr>
<th width="100px">eprint_id</th>
<th width="100px">entw</th>
<th width="100px">test</th>
<th width="100px">prod</th>
</tr>
EOX
my $items= $block->{items};
my $first= $block_nr*100;
my $last= $first+99;
for (my $eprint_id= $first; $eprint_id <= $last; $eprint_id++)
{
my $x= $eprint_ids[$eprint_id];
next unless (defined ($x));
print BLOCK "<tr>\n <td><a href=\"https://othes.univie.ac.at/$eprint_id/\" target=\"othes\">$eprint_id</a></td>\n";
foreach my $context (qw(entw test prod))
{
unless (exists ($x->{$context}))
{
print BLOCK " <td>&nbsp;</td>\n";
next;
}
my $c= $x->{$context};
my ($error_code, $utheses_id, $errors)= map { $c->{$_} } qw(error_code utheses_id errors);
if ($error_code eq 'ok')
{
my $link;
if ($context eq 'test') { $link= 'https://utheses-frontend.ctest.univie.ac.at/client/?#/view/document/utheses/' . $utheses_id; }
elsif ($context eq 'entw') { $link= 'https://utheses-frontend-entw-utheses.ctest.univie.ac.at/?#/view/document/utheses/' . $utheses_id; }
elsif ($context eq 'prod') { $link= 'unknown'; }
print BLOCK " <td bgcolor=\"lightgreen\"><a href=\"$link\" target=\"$context\">$utheses_id</a></td>\n";
}
else
{
print BLOCK " <td bgcolor=\"lightpink\">$error_code ", Dumper($errors), "</td>\n";
}
}
print BLOCK "</tr>\n";
}
print BLOCK <<"EOX";
</table>
</body>
</html>
EOX
close (BLOCK);
}
print MB <<"EOX";
</table>
</body>
</html>
EOX
close (MB);
}
my $total_othes= $totals{othes}->{cnt_total};
print IDX "<tr>\n <td>total</td>\n <td>", $total_othes, "</td>\n";
foreach my $context (qw(entw test prod))
{
my $c= $totals{$context}->{cnt_ok};
my $e= $totals{$context}->{cnt_error};
my $pct= $c*100.0/$total_othes;
my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue';
my $ck2= ($e == 0) ? 'lightgreen' : 'lightpink';
printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e);
}
print IDX "</tr>\n";
print IDX "</table>\n";
# print __LINE__, " cctab: ", Dumper($cctab);
my $trailer= "<h2>errors</h2>\n". Dumper(\%cnt_errors);
$trailer .= "<h2>warnings</h2>\n". Dumper(\%cnt_warnings);
print __LINE__, " trailer=[",$trailer,"]\n";
print IDX "<h2>errors</h2>\n". Dumper(\%cnt_errors);
print IDX "<h2>warnings</h2>\n". Dumper(\%cnt_warnings);
$trailer .= "<h2>nonpublic_doc_first</h2>\n". Dumper(\@lst_nonpublic_doc_first) if (@lst_nonpublic_doc_first);
$trailer .= "<h2>docs with notes</h2>\n". Dumper(\@lst_docs_with_notes) if (@lst_docs_with_notes);
$trailer .= "<h2>embargo dates</h2>\n". Dumper(\%doc_embargo_dates);
$trailer .= "<h2>errors</h2>\n". Dumper(\%upload_errors);
print IDX "<h2>nonpublic_doc_first</h2>\n". Dumper(\@lst_nonpublic_doc_first) if (@lst_nonpublic_doc_first);
print IDX "<h2>docs with notes</h2>\n". Dumper(\@lst_docs_with_notes) if (@lst_docs_with_notes);
print IDX "<h2>embargo dates</h2>\n". Dumper(\%doc_embargo_dates);
print IDX "<h2>errors</h2>\n". Dumper(\%upload_errors);
$cctab->show_tsv(['othes', @contexts], 'counters.tsv', $trailer);
print IDX <<"EOX";
</body>
</html>
EOX
# show objects which were uploaded to utheses but are no longer present in othes
my @synced_not_found= sort { $a <=> $b } keys %synced;
......@@ -4843,7 +5082,7 @@ sub show_tsv # TODO: rename ...
my $self= shift;
my $counters= shift;
my $fnm_counters= shift || 'counters.tsv';
my $trailer= shift;
# my $trailer= shift;
my @columns= @{$self->{columns}};
my $column_count= @columns;
......@@ -4856,19 +5095,23 @@ sub show_tsv # TODO: rename ...
my @counters= @$counters;
my @heading= ('bucket', @columns, @counters);
my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config...
my $idx_html= join('/', $base_path, 'index.html');
open (IDX, '>:utf8', $idx_html) or die;
print IDX <<EOX;
my $idx_html= join('/', $self->{base_path}, 'buckets.html');
open (BUCKETS, '>:utf8', $idx_html) or die;
print BUCKETS <<"EOX";
<html>
<head>
<meta charset="UTF-8" />
<title>ot2ut bucket list</title>
<style type="text/css">
<style>
td { text-align:right; }
</style>
</head>
<body>
<table border="1">
<tr>
EOX
foreach my $hdr (@heading) { print IDX "<th>$hdr</th>"; }
print IDX <<EOX;
foreach my $hdr (@heading) { print BUCKETS "<th>$hdr</th>"; }
print BUCKETS <<EOX;
</tr>
EOX
......@@ -4893,7 +5136,7 @@ EOX
my $row_info;
if (defined ($annotation))
{
print __LINE__, " bucket has annoation: ", main::Dumper($annotation);
# print __LINE__, " bucket has annotation: ", main::Dumper($annotation);
$row_info= ' bgcolor="green"';
if (exists ($annotation->{bgcolor}))
{
......@@ -4902,12 +5145,16 @@ EOX
}
my $fnm_lst= sprintf("bucket_%d.html", $bucket_nr);
my $path_lst= join ('/', $base_path, $fnm_lst);
my $path_lst= join ('/', $self->{base_path}, $fnm_lst);
open (LST, '>:utf8', $path_lst) or die;
print LST <<"EOX";
<html>
<head>
<meta charset="UTF-8">
<meta charset="UTF-8" />
<title>bucket $bucket_nr</title>
<style>
td { text-align:right; }
</style>
</head>
<body>
<table>
......@@ -4917,7 +5164,7 @@ EOX
foreach my $col (@columns)
{
my $val= shift (@vals);
my $desc= $bucketlist_column_descriptions{$col} || &nbsp;
my $desc= $bucketlist_column_descriptions{$col} || '&nbsp;';
print LST "<tr><th>$col</th><td>$val</td><td>$desc</td></tr>\n";
}
print LST "</table>\n";
......@@ -4928,7 +5175,7 @@ EOX
}
print LST <<"EOX";
<table border="1">
<tr><th>ac_number</th><th>othes</th><th>entw</th><th>test</th><th>prod</th><th>errors</th><th>warnings</th></tr>
<tr><th width="150px">ac_number</th><th width="100px">othes</th><th width="100px">entw</th><th width="100px">test</th><th width="100px">prod</th><th>errors</th><th>warnings</th></tr>
EOX
my $ids= $bucket->{ids};
......@@ -4985,41 +5232,36 @@ EOX
print LST "</tr>\n";
}
print LST <<EOX;
print LST <<"EOX";
</table>
</body>
</html>
EOX
close (LST);
print IDX "<tr$row_info><td><a href=\"$fnm_lst\" target=\"bucket\">$bucket_nr</a></td>";
foreach my $val (@$vals) { print IDX "<td>$val</td>"; }
print BUCKETS "<tr$row_info><td><a href=\"$fnm_lst\" target=\"bucket\">$bucket_nr</a></td>";
foreach my $val (@$vals) { print BUCKETS "<td>$val</td>"; }
ctr: foreach my $ctr (@counters)
{
my $cnt= $bucket->{$ctr};
if (defined ($cnt)) { print IDX "<td>$cnt</td>"; }
else { print IDX "<td>&nbsp;</td>"; }
if (defined ($cnt)) { print BUCKETS "<td>$cnt</td>"; }
else { print BUCKETS "<td>&nbsp;</td>"; }
}
print IDX "</tr>\n";
print BUCKETS "</tr>\n";
}
print IDX <<EOX;
print BUCKETS <<EOX;
</table>
EOX
print IDX "<p>ac_numbers found: ", $cnt_ac_numbers, "</p>\n";
if (defined($trailer))
{
print IDX $trailer;
# print __LINE__, " trailer=[",$trailer,"]\n";
}
print IDX <<EOX;
</body>
</html>
EOX
close (IDX);
close (BUCKETS);
close (TSV);
$bucket_nr;
}
sub enumerate
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment