diff --git a/eprints1.pl b/eprints1.pl index e9d8635cf39792c3cb2e9e51c1fca8e25d5dde1d..4f96c50b2dcd0ee08d3beefb80552209b73a9756 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -112,8 +112,8 @@ my $op_mode= 'unknown'; # ====================================================================== # BEGIN OT2UT: Othes to Utheses -my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize -# my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize +# my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize +my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize my $oma_sleep_time= 10; my %map_ot2ut_roles= @@ -399,7 +399,7 @@ elsif ($op_mode eq 'debug_filenames') } elsif ($op_mode eq 'update-policies') { - update_policies(); + update_policies(@PARS); } elsif ($op_mode eq 'policies-stats') { @@ -3363,7 +3363,7 @@ sub get_thesis_data policies => { # lock_status => ($row->{sperre} eq 'FALSE') ? 0 : 1, # TRUE or NULL means the object is locked - lock_status => ($row->{sperre} eq 'TRUE') ? 1 : 0, # TRUE means the object is locked; FALSE and NULL means not locked + lock_status => ($row->{sperre} eq 'TRUE') ? 1 : 0, # TRUE means the object is locked; FALSE and NULL means not locked authorisation_to_use_by_author => ($row->{einverstaendnis} eq 'TRUE') ? 1 : 0, fulltext_locked => ($row->{full_text_status} eq 'public') ? 0 : 1, # possible values for fulltext_locked: NULL, none, resticted @@ -3848,6 +3848,10 @@ sub debug_stkz sub update_policies { + my @upd_eprint_ids= @_; + + my %upd_eprint_ids= map { $_ => 1 } @upd_eprint_ids; + my $epr= get_eprints_db($cnf); # print "epr: ", Dumper ($epr); @@ -3877,7 +3881,7 @@ sub update_policies my $utp_info= $col_utp->find_one({ eprint_id => $eprint_id }); - if (defined ($utp_info) && $utp_info->{lastmod} eq $x1_lastmod) + if (!exists ($upd_eprint_ids{$eprint_id}) && defined ($utp_info) && $utp_info->{lastmod} eq $x1_lastmod) { print __LINE__, " NOTE: already processed, no change: x1_lastmod=[$x1_lastmod] eprint_id=[$eprint_id]\n"; $cnt_unchanged++; @@ -3914,7 +3918,7 @@ sub update_policies utheses_upload_result_json_path => $utheses_upload_result_json_path, }; - map { $full_data->{$_}= $row->{$_} } qw(eprint_status einverstaendnis sperre full_text_status urn doi); + map { $full_data->{$_}= $row->{$_} } qw(eprint_status einverstaendnis sperre full_text_status urn doi ac_nummer); # $show++ if ($docs->{cnt_embargo} > 0); # $show++ if ($docs->{cnt_public} > 0 && $docs->{cnt_restricted} > 0); @@ -4010,15 +4014,17 @@ sub policies_stats my $max; #= 1000; my @contexts= qw(entw test prod); + my (%cnt_errors, %cnt_warnings, %all_errors); while ($running) { my $row_utp= $cur_utp->next(); last unless (defined ($row_utp)); # print __LINE__, " row_utp: ", Dumper($row_utp); last; - my ($eprint_id, $eprint_status, $date_sperre, $einverstaendnis, $lastmod, $full_text_status, $sperre, $cnt_errors, $cnt_warnings, $docs, $utp, $urn, $doi)= - map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings docs ut_public urn doi); + my ($eprint_id, $eprint_status, $date_sperre, $einverstaendnis, $lastmod, $full_text_status, $sperre, $cnt_errors, $cnt_warnings, $errors, $warnings, $docs, $utp, $urn, $doi, $ac_number)= + map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings errors warnings docs ut_public urn doi ac_nummer); + $eprint_id += 0; my ($cnt_docs, $cnt_public, $cnt_restricted)= map { $docs->{$_} } qw(cnt_docs cnt_public cnt_restricted); @@ -4038,11 +4044,12 @@ sub policies_stats { ($utptp, $urn, $doi)= map { $utpt->{$_} } qw(policies urn doi); } + if (defined ($utptp)) { ($lrq, $kwl, $absl, $ftl)= map { $utptp->{$_} } qw(lock_request keywords_locked abstract_locked fulltext_locked); - } + if ($flag_add_utheses_policies) { push (@bucket_selectors, $lrq, $kwl, $absl, $ftl); @@ -4056,20 +4063,42 @@ sub policies_stats push (@bucket_selectors, $urn, $doi); } - my $bucket= $cctab->bucket(@bucket_selectors); + my $bucket= $cctab->bucket(1, @bucket_selectors); + + my $info= { ac_number => $ac_number }; + if (defined ($errors) && @$errors) + { + $info->{errors}= $errors; + foreach my $error (@$errors) + { + my $error_name= $error->{error}; + $cnt_errors{$error_name}++; + + $error->{eprint_id}= $eprint_id; + push (@{$all_errors{$error_name}}, $error); + } + } + if (defined ($warnings) && @$warnings) + { + $info->{warnings}= $warnings; + foreach my $warning (@$warnings) { $cnt_warnings{$warning->{warning}}++; } + } $bucket->{othes}++; - push (@{$bucket->{othes_ids}} => $eprint_id); + $bucket->{ids}->{$eprint_id}= $info; if (exists ($synced{$eprint_id})) { + my $s= $synced{$eprint_id}; + # print __LINE__, " eprint_id=[$eprint_id]; synced: ", Dumper ($s); foreach my $context (@contexts) { - if (exists ($synced{$eprint_id}->{$context})) + if (exists ($s->{$context})) { - my $x= $synced{$eprint_id}->{$context}; + my $x= $s->{$context}; $bucket->{$context}++; - push (@{$bucket->{$context . '_ids'}} => [ $eprint_id, $x->[1] ]); + # push (@{$bucket->{$context . '_ids'}} => [ $eprint_id, $x->[1] ]); + $bucket->{ids}->{$eprint_id}->{$context}= $x->[1]; } } delete($synced{$eprint_id}); @@ -4078,8 +4107,144 @@ sub policies_stats last if (defined($max) && --$max <= 0); } +# print __LINE__, " working: cctab: ", Dumper($cctab); + # annotations + my @s1= qw(archive FALSE FALSE no restricted 1 1 0 no); + push (@s1, qw( 0 0 0 1)) if ($flag_add_utheses_policies); + push (@s1, qw(present NULL)) if ($flag_add_identifiers); + my $b1= $cctab->bucket(0, @s1); + if (defined ($b1)) + { + $b1->{annotation}= { bgcolor => 'pink', note => 'restricted aber public doc' }; + print __LINE__, " special bucket found: ", Dumper($b1); + } + + my @s2= qw(archive FALSE FALSE no public 1 1 0 no); + push (@s2, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s2, qw(present NULL)) if ($flag_add_identifiers); + my $b2= $cctab->bucket(0, @s2); + if (defined ($b2)) + { + $b2->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis FALSE aber public' }; + print __LINE__, " special bucket found: ", Dumper($b2); + } + + my @s2b= qw(archive TRUE FALSE no public 1 1 0 no); + push (@s2b, qw(0 1 1 0)) if ($flag_add_utheses_policies); + push (@s2b, qw(present NULL)) if ($flag_add_identifiers); + my $b2b= $cctab->bucket(0, @s2b); + if (defined ($b2b)) + { + $b2b->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' }; + print __LINE__, " special bucket found: ", Dumper($b2b); + } + + my @s2c= qw(archive TRUE NULL no public 1 1 0 no); + push (@s2c, qw(0 1 1 0)) if ($flag_add_utheses_policies); + push (@s2c, qw(present NULL)) if ($flag_add_identifiers); + my $b2c= $cctab->bucket(0, @s2c); + if (defined ($b2c)) + { + $b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' }; + print __LINE__, " special bucket found: ", Dumper($b2c); + } + + my @s3= qw(archive TRUE FALSE no restricted 1 1 0 no); + push (@s3, qw(0 0 0 1)) if ($flag_add_utheses_policies); + push (@s3, qw(present NULL)) if ($flag_add_identifiers); + my $b3= $cctab->bucket(0, @s3); + if (defined ($b3)) + { + $b3->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis TRUE aber restricted mit public document' }; + print __LINE__, " special bucket found: ", Dumper($b3); + } + + my @s4a= qw(archive TRUE NULL no public 1 1 0 no); + push (@s4a, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s4a, qw(present present)) if ($flag_add_identifiers); + my $b4a= $cctab->bucket(0, @s4a); + if (defined ($b4a)) + { + $b4a->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' }; + print __LINE__, " special bucket found: ", Dumper($b4a); + } + + my @s4b= qw(archive TRUE NULL no public 1 1 0 yes); + push (@s4b, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s4b, qw(NULL NULL)) if ($flag_add_identifiers); + my $b4b= $cctab->bucket(0, @s4b); + if (defined ($b4b)) + { + $b4b->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' }; + print __LINE__, " special bucket found: ", Dumper($b4b); + } + + my @s4c= qw(archive TRUE NULL no public 1 1 0 yes); + push (@s4c, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s4c, qw(present present)) if ($flag_add_identifiers); + my $b4c= $cctab->bucket(0, @s4c); + if (defined ($b4c)) + { + $b4c->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' }; + print __LINE__, " special bucket found: ", Dumper($b4c); + } + + my @s5a= qw(archive TRUE FALSE no public 1 1 0 no); + push (@s5a, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s5a, qw(present present)) if ($flag_add_identifiers); + my $b5a= $cctab->bucket(0, @s5a); + if (defined ($b5a)) + { + $b5a->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones' }; + print __LINE__, " special bucket found: ", Dumper($b5a); + } + + my @s5b= qw(archive TRUE FALSE no public 1 1 0 no); + push (@s5b, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s5b, qw(present NULL)) if ($flag_add_identifiers); + my $b5b= $cctab->bucket(0, @s5b); + if (defined ($b5b)) + { + $b5b->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; DOI wird nachgereicht' }; + print __LINE__, " special bucket found: ", Dumper($b5b); + } + + my @s5c= qw(archive TRUE FALSE no public 1 1 0 no); + push (@s5c, qw(0 0 0 0)) if ($flag_add_utheses_policies); + push (@s5c, qw(NULL NULL)) if ($flag_add_identifiers); + my $b5c= $cctab->bucket(0, @s5c); + if (defined ($b5c)) + { + $b5c->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; URN fehlt noch, DOI wird nachgereicht' }; + print __LINE__, " special bucket found: ", Dumper($b5c); + } + + my @s6a= qw(archive FALSE FALSE no restricted 1 0 1 no); + push (@s6a, qw(0 0 0 1)) if ($flag_add_utheses_policies); + push (@s6a, qw(present NULL)) if ($flag_add_identifiers); + my $b6a= $cctab->bucket(0, @s6a); + if (defined ($b6a)) + { + $b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext' }; + print __LINE__, " special bucket found: ", Dumper($b6a); + } + + my @s6b= qw(archive FALSE FALSE no restricted 1 0 1 no); + push (@s6b, qw(0 0 0 1)) if ($flag_add_utheses_policies); + push (@s6b, qw(present present)) if ($flag_add_identifiers); + my $b6b= $cctab->bucket(0, @s6b); + if (defined ($b6b)) + { + $b6b->{annotation}= { bgcolor => ' #33a2ff', note => 'kein Volltext; DOI wurde offenbar vorher vergeben' }; + print __LINE__, " special bucket found: ", Dumper($b6b); + } + # print __LINE__, " cctab: ", Dumper($cctab); - $cctab->show_tsv(['othes', @contexts]); + my $trailer= "<h2>errors</h2>\n". Dumper(\%cnt_errors); + $trailer .= "<h2>warnings</h2>\n". Dumper(\%cnt_warnings); + print __LINE__, " trailer=[",$trailer,"]\n"; + + $cctab->show_tsv(['othes', @contexts], 'counters.tsv', $trailer); # show objects which were uploaded to utheses but are no longer present in othes my @synced_not_found= sort { $a <=> $b } keys %synced; @@ -4087,6 +4252,8 @@ sub policies_stats { print __LINE__, " ATTN: ", scalar @synced_not_found, " objects synced to utheses but not present at othes:\n", Dumper (\%synced); } + + Util::JSON::write_json_file('all_errors.json', \%all_errors); } sub cleanup_keywords @@ -4348,6 +4515,7 @@ sub normalize sub bucket { my $self= shift; + my $create= shift; my @pars= @_; my $p= $self->{buckets}; @@ -4360,7 +4528,12 @@ sub bucket my $norm= normalize($par); push (@norm, $norm); # print __LINE__, " par=[$par] norm=[$norm]\n"; - $p->{$norm}= {} unless (exists($p->{$norm})); + + unless (exists($p->{$norm})) + { + return undef unless ($create); + $p->{$norm}= {}; + } $p= $p->{$norm}; $v->[$i]->{$norm}++; @@ -4371,26 +4544,45 @@ sub bucket $p; } -sub show_tsv +sub show_tsv # TODO: rename ... { my $self= shift; my $counters= shift; my $fnm_counters= shift || 'counters.tsv'; + my $trailer= shift; my @columns= @{$self->{columns}}; my $column_count= @columns; + my $b= $self->{buckets}; + my @rows= (); + + enumerate(\@rows, $b, $column_count, []); + my @counters= @$counters; my @heading= ('bucket', @columns, @counters); + my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config... + my $idx_html= join('/', $base_path, 'index.html'); + open (IDX, '>:utf8', $idx_html) or die; + print IDX <<EOX; +<title>ot2ut bucket list</title> +<style type="text/css"> + +</style> +<table border="1"> + <tr> +EOX + foreach my $hdr (@heading) { print IDX "<th>$hdr</th>"; } + print IDX <<EOX; + </tr> +EOX + open (TSV, '>:utf8', $fnm_counters) or die; print __LINE__, " saving bucket_counters to '$fnm_counters'\n"; print TSV join("\t", @heading), "\n"; - my $b= $self->{buckets}; - my @rows= (); - - enumerate(\@rows, $b, $column_count, []); + my $cnt_ac_numbers= 0; # print __LINE__, " rows: ", main::Dumper(\@rows); my $bucket_nr= 0; @@ -4398,10 +4590,134 @@ sub show_tsv { $bucket_nr++; my ($vals, $bucket)= @$row; - print TSV join("\t", $bucket_nr, @$vals, map { $bucket->{$_} } @counters), "\n"; + my @vals= @$vals; + print TSV join("\t", $bucket_nr, @vals, map { $bucket->{$_} } @counters), "\n"; Util::JSON::write_json_file("bucket_${bucket_nr}.json", $bucket); + + my $annotation= $bucket->{annotation}; + my $row_info; + if (defined ($annotation)) + { + print __LINE__, " bucket has annoation: ", main::Dumper($annotation); + $row_info= ' bgcolor="green"'; + if (exists ($annotation->{bgcolor})) + { + $row_info= ' bgcolor="'. $annotation->{bgcolor}. '"'; + } + } + + my $fnm_lst= sprintf("bucket_%d.html", $bucket_nr); + my $path_lst= join ('/', $base_path, $fnm_lst); + open (LST, '>:utf8', $path_lst) or die; + print LST <<"EOX"; +<title>bucket $bucket_nr</title> +<table> +EOX + + my @ctr= map { $bucket->{$_} } @counters; + foreach my $col (@columns) + { + my $val= shift (@vals); + print LST "<tr><th>$col</th><td>$val</td></tr>\n"; + } + print LST "</table>\n"; + + if (defined ($annotation)) + { + print LST "<p>Note: ", $annotation->{note}, "</p>\n"; + } + print LST <<"EOX"; +<table border="1"> +<tr><th>ac_number</th><th>othes</th><th>entw</th><th>test</th><th>prod</th><th>errors</th><th>warnings</th></tr> +EOX + + my $ids= $bucket->{ids}; + # print __LINE__, " bucket: ", main::Dumper($bucket); + my @ids= CORE::sort { $a <=> $b } map { $_+0 } keys %$ids; + # print __LINE__, " ids: ", join(' ', @ids), "\n"; + # my @ids2= sort { $a cmp $b } @ids; + # print __LINE__, " ids2: ", join(' ', @ids2), "\n"; + # print LST '<!-- ids: xx '. join(' ', @ids), " -->\n"; + foreach my $id (@ids) + { + my $p= $ids->{$id}; + print LST "<tr>"; + + if (exists($p->{ac_number}) && defined ($p->{ac_number})) + { + my $ac= $p->{ac_number}; + my $l= "https://ubdata.univie.ac.at/" . $ac; + print LST "<td><a href=\"$l\" target=\"alma\">$ac</a></td>"; + $cnt_ac_numbers++; + } + else + { + print LST "<td> </td>"; + } + + print LST "<td><a href=\"https://othes.univie.ac.at/$id/\" target=\"othes\">$id</a></td>"; + # print __LINE__, " p: ", main::Dumper($p); + + foreach my $context (qw(entw test prod)) + { + unless (exists ($p->{$context})) + { + print LST "<td> </td>"; + next; + } + + my $other= $p->{$context}; + my $link; + + if ($context eq 'test') { $link= 'https://utheses-frontend.ctest.univie.ac.at/client/?#/view/document/utheses/' . $other; } + elsif ($context eq 'entw') { $link= 'https://utheses-frontend-entw-utheses.ctest.univie.ac.at/?#/view/document/utheses/' . $other; } + elsif ($context eq 'prod') { $link= 'unknown'; } + + print LST "<td><a href=\"$link\" target=\"$context\">$other</a></td>"; + } + + if (exists ($p->{errors})) { print LST '<td>', main::Dumper($p->{errors}), "</td>\n"; } + else { print LST "<td> </td>\n"; } + + if (exists ($p->{warnings})) { print LST '<td>', main::Dumper($p->{warnings}), "</td>\n"; } + else { print LST "<td> </td>\n"; } + + print LST "</tr>\n"; + } + + print LST <<EOX; +</table> +EOX + close (LST); + + print IDX "<tr$row_info><td><a href=\"$fnm_lst\" target=\"bucket\">$bucket_nr</a></td>"; + foreach my $val (@$vals) { print IDX "<td>$val</td>"; } + + ctr: foreach my $ctr (@counters) + { + my $cnt= $bucket->{$ctr}; + if (defined ($cnt)) { print IDX "<td>$cnt</td>"; } + else { print IDX "<td> </td>"; } + } + print IDX "</tr>\n"; + } + + print IDX <<EOX; +</table> +EOX + + print IDX "<p>ac_numbers found: ", $cnt_ac_numbers, "</p>\n"; + + if (defined($trailer)) + { + print IDX $trailer; + print __LINE__, " trailer=[",$trailer,"]\n"; + } + + close (IDX); + close (TSV); } sub enumerate