Skip to content
Snippets Groups Projects
Commit 840675b3 authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

classification of various combinations of eprints database columns and...

classification of various combinations of eprints database columns and resulting flags stored in utheses policies datastructure
parent 9e901bb7
No related branches found
No related tags found
No related merge requests found
......@@ -112,8 +112,8 @@ my $op_mode= 'unknown';
# ======================================================================
# BEGIN OT2UT: Othes to Utheses
my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
# my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize
# my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize
my $oma_sleep_time= 10;
my %map_ot2ut_roles=
......@@ -399,7 +399,7 @@ elsif ($op_mode eq 'debug_filenames')
}
elsif ($op_mode eq 'update-policies')
{
update_policies();
update_policies(@PARS);
}
elsif ($op_mode eq 'policies-stats')
{
......@@ -3363,7 +3363,7 @@ sub get_thesis_data
policies => {
# lock_status => ($row->{sperre} eq 'FALSE') ? 0 : 1, # TRUE or NULL means the object is locked
lock_status => ($row->{sperre} eq 'TRUE') ? 1 : 0, # TRUE means the object is locked; FALSE and NULL means not locked
lock_status => ($row->{sperre} eq 'TRUE') ? 1 : 0, # TRUE means the object is locked; FALSE and NULL means not locked
authorisation_to_use_by_author => ($row->{einverstaendnis} eq 'TRUE') ? 1 : 0,
fulltext_locked => ($row->{full_text_status} eq 'public') ? 0 : 1, # possible values for fulltext_locked: NULL, none, resticted
......@@ -3848,6 +3848,10 @@ sub debug_stkz
sub update_policies
{
my @upd_eprint_ids= @_;
my %upd_eprint_ids= map { $_ => 1 } @upd_eprint_ids;
my $epr= get_eprints_db($cnf);
# print "epr: ", Dumper ($epr);
......@@ -3877,7 +3881,7 @@ sub update_policies
my $utp_info= $col_utp->find_one({ eprint_id => $eprint_id });
if (defined ($utp_info) && $utp_info->{lastmod} eq $x1_lastmod)
if (!exists ($upd_eprint_ids{$eprint_id}) && defined ($utp_info) && $utp_info->{lastmod} eq $x1_lastmod)
{
print __LINE__, " NOTE: already processed, no change: x1_lastmod=[$x1_lastmod] eprint_id=[$eprint_id]\n";
$cnt_unchanged++;
......@@ -3914,7 +3918,7 @@ sub update_policies
utheses_upload_result_json_path => $utheses_upload_result_json_path,
};
map { $full_data->{$_}= $row->{$_} } qw(eprint_status einverstaendnis sperre full_text_status urn doi);
map { $full_data->{$_}= $row->{$_} } qw(eprint_status einverstaendnis sperre full_text_status urn doi ac_nummer);
# $show++ if ($docs->{cnt_embargo} > 0);
# $show++ if ($docs->{cnt_public} > 0 && $docs->{cnt_restricted} > 0);
......@@ -4010,15 +4014,17 @@ sub policies_stats
my $max; #= 1000;
my @contexts= qw(entw test prod);
my (%cnt_errors, %cnt_warnings, %all_errors);
while ($running)
{
my $row_utp= $cur_utp->next();
last unless (defined ($row_utp));
# print __LINE__, " row_utp: ", Dumper($row_utp); last;
my ($eprint_id, $eprint_status, $date_sperre, $einverstaendnis, $lastmod, $full_text_status, $sperre, $cnt_errors, $cnt_warnings, $docs, $utp, $urn, $doi)=
map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings docs ut_public urn doi);
my ($eprint_id, $eprint_status, $date_sperre, $einverstaendnis, $lastmod, $full_text_status, $sperre, $cnt_errors, $cnt_warnings, $errors, $warnings, $docs, $utp, $urn, $doi, $ac_number)=
map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings errors warnings docs ut_public urn doi ac_nummer);
$eprint_id += 0;
my ($cnt_docs, $cnt_public, $cnt_restricted)=
map { $docs->{$_} } qw(cnt_docs cnt_public cnt_restricted);
......@@ -4038,11 +4044,12 @@ sub policies_stats
{
($utptp, $urn, $doi)= map { $utpt->{$_} } qw(policies urn doi);
}
if (defined ($utptp))
{
($lrq, $kwl, $absl, $ftl)= map { $utptp->{$_} } qw(lock_request keywords_locked abstract_locked fulltext_locked);
}
if ($flag_add_utheses_policies)
{
push (@bucket_selectors, $lrq, $kwl, $absl, $ftl);
......@@ -4056,20 +4063,42 @@ sub policies_stats
push (@bucket_selectors, $urn, $doi);
}
my $bucket= $cctab->bucket(@bucket_selectors);
my $bucket= $cctab->bucket(1, @bucket_selectors);
my $info= { ac_number => $ac_number };
if (defined ($errors) && @$errors)
{
$info->{errors}= $errors;
foreach my $error (@$errors)
{
my $error_name= $error->{error};
$cnt_errors{$error_name}++;
$error->{eprint_id}= $eprint_id;
push (@{$all_errors{$error_name}}, $error);
}
}
if (defined ($warnings) && @$warnings)
{
$info->{warnings}= $warnings;
foreach my $warning (@$warnings) { $cnt_warnings{$warning->{warning}}++; }
}
$bucket->{othes}++;
push (@{$bucket->{othes_ids}} => $eprint_id);
$bucket->{ids}->{$eprint_id}= $info;
if (exists ($synced{$eprint_id}))
{
my $s= $synced{$eprint_id};
# print __LINE__, " eprint_id=[$eprint_id]; synced: ", Dumper ($s);
foreach my $context (@contexts)
{
if (exists ($synced{$eprint_id}->{$context}))
if (exists ($s->{$context}))
{
my $x= $synced{$eprint_id}->{$context};
my $x= $s->{$context};
$bucket->{$context}++;
push (@{$bucket->{$context . '_ids'}} => [ $eprint_id, $x->[1] ]);
# push (@{$bucket->{$context . '_ids'}} => [ $eprint_id, $x->[1] ]);
$bucket->{ids}->{$eprint_id}->{$context}= $x->[1];
}
}
delete($synced{$eprint_id});
......@@ -4078,8 +4107,144 @@ sub policies_stats
last if (defined($max) && --$max <= 0);
}
# print __LINE__, " working: cctab: ", Dumper($cctab);
# annotations
my @s1= qw(archive FALSE FALSE no restricted 1 1 0 no);
push (@s1, qw( 0 0 0 1)) if ($flag_add_utheses_policies);
push (@s1, qw(present NULL)) if ($flag_add_identifiers);
my $b1= $cctab->bucket(0, @s1);
if (defined ($b1))
{
$b1->{annotation}= { bgcolor => 'pink', note => 'restricted aber public doc' };
print __LINE__, " special bucket found: ", Dumper($b1);
}
my @s2= qw(archive FALSE FALSE no public 1 1 0 no);
push (@s2, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s2, qw(present NULL)) if ($flag_add_identifiers);
my $b2= $cctab->bucket(0, @s2);
if (defined ($b2))
{
$b2->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis FALSE aber public' };
print __LINE__, " special bucket found: ", Dumper($b2);
}
my @s2b= qw(archive TRUE FALSE no public 1 1 0 no);
push (@s2b, qw(0 1 1 0)) if ($flag_add_utheses_policies);
push (@s2b, qw(present NULL)) if ($flag_add_identifiers);
my $b2b= $cctab->bucket(0, @s2b);
if (defined ($b2b))
{
$b2b->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' };
print __LINE__, " special bucket found: ", Dumper($b2b);
}
my @s2c= qw(archive TRUE NULL no public 1 1 0 no);
push (@s2c, qw(0 1 1 0)) if ($flag_add_utheses_policies);
push (@s2c, qw(present NULL)) if ($flag_add_identifiers);
my $b2c= $cctab->bucket(0, @s2c);
if (defined ($b2c))
{
$b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' };
print __LINE__, " special bucket found: ", Dumper($b2c);
}
my @s3= qw(archive TRUE FALSE no restricted 1 1 0 no);
push (@s3, qw(0 0 0 1)) if ($flag_add_utheses_policies);
push (@s3, qw(present NULL)) if ($flag_add_identifiers);
my $b3= $cctab->bucket(0, @s3);
if (defined ($b3))
{
$b3->{annotation}= { bgcolor => 'pink', note => 'einverstaendnis TRUE aber restricted mit public document' };
print __LINE__, " special bucket found: ", Dumper($b3);
}
my @s4a= qw(archive TRUE NULL no public 1 1 0 no);
push (@s4a, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s4a, qw(present present)) if ($flag_add_identifiers);
my $b4a= $cctab->bucket(0, @s4a);
if (defined ($b4a))
{
$b4a->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' };
print __LINE__, " special bucket found: ", Dumper($b4a);
}
my @s4b= qw(archive TRUE NULL no public 1 1 0 yes);
push (@s4b, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s4b, qw(NULL NULL)) if ($flag_add_identifiers);
my $b4b= $cctab->bucket(0, @s4b);
if (defined ($b4b))
{
$b4b->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' };
print __LINE__, " special bucket found: ", Dumper($b4b);
}
my @s4c= qw(archive TRUE NULL no public 1 1 0 yes);
push (@s4c, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s4c, qw(present present)) if ($flag_add_identifiers);
my $b4c= $cctab->bucket(0, @s4c);
if (defined ($b4c))
{
$b4c->{annotation}= { bgcolor => 'yellow', note => 'sperre NULL? viele errors und warnings; wo kommen diese Daten her?' };
print __LINE__, " special bucket found: ", Dumper($b4c);
}
my @s5a= qw(archive TRUE FALSE no public 1 1 0 no);
push (@s5a, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s5a, qw(present present)) if ($flag_add_identifiers);
my $b5a= $cctab->bucket(0, @s5a);
if (defined ($b5a))
{
$b5a->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones' };
print __LINE__, " special bucket found: ", Dumper($b5a);
}
my @s5b= qw(archive TRUE FALSE no public 1 1 0 no);
push (@s5b, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s5b, qw(present NULL)) if ($flag_add_identifiers);
my $b5b= $cctab->bucket(0, @s5b);
if (defined ($b5b))
{
$b5b->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; DOI wird nachgereicht' };
print __LINE__, " special bucket found: ", Dumper($b5b);
}
my @s5c= qw(archive TRUE FALSE no public 1 1 0 no);
push (@s5c, qw(0 0 0 0)) if ($flag_add_utheses_policies);
push (@s5c, qw(NULL NULL)) if ($flag_add_identifiers);
my $b5c= $cctab->bucket(0, @s5c);
if (defined ($b5c))
{
$b5c->{annotation}= { bgcolor => 'lightgreen', note => 'the good ones; URN fehlt noch, DOI wird nachgereicht' };
print __LINE__, " special bucket found: ", Dumper($b5c);
}
my @s6a= qw(archive FALSE FALSE no restricted 1 0 1 no);
push (@s6a, qw(0 0 0 1)) if ($flag_add_utheses_policies);
push (@s6a, qw(present NULL)) if ($flag_add_identifiers);
my $b6a= $cctab->bucket(0, @s6a);
if (defined ($b6a))
{
$b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext' };
print __LINE__, " special bucket found: ", Dumper($b6a);
}
my @s6b= qw(archive FALSE FALSE no restricted 1 0 1 no);
push (@s6b, qw(0 0 0 1)) if ($flag_add_utheses_policies);
push (@s6b, qw(present present)) if ($flag_add_identifiers);
my $b6b= $cctab->bucket(0, @s6b);
if (defined ($b6b))
{
$b6b->{annotation}= { bgcolor => ' #33a2ff', note => 'kein Volltext; DOI wurde offenbar vorher vergeben' };
print __LINE__, " special bucket found: ", Dumper($b6b);
}
# print __LINE__, " cctab: ", Dumper($cctab);
$cctab->show_tsv(['othes', @contexts]);
my $trailer= "<h2>errors</h2>\n". Dumper(\%cnt_errors);
$trailer .= "<h2>warnings</h2>\n". Dumper(\%cnt_warnings);
print __LINE__, " trailer=[",$trailer,"]\n";
$cctab->show_tsv(['othes', @contexts], 'counters.tsv', $trailer);
# show objects which were uploaded to utheses but are no longer present in othes
my @synced_not_found= sort { $a <=> $b } keys %synced;
......@@ -4087,6 +4252,8 @@ sub policies_stats
{
print __LINE__, " ATTN: ", scalar @synced_not_found, " objects synced to utheses but not present at othes:\n", Dumper (\%synced);
}
Util::JSON::write_json_file('all_errors.json', \%all_errors);
}
sub cleanup_keywords
......@@ -4348,6 +4515,7 @@ sub normalize
sub bucket
{
my $self= shift;
my $create= shift;
my @pars= @_;
my $p= $self->{buckets};
......@@ -4360,7 +4528,12 @@ sub bucket
my $norm= normalize($par);
push (@norm, $norm);
# print __LINE__, " par=[$par] norm=[$norm]\n";
$p->{$norm}= {} unless (exists($p->{$norm}));
unless (exists($p->{$norm}))
{
return undef unless ($create);
$p->{$norm}= {};
}
$p= $p->{$norm};
$v->[$i]->{$norm}++;
......@@ -4371,26 +4544,45 @@ sub bucket
$p;
}
sub show_tsv
sub show_tsv # TODO: rename ...
{
my $self= shift;
my $counters= shift;
my $fnm_counters= shift || 'counters.tsv';
my $trailer= shift;
my @columns= @{$self->{columns}};
my $column_count= @columns;
my $b= $self->{buckets};
my @rows= ();
enumerate(\@rows, $b, $column_count, []);
my @counters= @$counters;
my @heading= ('bucket', @columns, @counters);
my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config...
my $idx_html= join('/', $base_path, 'index.html');
open (IDX, '>:utf8', $idx_html) or die;
print IDX <<EOX;
<title>ot2ut bucket list</title>
<style type="text/css">
</style>
<table border="1">
<tr>
EOX
foreach my $hdr (@heading) { print IDX "<th>$hdr</th>"; }
print IDX <<EOX;
</tr>
EOX
open (TSV, '>:utf8', $fnm_counters) or die;
print __LINE__, " saving bucket_counters to '$fnm_counters'\n";
print TSV join("\t", @heading), "\n";
my $b= $self->{buckets};
my @rows= ();
enumerate(\@rows, $b, $column_count, []);
my $cnt_ac_numbers= 0;
# print __LINE__, " rows: ", main::Dumper(\@rows);
my $bucket_nr= 0;
......@@ -4398,10 +4590,134 @@ sub show_tsv
{
$bucket_nr++;
my ($vals, $bucket)= @$row;
print TSV join("\t", $bucket_nr, @$vals, map { $bucket->{$_} } @counters), "\n";
my @vals= @$vals;
print TSV join("\t", $bucket_nr, @vals, map { $bucket->{$_} } @counters), "\n";
Util::JSON::write_json_file("bucket_${bucket_nr}.json", $bucket);
my $annotation= $bucket->{annotation};
my $row_info;
if (defined ($annotation))
{
print __LINE__, " bucket has annoation: ", main::Dumper($annotation);
$row_info= ' bgcolor="green"';
if (exists ($annotation->{bgcolor}))
{
$row_info= ' bgcolor="'. $annotation->{bgcolor}. '"';
}
}
my $fnm_lst= sprintf("bucket_%d.html", $bucket_nr);
my $path_lst= join ('/', $base_path, $fnm_lst);
open (LST, '>:utf8', $path_lst) or die;
print LST <<"EOX";
<title>bucket $bucket_nr</title>
<table>
EOX
my @ctr= map { $bucket->{$_} } @counters;
foreach my $col (@columns)
{
my $val= shift (@vals);
print LST "<tr><th>$col</th><td>$val</td></tr>\n";
}
print LST "</table>\n";
if (defined ($annotation))
{
print LST "<p>Note: ", $annotation->{note}, "</p>\n";
}
print LST <<"EOX";
<table border="1">
<tr><th>ac_number</th><th>othes</th><th>entw</th><th>test</th><th>prod</th><th>errors</th><th>warnings</th></tr>
EOX
my $ids= $bucket->{ids};
# print __LINE__, " bucket: ", main::Dumper($bucket);
my @ids= CORE::sort { $a <=> $b } map { $_+0 } keys %$ids;
# print __LINE__, " ids: ", join(' ', @ids), "\n";
# my @ids2= sort { $a cmp $b } @ids;
# print __LINE__, " ids2: ", join(' ', @ids2), "\n";
# print LST '<!-- ids: xx '. join(' ', @ids), " -->\n";
foreach my $id (@ids)
{
my $p= $ids->{$id};
print LST "<tr>";
if (exists($p->{ac_number}) && defined ($p->{ac_number}))
{
my $ac= $p->{ac_number};
my $l= "https://ubdata.univie.ac.at/" . $ac;
print LST "<td><a href=\"$l\" target=\"alma\">$ac</a></td>";
$cnt_ac_numbers++;
}
else
{
print LST "<td>&nbsp;</td>";
}
print LST "<td><a href=\"https://othes.univie.ac.at/$id/\" target=\"othes\">$id</a></td>";
# print __LINE__, " p: ", main::Dumper($p);
foreach my $context (qw(entw test prod))
{
unless (exists ($p->{$context}))
{
print LST "<td>&nbsp;</td>";
next;
}
my $other= $p->{$context};
my $link;
if ($context eq 'test') { $link= 'https://utheses-frontend.ctest.univie.ac.at/client/?#/view/document/utheses/' . $other; }
elsif ($context eq 'entw') { $link= 'https://utheses-frontend-entw-utheses.ctest.univie.ac.at/?#/view/document/utheses/' . $other; }
elsif ($context eq 'prod') { $link= 'unknown'; }
print LST "<td><a href=\"$link\" target=\"$context\">$other</a></td>";
}
if (exists ($p->{errors})) { print LST '<td>', main::Dumper($p->{errors}), "</td>\n"; }
else { print LST "<td>&nbsp;</td>\n"; }
if (exists ($p->{warnings})) { print LST '<td>', main::Dumper($p->{warnings}), "</td>\n"; }
else { print LST "<td>&nbsp;</td>\n"; }
print LST "</tr>\n";
}
print LST <<EOX;
</table>
EOX
close (LST);
print IDX "<tr$row_info><td><a href=\"$fnm_lst\" target=\"bucket\">$bucket_nr</a></td>";
foreach my $val (@$vals) { print IDX "<td>$val</td>"; }
ctr: foreach my $ctr (@counters)
{
my $cnt= $bucket->{$ctr};
if (defined ($cnt)) { print IDX "<td>$cnt</td>"; }
else { print IDX "<td>&nbsp;</td>"; }
}
print IDX "</tr>\n";
}
print IDX <<EOX;
</table>
EOX
print IDX "<p>ac_numbers found: ", $cnt_ac_numbers, "</p>\n";
if (defined($trailer))
{
print IDX $trailer;
print __LINE__, " trailer=[",$trailer,"]\n";
}
close (IDX);
close (TSV);
}
sub enumerate
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment