diff --git a/eprints1.pl b/eprints1.pl index f888cc56541004185e87d6b74b315fc282d62dbf..e28851a26871b37e8f653a9802b9dd472b62f597 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -114,8 +114,8 @@ my $op_mode= 'unknown'; # ====================================================================== # BEGIN OT2UT: Othes to Utheses # my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize -my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize -# my $ot2ut_context= 'ot2ut-prod'; # TODO: parametrize +# my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize +my $ot2ut_context= 'ot2ut-prod'; # TODO: parametrize my $oma_sleep_time= 10; my %map_ot2ut_roles= @@ -141,8 +141,11 @@ my %map_ot2ut_json_columns= ubw_intern_2 => 'utheses_note_2', ); -my $fnm_utheses_faculty_map= '/home/gg/work/gitlab.uv/zid-services/utheses-admin-tools/faculty/univie.json'; +# my $fnm_utheses_faculty_map_old= '/home/gg/work/gitlab.uv/zid-services/utheses-admin-tools/faculty/univie.json'; +my $fnm_utheses_departments_map_new= '/home/gg/work/gitlab.uv/zid-services/utheses/uthesesAPI/UthesesAPI/Resources/Departments.json'; + my $utheses_faculty_map; +my $utheses_faculty_map_loaded= 0; # NOTE: alternative module (currently without persistent department identifier) # https://gitlab.uv.univie.ac.at/zid-services/hopla/blob/dev-utheses/lib/utheses_data.pm @@ -2805,17 +2808,6 @@ sub ot2ut my $col_att= $db_ot2ut->get_collection('attachments'); my $col_policy_utheses= $db_ot2ut->get_collection('policy.utheses'); - unless (defined ($utheses_faculty_map)) - { - my $utheses_faculty_list= Util::JSON::read_json_file($fnm_utheses_faculty_map); - # print __LINE__, " utheses_faculty_list: ", Dumper($utheses_faculty_list); - my %utheses_faculty_map= map { $_->{oracle_id} => $_ } @$utheses_faculty_list; - $utheses_faculty_map= \%utheses_faculty_map; - - # Util::JSON::write_json_file('@facultymap.json', $utheses_faculty_map); - # print __LINE__, " utheses_faculty_map: ", Dumper($utheses_faculty_map); exit; - } - # find items to upload if (!@eprint_ids) { @@ -3255,8 +3247,13 @@ sub generate_utheses_metadata my ($eprint_status, $fileinfo, $dir)= map { $row->{$_} } qw(eprint_status fileinfo dir); - my $docs= get_documents($epr_db, $eprintid, $dir); + my ($docs, $doc_errors)= get_documents($epr_db, $eprintid, $dir); print __LINE__, " docs: ", Dumper($docs); + if (@$doc_errors) + { + print __LINE__, " doc_errors: ", Dumper($doc_errors); + push (@errors, @$doc_errors); + } my ($lang_pdf, $files)= analyze_files($eprintid, $fileinfo, $dir); print __LINE__, " lang_pdf=[$lang_pdf] files: ", Dumper($files); @@ -3330,28 +3327,56 @@ sub generate_utheses_metadata $utp->{studies}->{program_designation}= $s; } + unless ($utheses_faculty_map_loaded) + { + +=begin comment + +the structure of json file has slightly changed + my $utheses_faculty_list= Util::JSON::read_json_file($fnm_utheses_faculty_map_old); + # print __LINE__, " utheses_faculty_list: ", Dumper($utheses_faculty_list); + my %utheses_faculty_map= map { $_->{oracle_id} => $_ } @$utheses_faculty_list; + +=end comment +=cut + + # 2020-09-21: new structure; this includes historic faculty codes + my $utheses_departments_list= Util::JSON::read_json_file($fnm_utheses_departments_map_new); + # print __LINE__, " utheses_faculty_list: ", Dumper($utheses_faculty_list); + my %utheses_faculty_map= map { $_->{oracle_id} => $_ } @{$utheses_departments_list->{departments}}; + + $utheses_faculty_map= \%utheses_faculty_map; + Util::JSON::write_json_file('@facultymap.json', $utheses_faculty_map); + # print __LINE__, " utheses_faculty_map: ", Dumper($utheses_faculty_map); exit; + $utheses_faculty_map_loaded= 1; + } + { my $faculty_code_str= $row->{fakultaet}; # like A32 etc..., but also NULL and '' + $faculty_code_str= 'A108' if ($faculty_code_str eq 'PGC'); + + my $faculty_info; if ($faculty_code_str =~ m#^A(\d+)$#) { my $faculty_code_num= $1; - my $faculty_info= $utheses_faculty_map->{$faculty_code_num}; - if (defined ($faculty_info)) - { - $utp->{studies}->{department}= - { - name => $faculty_info->{de}, - id => $faculty_info->{id}, - }; - } - else + $faculty_info= $utheses_faculty_map->{$faculty_code_num}; + } + elsif ($faculty_code_str =~ m#^[B-H]\d*$#) # e.g.: B, D2, F, F2, G, G2, H, H2 + { # historic codes, prepend 'hist-' to the string; this is how the reference file stores it. + $faculty_info= $utheses_faculty_map->{'hist-' . $faculty_code_str}; + } + + if (defined ($faculty_info)) + { + $utp->{studies}->{department}= { - push (@mapping_errors, { error => 'faculty_no_match', fakultaet => $faculty_code_str }); - } + name => $faculty_info->{de}, + id => $faculty_info->{id}, + }; } else { - push (@mapping_errors, { error => 'faculty_unknown', fakultaet => $faculty_code_str }); + push (@mapping_errors, { error => 'faculty_no_match', fakultaet => $faculty_code_str }); } } @@ -3432,6 +3457,8 @@ sub generate_utheses_metadata if (@mapping_errors) { $utp->{import}->{info}->{mapping_errors}= \@mapping_errors; + push (@errors, @mapping_errors); + # push (@warnings, @mapping_errors); } my ($creators_xml, $creators_json)= $epr->get_creators($eprintid); @@ -3486,6 +3513,8 @@ sub get_documents my $eprint_id= shift; my $dir= shift; + my @errors= (); + my $document_rows= $epr_db->get_all_x('document', ['eprintid=?', $eprint_id]); # print __LINE__, " document_rows: ", Dumper($document_rows); @@ -3527,6 +3556,12 @@ sub get_documents my (@stat_doc)= stat($path_doc); $row->{path_doc}= $path_doc; $row->{stat_doc}= \@stat_doc; + unless (@stat_doc) + { + my $ec= { error => 'doc_missing', path_doc => $path_doc, rev_dir => $rev_dir, dir => $dir }; + push (@errors, $ec); + $row->{error}= $ec; + } if (defined ($row->{placement}) && $row->{pos} != $row->{placement}) { @@ -3588,7 +3623,7 @@ sub get_documents $res->{documents}= \@docs2, - $res; + return ($res, \@errors); } sub get_history @@ -4114,7 +4149,7 @@ sub analyze_files $format= 'pdf' if ($icon eq '/style/images/fileicons/pdf.png' || $icon eq '/style/images/fileicons/application_pdf.png'); $filepath =~ s#%([\dA-Fa-f]{2})#chr(hex($1))#ge; # filenames are URL encoded, see 19072 for an example - print __LINE__, " vor utf8::decode filepath=[$filepath]\n"; + print __LINE__, " vor utf8::decode filepath=[$filepath]\n"; utf8::decode($filepath); print __LINE__, " nach utf8::decode filepath=[$filepath]\n"; @@ -4500,7 +4535,7 @@ sub update_policies $db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut)); my $col_utp= $db_ot2ut->get_collection('utheses.policies'); - if (defined ($refresh_oldest_policies) && $refresh_oldest_policies > 0 && $refresh_oldest_policies <= 1000) + if (defined ($refresh_oldest_policies) && $refresh_oldest_policies > 0 && $refresh_oldest_policies <= 3000) { my $c1= $col_utp->find( {}, { '_id' => 1, eprint_id => 1, generated => 1 }); $c1->sort( { generated => 1 } ); @@ -4601,8 +4636,9 @@ sub update_policies print __LINE__, ' ', '='x70, "\n"; } - my @removed_from_mysql= $col_utp->find( { eprint_id => { '$nin' => \@eprint_ids_in_mysql } }, { '_id' => 1, eprint_id => 1 })->all(); - print __LINE__, " removed from mysql ", Dumper(\@removed_from_mysql); + my @removed_from_othes= $col_utp->find( { eprint_id => { '$nin' => \@eprint_ids_in_mysql } }, { '_id' => 1, eprint_id => 1 })->all(); + print __LINE__, " removed from othes ", Dumper(\@removed_from_othes); + my @eprint_id_removed_from_othes= map { $_->{eprint_id} } @removed_from_othes; my %stats= ( @@ -4616,6 +4652,7 @@ sub update_policies lst_inserted => \@lst_inserted, lst_updated => \@lst_updated, lst_unchanged => \@lst_unchanged, + removed_from_othes => \@eprint_id_removed_from_othes, ); my $col_stats= $db_ot2ut->get_collection('statistics'); @@ -4849,7 +4886,7 @@ sub policies_stats my $b2b= $cctab->bucket(0, @s2b); if (defined ($b2b)) { - $b2b->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' }; + $b2b->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public; bekommt keine DOI' }; # print __LINE__, " special bucket found: ", Dumper($b2b); } @@ -4859,7 +4896,7 @@ sub policies_stats my $b2c= $cctab->bucket(0, @s2c); if (defined ($b2c)) { - $b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' }; + $b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public; bekommt keine DOI' }; # print __LINE__, " special bucket found: ", Dumper($b2c); } # ZZZZ @@ -5029,7 +5066,7 @@ EOX my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue'; my $ck2= ($e == 0) ? 'lightgreen' : 'lightpink'; - printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); + printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%7.4f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); } print IDX "</tr>\n"; @@ -5076,7 +5113,7 @@ EOX my $pct= $c*100.0/$b_othes; my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue'; my $ck2= ($e == 0) ? 'lightgreen' : 'pink'; - printf MB (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); + printf MB (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%7.4f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); } print MB "</tr>\n"; @@ -5125,9 +5162,10 @@ EOX { my $link; - if ($context eq 'test') { $link= 'https://utheses-frontend.ctest.univie.ac.at/client/?#/view/document/utheses/' . $utheses_id; } - elsif ($context eq 'entw') { $link= 'https://utheses-frontend-entw-utheses.ctest.univie.ac.at/?#/view/document/utheses/' . $utheses_id; } - elsif ($context eq 'prod') { $link= 'unknown'; } + if ($context eq 'test') { $link= 'https://utheses-frontend.ctest.univie.ac.at/client/#/detail/' . $utheses_id . '/'; } + elsif ($context eq 'entw') { $link= 'https://utheses-frontend-entw-utheses.ctest.univie.ac.at/#/detail/' . $utheses_id . '/'; } + # elsif ($context eq 'prod') { $link= 'https://utheses-admin-ui-utheses-prod.cprod.univie.ac.at/client#/view/document/utheses/' . $utheses_id . '/'; } + elsif ($context eq 'prod') { $link= 'https://utheses-admin-ui-utheses-prod.cprod.univie.ac.at/client#/detail/' . $utheses_id . '/'; } print BLOCK " <td bgcolor=\"lightgreen\"><a href=\"$link\" target=\"$context\">$utheses_id</a></td>\n"; } @@ -5166,7 +5204,7 @@ EOX my $pct= ($total_othes == 0) ? 0 : $c*100.0/$total_othes; my $ck1= ($pct == 100.0) ? 'lightgreen' : 'lightblue'; my $ck2= ($e == 0) ? 'lightgreen' : 'lightpink'; - printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%5.2f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); + printf IDX (" <td bgcolor=\"$ck1\">%d</td><td bgcolor=\"$ck1\">%7.4f %%</td><td bgcolor=\"$ck2\">%d</td>\n", $c, $pct, $e); } print IDX "</tr>\n";