diff --git a/eprints1.pl b/eprints1.pl index 6043471a15180f1ff4055a4d271b4b9982661673..3bb413fc39fa28d192e592e6fde0f0625ba20d54 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -108,6 +108,7 @@ my $delete_bad_ac_entries= 0; # TODO: should be an option? my $show_TODOs= 0; my $die_nbn_already_defined= 0; +my $op_mode= 'unknown'; # ====================================================================== # BEGIN OT2UT: Othes to Utheses @@ -155,22 +156,28 @@ my %map_ot2ut_thesis_type= phd => 'https://pid.phaidra.org/vocabulary/1PHE-7VMS', # 'Dissertation' ); +my @ot2ut_synced_columns= qw( eprint_id eprint_status lastmod context ts_upload td_total error_code error_cnt utheses_id uploaded_fnm ); +# Fields currently not available: +# container_pid container_result document_pid document_result +# activate_result import_code response_msg import_note + +my $ot2ut_eprint_status= 'archive'; +my $silent_upload_success= 0; +my $do_upload= 0; +my $no_doi= 0; +my $ignore_errors= 0; + +if ($0 eq './ot2ut.pl') { $op_mode= 'ot2ut'; $do_upload= 1; $MAX_SYNC= 1; } +if ($0 eq './oma.pl') { $op_mode= 'oma'; $do_upload= 1; } + # END OT2UT: Othesis to Utheses # ====================================================================== -my $op_mode= 'unknown'; my @db_tables= (); my @PARS; my $debug_level= 0; my $force= 0; -my $do_upload= 0; my $db_name; -my $no_doi= 0; -my $ignore_errors= 0; -my $ot2ut_eprint_status= 'archive'; -my $silent_upload_success= 0; - -if ($0 eq './ot2ut.pl') { $op_mode= 'ot2ut'; $MAX_SYNC= 1; $do_upload= 1; } my $arg; while (defined ($arg= shift (@ARGV))) @@ -2320,16 +2327,6 @@ sub get_othes_timestamp my $row= shift; my $name= shift; -=begin comment - -old format... - my $ts= sprintf("%4d-%02d-%02d", map { $row->{$name . '_' . $_} } qw(year month day)); - - $ts .= sprintf("T%02d%02d%02d", map { $row->{$name . '_' . $_} } qw(hour minute second)) if (exists ($row->{$name . '_hour'})); - -=end comment -=cut - my @ts; foreach my $el (qw(year month day)) { @@ -2361,8 +2358,8 @@ sub doigen my $canonical_url= sprintf ("https://othes.univie.ac.at/%s/", $eprintid); my $datacite_xml_path= 'othes/DataCite_XML/' . $doi . '.xml'; - my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)); - # my $lastmod= get_othes_timestamp($row, 'lastmod'); + # my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)); + my $lastmod= get_othes_timestamp($row, 'lastmod'); # TODO: utheses and datacite metadata should be considered independently if (-f $datacite_xml_path) @@ -2591,6 +2588,7 @@ sub ot2ut { my @fetch_pars; push (@fetch_pars, { doi => 1 }) unless ($no_doi); + print __LINE__, " no_doi=[$no_doi] fetch_pars: ", Dumper(\@fetch_pars); sleep(3); $res1= $epr->fetch_data('archive', @fetch_pars); } elsif ($ot2ut_eprint_status eq 'buffer') @@ -2604,12 +2602,9 @@ sub ot2ut } else { - die "no eprints objects found"; + print __LINE__, " ATTN: no eprints objects found"; + return undef; } - - # TODO, future ... - # my $res2= $epr->fetch_data('buffer'); - # push (@eprint_ids, keys %$res2); } my @synced= (); @@ -2620,7 +2615,7 @@ sub ot2ut sleep(3); foreach my $eprint_id (@eprint_ids) { - last if (!$running); + last unless ($running); last if (defined ($MAX_SYNC) && $cnt_synced >= $MAX_SYNC); activity({ activity => 'ot2ut'}) if ($last_activity + $activity_period <= time()); @@ -2824,19 +2819,22 @@ old format 2019-11..2020-01 } $cnt_synced++; - - last unless ($running); } - my @columns= qw( eprint_id eprint_status lastmod context ts_upload td_total error_code error_cnt utheses_id uploaded_fnm ); - -# Fields currently not available: container_pid container_result document_pid document_result activate_result import_code response_msg import_note - - my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO()); - Util::Matrix::save_hash_as_csv(\@columns, \@synced, $fnm, "\t", '', "\n", 1); + my $res; + if ($cnt_synced) + { + $res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors"; + my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO()); + Util::Matrix::save_hash_as_csv(\@ot2ut_synced_columns, \@synced, $fnm, "\t", '', "\n", 1); + print __LINE__, " $res, see [$fnm]\n"; + } + else + { + print __LINE__, " $res\n"; + $res= "synced no objects in context $ot2ut_context"; + } - my $res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors"; - print __LINE__, " $res, see [$fnm]\n"; (\@synced, $res); } @@ -2883,13 +2881,14 @@ sub generate_utheses_metadata push (@errors, { error => 'no_file' }); } - my $doi= $row->{doi}; + my ($doi, $eprint_status)= map { $row->{$_} } qw(doi eprint_status); push (@errors, { error => 'no_doi', note => 'for now...' }) unless (defined($doi) || $no_doi); my $utheses_json_path= 'othes/utheses_json/' . $eprintid . '.json'; my $utheses_upload_result_json_path= 'othes/utheses_json/' . $eprintid . '_upload_result.json'; my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)); + # my $lastmod= get_othes_timestamp($row, 'lastmod'); that's a different format: yyyy-mm-ddTHH:MM:SSZ if (-f $utheses_json_path) { @@ -2981,7 +2980,7 @@ sub generate_utheses_metadata } # $utp->{utheses_id}= wird erzeugt beim Import, kennma ned wissn - $utp->{utheses_status}= ($row->{eprint_status} eq 'archive') + $utp->{utheses_status}= ($eprint_status eq 'archive') ? 'published' : 'thesis_doc_added'; # objects in eprint_status "buffer" are 'thesis_doc_added', formerly 'work_in_progress'; @@ -3012,7 +3011,7 @@ sub generate_utheses_metadata # TODO: add information about the uploaded files }, - fields_processed => + fields_processed => # TODO: may be obosolete { (map { $_ => $row->{$_} } qw( thesis_type )), # thesis_type: various strings; see %map_ot2ut_thesis_type @@ -3027,7 +3026,6 @@ sub generate_utheses_metadata # always NULL: department # various timestamps - lastmod => $lastmod, (map { $_ => get_othes_timestamp($row, $_) } qw( datestamp )), }, },