Skip to content
Snippets Groups Projects
Commit 141adce9 authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

ot2ut optimizations

parent ce7906e6
No related branches found
No related tags found
No related merge requests found
...@@ -108,6 +108,7 @@ my $delete_bad_ac_entries= 0; # TODO: should be an option? ...@@ -108,6 +108,7 @@ my $delete_bad_ac_entries= 0; # TODO: should be an option?
my $show_TODOs= 0; my $show_TODOs= 0;
my $die_nbn_already_defined= 0; my $die_nbn_already_defined= 0;
my $op_mode= 'unknown';
# ====================================================================== # ======================================================================
# BEGIN OT2UT: Othes to Utheses # BEGIN OT2UT: Othes to Utheses
...@@ -155,22 +156,28 @@ my %map_ot2ut_thesis_type= ...@@ -155,22 +156,28 @@ my %map_ot2ut_thesis_type=
phd => 'https://pid.phaidra.org/vocabulary/1PHE-7VMS', # 'Dissertation' phd => 'https://pid.phaidra.org/vocabulary/1PHE-7VMS', # 'Dissertation'
); );
my @ot2ut_synced_columns= qw( eprint_id eprint_status lastmod context ts_upload td_total error_code error_cnt utheses_id uploaded_fnm );
# Fields currently not available:
# container_pid container_result document_pid document_result
# activate_result import_code response_msg import_note
my $ot2ut_eprint_status= 'archive';
my $silent_upload_success= 0;
my $do_upload= 0;
my $no_doi= 0;
my $ignore_errors= 0;
if ($0 eq './ot2ut.pl') { $op_mode= 'ot2ut'; $do_upload= 1; $MAX_SYNC= 1; }
if ($0 eq './oma.pl') { $op_mode= 'oma'; $do_upload= 1; }
# END OT2UT: Othesis to Utheses # END OT2UT: Othesis to Utheses
# ====================================================================== # ======================================================================
my $op_mode= 'unknown';
my @db_tables= (); my @db_tables= ();
my @PARS; my @PARS;
my $debug_level= 0; my $debug_level= 0;
my $force= 0; my $force= 0;
my $do_upload= 0;
my $db_name; my $db_name;
my $no_doi= 0;
my $ignore_errors= 0;
my $ot2ut_eprint_status= 'archive';
my $silent_upload_success= 0;
if ($0 eq './ot2ut.pl') { $op_mode= 'ot2ut'; $MAX_SYNC= 1; $do_upload= 1; }
my $arg; my $arg;
while (defined ($arg= shift (@ARGV))) while (defined ($arg= shift (@ARGV)))
...@@ -2320,16 +2327,6 @@ sub get_othes_timestamp ...@@ -2320,16 +2327,6 @@ sub get_othes_timestamp
my $row= shift; my $row= shift;
my $name= shift; my $name= shift;
=begin comment
old format...
my $ts= sprintf("%4d-%02d-%02d", map { $row->{$name . '_' . $_} } qw(year month day));
$ts .= sprintf("T%02d%02d%02d", map { $row->{$name . '_' . $_} } qw(hour minute second)) if (exists ($row->{$name . '_hour'}));
=end comment
=cut
my @ts; my @ts;
foreach my $el (qw(year month day)) foreach my $el (qw(year month day))
{ {
...@@ -2361,8 +2358,8 @@ sub doigen ...@@ -2361,8 +2358,8 @@ sub doigen
my $canonical_url= sprintf ("https://othes.univie.ac.at/%s/", $eprintid); my $canonical_url= sprintf ("https://othes.univie.ac.at/%s/", $eprintid);
my $datacite_xml_path= 'othes/DataCite_XML/' . $doi . '.xml'; my $datacite_xml_path= 'othes/DataCite_XML/' . $doi . '.xml';
my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)); # my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second));
# my $lastmod= get_othes_timestamp($row, 'lastmod'); my $lastmod= get_othes_timestamp($row, 'lastmod');
# TODO: utheses and datacite metadata should be considered independently # TODO: utheses and datacite metadata should be considered independently
if (-f $datacite_xml_path) if (-f $datacite_xml_path)
...@@ -2591,6 +2588,7 @@ sub ot2ut ...@@ -2591,6 +2588,7 @@ sub ot2ut
{ {
my @fetch_pars; my @fetch_pars;
push (@fetch_pars, { doi => 1 }) unless ($no_doi); push (@fetch_pars, { doi => 1 }) unless ($no_doi);
print __LINE__, " no_doi=[$no_doi] fetch_pars: ", Dumper(\@fetch_pars); sleep(3);
$res1= $epr->fetch_data('archive', @fetch_pars); $res1= $epr->fetch_data('archive', @fetch_pars);
} }
elsif ($ot2ut_eprint_status eq 'buffer') elsif ($ot2ut_eprint_status eq 'buffer')
...@@ -2604,12 +2602,9 @@ sub ot2ut ...@@ -2604,12 +2602,9 @@ sub ot2ut
} }
else else
{ {
die "no eprints objects found"; print __LINE__, " ATTN: no eprints objects found";
return undef;
} }
# TODO, future ...
# my $res2= $epr->fetch_data('buffer');
# push (@eprint_ids, keys %$res2);
} }
my @synced= (); my @synced= ();
...@@ -2620,7 +2615,7 @@ sub ot2ut ...@@ -2620,7 +2615,7 @@ sub ot2ut
sleep(3); sleep(3);
foreach my $eprint_id (@eprint_ids) foreach my $eprint_id (@eprint_ids)
{ {
last if (!$running); last unless ($running);
last if (defined ($MAX_SYNC) && $cnt_synced >= $MAX_SYNC); last if (defined ($MAX_SYNC) && $cnt_synced >= $MAX_SYNC);
activity({ activity => 'ot2ut'}) if ($last_activity + $activity_period <= time()); activity({ activity => 'ot2ut'}) if ($last_activity + $activity_period <= time());
...@@ -2824,19 +2819,22 @@ old format 2019-11..2020-01 ...@@ -2824,19 +2819,22 @@ old format 2019-11..2020-01
} }
$cnt_synced++; $cnt_synced++;
last unless ($running);
} }
my @columns= qw( eprint_id eprint_status lastmod context ts_upload td_total error_code error_cnt utheses_id uploaded_fnm ); my $res;
if ($cnt_synced)
# Fields currently not available: container_pid container_result document_pid document_result activate_result import_code response_msg import_note {
$res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors";
my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO()); my $fnm= sprintf('ot2ut_%s.tsv', ts_ISO());
Util::Matrix::save_hash_as_csv(\@columns, \@synced, $fnm, "\t", '', "\n", 1); Util::Matrix::save_hash_as_csv(\@ot2ut_synced_columns, \@synced, $fnm, "\t", '', "\n", 1);
my $res= "synced $cnt_synced objects in context $ot2ut_context; $cnt_errors objects with errors";
print __LINE__, " $res, see [$fnm]\n"; print __LINE__, " $res, see [$fnm]\n";
}
else
{
print __LINE__, " $res\n";
$res= "synced no objects in context $ot2ut_context";
}
(\@synced, $res); (\@synced, $res);
} }
...@@ -2883,13 +2881,14 @@ sub generate_utheses_metadata ...@@ -2883,13 +2881,14 @@ sub generate_utheses_metadata
push (@errors, { error => 'no_file' }); push (@errors, { error => 'no_file' });
} }
my $doi= $row->{doi}; my ($doi, $eprint_status)= map { $row->{$_} } qw(doi eprint_status);
push (@errors, { error => 'no_doi', note => 'for now...' }) unless (defined($doi) || $no_doi); push (@errors, { error => 'no_doi', note => 'for now...' }) unless (defined($doi) || $no_doi);
my $utheses_json_path= 'othes/utheses_json/' . $eprintid . '.json'; my $utheses_json_path= 'othes/utheses_json/' . $eprintid . '.json';
my $utheses_upload_result_json_path= 'othes/utheses_json/' . $eprintid . '_upload_result.json'; my $utheses_upload_result_json_path= 'othes/utheses_json/' . $eprintid . '_upload_result.json';
my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)); my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second));
# my $lastmod= get_othes_timestamp($row, 'lastmod'); that's a different format: yyyy-mm-ddTHH:MM:SSZ
if (-f $utheses_json_path) if (-f $utheses_json_path)
{ {
...@@ -2981,7 +2980,7 @@ sub generate_utheses_metadata ...@@ -2981,7 +2980,7 @@ sub generate_utheses_metadata
} }
# $utp->{utheses_id}= wird erzeugt beim Import, kennma ned wissn # $utp->{utheses_id}= wird erzeugt beim Import, kennma ned wissn
$utp->{utheses_status}= ($row->{eprint_status} eq 'archive') $utp->{utheses_status}= ($eprint_status eq 'archive')
? 'published' ? 'published'
: 'thesis_doc_added'; # objects in eprint_status "buffer" are 'thesis_doc_added', formerly 'work_in_progress'; : 'thesis_doc_added'; # objects in eprint_status "buffer" are 'thesis_doc_added', formerly 'work_in_progress';
...@@ -3012,7 +3011,7 @@ sub generate_utheses_metadata ...@@ -3012,7 +3011,7 @@ sub generate_utheses_metadata
# TODO: add information about the uploaded files # TODO: add information about the uploaded files
}, },
fields_processed => fields_processed => # TODO: may be obosolete
{ {
(map { $_ => $row->{$_} } qw( thesis_type )), (map { $_ => $row->{$_} } qw( thesis_type )),
# thesis_type: various strings; see %map_ot2ut_thesis_type # thesis_type: various strings; see %map_ot2ut_thesis_type
...@@ -3027,7 +3026,6 @@ sub generate_utheses_metadata ...@@ -3027,7 +3026,6 @@ sub generate_utheses_metadata
# always NULL: department # always NULL: department
# various timestamps # various timestamps
lastmod => $lastmod,
(map { $_ => get_othes_timestamp($row, $_) } qw( datestamp )), (map { $_ => get_othes_timestamp($row, $_) } qw( datestamp )),
}, },
}, },
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment