diff --git a/eprints1.pl b/eprints1.pl index cb21900ffbcb172b6e1708ef96df6220fd9a9cad..2a94e197d700b8b9f4a19810dbf5d8683710f3e2 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -113,8 +113,8 @@ my $op_mode= 'unknown'; # ====================================================================== # BEGIN OT2UT: Othes to Utheses -my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize -# my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize +# my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize +my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize my $oma_sleep_time= 10; my %map_ot2ut_roles= @@ -203,6 +203,8 @@ my %bucketlist_column_descriptions= prod => 'Zahl der Objekte in utheses Prod', ); +my %ot2ut_sync_anyway= map { $_ => 1 } qw(33905); # these should be synced, even if they were already marked as ok + my %doc_embargo_dates; # END OT2UT: Othes to Utheses # ====================================================================== @@ -289,6 +291,9 @@ my $sleep_urn_request= 3; my $running= 1; $SIG{INT}= sub { $running= 0; }; +my $serving_requests= 1; +$SIG{USR1}= sub { $serving_requests= 0; }; + # Agent mode my $db_ot2ut; my $col_msg; @@ -2509,7 +2514,7 @@ sub oma activity({ activity => 'listening' }); # send_message("oma is listening..."); - REQ: while($running) + REQ: while($running && $serving_requests) { my $row= $col_req->find_one({ agent => 'oma', status => 'new' }); unless (defined ($row)) @@ -2547,6 +2552,22 @@ sub oma $new_status= 'done' if (@$synced); } + elsif ($row->{action} eq 'send_block') + { + my $block= $row->{block}; + $silent_upload_success= 1; + $ignore_errors= 0; + + $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }}); + my $msg= "send_block: sending objects of block $block to $ot2ut_context"; + activity({ activity => 'send_batch', msg => $msg}); + send_message($msg); + + my ($synced, $res)= ot2ut('block' => $block); + send_message("send_block: $res"); + + $new_status= 'done' if (@$synced); + } elsif ($row->{action} eq 'send_ids') { $ignore_errors= 1; @@ -2631,7 +2652,7 @@ sub ot2ut } # find items to upload - unless (@eprint_ids) + if (!@eprint_ids) { print __LINE__, " fetching data\n"; my $res1; @@ -2655,6 +2676,13 @@ sub ot2ut return undef; } } + elsif ($eprint_ids[0] eq 'block') + { + my $block_num= $eprint_ids[1]; + @eprint_ids= $epr->get_eprint_ids_for_block($block_num); + print __LINE__, " block_num=[$block_num], eprint_ids: ", join(' ', @eprint_ids), "\n"; + # return ([], 'dummy action, nothing happened...'); + } my @synced= (); my $cnt_synced= 0; @@ -2662,7 +2690,7 @@ sub ot2ut my $cnt_eprint_ids= @eprint_ids; print __LINE__, " ot2ut: ot2ut_eprint_status=$ot2ut_eprint_status cnt_eprint_ids=$cnt_eprint_ids MAX_SYNC=$MAX_SYNC\n"; sleep(3); - foreach my $eprint_id (@eprint_ids) + EPR: foreach my $eprint_id (@eprint_ids) { last unless ($running); last if (defined ($MAX_SYNC) && $cnt_synced >= $MAX_SYNC); @@ -2675,7 +2703,7 @@ sub ot2ut if (defined ($sync_info)) { - if ($sync_info->{error_code} ne 'ok') + if ($sync_info->{error_code} ne 'ok' || $ot2ut_sync_anyway{$eprint_id}) { print __LINE__, " earlier sync attempt had errors, retrying...\n"; print __LINE__, " sync_info: ", Dumper($sync_info); @@ -2684,8 +2712,14 @@ sub ot2ut } } - my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path)= - generate_utheses_metadata($epr, $eprint_id); + my @res= generate_utheses_metadata($epr, $eprint_id); + my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path)= @res; + if (@res < 9) + { + print __LINE__, " something went wrong, not enough results: ", Dumper(\@res); + next EPR; + } + my ($eprint_status)= map { $row->{$_} } qw(eprint_status); print __LINE__, " sync_info=[$sync_info]\n"; @@ -2715,7 +2749,7 @@ sub ot2ut if (!defined ($files) || ref($files) ne 'ARRAY' || @$files < 1 # || @$files != 1 # no attachments allowed yet - || @$files > 10 # testing ... + # || @$files > 10 # testing ... ) { push (@$errors, { error => 'num_files', note => 'currently limited to objects with exactly one file' } ); @@ -2761,11 +2795,13 @@ sub ot2ut my @docs= @{$docs->{documents}}; my $main_file= shift(@docs); my ($local_filename, $lfnm)= map { $main_file->{$_} } qw(path_doc main); + # $lfnm=~ s#\x{e4}#ae#g; # TODO: use curl for now - my @upload_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F), 'metadata=@' . $utheses_json_path, - qw(-F type=application/json -F), 'file=@' . $local_filename . ';filename=' . $lfnm, - qw(-F type=application/pdf), $upload_cnf->{import_url}, '-o' . $utheses_upload_result_json_path); + my @upload_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data), + '-F', 'metadata=@' . $utheses_json_path, '-F', 'type=application/json;charset="UTF-8"', + '-F', 'file=@"' . $local_filename . '";filename="' . $lfnm . '"', '-F', 'type=application/pdf', + $upload_cnf->{import_url}, '-o' . $utheses_upload_result_json_path); if (exists ($upload_cnf->{headers})) { @@ -2917,8 +2953,10 @@ old format 2019-11..2020-01 # curl -X POST http://localhost:3000/attachment/add/#uthesesId/fromOthes -F "metadata=@uthesesDM.json" -F "file=@attachment.jpg" my $url1= join ('/', $upload_cnf->{api_url}, qw(attachment add), $utheses_id, 'fromOthes'); - my @attachment_add_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F), - 'metadata=@' . $fnm_attachment_md, '-F', 'file=@' . $attachment->{path_doc}, $url1, '-o' . $fnm_attachment_res); + my @attachment_add_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data), + '-F', 'metadata=@"' . $fnm_attachment_md . '"', + '-F', 'file=@"' . $attachment->{path_doc} . '"', + $url1, '-o' . $fnm_attachment_res); if (exists ($upload_cnf->{headers})) { @@ -2947,38 +2985,41 @@ old format 2019-11..2020-01 send_message("upload attachment success: eprint_id=[$eprint_id] context=[$ot2ut_context] utheses_id=[$utheses_id] attachment_number=[$attachment_number] attachment_pid=[$attachment_pid] curl_status=[$curl_status]"); # unless ($silent_upload_success); + if ($attachment->{security} eq 'public') + { # set attachment status to Active (in Phaidra) only when this attachment is public # curl -X POST http://localhost:3000/attachment/changeStatus/#attachmentPid -F "status=A" - my $url2= join ('/', $upload_cnf->{api_url}, qw(attachment changeStatus), $attachment_pid); - my @attachment_chg_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F status=A), - $url2, '-o' . $fnm_attachment_chg); + my $url2= join ('/', $upload_cnf->{api_url}, qw(attachment changeStatus), $attachment_pid); + my @attachment_chg_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F status=A), + $url2, '-o' . $fnm_attachment_chg); - if (exists ($upload_cnf->{headers})) - { - foreach my $header (@{$upload_cnf->{headers}}) + if (exists ($upload_cnf->{headers})) { - push (@attachment_chg_cmd, '--header', $header ); + foreach my $header (@{$upload_cnf->{headers}}) + { + push (@attachment_chg_cmd, '--header', $header ); + } } - } - - print __LINE__, " attachment_chg_cmd: [", join(' ', @attachment_chg_cmd), "]\n"; - my $t_curl= time(); - system(@attachment_chg_cmd); - - my $result_data2; - eval { $result_data2= Util::JSON::read_json_file($fnm_attachment_res); }; - if ($@) - { - print __LINE__, " can't parse upload_result; error=[$@]\n"; - } - else - { - print __LINE__, " change attchment [$attachment_number] result: ", Dumper($result_data2); - # my $attachment_pid= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg); - } - } + print __LINE__, " attachment_chg_cmd: [", join(' ', @attachment_chg_cmd), "]\n"; + my $t_curl= time(); + system(@attachment_chg_cmd); - } + my $result_data2; + eval { $result_data2= Util::JSON::read_json_file($fnm_attachment_res); }; + if ($@) + { + print __LINE__, " can't parse upload_result; error=[$@]\n"; + } + else + { + print __LINE__, " change attchment [$attachment_number] result: ", Dumper($result_data2); + # my $attachment_pid= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg); + } + } # if attachment is public + + } # when main document was uploaded successfully + + } # if ($do_upload) } } else @@ -3249,6 +3290,9 @@ sub generate_utheses_metadata $thesis->{original_filename}= $main_file->{fileinfo}->{orig_fnm}; $thesis->{upload_filename}= $main_file->{fileinfo}->{upl_fnm}; + # 2020-08-05: + $thesis->{policies}->{fulltext_locked}= 0 if ($eprint_status eq 'archive' && $main_file->{security} eq 'public'); + $ut->public('thesis', $thesis); # Mon May 11 22:12:38 CEST 2020 asked nd about this, especially thesis_doc_added_date: @@ -4080,8 +4124,13 @@ sub update_policies print __LINE__, " eprint_id=[$eprint_id] x1_lastmod=[$x1_lastmod]\n"; print __LINE__, ' ', '='x70, "\n"; - my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path)= - generate_utheses_metadata($epr, $eprint_id); + my @res= generate_utheses_metadata($epr, $eprint_id); + if (@res < 9) + { + print __LINE__, " something went wrong, not enough results: ", Dumper(\@res); + return undef; + } + my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path)= @res; my $show= $docs->{show}; @@ -4485,7 +4534,7 @@ sub policies_stats $trailer .= "<h2>nonpublic_doc_first</h2>\n". Dumper(\@lst_nonpublic_doc_first) if (@lst_nonpublic_doc_first); $trailer .= "<h2>docs with notes</h2>\n". Dumper(\@lst_docs_with_notes) if (@lst_docs_with_notes); $trailer .= "<h2>embargo dates</h2>\n". Dumper(\%doc_embargo_dates); - $trailer .= "<h2>upload_errors</h2>\n". Dumper(\%upload_errors); + $trailer .= "<h2>errors</h2>\n". Dumper(\%upload_errors); $cctab->show_tsv(['othes', @contexts], 'counters.tsv', $trailer); diff --git a/lib/IRMA/eprints.pm b/lib/IRMA/eprints.pm index e60c514327581c46dcddc68f36560bde8af94c6e..2044ea876477848696a5079e6d64c1f6b62ca1f6 100644 --- a/lib/IRMA/eprints.pm +++ b/lib/IRMA/eprints.pm @@ -52,6 +52,27 @@ sub fetch_data $res; } +sub get_eprint_ids_for_block +{ + my $self= shift; + my $block= shift; + + my $begin= $block*100; + my $end= $begin+100; + + my $m= $self->connect(); + + my $conditions= 'eprint_status in ("archive", "buffer") AND eprintid >= ? AND eprintid < ?'; + + # $m->show_query(1); + my $res= $m->get_all_x ('eprint', [$conditions, $begin, $end], 'eprintid'); + # print "inbox res: ", main::Dumper ($res); + + my @eprint_ids= map { $_.'' } sort { $a <=> $b } keys %$res; + + (wantarray) ? @eprint_ids : \@eprint_ids; +} + sub fetch_metadata { my $self= shift; @@ -162,7 +183,7 @@ sub fetch_eprint_app_data next EPR_ITEM; } - unless ($matr =~ m#^\d{8}$#) + unless ($matr =~ m#^\d{8}$# || $matr eq '') # allow empty matr as well { push (@{$errors{$epr_id}}, "malformed matr=[$matr]"); next EPR_ITEM;