diff --git a/eprints1.pl b/eprints1.pl index 626b807db2a14d92ab5c0a931893ba8e52d0b15c..f888cc56541004185e87d6b74b315fc282d62dbf 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -115,6 +115,7 @@ my $op_mode= 'unknown'; # BEGIN OT2UT: Othes to Utheses # my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize +# my $ot2ut_context= 'ot2ut-prod'; # TODO: parametrize my $oma_sleep_time= 10; my %map_ot2ut_roles= @@ -390,6 +391,7 @@ while (defined ($arg= shift (@ARGV))) elsif ($opt eq 'urn4xx') { $op_mode= $opt; $MAX_SYNC= $val if (defined ($val)); } # request urns for ??? elsif ($opt eq 'doi4ep') { $op_mode= $opt; $MAX_SYNC= $val if (defined ($val)); } # new 2019-06-18 elsif ($opt eq 'ot2ut') { $op_mode= $opt; $MAX_SYNC= $val if (defined ($val)); } # new 2019-11-28 + elsif ($opt eq 'ot2ut-context') { $ot2ut_context= $val || shift(@ARGV); } elsif ($opt eq 'rqm') { $op_mode= $opt; } # Request Monitor elsif ($opt eq 'auto') { $op_mode= $opt; } # perform a complete run including resetting all errors elsif ($opt eq 'debug') { $debug_level= $val || 1; } @@ -400,9 +402,9 @@ while (defined ($arg= shift (@ARGV))) elsif ($opt eq 'debug_stkz') { $op_mode= 'debug_stkz'; } elsif ($opt eq 'debug_stbez') { $op_mode= 'debug_stbez'; } elsif ($opt eq 'debug_filenames') { $op_mode= 'debug_filenames'; } - elsif ($opt eq 'update-policies') { $op_mode= 'update-policies'; } + elsif ($opt eq 'update-policies') { $op_mode= 'update-policies'; $refresh_oldest_policies= $val if ($val); } elsif ($opt eq 'policies-stats') { $op_mode= 'policies-stats'; } - elsif ($opt eq 'refresh-policies') { $refresh_oldest_policies= $val || shift (@ARGV); } + elsif ($opt eq 'export-redirect' || $opt eq 'export-redis') { $op_mode= 'export-redirect'; } elsif ($opt eq 'max') { $MAX_SYNC= $val || shift (@ARGV); } elsif ($opt eq 'mab-age') { $MAX_MAB_AGE= $val || shift (@ARGV); } # in seconds elsif ($opt eq 'marc-age'){ $MAX_MARC_AGE= $val || shift (@ARGV); } # in seconds @@ -587,6 +589,10 @@ elsif ($op_mode eq 'policies-stats') { policies_stats(@PARS); } +elsif ($op_mode eq 'export-redirect') +{ + export_redirect(); +} elsif ($op_mode eq 'reset') # reset error conditions for given ac_numbers { reset_errors(@PARS); @@ -3639,7 +3645,7 @@ sub get_study_id if (defined ($stkz) && $stkz ne '') { my ($unikz, $digits, $coop_kz); - if ($stkz =~ m#^\s*U?([A-Z])\s*([\d ]+)([A-Z])?$#) { ($unikz, $digits, $coop_kz)= ('U'.$1, $2, $3); } + if ($stkz =~ m#^\s*U?([A-Z])\s*([\d ]+)U?([A-Z])?$#) { ($unikz, $digits, $coop_kz)= ('U'.$1, $2, $3); } elsif ($stkz =~ m#^\s*([\d ]+)$#) { ($unikz, $digits)= ('UA', $1); } else { push (@errors, { error => 'stkz_format_unknown', stkz => $stkz }); } @@ -4417,6 +4423,66 @@ sub debug_stkz } } +sub export_redirect +{ + # $cnf is global + my $ctx_cnf= $cnf->{$ot2ut_context}; + + my $epr= get_eprints_db($cnf); + + my $epr_db= $epr->connect(); + my @col_names= qw( eprintid lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second ); + my $search_term= "eprint_status IN ('archive', 'buffer')"; + my $keys= $epr_db->get_all_x('eprint', [$search_term], join(',', @col_names)); + my @eprint_ids_in_mysql= keys %$keys; # mongodb stores eprint_id as string + + $db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut)); + my $col_sync= $db_ot2ut->get_collection('sync'); + my $cur_sync= $col_sync->find({ context => $ot2ut_context }); + + my ($eprint_redirect_base_url, $utheses_public_base_url)= map { $ctx_cnf->{$_} } qw(eprint_redirect_base_url utheses_public_base_url); + + my @eprint_ids; + my $max_eprint_id= 0; + foreach my $eprint_id (@eprint_ids_in_mysql) + { + $eprint_ids[$eprint_id]= $eprint_redirect_base_url . $eprint_id . '/'; + $max_eprint_id= $eprint_id if ($eprint_id > $max_eprint_id); + } + + while ($running) + { + my $row_sync= $cur_sync->next(); + last unless (defined ($row_sync)); + # print __LINE__, " row_sync: ", Dumper($row_sync); last; + + my ($eprint_id, $utheses_id)= map { $row_sync->{$_} } qw(eprint_id utheses_id); + + my $ut_url= $utheses_public_base_url . $utheses_id; + + $eprint_ids[$eprint_id]= $ut_url; + $max_eprint_id= $eprint_id if ($eprint_id > $max_eprint_id); + } + + my $fnm_redis= 'utheses_redirect_' . $ot2ut_context .'.redis'; + open (REDIS, '>:utf8', $fnm_redis) or die "can't write to [$fnm_redis]"; + my $cnt= 0; + for(my $i= 0; $i <= $max_eprint_id; $i++) + { + my $ut_url= $eprint_ids[$i]; + + if ($ut_url) + { + printf REDIS ("set %d %s\n", $i, $ut_url); + $cnt++; + } + } + close (REDIS); + + print __LINE__, " export_redirect: exported $cnt redirects, max_eprint_id= $max_eprint_id to $fnm_redis\n"; + ($cnt, $max_eprint_id, $fnm_redis); +} + sub update_policies { my @upd_eprint_ids= @_; @@ -4545,7 +4611,7 @@ sub update_policies ts_finish => Util::ts::ts_ISO_gmt(time()), cnt_inserted => $cnt_inserted, - cnt_updates => $cnt_updated, + cnt_updated => $cnt_updated, cnt_unchanged => $cnt_unchanged, lst_inserted => \@lst_inserted, lst_updated => \@lst_updated, @@ -4874,7 +4940,7 @@ sub policies_stats my $b6a= $cctab->bucket(0, @s6a); if (defined ($b6a)) { - $b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN schon vergeben' }; + $b6a->{annotation}= { bgcolor => 'lightblue', note => 'keine Volltextfreigabe; NBN schon vergeben' }; # print __LINE__, " special bucket found: ", Dumper($b6a); }