Skip to content
Snippets Groups Projects
Commit 37d62ee1 authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

additional functionality for ut1.pl

parent 4a0a8096
No related branches found
No related tags found
No related merge requests found
......@@ -52,7 +52,7 @@ sub register_doi
unless ($code1 =~ m#^20[01]#)
{
print STDERR "ATTN: register_doi POST metadata returned code1=[$code1] res1=[$res1]\n";
return undef;
return (0, $res1, undef);
}
my $doi_reg= <<"EOX";
......@@ -66,10 +66,10 @@ EOX
unless ($code2 =~ m#^20[01]#)
{
print STDERR "ATTN: register_doi POST doi returned code2=[$code2] res2=[$res2]\n";
return undef;
return (0, $res1, $res2);
}
1;
(1, $res1, $res2);
}
sub datacite_request
......
......@@ -20,24 +20,22 @@ sub getContainerPublicMetadata
my $self= shift;
my $utheses_id= shift;
my ($code1, $res1)= $self->utheses_request('GET', 'container/get/public', $utheses_id);
print __LINE__, " code1=[$code1] res1=[$res1]\n";
my ($code1, $res1, $info)= $self->utheses_request('GET', 'container/get/public', $utheses_id);
print __LINE__, " code1=[$code1] res1=[$res1] info=[$info]\n";
my $info;
if ($code1 =~ m#^2#)
{
eval
{
$info= from_json($res1);
};
if ($@)
{
die $@;
}
# TODO: Error handling!
$info;
}
sub getPendingDoisCreateRequest
{
my $self= shift;
my ($code1, $res1, $info)= $self->utheses_request('GET', 'doi/get/createRequest', undef, );
print __LINE__, " code1=[$code1] res1=[$res1] info=[$info]\n";
# TODO: Error handling!
$info;
(wantarray) ? ($code1, $res1, $info) : $info;
}
sub utheses_request
......@@ -45,20 +43,42 @@ sub utheses_request
my $self= shift;
my $method= shift;
my $what= shift;
my $id= shift;
my $par= shift;
my ($api_url)= map { $self->{config}->{$_} } qw(api_url);
my ($api_url, $headers)= map { $self->{config}->{$_} } qw(api_url headers);
my $req_url= join ('/', $api_url, $what, $id);
my $req_url= join ('/', $api_url, $what, $par);
print __LINE__, " req_url=[$req_url]\n";
my $req = HTTP::Request->new( $method => $req_url );
if (defined ($headers))
{
foreach my $h (@$headers)
{
$req->header(@$h);
}
}
my $ua= LWP::UserAgent->new;
my $res= $ua->request($req);
# print __LINE__, " res: ", main::Dumper($res);
my $txt= decode("utf8", $res->content());
return ($res->code(), $txt);
my $code= $res->code();
my $info;
if ($code =~ m#^2#)
{
eval
{
$info= from_json($txt);
};
if ($@)
{
die $@; # TODO: this should be handled mor gracefully!
}
}
return ($code, $txt, $info);
}
1;
......
......@@ -3,6 +3,7 @@
use strict;
use FileHandle;
use utf8;
binmode( STDOUT, ':utf8' ); autoflush STDOUT 1;
binmode( STDERR, ':utf8' ); autoflush STDERR 1;
......@@ -11,21 +12,36 @@ binmode( STDIN, ':utf8' );
use Data::Dumper;
$Data::Dumper::Indent= 1;
use Digest::MD5::File qw(file_md5_hex);
use Util::ts;
use Util::JSON;
use Redmine::DB::MySQL;
use Phaidra::Utils::iso639;
use lib 'lib';
use Univie::Utheses::API;
use Phaidra::Utils::iso639;
use Util::JSON;
use DataCite::API;
use IRMA::NA;
use Alma::MARC_Extractor;
my @TSV_COLUMNS= qw( utheses_id fulltext_locked suffix doi nbn ac_number langs language persistent_link xml_fnm errors );
# my $op_mode= 'fetch_metadata_bulk';
my $op_mode= 'analyze';
my $op_mode= 'process';
my $fnm_tsv= 'utheses/utheses_info.tsv'; # TODO: timestamp!
my $config_file= '/etc/irma/DataCite.json';
# TODO: make these configurable too!
my $agent_config_file= '/etc/irma/pidagent.json';
my $MAX_MARC_AGE= 86400*60;
my $MAX_MARC_REQUESTS= 10_000;
my $do_register_doi= 0;
my $agent_name= 'pidagent';
my $agent_id= $$;
my $fix_problems= 0;
my @pars= ();
while (my $arg= shift (@ARGV))
......@@ -36,6 +52,7 @@ while (my $arg= shift (@ARGV))
my ($opt, $val)= split ('=', $1, 2);
if ($opt eq 'help') { usage(); }
elsif ($opt eq 'register-doi') { $do_register_doi= 1; }
elsif ($opt eq 'fix') { $fix_problems= 1; }
else { usage(); }
}
elsif ($arg =~ /^-(.+)/)
......@@ -53,25 +70,84 @@ while (my $arg= shift (@ARGV))
}
}
my $cnf= Util::JSON::read_json_file ($config_file);
# print __LINE__, " cnf: ", main::Dumper ($cnf); exit(0);
my $ut_cnf= $cnf->{repositories}->{'utheses.univie.ac.at'};
# load configuration
my $agent_cnf= Util::JSON::read_json_file ($agent_config_file);
# print __LINE__, " agent_cnf: ", main::Dumper ($agent_cnf); exit(0);
my $dc_cnf= Util::JSON::read_json_file ($agent_cnf->{DataCite_config});
# print __LINE__, " dc_cnf: ", main::Dumper ($dc_cnf); exit(0);
my $ut_cnf= $dc_cnf->{repositories}->{'utheses.univie.ac.at'};
# print __LINE__, " ut_cnf: ", main::Dumper ($ut_cnf); exit(0);
my $reg_cnf= $cnf->{doi_registries}->{$ut_cnf->{registry}};
# print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf); exit(0);
my $dc_reg_cnf= $dc_cnf->{doi_registries}->{my $dc_cnf_name= $ut_cnf->{registry}};
# print __LINE__, " dc_reg_cnf: ", main::Dumper ($dc_reg_cnf); exit(0);
my $utapi= new Univie::Utheses::API( config => { api_url => $ut_cnf->{api_url} } );
die "no utapi" unless (defined ($utapi));
# prepare APIs
my $ut_api= new Univie::Utheses::API( config => $ut_cnf->{api_config} );
die "no ut_api" unless (defined ($ut_api));
my $reg_obj= new DataCite::API (config => $reg_cnf, xmode => 'test');
my $reg_obj= new DataCite::API (config => $dc_reg_cnf, xmode => 'test');
die "no reg_obj" unless (defined ($reg_obj));
if ($op_mode eq 'analyze')
# get handles for various databases
my $marc_db= IRMA::db::get_any_db($agent_cnf, 'marc_database');
my $agent_db= IRMA::db::get_any_db($agent_cnf, 'pidagent_database');
print __LINE__, " agent_db=[$agent_db]\n";
my @marc_fields= qw(ac_number mms_id fetched lib_code);
my $mex= Alma::MARC_Extractor->new(\@marc_fields);
my $running= 0;
if ($op_mode eq 'process')
{
do
{
while (my $par= shift (@pars))
{
if ($par =~ m#^(\d+)$#
|| $par =~ m#https?://utheses.univie.ac.at/detail/(\d+)[\#/]?#
)
{
my $utheses_id= $1;
my @actions= process_utheses_item($utheses_id);
print __LINE__, " process_utheses_item($utheses_id) ==> [", join(', ', @actions), "]\n";
}
elsif ($par =~ m#^(\d+)\.\.(\d+)$# || $par =~ m#^(\d+)\-(\d+)$# || $par =~ m#(blk)(\d+)$#)
{
my ($start, $end)= ($1, $2);
($start, $end)= ($end*100, $end*100+99) if ($start eq 'blk');
my $item_count= $end-$start;
if ($start <= $end && (($item_count <= 2000 && $do_register_doi) || ($item_count <= 5000 && !$do_register_doi)))
{
foreach (my $utheses_id= $start; $utheses_id <= $end; $utheses_id++)
{
push (@pars, "$utheses_id"); # cast to string!
}
}
}
elsif ($par =~ m#^AC\d{8}$#)
{
foreach my $par (@pars)
my @actions= process_ac_number($par);
print __LINE__, " process_ac_number($par) ==> [", join(', ', @actions), "]\n";
}
elsif ($par eq 'queue')
{
my @utheses_ids= get_job_from_queue();
print __LINE__, ' queued utheses_ids: ', join(', ', @utheses_ids), "\n";
sleep(10);
push (@pars, @utheses_ids, 'cleanup');
}
elsif ($par eq 'cleanup')
{
if ($par =~ m#^\d+$#) { analyze_utheses_item($par); }
cleanup_queue();
}
elsif ($par eq 'gpdcr')
{
my @utheses_ids= gpdcr();
# push (@pars, @utheses_ids);
}
sleep(2) if (@pars);
}
} while($running);
}
elsif ($op_mode eq 'fetch_metadata_bulk')
{
......@@ -79,55 +155,372 @@ elsif ($op_mode eq 'fetch_metadata_bulk')
}
exit(0);
sub analyze_utheses_item
sub gpdcr
{
my ($status, $txt, $info)= $ut_api->getPendingDoisCreateRequest();
print __LINE__, " gpdcr: info: ", Dumper($info);
my @utheses_ids= ();
if ($status eq '200')
{
my $p= $info->{pendingDois};
if (defined ($p) && ref($p) eq 'HASH')
{
push (@utheses_ids, map { $p->{$_}->{utheses_id} } keys %$p);
}
else
{
die "unexpected type of pendingDois";
}
}
@utheses_ids;
}
sub cleanup_queue
{
my $q_col= $agent_db->get_collection('queue');
my $j= $q_col->find_one({status => "in_progress", agent_name => $agent_name, agent_id => $agent_id});
if (defined ($j))
{
print __LINE__, " finishing old job ", Dumper($j);
$q_col->update({_id => $j->{_id}}, { '$set' => {status => "done", agent_name => $agent_name, agent_id => $agent_id }});
sleep(10);
}
}
sub get_job_from_queue
{
my $q_col= $agent_db->get_collection('queue');
my $j= $q_col->find_one({status => "in_progress", agent_name => $agent_name});
if (defined ($j))
{
print __LINE__, " resuming old job ", Dumper($j);
}
else
{
$j= $q_col->find_one({status => "new"});
}
return () unless (defined ($j));
print __LINE__, " found new job ", Dumper($j);
$q_col->update({_id => $j->{_id}}, { '$set' => {status => "in_progress", agent_name => $agent_name, agent_id => $agent_id }});
my @ut_ids;
if (exists($j->{utheses_ids})) { push (@ut_ids, @{$j->{utheses_ids}}); }
if (exists($j->{utheses_id})) { push (@ut_ids, $j->{utheses_id}); }
return @ut_ids;
}
sub process_utheses_item
{
my $utheses_id= shift;
my ($row, $xml)= get_utheses_metadata($utheses_id);
print __LINE__, " process_utheses_item: utheses_id=[$utheses_id] ", '='x50, "\n";
my $register_doi_ok= 1;
my $row= {};
my ($error_code, $status, $xml)= get_utheses_metadata($row, $utheses_id);
print __LINE__, " utheses_id=[$utheses_id] ", '='x50, "\n";
print __LINE__, " error_code=[$error_code] status=[$status]\n";
print __LINE__, " xml=[$xml]\n";
print __LINE__, " row: ", Dumper($row);
if ($do_register_doi)
my @actions= ('fetched_utheses');
unless ($status eq '200')
{
push (@actions, 'no_utheses_record');
return @actions;
}
unless ($error_code eq 'ok')
{
push (@actions, "error_code=[$error_code]");
report_problem( { area => 'utheses', problem => 'error_code', utheses_id => $utheses_id, error_code => $error_code } );
$register_doi_ok= 0;
}
my @x= analyze_marc_record($row);
push (@actions, @x);
unless (defined ($agent_db))
{
return @actions;
}
my $ut_col= $agent_db->get_collection('utheses');
if ($fix_problems) { remove_problem('utheses', $row->{utheses_id}); }
# first: compare existing utheses record (if it exists) with data from utheses
my $ut_data= $ut_col->find_one( { utheses_id => $row->{utheses_id} } );
if (defined ($ut_data))
{
my @cmp_fields= qw(doi urn nbn ac_number);
my @problems= ();
foreach my $f (@cmp_fields)
{
if (exists($ut_data->{$f}) && defined($ut_data->{$f}) && $row->{$f} ne $ut_data->{$f})
{
push (@problems, { problem => 'missmatch', field => $f, recorded => $ut_data->{$f}, found => $row->{$f} });
}
}
if (@problems)
{
report_problem( { area => 'utheses', problem => 'utheses_data_missmatch', utheses_id => $utheses_id, problems => \@problems } );
push(@actions, 'problem report utheses');
return (@actions);
}
}
# second: update utheses record in database
my $res_upd= $ut_col->update( { utheses_id => $row->{utheses_id} }, $row, { upsert => 1 } );
print __LINE__, " res_upd: ", Dumper($res_upd);
my ($ok, $verdict, $msg1, $msg2);
if ($do_register_doi && $register_doi_ok)
{
if ($fix_problems)
{
remove_problem('datacite', $row->{utheses_id}, 'utheses_id');
remove_problem('datacite', $row->{doi});
}
my $dc_col= $agent_db->get_collection('datacite');
my ($utheses_id, $doi, $xml_md5, $url)= map { $row->{$_} } qw(utheses_id doi xml_md5 persistent_link);
my $dc_record= $dc_col->find_one( { doi => $doi } );
# prepare registration data
my %reg_info=
(
doi => $row->{doi},
url => $url,
context => 'utheses',
registry => $dc_cnf_name,
utheses_id => $utheses_id,
ac_number => $row->{ac_number},
xml_md5 => $xml_md5,
ts_epoch => time(),
ts_iso_gmt => Util::ts::ts_ISO_gmt(),
);
# check existing datacite registration record, if it exists
if (defined ($dc_record))
{
print __LINE__, " datacite record found: ", Dumper($dc_record);
# check if record matches with new data
my @cmp_fields= qw(registry context url utheses_id ac_number);
my @problems= ();
foreach my $f (@cmp_fields)
{
if (exists($dc_record->{$f}) && defined($dc_record->{$f}))
{
if ($reg_info{$f} ne $dc_record->{$f})
{
push (@problems, { problem => 'missmatch', field => $f, recorded => $dc_record->{$f}, found => $reg_info{$f} });
}
# else: record matches, that's ok
}
else
{
push (@problems, { problem => 'missing', field => $f });
}
}
if (@problems)
{
report_problem( { area => 'datacite', problem => 'datacite_data_missmatch', utheses_id => $utheses_id, doi => $doi, problems => \@problems } );
push(@actions, 'problem report datacite');
return (@actions);
}
if ($dc_record->{xml_md5} eq $xml_md5 && $dc_record->{reg_status} == 1)
{
print __LINE__, " datacite metadata unchanged; not updating!\n";
push (@actions, 'datacite doi metadata unchanged');
return @actions;
}
}
($ok, $verdict, $msg1, $msg2)= register_doi_with_DataCite_for_utheses($row, $xml);
print __LINE__, " ok=[$ok] msg1=[$msg1] msg2=[$msg2] verdict=[$verdict]\n";
push (@actions, (($ok) ? 'datacite doi registration ok' : 'datacite doi registration failed'), $verdict);
$reg_info{reg_status}= $ok;
$reg_info{reg_verdict}= $verdict,
$reg_info{reg_message1}= $msg1;
$reg_info{reg_message2}= $msg2;
print __LINE__, " insert into datacite; reg_info: ", Dumper(\%reg_info);
my $res= $dc_col->update( { doi => $doi }, \%reg_info, { upsert => 1 } );
print __LINE__, " insert res: ", Dumper($res);
unless ($ok)
{
report_problem( { area => 'datacite', problem => 'registration failure', utheses_id => $utheses_id, doi => $doi, reg_info => \%reg_info } );
}
}
@actions;
}
sub process_ac_number
{
my $ac_number= shift;
my $row= {};
my @actions= analyze_marc_record($row, $ac_number);
if (exists($row->{val_utheses}))
{
my $ut_link= $row->{val_utheses};
if ($ut_link =~ m#https?://utheses.univie.ac.at/detail/(\d+)[\#/]?#)
{
my $utheses_id= $1;
my ($error_code, $status, $xml)= get_utheses_metadata($row, $utheses_id);
}
else
{
# TODO: report bad link in Alma
}
}
print __LINE__, " process_ac_number: ac_number=[$ac_number] row: ", Dumper($row);
}
sub analyze_marc_record
{
my $row= shift;
my $ac_number= shift || $row->{ac_number};
return ($row->{marc_record}= 'no_marc_db') unless (defined ($marc_db));
return ($row->{marc_record}= 'invalid_ac_number') unless ($ac_number =~ m#^AC\d{8}$#);
my @actions= ();
my $marc_col= $marc_db->get_collection('alma.marc');
my $marc_rec= $marc_col->find_one({ ac_number => $ac_number });
print __LINE__, " marc_rec: ", Dumper($marc_rec);
my $request_marc_rec= 0;
if (defined ($marc_rec))
{
my $marc_fetched= $marc_rec->{fetched};
my $best_before= $marc_fetched + $MAX_MARC_AGE;
my $now= time();
print __LINE__, " marc_fetched=[$marc_fetched] best_before=[$best_before] now=[$now]\n";
if ($best_before > $now)
{
$row->{marc_record}= 'ok';
}
else
{
print __LINE__, " marc_record too old\n";
$row->{marc_record}= 'too_old';
$request_marc_rec= 1;
}
# check marc record, even when it is too old
$row->{marc}= my $x= {};
$mex->extract_identifiers($marc_rec, $x);
push (@actions, 'marc_rec_checked');
}
else
{
$row->{marc_record}= 'not_found';
$request_marc_rec= 1;
}
if ($request_marc_rec)
{
my $req_col= $marc_db->get_collection('requests');
my $req=
{
agent => 'alma_cat',
status => 'new',
action => 'update_alma_2xml',
ac_number => $ac_number,
requested_by => $agent_name,
};
$req_col->insert ($req);
push (@actions, 'marc_rec_requested');
print __LINE__, " analyze_marc_record: insert requests: ", Dumper($req);
}
@actions;
}
sub register_doi_with_DataCite_for_utheses
{
my $row= shift;
my $xml= shift;
my ($rejected, $accepted);
my $ok= 0;
unless (defined ($row))
{
print "ATTN: can't register DOI: no utheses data found\n";
$rejected= "can't register DOI: no utheses data found";
goto END;
}
unless (defined ($xml))
{
print "ATTN: can't register DOI: no xml data generated\n";
$rejected= "can't register DOI: no xml data generated";
goto END;
}
my ($doi, $url, $errors, $pol)= map { $row->{$_} } qw(doi persistent_link datacite_conversion_errors policies);
my ($ftl, $lks)= map { $pol->{$_} } qw(fulltext_locked lock_status);
if (!defined ($ftl) || $ftl)
{
$rejected= "can't register DOI: fulltext locked ($ftl)";
goto END;
}
my ($doi, $url, $errors, $ftl)= map { $row->{$_} } qw(doi persistent_link datacite_conversion_errors fulltext_locked);
if ($ftl)
if (!defined ($lks) || $lks)
{
print "ATTN: can't register DOI: fulltext locked\n";
$rejected= "can't register DOI: lock_status=[$lks]";
goto END;
}
if (@$errors)
{
print "ATTN: can't register DOI $doi due to errors: ", join(', ', @$errors), "\n";
$rejected= "can't register DOI $doi due to errors: ", join(', ', @$errors);
goto END;
}
if ($reg_obj->register_doi ($doi, $xml, $url))
my ($reg_res, $reg_msg1, $reg_msg2)= $reg_obj->register_doi ($doi, $xml, $url);
if ($reg_res)
{
print "NOTE: register_doi doi=[$doi] url=[$url] OK\n";
$accepted= "register_doi doi=[$doi] url=[$url] OK";
$ok= 1;
}
else
{
print "ATTN: register_doi doi=[$doi] url=[$url] was not ok\n";
}
$rejected= "register_doi doi=[$doi] url=[$url] was not ok";
}
END:
return;
if ($rejected)
{
print "REJECTED: ", $rejected, "\n";
}
else
{
print "ACCEPTED: ", $accepted, "\n";
}
return ($ok, ($rejected || $accepted), $reg_msg1, $reg_msg2);
}
sub fetch_metadata_bulk
......@@ -142,54 +535,90 @@ sub fetch_metadata_bulk
{
next if ($utheses_id eq 'utheses_id'); # CSV column name...
my ($row, $xml)= get_utheses_metadata($utheses_id);
my $row;
my ($error_code, $status, $xml)= get_utheses_metadata($row, $utheses_id);
if (defined ($row))
{
print TSV join("\t", map { $row->{$_} } @TSV_COLUMNS), "\n";
}
}
}
=head2 my ($error_code, $status, $xml)= get_utheses_metadata($row, $utheses_id)
return DataCite_XML document for a given utheses_id and fill in $row
with important information
=cut
sub get_utheses_metadata
{
my $row= shift;
my $utheses_id= shift;
print __LINE__, " utheses_id=[$utheses_id]\n";
my $info= $utapi->getContainerPublicMetadata($utheses_id);
my $info= $ut_api->getContainerPublicMetadata($utheses_id);
print __LINE__, " info: ", Dumper($info);
my ($row, $xml);
if ($info->{status} eq '200')
my $xml;
my $status= $info->{status};
my $error_code= 'unknown';
if ($status eq '200')
{
($row, $xml)= utheses2datacite($info, $utheses_id);
$xml= utheses2datacite($row, $info, $utheses_id);
# print __LINE__, " row: ", Dumper($row);
if (defined ($row->{doi}))
{
if (defined ($xml) && defined (my $xml_fnm= $row->{xml_fnm}))
{
$row->{xml_fnm}= my $xml_fnm= 'utheses/DataCite_XML/'. $row->{doi}. '.xml';
# print __LINE__, " DataCite_XML=[$xml_fnm] xml=[$xml]\n";
if (defined ($xml) && defined ($row->{xml_fnm}) && !@{$row->{datacite_conversion_errors}})
{
open (XML, '>:utf8', $row->{xml_fnm}) or die; # TODO: or do something else...
open (XML, '>:utf8', $xml_fnm) or die "can't write to $xml_fnm"; # TODO: or do something else...
print XML $xml;
close (XML);
my $md5= file_md5_hex($xml_fnm);
print __LINE__, " xml_fnm=[$xml_fnm] md5=[$md5]\n";
$row->{xml_md5}= $md5;
}
if ($row->{datacite_conversion_error_count} == 0)
{
$error_code= 'ok';
}
else
{
printf ERRORS ("utheses_id=%d datacite conversion errors\n", $utheses_id);
$error_code= 'datacite_conversion_errors';
}
}
else
{
printf ERRORS ("utheses_id=%d status=%s\n", $utheses_id, $info->{status});
printf ERRORS ("utheses_id=%d no doi defined\n", $utheses_id);
$error_code= 'no_doi_defined';
}
}
else
{
printf ERRORS ("utheses_id=%d bad status=%s\n", $utheses_id, $status);
$error_code= 'bad_status';
}
return ($row, $xml);
$row->{error_code}= $error_code;
return ($error_code, $status, $xml);
}
=head2 my $xml= utheses2datacite ($row, $info, $utheses_id)
Transcribe utheses $info for a given utheses_id into xml and return that.
$row is filled with information extracted from $info.
=cut
sub utheses2datacite
{
my $row= shift;
my $info= shift;
my $utheses_id= shift;
......@@ -204,6 +633,9 @@ sub utheses2datacite
if ($doi =~ m#^10.25365/(thesis\.\d+)$#)
{
$suffix= $1;
# rewrite DOI, if necessary, e.g. for the test environment
$doi= join ('/', $ut_cnf->{use_prefix}, $suffix) if (exists($ut_cnf->{use_prefix}));
# $doi= join ('/', '10.493943', $suffix); # TEST: fake prefix
}
else
{
......@@ -215,7 +647,6 @@ sub utheses2datacite
push (@datacite_conversion_errors, "bad_url=[$persistent_link]") unless ($persistent_link =~ m#^https://utheses.univie.ac.at/detail/\d+/?$#);
my ($titles, $abstracts, $publication_date, $langs, $policies)= map { $th->{$_} } qw(titles abstracts publication_date languages policies);
my $fulltext_locked= $policies->{fulltext_locked};
my $publication_year;
if ($publication_date =~ m#^(\d{4})-?#)
......@@ -249,6 +680,8 @@ sub utheses2datacite
<creators>
EOX
# $xml .= " <urxn>bla</urxn>\n"; # TEST: check what happens with invalid XML
foreach my $author (@$authors)
{
$xml .= << "EOX";
......@@ -317,23 +750,24 @@ EOX
</resource>
EOX
my %row=
(
utheses_id => $utheses_id,
suffix => $suffix,
doi => $doi,
nbn => $nbn,
ac_number => $ac_number,
langs => join(',', @$langs),
language => $main_language,
persistent_link => $persistent_link,
datacite_conversion_errors => \@datacite_conversion_errors,
fulltext_locked => $fulltext_locked,
phaidra => $phaidra,
policies => $policies,
);
$row->{utheses_fetched}= time();
$row->{utheses_id}= $utheses_id;
$row->{suffix}= $suffix;
$row->{doi}= $doi;
$row->{nbn}= $nbn;
$row->{ac_number}= $ac_number;
$row->{langs}= join(',', @$langs);
$row->{language}= $main_language;
$row->{persistent_link }=$persistent_link;
$row->{datacite_conversion_errors}= \@datacite_conversion_errors;
$row->{datacite_conversion_error_count}= scalar @datacite_conversion_errors;
$row->{phaidra}= $phaidra; # ... mapped, see below
$row->{policies}= $policies; # ... mapped, see below
# foreach my $an (qw(fulltext_locked lock_status lock_until_date)) { $row->{$an}= $policies->{$an} }
# foreach my $an (qw(container_pid container_status thesis_doc_pid thesis_doc_status)) { $row->{$an}= $phaidra->{$an} }
(\%row, $xml);
$xml;
}
sub xml_escape
......@@ -347,3 +781,47 @@ sub xml_escape
$s;
}
sub report_problem
{
my $problem= shift;
my $ut_col= $agent_db->get_collection('problems');
$problem->{ts_iso_gmt}= Util::ts::ts_ISO_gmt();
my $area= $problem->{area};
my $check_id;
if ($area eq 'utheses') { $check_id= 'utheses_id' }
elsif ($area eq 'marc') { $check_id= 'ac_number' }
elsif ($area eq 'datacite') { $check_id= 'doi' }
$ut_col->update( { area => $area, $check_id => my $id= $problem->{$check_id} }, $problem, { upsert => 1 } ); # replace problem report, if one already exists
print __LINE__, " ATTN: problem reported for $check_id=$id: ", Dumper($problem);
}
sub remove_problem
{
my $area= shift;
my $id= shift;
my $check_id= shift;
my $ut_col= $agent_db->get_collection('problems');
unless (defined ($check_id))
{
if ($area eq 'utheses') { $check_id= 'utheses_id' }
elsif ($area eq 'marc') { $check_id= 'ac_number' }
elsif ($area eq 'datacite') { $check_id= 'doi' }
}
$ut_col->remove({ area => $area, $check_id => $id });
}
__END__
=head1 TODO
* handle signals like SIGINT
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment