Skip to content
Snippets Groups Projects
Commit b7acf18c authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

DataCite DOI registration and reservation

parent edcb234c
Branches
Tags
No related merge requests found
......@@ -12,6 +12,7 @@ loader-*.txt
typescript
table_eprint
RMS
na/
/blib/
/.build/
_build/
......
......@@ -5,6 +5,14 @@
DataCite DOI
=head1 USAGE
dcd.pl order 5 ... reserve 5 new DOIs
=head1 EXAMPLES
dcd.pl --ticket 14012 order 1
=cut
use strict;
......@@ -18,6 +26,7 @@ $Data::Dumper::Indent= 1;
# use Module;
use JSON;
use Util::JSON;
use Util::Simple_CSV;
binmode( STDOUT, ':utf8' ); autoflush STDOUT 1;
......@@ -29,6 +38,15 @@ use DataCite::API;
my $config_file= '/etc/irma/DataCite.json';
# identifiers table
my $na_path= 'na/na-dcd-10-25365';
my $identifiers_file= join ('/', $na_path, 'identifiers.tsv');
my $ifx= '10.25365/phaidra';
my $na_id= 1;
my $context_id= 1;
my $ticket;
my $tsv;
my $x_flag= 0;
my @PARS;
......@@ -39,7 +57,9 @@ while (defined ($arg= shift (@ARGV)))
elsif ($arg =~ /^--(.+)/)
{
my ($opt, $val)= split ('=', $1, 2);
if ($opt eq 'help') { usage(); }
elsif ($opt eq 'ticket') { $ticket= $val || shift (@ARGV); }
else { usage(); }
}
elsif ($arg =~ /^-(.+)/)
......@@ -72,6 +92,11 @@ if ($op_code eq 'register')
my $repo_url= shift (@PARS);
register_url ($cnf, $repo_url);
}
elsif ($op_code eq 'order')
{
my $order_count= shift (@PARS) || 1;
order_identifiers ($order_count);
}
exit (0);
......@@ -100,17 +125,23 @@ sub register_url
my $repo_url= shift;
print __LINE__, " repo_url=[$repo_url]\n";
if ($repo_url =~ m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#)
if ($repo_url =~ m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#
# || $repo_url =~ m# ... uscholar ... #)
)
{
my ($repo, $s1, $s2, $s3, $pid)= ($1, $2, $3, $4, $5);
usage ("unknown repo=[$repo]") unless (exists ($cnf->{repositories}->{$repo}));
print __LINE__, " repo=[$repo]\n";
my $repo_cnf= $cnf->{repositories}->{$repo};
my $t_reg_cnf= $cnf->{doi_registries}->{$repo_cnf->{t_registry}};
my $reg_cnf= $cnf->{doi_registries}->{$repo_cnf->{registry}};
# print __LINE__, " repo_cnf: ", main::Dumper ($repo_cnf);
# print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf);
print __LINE__, " t_reg_cnf: ", main::Dumper ($t_reg_cnf);
print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf);
my $repo_obj= new Phaidra::DataCite (config => $repo_cnf);
# print __LINE__, " repo_obj: ", main::Dumper ($repo_obj);
......@@ -127,18 +158,14 @@ sub register_url
my $dc_res= $api_res->{datacite};
print __LINE__, " dc_res: ", Dumper ($dc_res);
if ($dc_res->{status} ne 'OK')
{
print "Metadata not ok; status=[$dc_res->{status}] errors: ", Dumper ($dc_res->{errors});
return undef;
}
# TODO:
# * mint new DOI
# * insert DOI in metadata
my $reg_obj= new DataCite::API (config => $reg_cnf, 'xmode' => 'test');
my $doi_string= $reg_obj->mint_doi();
# NOTE: if ($dc_res->{status} ne 'OK') request a dummy doi_string
my $t_reg_obj= new DataCite::API (config => $t_reg_cnf, 'xmode' => 'test');
my $doi_string= $t_reg_obj->mint_doi();
my $doi_element=
{
......@@ -160,10 +187,38 @@ sub register_url
my $xml_new= $repo_obj->json_2_xml ($md);
print __LINE__, " xml_new=[$xml_new]\n";
if ($dc_res->{status} ne 'OK')
{
print "Metadata not ok; status=[$dc_res->{status}] errors: ", Dumper ($dc_res->{errors});
return undef;
}
# TODO: interact with the IRMA database to find out if the DOI string is really unique and register in there
# TODO: interact with DataCite API to register the DOI with the metadata
$reg_obj->register_doi ($doi_string, $xml_new, $repo_url);
unless ($t_reg_obj->register_doi ($doi_string, $xml_new, $repo_url))
{
print "ATTN: register_doi with Test-DOI was not ok\n";
return undef;
}
my $prod_doi_string= find_doi_string ($repo_url);
unless (defined ($prod_doi_string))
{
print "NOTE: no identifier registered sofar\n";
return undef;
}
my $reg_obj= new DataCite::API (config => $reg_cnf, 'xmode' => 'test');
$doi_element->{value}= $prod_doi_string;
my $prod_xml_new= $repo_obj->json_2_xml ($md);
print __LINE__, " prod_xml_new=[$prod_xml_new]\n";
unless ($reg_obj->register_doi ($prod_doi_string, $prod_xml_new, $repo_url))
{
print "ATTN: register_doi doi=[$prod_doi_string] was not ok\n";
return undef;
}
}
else
{
......@@ -172,80 +227,84 @@ sub register_url
}
# ----------------------------------------------------------------------------
sub main_function
sub setup_identifiers
{
my $fnm= shift;
print "main_function: $fnm\n";
hex_dump_file ($fnm);
$tsv= new Util::Simple_CSV (load => $identifiers_file, separator => "\t", no_array => 1);
}
# ----------------------------------------------------------------------------
sub hex_dump_file
sub find_doi_string
{
my $fnm= shift;
my $buffer_size= shift || 4 * 1048576;
# my $buffer_size= shift || 32*1024;
open (FI, '<:raw', $fnm) or die "can not read [$fnm]\n";
my $buffer;
my $segment= 0;
while (1)
my $url= shift;
setup_identifiers() unless (defined ($tsv));
my $data= $tsv->{data};
foreach my $row (@$data)
{
my $rc1= sysread (FI, $buffer, $buffer_size);
printf ("segment=[%s] fnm=[%d] rc1=[0x%08lx] (bs=0x%08lx)\n", $segment++, $fnm, $rc1, $buffer_size);
hex_dump ($buffer);
last if ($rc1 < $buffer_size);
my ($identifier, $canonical_url)= map { $row->{$_} } qw(identifier canonical_url);
next unless ($identifier && $canonical_url);
return $identifier if ($url eq $canonical_url);
}
close (FI);
undef;
}
# ----------------------------------------------------------------------------
sub hex_dump
sub order_identifiers
{
my $data= shift;
local *FX= shift || *STDOUT;
my $order_count= shift;
my $off= 0;
my ($i, $c, $v);
setup_identifiers() unless (defined ($tsv));
my $run= 1;
DATA: while ($run)
{
my $char= '';
my $hex= '';
my $offx= sprintf ('%08X', $off);
my $counter= get_last_id($tsv);
for ($i= 0; $i < 16; $i++)
print __LINE__, " identifier: ", main::Dumper ($counter);
for (my $i= 0; $i < $order_count; $i++)
{
$c= substr ($data, $off+$i, 1);
my $next_identifier= join ('.', $ifx, ++$counter->{$ifx});
if ($i == 8)
{
$hex .= ' ';
my $data= { na_id => $na_id, context_id => $context_id, ticket => $ticket, identifier => $next_identifier };
push (@{$tsv->{data}} => $data);
}
$tsv->save_csv_file ();
}
if ($c ne '')
sub get_last_id
{
my $tsv= shift;
my $data= $tsv->{data};
my %counter;
foreach my $row (@$data)
{
# $data= substr ($data, 1);
$v= unpack ('C', $c);
$c= '.' if ($v < 0x20 || $v >= 0x7F);
my $identifier= $row->{identifier};
next unless ($identifier); # TODO: maybe write warning
$char .= $c;
$hex .= sprintf (' %02X', $v);
# my ($pfx, $sfx)= split ('/', $identifier, 2);
my @sfx= split (/\./, $identifier);
my $cnt= pop (@sfx);
my $ifx= join ('.', @sfx);
# print __LINE__, " pfx=[$pfx] sfx=[$sfx] ifx=[$ifx] cnt=[$cnt]\n";
print __LINE__, " ifx=[$ifx] cnt=[$cnt]\n";
if (!exists ($counter{$ifx}) || $counter{$ifx} < $cnt)
{
$counter{$ifx}= $cnt;
}
else
elsif ($counter{$ifx} == $cnt)
{
$char .= ' ';
$hex .= ' ';
$run= 0;
print "ATTN: duplicate identifier=[$identifier]\n";
}
elsif ($counter{$ifx} > $cnt)
{ # higher counter known, nothing to do
}
print FX "$offx $hex |$char|\n";
$off += 0x10;
}
(wantarray) ? %counter : \%counter;
}
__END__
......@@ -264,3 +323,12 @@ instead of
"registry" : "DataCite_Test_Prod",
=head1 NOTES
=head2 uscholar
* Telefonat mit Guido Blechl 2017-12-04T1320
* Fuer uscholar Objekte soll der Permalink von Phaidra eingetragen werden
......@@ -68,6 +68,8 @@ EOX
print STDERR "ATTN: register_doi POST doi returned code2=[$code2] res2=[$res2]\n";
return undef;
}
1;
}
sub datacite_request
......
......@@ -39,7 +39,7 @@ sub get_metadata
if ($fmt eq 'xml')
{
$url_md .= '?format=xml';
$accept= 'application/json';
$accept= 'application/json'; # TODO ???
}
print __LINE__, " fmt=[$fmt] url_md=[$url_md]\n";
......@@ -60,7 +60,7 @@ sub get_metadata
my $code= $res->code();
my $content= $res->content();
# print __LINE__, " code=[$code] content=[$content]\n";
print __LINE__, " code=[$code] content=[$content]\n";
# main::hex_dump ($content);
# TODO/HACK: fix double UTF8 encoding handed down from PhaidraAPI
......@@ -98,6 +98,7 @@ EOX
);
$writer->startTag('resource', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xmlns' => 'http://datacite.org/schema/kernel-4', 'xsi:schemaLocation' => $datacite_schemaLocation);
print __LINE__, " json: ",main::Dumper($json);
json_2_xml_rec(undef, $json, $writer);
$writer->endTag('resource');
......@@ -118,36 +119,46 @@ sub json_2_xml_rec
my $children = shift;
my $writer = shift;
foreach my $child (@{$children}){
foreach my $child (@{$children})
{
my $children_size = defined($child->{children}) ? scalar (@{$child->{children}}) : 0;
my $attributes_size = defined($child->{attributes}) ? scalar (@{$child->{attributes}}) : 0;
if((!defined($child->{value}) || ($child->{value} eq '')) && $children_size == 0 && $attributes_size == 0){
if((!defined($child->{value}) || ($child->{value} eq '')) && $children_size == 0 && $attributes_size == 0)
{
next;
}
if (defined($child->{attributes}) && (scalar @{$child->{attributes}} > 0)){
if (defined($child->{attributes}) && (scalar @{$child->{attributes}} > 0))
{
my @attrs;
foreach my $a (@{$child->{attributes}}){
if (defined($a->{value}) && $a->{value} ne '')
{
if($a->{xmlname} eq 'DOESNOTWORK_lang'){
if ($a->{xmlname} eq 'DOESNOTWORK_lang')
{
push @attrs, ['http://www.w3.org/XML/1998/namespace', 'lang'] => $a->{value};
}else{
}
else
{
push @attrs, $a->{xmlname} => $a->{value};
}
}
}
$writer->startTag($child->{xmlname}, @attrs);
}else{
}
else
{
$writer->startTag($child->{xmlname});
}
if($children_size > 0){
if ($children_size > 0)
{
json_2_xml_rec($child, $child->{children}, $writer);
}else{
}
else
{
$writer->characters($child->{value});
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment