diff --git a/.gitignore b/.gitignore index 63dcd8d0c54163a0fa05b938ffa8b87c976f99a9..a44fd12f13c8a1f80f4276b9fd4095b89215c5f3 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ loader-*.txt typescript table_eprint RMS +na/ /blib/ /.build/ _build/ diff --git a/dcd.pl b/dcd.pl index c39ef14fa542952d3b723d727f54df488edd5eae..0d88a355f21b8265d0f1fe1556ef06b0e13f49af 100755 --- a/dcd.pl +++ b/dcd.pl @@ -5,6 +5,14 @@ DataCite DOI +=head1 USAGE + + dcd.pl order 5 ... reserve 5 new DOIs + +=head1 EXAMPLES + + dcd.pl --ticket 14012 order 1 + =cut use strict; @@ -18,6 +26,7 @@ $Data::Dumper::Indent= 1; # use Module; use JSON; use Util::JSON; +use Util::Simple_CSV; binmode( STDOUT, ':utf8' ); autoflush STDOUT 1; @@ -29,6 +38,15 @@ use DataCite::API; my $config_file= '/etc/irma/DataCite.json'; +# identifiers table +my $na_path= 'na/na-dcd-10-25365'; +my $identifiers_file= join ('/', $na_path, 'identifiers.tsv'); +my $ifx= '10.25365/phaidra'; +my $na_id= 1; +my $context_id= 1; +my $ticket; +my $tsv; + my $x_flag= 0; my @PARS; @@ -39,8 +57,10 @@ while (defined ($arg= shift (@ARGV))) elsif ($arg =~ /^--(.+)/) { my ($opt, $val)= split ('=', $1, 2); - if ($opt eq 'help') { usage(); } - else { usage(); } + + if ($opt eq 'help') { usage(); } + elsif ($opt eq 'ticket') { $ticket= $val || shift (@ARGV); } + else { usage(); } } elsif ($arg =~ /^-(.+)/) { @@ -72,6 +92,11 @@ if ($op_code eq 'register') my $repo_url= shift (@PARS); register_url ($cnf, $repo_url); } +elsif ($op_code eq 'order') +{ + my $order_count= shift (@PARS) || 1; + order_identifiers ($order_count); +} exit (0); @@ -100,17 +125,23 @@ sub register_url my $repo_url= shift; print __LINE__, " repo_url=[$repo_url]\n"; - if ($repo_url =~ m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#) + if ($repo_url =~ m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$# + # || $repo_url =~ m# ... uscholar ... #) + ) { my ($repo, $s1, $s2, $s3, $pid)= ($1, $2, $3, $4, $5); + usage ("unknown repo=[$repo]") unless (exists ($cnf->{repositories}->{$repo})); print __LINE__, " repo=[$repo]\n"; my $repo_cnf= $cnf->{repositories}->{$repo}; + + my $t_reg_cnf= $cnf->{doi_registries}->{$repo_cnf->{t_registry}}; my $reg_cnf= $cnf->{doi_registries}->{$repo_cnf->{registry}}; # print __LINE__, " repo_cnf: ", main::Dumper ($repo_cnf); - # print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf); + print __LINE__, " t_reg_cnf: ", main::Dumper ($t_reg_cnf); + print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf); my $repo_obj= new Phaidra::DataCite (config => $repo_cnf); # print __LINE__, " repo_obj: ", main::Dumper ($repo_obj); @@ -127,18 +158,14 @@ sub register_url my $dc_res= $api_res->{datacite}; print __LINE__, " dc_res: ", Dumper ($dc_res); - if ($dc_res->{status} ne 'OK') - { - print "Metadata not ok; status=[$dc_res->{status}] errors: ", Dumper ($dc_res->{errors}); - return undef; - } - # TODO: # * mint new DOI # * insert DOI in metadata - my $reg_obj= new DataCite::API (config => $reg_cnf, 'xmode' => 'test'); - my $doi_string= $reg_obj->mint_doi(); + # NOTE: if ($dc_res->{status} ne 'OK') request a dummy doi_string + + my $t_reg_obj= new DataCite::API (config => $t_reg_cnf, 'xmode' => 'test'); + my $doi_string= $t_reg_obj->mint_doi(); my $doi_element= { @@ -160,10 +187,38 @@ sub register_url my $xml_new= $repo_obj->json_2_xml ($md); print __LINE__, " xml_new=[$xml_new]\n"; + if ($dc_res->{status} ne 'OK') + { + print "Metadata not ok; status=[$dc_res->{status}] errors: ", Dumper ($dc_res->{errors}); + return undef; + } + # TODO: interact with the IRMA database to find out if the DOI string is really unique and register in there # TODO: interact with DataCite API to register the DOI with the metadata - $reg_obj->register_doi ($doi_string, $xml_new, $repo_url); + unless ($t_reg_obj->register_doi ($doi_string, $xml_new, $repo_url)) + { + print "ATTN: register_doi with Test-DOI was not ok\n"; + return undef; + } + + my $prod_doi_string= find_doi_string ($repo_url); + unless (defined ($prod_doi_string)) + { + print "NOTE: no identifier registered sofar\n"; + return undef; + } + + my $reg_obj= new DataCite::API (config => $reg_cnf, 'xmode' => 'test'); + $doi_element->{value}= $prod_doi_string; + my $prod_xml_new= $repo_obj->json_2_xml ($md); + print __LINE__, " prod_xml_new=[$prod_xml_new]\n"; + + unless ($reg_obj->register_doi ($prod_doi_string, $prod_xml_new, $repo_url)) + { + print "ATTN: register_doi doi=[$prod_doi_string] was not ok\n"; + return undef; + } } else { @@ -172,80 +227,84 @@ sub register_url } -# ---------------------------------------------------------------------------- -sub main_function +sub setup_identifiers { - my $fnm= shift; - print "main_function: $fnm\n"; - hex_dump_file ($fnm); + $tsv= new Util::Simple_CSV (load => $identifiers_file, separator => "\t", no_array => 1); } -# ---------------------------------------------------------------------------- -sub hex_dump_file +sub find_doi_string { - my $fnm= shift; - my $buffer_size= shift || 4 * 1048576; - # my $buffer_size= shift || 32*1024; - - open (FI, '<:raw', $fnm) or die "can not read [$fnm]\n"; - my $buffer; - my $segment= 0; - while (1) + my $url= shift; + + setup_identifiers() unless (defined ($tsv)); + + my $data= $tsv->{data}; + foreach my $row (@$data) { - my $rc1= sysread (FI, $buffer, $buffer_size); - printf ("segment=[%s] fnm=[%d] rc1=[0x%08lx] (bs=0x%08lx)\n", $segment++, $fnm, $rc1, $buffer_size); - hex_dump ($buffer); - last if ($rc1 < $buffer_size); + my ($identifier, $canonical_url)= map { $row->{$_} } qw(identifier canonical_url); + + next unless ($identifier && $canonical_url); + + return $identifier if ($url eq $canonical_url); } - close (FI); + + undef; } -# ---------------------------------------------------------------------------- -sub hex_dump +sub order_identifiers { - my $data= shift; - local *FX= shift || *STDOUT; + my $order_count= shift; - my $off= 0; - my ($i, $c, $v); + setup_identifiers() unless (defined ($tsv)); - my $run= 1; - DATA: while ($run) + my $counter= get_last_id($tsv); + +print __LINE__, " identifier: ", main::Dumper ($counter); + for (my $i= 0; $i < $order_count; $i++) { - my $char= ''; - my $hex= ''; - my $offx= sprintf ('%08X', $off); + my $next_identifier= join ('.', $ifx, ++$counter->{$ifx}); - for ($i= 0; $i < 16; $i++) - { - $c= substr ($data, $off+$i, 1); - - if ($i == 8) - { - $hex .= ' '; - } - - if ($c ne '') - { - # $data= substr ($data, 1); - $v= unpack ('C', $c); - $c= '.' if ($v < 0x20 || $v >= 0x7F); - - $char .= $c; - $hex .= sprintf (' %02X', $v); - } - else - { - $char .= ' '; - $hex .= ' '; - $run= 0; - } - } + my $data= { na_id => $na_id, context_id => $context_id, ticket => $ticket, identifier => $next_identifier }; + + push (@{$tsv->{data}} => $data); + } - print FX "$offx $hex |$char|\n"; + $tsv->save_csv_file (); +} + +sub get_last_id +{ + my $tsv= shift; - $off += 0x10; + my $data= $tsv->{data}; + my %counter; + foreach my $row (@$data) + { + my $identifier= $row->{identifier}; + + next unless ($identifier); # TODO: maybe write warning + + # my ($pfx, $sfx)= split ('/', $identifier, 2); + my @sfx= split (/\./, $identifier); + my $cnt= pop (@sfx); + my $ifx= join ('.', @sfx); + + # print __LINE__, " pfx=[$pfx] sfx=[$sfx] ifx=[$ifx] cnt=[$cnt]\n"; + print __LINE__, " ifx=[$ifx] cnt=[$cnt]\n"; + if (!exists ($counter{$ifx}) || $counter{$ifx} < $cnt) + { + $counter{$ifx}= $cnt; + } + elsif ($counter{$ifx} == $cnt) + { + print "ATTN: duplicate identifier=[$identifier]\n"; + } + elsif ($counter{$ifx} > $cnt) + { # higher counter known, nothing to do + } } + + (wantarray) ? %counter : \%counter; } __END__ @@ -264,3 +323,12 @@ instead of "registry" : "DataCite_Test_Prod", +=head1 NOTES + +=head2 uscholar + + * Telefonat mit Guido Blechl 2017-12-04T1320 + * Fuer uscholar Objekte soll der Permalink von Phaidra eingetragen werden + + + diff --git a/lib/DataCite/API.pm b/lib/DataCite/API.pm index 6e65ae886763f9a76fd2f3646adf88888482839e..fdd03936b0336ed33ef427dfd68d0a3240f2531d 100644 --- a/lib/DataCite/API.pm +++ b/lib/DataCite/API.pm @@ -68,6 +68,8 @@ EOX print STDERR "ATTN: register_doi POST doi returned code2=[$code2] res2=[$res2]\n"; return undef; } + + 1; } sub datacite_request diff --git a/lib/Phaidra/DataCite.pm b/lib/Phaidra/DataCite.pm index 0ae63f4624efd763e14454ee5c00097355bf7af6..218fc05957fb9efdd45ec8c4ced82a8d79bdff3a 100644 --- a/lib/Phaidra/DataCite.pm +++ b/lib/Phaidra/DataCite.pm @@ -39,18 +39,18 @@ sub get_metadata if ($fmt eq 'xml') { $url_md .= '?format=xml'; - $accept= 'application/json'; + $accept= 'application/json'; # TODO ??? } print __LINE__, " fmt=[$fmt] url_md=[$url_md]\n"; # build request - my $headers = HTTP::Headers->new( + my $headers= HTTP::Headers->new( 'Accept' => $accept, # 'Content-Type' => $content_type ); - my $req = HTTP::Request->new('GET' => $url_md, $headers); + my $req= HTTP::Request->new('GET' => $url_md, $headers); # not needed here $req->authorization_basic($user_name, $user_pw); # pass request to the user agent and get a response back @@ -60,7 +60,7 @@ sub get_metadata my $code= $res->code(); my $content= $res->content(); - # print __LINE__, " code=[$code] content=[$content]\n"; + print __LINE__, " code=[$code] content=[$content]\n"; # main::hex_dump ($content); # TODO/HACK: fix double UTF8 encoding handed down from PhaidraAPI @@ -98,6 +98,7 @@ EOX ); $writer->startTag('resource', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xmlns' => 'http://datacite.org/schema/kernel-4', 'xsi:schemaLocation' => $datacite_schemaLocation); + print __LINE__, " json: ",main::Dumper($json); json_2_xml_rec(undef, $json, $writer); $writer->endTag('resource'); @@ -118,36 +119,46 @@ sub json_2_xml_rec my $children = shift; my $writer = shift; - foreach my $child (@{$children}){ - + foreach my $child (@{$children}) + { my $children_size = defined($child->{children}) ? scalar (@{$child->{children}}) : 0; my $attributes_size = defined($child->{attributes}) ? scalar (@{$child->{attributes}}) : 0; - if((!defined($child->{value}) || ($child->{value} eq '')) && $children_size == 0 && $attributes_size == 0){ + if((!defined($child->{value}) || ($child->{value} eq '')) && $children_size == 0 && $attributes_size == 0) + { next; } - if (defined($child->{attributes}) && (scalar @{$child->{attributes}} > 0)){ + if (defined($child->{attributes}) && (scalar @{$child->{attributes}} > 0)) + { my @attrs; foreach my $a (@{$child->{attributes}}){ - if(defined($a->{value}) && $a->{value} ne '') + if (defined($a->{value}) && $a->{value} ne '') { - if($a->{xmlname} eq 'DOESNOTWORK_lang'){ + if ($a->{xmlname} eq 'DOESNOTWORK_lang') + { push @attrs, ['http://www.w3.org/XML/1998/namespace', 'lang'] => $a->{value}; - }else{ + } + else + { push @attrs, $a->{xmlname} => $a->{value}; } } } $writer->startTag($child->{xmlname}, @attrs); - }else{ + } + else + { $writer->startTag($child->{xmlname}); } - if($children_size > 0){ + if ($children_size > 0) + { json_2_xml_rec($child, $child->{children}, $writer); - }else{ + } + else + { $writer->characters($child->{value}); }