DataCite DOI registration and reservation

b7acf18c · Gerhard Gonter · edcb234c · b7acf18c · b7acf18c · b7acf18c
Commit b7acf18c authored 7 years ago by Gerhard Gonter
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ loader-*.txt
 typescript
 table_eprint
 RMS
+na/
 /blib/
 /.build/
 _build/

--- a/dcd.pl
+++ b/dcd.pl
@@ -5,6 +5,14 @@

 DataCite DOI

+=head1 USAGE
+
+  dcd.pl order 5   ... reserve 5 new DOIs
+
+=head1 EXAMPLES
+
+  dcd.pl --ticket 14012 order 1
+
 =cut

 use strict;
@@ -18,6 +26,7 @@ $Data::Dumper::Indent= 1;
 # use Module;
 use JSON;
 use Util::JSON;
+use Util::Simple_CSV;

 binmode( STDOUT, ':utf8' ); autoflush STDOUT 1;

@@ -29,6 +38,15 @@ use DataCite::API;

 my $config_file= '/etc/irma/DataCite.json';

+# identifiers table
+my $na_path= 'na/na-dcd-10-25365';
+my $identifiers_file= join ('/', $na_path, 'identifiers.tsv');
+my $ifx= '10.25365/phaidra';
+my $na_id= 1;
+my $context_id= 1;
+my $ticket;
+my $tsv;
+
 my $x_flag= 0;

 my @PARS;
@@ -39,7 +57,9 @@ while (defined ($arg= shift (@ARGV)))
  elsif ($arg =~ /^--(.+)/)
  {
    my ($opt, $val)= split ('=', $1, 2);
+
       if ($opt eq 'help') { usage(); }
+    elsif ($opt eq 'ticket') { $ticket= $val || shift (@ARGV);  }
     else { usage(); }
  }
  elsif ($arg =~ /^-(.+)/)
@@ -72,6 +92,11 @@ if ($op_code eq 'register')
  my $repo_url= shift (@PARS);
  register_url ($cnf, $repo_url);
 }
+elsif ($op_code eq 'order')
+{
+  my $order_count= shift (@PARS) || 1;
+  order_identifiers ($order_count);
+}

 exit (0);

@@ -100,17 +125,23 @@ sub register_url
  my $repo_url= shift;

  print __LINE__, " repo_url=[$repo_url]\n";
-  if ($repo_url =~ m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#)
+  if ($repo_url =~ m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#
+      # || $repo_url =~ m# ... uscholar ... #)
+     )
  {
    my ($repo, $s1, $s2, $s3, $pid)= ($1, $2, $3, $4, $5);
+
    usage ("unknown repo=[$repo]") unless (exists ($cnf->{repositories}->{$repo}));
    print __LINE__, " repo=[$repo]\n";

    my $repo_cnf= $cnf->{repositories}->{$repo};
+
+    my $t_reg_cnf=  $cnf->{doi_registries}->{$repo_cnf->{t_registry}};
    my $reg_cnf=  $cnf->{doi_registries}->{$repo_cnf->{registry}};

    # print __LINE__, " repo_cnf: ", main::Dumper ($repo_cnf);
-    # print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf);
+    print __LINE__, " t_reg_cnf: ", main::Dumper ($t_reg_cnf);
+    print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf);

    my $repo_obj= new Phaidra::DataCite (config => $repo_cnf);
    # print __LINE__, " repo_obj: ", main::Dumper ($repo_obj);
@@ -127,18 +158,14 @@ sub register_url
    my $dc_res= $api_res->{datacite};
    print __LINE__, " dc_res: ", Dumper ($dc_res);

-    if ($dc_res->{status} ne 'OK')
-    {
-      print "Metadata not ok; status=[$dc_res->{status}] errors: ", Dumper ($dc_res->{errors});
-      return undef;
-    }
-
    # TODO:
    # * mint new DOI
    # * insert DOI in metadata

-    my $reg_obj= new DataCite::API (config => $reg_cnf, 'xmode' => 'test');
-    my $doi_string= $reg_obj->mint_doi();
+    # NOTE: if ($dc_res->{status} ne 'OK') request a dummy doi_string
+
+    my $t_reg_obj= new DataCite::API (config => $t_reg_cnf, 'xmode' => 'test');
+    my $doi_string= $t_reg_obj->mint_doi();
   
    my $doi_element=
    {
@@ -160,10 +187,38 @@ sub register_url
    my $xml_new= $repo_obj->json_2_xml ($md);
    print __LINE__, " xml_new=[$xml_new]\n";

+    if ($dc_res->{status} ne 'OK')
+    {
+      print "Metadata not ok; status=[$dc_res->{status}] errors: ", Dumper ($dc_res->{errors});
+      return undef;
+    }
+
    # TODO: interact with the IRMA database to find out if the DOI string is really unique and register in there

    # TODO: interact with DataCite API to register the DOI with the metadata
-    $reg_obj->register_doi ($doi_string, $xml_new, $repo_url);
+    unless ($t_reg_obj->register_doi ($doi_string, $xml_new, $repo_url))
+    {
+      print "ATTN: register_doi with Test-DOI was not ok\n";
+      return undef;
+    }
+
+    my $prod_doi_string= find_doi_string ($repo_url);
+    unless (defined ($prod_doi_string))
+    {
+      print "NOTE: no identifier registered sofar\n";
+      return undef;
+    }
+
+    my $reg_obj= new DataCite::API (config => $reg_cnf, 'xmode' => 'test');
+    $doi_element->{value}= $prod_doi_string;
+    my $prod_xml_new= $repo_obj->json_2_xml ($md);
+    print __LINE__, " prod_xml_new=[$prod_xml_new]\n";
+
+    unless ($reg_obj->register_doi ($prod_doi_string, $prod_xml_new, $repo_url))
+    {
+      print "ATTN: register_doi doi=[$prod_doi_string] was not ok\n";
+      return undef;
+    }
  }
  else
  {
@@ -172,80 +227,84 @@ sub register_url

 }

-# ----------------------------------------------------------------------------
-sub main_function
+sub setup_identifiers
 {
-  my $fnm= shift;
-  print "main_function: $fnm\n";
-  hex_dump_file ($fnm);
+  $tsv= new Util::Simple_CSV (load => $identifiers_file, separator => "\t", no_array => 1);
 }

-# ----------------------------------------------------------------------------
-sub hex_dump_file
+sub find_doi_string
 {
-  my $fnm= shift;
-  my $buffer_size= shift || 4 * 1048576;
-  # my $buffer_size= shift || 32*1024;
-
-  open (FI, '<:raw', $fnm) or die "can not read [$fnm]\n";
-  my $buffer;
-  my $segment= 0;
-  while (1)
+  my $url= shift;
+
+  setup_identifiers() unless (defined ($tsv));
+
+  my $data= $tsv->{data};
+  foreach my $row (@$data)
  {
-    my $rc1= sysread (FI, $buffer, $buffer_size);
-    printf ("segment=[%s] fnm=[%d] rc1=[0x%08lx] (bs=0x%08lx)\n", $segment++, $fnm, $rc1, $buffer_size);
-    hex_dump ($buffer);
-    last if ($rc1 < $buffer_size);
+    my ($identifier, $canonical_url)= map { $row->{$_} } qw(identifier canonical_url);
+    
+    next unless ($identifier && $canonical_url);
+
+    return $identifier if ($url eq $canonical_url);
  }
-  close (FI);
+
+  undef;
 }

-# ----------------------------------------------------------------------------
-sub hex_dump
+sub order_identifiers
 {
-  my $data= shift;
-  local *FX= shift || *STDOUT;
+  my $order_count= shift;

-  my $off= 0;
-  my ($i, $c, $v);
+  setup_identifiers() unless (defined ($tsv));

-  my $run= 1;
-  DATA: while ($run)
-  {
-    my $char= '';
-    my $hex= '';
-    my $offx= sprintf ('%08X', $off);
+  my $counter= get_last_id($tsv);

-    for ($i= 0; $i < 16; $i++)
+print __LINE__, " identifier: ", main::Dumper ($counter);
+  for (my $i= 0; $i < $order_count; $i++)
  {
-      $c= substr ($data, $off+$i, 1);
+    my $next_identifier= join ('.', $ifx, ++$counter->{$ifx});

-      if ($i == 8)
-      {
-        $hex  .= ' ';
+    my $data= { na_id => $na_id, context_id => $context_id, ticket => $ticket, identifier => $next_identifier };
+
+    push (@{$tsv->{data}} => $data);
+  }
+
+  $tsv->save_csv_file ();
 }

-      if ($c ne '')
+sub get_last_id
+{
+  my $tsv= shift;
+
+  my $data= $tsv->{data};
+  my %counter;
+  foreach my $row (@$data)
  {
-        # $data= substr ($data, 1);
-        $v= unpack ('C', $c);
-        $c= '.' if ($v < 0x20 || $v >= 0x7F);
+    my $identifier= $row->{identifier};
+    
+    next unless ($identifier); # TODO: maybe write warning

-        $char .= $c;
-        $hex .= sprintf (' %02X', $v);
+    # my ($pfx, $sfx)= split ('/', $identifier, 2);
+    my @sfx= split (/\./, $identifier);
+    my $cnt= pop (@sfx);
+    my $ifx= join ('.', @sfx);
+
+    # print __LINE__, " pfx=[$pfx] sfx=[$sfx] ifx=[$ifx] cnt=[$cnt]\n";
+    print __LINE__, " ifx=[$ifx] cnt=[$cnt]\n";
+    if (!exists ($counter{$ifx}) || $counter{$ifx} < $cnt)
+    {
+      $counter{$ifx}= $cnt;
    }
-      else
+    elsif ($counter{$ifx} == $cnt)
    {
-        $char .= ' ';
-        $hex  .= '   ';
-        $run= 0;
+      print "ATTN: duplicate identifier=[$identifier]\n";
    }
+    elsif ($counter{$ifx} > $cnt)
+    { # higher counter known, nothing to do
    }
-
-    print FX "$offx $hex  |$char|\n";
-
-    $off += 0x10;
  }
+
+  (wantarray) ? %counter : \%counter;
 }

 __END__
@@ -264,3 +323,12 @@ instead of

      "registry" : "DataCite_Test_Prod",

+=head1 NOTES
+
+=head2 uscholar
+
+ * Telefonat mit Guido Blechl 2017-12-04T1320
+   * Fuer uscholar Objekte soll der Permalink von Phaidra eingetragen werden
+
+
+
--- a/lib/DataCite/API.pm
+++ b/lib/DataCite/API.pm
@@ -68,6 +68,8 @@ EOX
    print STDERR "ATTN: register_doi POST doi returned code2=[$code2] res2=[$res2]\n";
    return undef;
  }
+
+  1;
 }

 sub datacite_request

--- a/lib/Phaidra/DataCite.pm
+++ b/lib/Phaidra/DataCite.pm
@@ -39,7 +39,7 @@ sub get_metadata
  if ($fmt eq 'xml')
  {
    $url_md .= '?format=xml';
-    $accept= 'application/json';
+    $accept= 'application/json'; # TODO ???
  }

  print __LINE__, " fmt=[$fmt] url_md=[$url_md]\n";
@@ -60,7 +60,7 @@ sub get_metadata

  my $code= $res->code();
  my $content= $res->content();
-  # print __LINE__, " code=[$code] content=[$content]\n";
+  print __LINE__, " code=[$code] content=[$content]\n";
  # main::hex_dump ($content);

  # TODO/HACK: fix double UTF8 encoding handed down from PhaidraAPI
@@ -98,6 +98,7 @@ EOX
    );

    $writer->startTag('resource', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xmlns' => 'http://datacite.org/schema/kernel-4', 'xsi:schemaLocation' => $datacite_schemaLocation);
+    print __LINE__, " json: ",main::Dumper($json);
    json_2_xml_rec(undef, $json, $writer);
    $writer->endTag('resource');

@@ -118,36 +119,46 @@ sub json_2_xml_rec
  my $children = shift;
  my $writer = shift;

-  foreach my $child (@{$children}){
-
+  foreach my $child (@{$children})
+  {
    my $children_size = defined($child->{children}) ? scalar (@{$child->{children}}) : 0;
    my $attributes_size = defined($child->{attributes}) ? scalar (@{$child->{attributes}}) : 0;

-    if((!defined($child->{value}) || ($child->{value} eq '')) && $children_size == 0 && $attributes_size == 0){
+    if((!defined($child->{value}) || ($child->{value} eq '')) && $children_size == 0 && $attributes_size == 0)
+    {
      next;
    }

-    if (defined($child->{attributes}) && (scalar @{$child->{attributes}} > 0)){
+    if (defined($child->{attributes}) && (scalar @{$child->{attributes}} > 0))
+    {
      my @attrs;
      foreach my $a (@{$child->{attributes}}){
        if (defined($a->{value}) && $a->{value} ne '')
        {
-          if($a->{xmlname} eq 'DOESNOTWORK_lang'){
+          if ($a->{xmlname} eq 'DOESNOTWORK_lang')
+          {
            push @attrs, ['http://www.w3.org/XML/1998/namespace', 'lang'] => $a->{value};
-          }else{
+          }
+          else
+          {
            push @attrs, $a->{xmlname} => $a->{value};
          }
        }
      }

      $writer->startTag($child->{xmlname}, @attrs);
-    }else{
+    }
+    else
+    {
      $writer->startTag($child->{xmlname});
    }

-    if($children_size > 0){
+    if ($children_size > 0)
+    {
      json_2_xml_rec($child, $child->{children}, $writer);
-    }else{
+    }
+    else
+    {
      $writer->characters($child->{value});
    }