From 2d639dd1b58ba4ad96593be6bb8914ff0fd8c0b8 Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Fri, 13 Oct 2023 15:54:17 +0200 Subject: [PATCH] added code to inject metadata fields "publisher" and "publicationYear", if missing --- dcd.pl | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/dcd.pl b/dcd.pl index 8458566..30b4de2 100755 --- a/dcd.pl +++ b/dcd.pl @@ -291,6 +291,8 @@ print __LINE__, " prod_doi_string=[$prod_doi_string] pfx=[$pfx] ns=[$ns] nr=[$nr unshift (@$md, $doi_element); print __LINE__, " md: ", main::Dumper ($md); + fixup_phaidra_metadata($md); + my $xml_new= $repo_obj->json_2_xml ($md); print __LINE__, " xml_new=[$xml_new]\n"; @@ -311,8 +313,10 @@ print __LINE__, " prod_doi_string=[$prod_doi_string] pfx=[$pfx] ns=[$ns] nr=[$nr syswrite (DC_XML, $prod_xml_new); close (DC_XML); - print "metatada fetched: ", get_ts(), "\n"; - if ($datacite_res->{status} ne 'OK') + print __LINE__, " metatada fetched: ", get_ts(), "\n"; + if ($datacite_res->{status} ne 'OK' + && $datacite_res->{status} ne 'INCOMPLETE' # maybe we should check, what was missing + ) { print "Metadata not ok; status=[$datacite_res->{status}] errors: ", Dumper ($datacite_res->{errors}); return undef; @@ -352,6 +356,30 @@ print __LINE__, " prod_doi_string=[$prod_doi_string] pfx=[$pfx] ns=[$ns] nr=[$nr } +# TODO: shema checking could work differently +sub fixup_phaidra_metadata +{ + my $md= shift; # should be an array + + # print __LINE__, " fixup_phaidra_metadata: md= ", Dumper($md); + die 'not an array reference' unless (ref($md) eq 'ARRAY'); + + my $cnt= @$md; + print __LINE__, " md element count=[$cnt]\n"; + my %pos; + for (my $i= 0; $i < $cnt; $i++) + { + my $e= $md->[$i]; + print __LINE__, " fpm: i=[$i] e= ", Dumper($e); + $pos{$e->{xmlname}}= $i; + } + + print __LINE__, " pos: ", Dumper(\%pos); + + splice(@$md, 3, 0, { xmlname => 'publisher', 'value' => ':none' }) unless (exists ($pos{publisher})); + splice(@$md, 4, 0, { xmlname => 'publicationYear', 'value' => '2023' }) unless (exists ($pos{publicationYear})); +} + sub get_ts { my $time= shift || time (); -- GitLab