Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
irma2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
irma2
Commits
b7acf18c
Commit
b7acf18c
authored
7 years ago
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
DataCite DOI registration and reservation
parent
edcb234c
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
dcd.pl
+139
-71
139 additions, 71 deletions
dcd.pl
lib/DataCite/API.pm
+2
-0
2 additions, 0 deletions
lib/DataCite/API.pm
lib/Phaidra/DataCite.pm
+25
-14
25 additions, 14 deletions
lib/Phaidra/DataCite.pm
with
167 additions
and
85 deletions
.gitignore
+
1
−
0
View file @
b7acf18c
...
...
@@ -12,6 +12,7 @@ loader-*.txt
typescript
table_eprint
RMS
na/
/blib/
/.build/
_build/
...
...
This diff is collapsed.
Click to expand it.
dcd.pl
+
139
−
71
View file @
b7acf18c
...
...
@@ -5,6 +5,14 @@
DataCite DOI
=head1 USAGE
dcd.pl order 5 ... reserve 5 new DOIs
=head1 EXAMPLES
dcd.pl --ticket 14012 order 1
=cut
use
strict
;
...
...
@@ -18,6 +26,7 @@ $Data::Dumper::Indent= 1;
# use Module;
use
JSON
;
use
Util::
JSON
;
use
Util::
Simple_CSV
;
binmode
(
STDOUT
,
'
:utf8
'
);
autoflush
STDOUT
1
;
...
...
@@ -29,6 +38,15 @@ use DataCite::API;
my
$config_file
=
'
/etc/irma/DataCite.json
';
# identifiers table
my
$na_path
=
'
na/na-dcd-10-25365
';
my
$identifiers_file
=
join
('
/
',
$na_path
,
'
identifiers.tsv
');
my
$ifx
=
'
10.25365/phaidra
';
my
$na_id
=
1
;
my
$context_id
=
1
;
my
$ticket
;
my
$tsv
;
my
$x_flag
=
0
;
my
@PARS
;
...
...
@@ -39,7 +57,9 @@ while (defined ($arg= shift (@ARGV)))
elsif
(
$arg
=~
/^--(.+)/
)
{
my
(
$opt
,
$val
)
=
split
('
=
',
$
1
,
2
);
if
(
$opt
eq
'
help
')
{
usage
();
}
elsif
(
$opt
eq
'
ticket
')
{
$ticket
=
$val
||
shift
(
@ARGV
);
}
else
{
usage
();
}
}
elsif
(
$arg
=~
/^-(.+)/
)
...
...
@@ -72,6 +92,11 @@ if ($op_code eq 'register')
my
$repo_url
=
shift
(
@PARS
);
register_url
(
$cnf
,
$repo_url
);
}
elsif
(
$op_code
eq
'
order
')
{
my
$order_count
=
shift
(
@PARS
)
||
1
;
order_identifiers
(
$order_count
);
}
exit
(
0
);
...
...
@@ -100,17 +125,23 @@ sub register_url
my
$repo_url
=
shift
;
print
__LINE__
,
"
repo_url=[
$repo_url
]
\n
";
if
(
$repo_url
=~
m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#
)
if
(
$repo_url
=~
m#^https?://(phaidra(-(sandbox|temp))?\.univie\.ac\.at)/(detail_object/)?(o:\d+)$#
# || $repo_url =~ m# ... uscholar ... #)
)
{
my
(
$repo
,
$s1
,
$s2
,
$s3
,
$pid
)
=
(
$
1
,
$
2
,
$
3
,
$
4
,
$
5
);
usage
("
unknown repo=[
$repo
]
")
unless
(
exists
(
$cnf
->
{
repositories
}
->
{
$repo
}));
print
__LINE__
,
"
repo=[
$repo
]
\n
";
my
$repo_cnf
=
$cnf
->
{
repositories
}
->
{
$repo
};
my
$t_reg_cnf
=
$cnf
->
{
doi_registries
}
->
{
$repo_cnf
->
{
t_registry
}};
my
$reg_cnf
=
$cnf
->
{
doi_registries
}
->
{
$repo_cnf
->
{
registry
}};
# print __LINE__, " repo_cnf: ", main::Dumper ($repo_cnf);
# print __LINE__, " reg_cnf: ", main::Dumper ($reg_cnf);
print
__LINE__
,
"
t_reg_cnf:
",
main::
Dumper
(
$t_reg_cnf
);
print
__LINE__
,
"
reg_cnf:
",
main::
Dumper
(
$reg_cnf
);
my
$repo_obj
=
new
Phaidra::
DataCite
(
config
=>
$repo_cnf
);
# print __LINE__, " repo_obj: ", main::Dumper ($repo_obj);
...
...
@@ -127,18 +158,14 @@ sub register_url
my
$dc_res
=
$api_res
->
{
datacite
};
print
__LINE__
,
"
dc_res:
",
Dumper
(
$dc_res
);
if
(
$dc_res
->
{
status
}
ne
'
OK
')
{
print
"
Metadata not ok; status=[
$dc_res
->{status}] errors:
",
Dumper
(
$dc_res
->
{
errors
});
return
undef
;
}
# TODO:
# * mint new DOI
# * insert DOI in metadata
my
$reg_obj
=
new
DataCite::
API
(
config
=>
$reg_cnf
,
'
xmode
'
=>
'
test
');
my
$doi_string
=
$reg_obj
->
mint_doi
();
# NOTE: if ($dc_res->{status} ne 'OK') request a dummy doi_string
my
$t_reg_obj
=
new
DataCite::
API
(
config
=>
$t_reg_cnf
,
'
xmode
'
=>
'
test
');
my
$doi_string
=
$t_reg_obj
->
mint_doi
();
my
$doi_element
=
{
...
...
@@ -160,10 +187,38 @@ sub register_url
my
$xml_new
=
$repo_obj
->
json_2_xml
(
$md
);
print
__LINE__
,
"
xml_new=[
$xml_new
]
\n
";
if
(
$dc_res
->
{
status
}
ne
'
OK
')
{
print
"
Metadata not ok; status=[
$dc_res
->{status}] errors:
",
Dumper
(
$dc_res
->
{
errors
});
return
undef
;
}
# TODO: interact with the IRMA database to find out if the DOI string is really unique and register in there
# TODO: interact with DataCite API to register the DOI with the metadata
$reg_obj
->
register_doi
(
$doi_string
,
$xml_new
,
$repo_url
);
unless
(
$t_reg_obj
->
register_doi
(
$doi_string
,
$xml_new
,
$repo_url
))
{
print
"
ATTN: register_doi with Test-DOI was not ok
\n
";
return
undef
;
}
my
$prod_doi_string
=
find_doi_string
(
$repo_url
);
unless
(
defined
(
$prod_doi_string
))
{
print
"
NOTE: no identifier registered sofar
\n
";
return
undef
;
}
my
$reg_obj
=
new
DataCite::
API
(
config
=>
$reg_cnf
,
'
xmode
'
=>
'
test
');
$doi_element
->
{
value
}
=
$prod_doi_string
;
my
$prod_xml_new
=
$repo_obj
->
json_2_xml
(
$md
);
print
__LINE__
,
"
prod_xml_new=[
$prod_xml_new
]
\n
";
unless
(
$reg_obj
->
register_doi
(
$prod_doi_string
,
$prod_xml_new
,
$repo_url
))
{
print
"
ATTN: register_doi doi=[
$prod_doi_string
] was not ok
\n
";
return
undef
;
}
}
else
{
...
...
@@ -172,80 +227,84 @@ sub register_url
}
# ----------------------------------------------------------------------------
sub
main_function
sub
setup_identifiers
{
my
$fnm
=
shift
;
print
"
main_function:
$fnm
\n
";
hex_dump_file
(
$fnm
);
$tsv
=
new
Util::
Simple_CSV
(
load
=>
$identifiers_file
,
separator
=>
"
\t
",
no_array
=>
1
);
}
# ----------------------------------------------------------------------------
sub
hex_dump_file
sub
find_doi_string
{
my
$fnm
=
shift
;
my
$buffer_size
=
shift
||
4
*
1048576
;
# my $buffer_size= shift || 32*1024;
open
(
FI
,
'
<:raw
',
$fnm
)
or
die
"
can not read [
$fnm
]
\n
";
my
$buffer
;
my
$segment
=
0
;
while
(
1
)
my
$url
=
shift
;
setup_identifiers
()
unless
(
defined
(
$tsv
));
my
$data
=
$tsv
->
{
data
};
foreach
my
$row
(
@$data
)
{
my
$rc1
=
sysread
(
FI
,
$buffer
,
$buffer_size
);
printf
("
segment=[%s] fnm=[%d] rc1=[0x%08lx] (bs=0x%08lx)
\n
",
$segment
++
,
$fnm
,
$rc1
,
$buffer_size
);
hex_dump
(
$buffer
);
last
if
(
$rc1
<
$buffer_size
);
my
(
$identifier
,
$canonical_url
)
=
map
{
$row
->
{
$_
}
}
qw(identifier canonical_url)
;
next
unless
(
$identifier
&&
$canonical_url
);
return
$identifier
if
(
$url
eq
$canonical_url
);
}
close
(
FI
);
undef
;
}
# ----------------------------------------------------------------------------
sub
hex_dump
sub
order_identifiers
{
my
$data
=
shift
;
local
*FX
=
shift
||
*STDOUT
;
my
$order_count
=
shift
;
my
$off
=
0
;
my
(
$i
,
$c
,
$v
);
setup_identifiers
()
unless
(
defined
(
$tsv
));
my
$run
=
1
;
DATA
:
while
(
$run
)
{
my
$char
=
'';
my
$hex
=
'';
my
$offx
=
sprintf
('
%08X
',
$off
);
my
$counter
=
get_last_id
(
$tsv
);
for
(
$i
=
0
;
$i
<
16
;
$i
++
)
print
__LINE__
,
"
identifier:
",
main::
Dumper
(
$counter
);
for
(
my
$i
=
0
;
$i
<
$order_count
;
$i
++
)
{
$c
=
substr
(
$data
,
$off
+
$i
,
1
);
my
$next_identifier
=
join
('
.
',
$ifx
,
++
$counter
->
{
$ifx
}
);
if
(
$i
==
8
)
{
$hex
.=
'
';
my
$data
=
{
na_id
=>
$na_id
,
context_id
=>
$context_id
,
ticket
=>
$ticket
,
identifier
=>
$next_identifier
};
push
(
@
{
$tsv
->
{
data
}}
=>
$data
);
}
$tsv
->
save_csv_file
();
}
if
(
$c
ne
'')
sub
get_last_id
{
my
$tsv
=
shift
;
my
$data
=
$tsv
->
{
data
};
my
%counter
;
foreach
my
$row
(
@$data
)
{
# $data= substr ($data, 1)
;
$v
=
unpack
('
C
',
$c
);
$c
=
'
.
'
if
(
$v
<
0x20
||
$v
>=
0x7F
);
my
$identifier
=
$row
->
{
identifier
}
;
next
unless
(
$identifier
);
# TODO: maybe write warning
$char
.=
$c
;
$hex
.=
sprintf
('
%02X
',
$v
);
# my ($pfx, $sfx)= split ('/', $identifier, 2);
my
@sfx
=
split
(
/\./
,
$identifier
);
my
$cnt
=
pop
(
@sfx
);
my
$ifx
=
join
('
.
',
@sfx
);
# print __LINE__, " pfx=[$pfx] sfx=[$sfx] ifx=[$ifx] cnt=[$cnt]\n";
print
__LINE__
,
"
ifx=[
$ifx
] cnt=[
$cnt
]
\n
";
if
(
!
exists
(
$counter
{
$ifx
})
||
$counter
{
$ifx
}
<
$cnt
)
{
$counter
{
$ifx
}
=
$cnt
;
}
els
e
els
if
(
$counter
{
$ifx
}
==
$cnt
)
{
$char
.=
'
';
$hex
.=
'
';
$run
=
0
;
print
"
ATTN: duplicate identifier=[
$identifier
]
\n
";
}
elsif
(
$counter
{
$ifx
}
>
$cnt
)
{
# higher counter known, nothing to do
}
print
FX
"
$offx
$hex
|
$char
|
\n
";
$off
+=
0x10
;
}
(
wantarray
)
?
%counter
:
\
%counter
;
}
__END__
...
...
@@ -264,3 +323,12 @@ instead of
"registry" : "DataCite_Test_Prod",
=head1 NOTES
=head2 uscholar
* Telefonat mit Guido Blechl 2017-12-04T1320
* Fuer uscholar Objekte soll der Permalink von Phaidra eingetragen werden
This diff is collapsed.
Click to expand it.
lib/DataCite/API.pm
+
2
−
0
View file @
b7acf18c
...
...
@@ -68,6 +68,8 @@ EOX
print
STDERR
"
ATTN: register_doi POST doi returned code2=[
$code2
] res2=[
$res2
]
\n
";
return
undef
;
}
1
;
}
sub
datacite_request
...
...
This diff is collapsed.
Click to expand it.
lib/Phaidra/DataCite.pm
+
25
−
14
View file @
b7acf18c
...
...
@@ -39,7 +39,7 @@ sub get_metadata
if
(
$fmt
eq
'
xml
')
{
$url_md
.=
'
?format=xml
';
$accept
=
'
application/json
';
$accept
=
'
application/json
';
# TODO ???
}
print
__LINE__
,
"
fmt=[
$fmt
] url_md=[
$url_md
]
\n
";
...
...
@@ -60,7 +60,7 @@ sub get_metadata
my
$code
=
$res
->
code
();
my
$content
=
$res
->
content
();
#
print __LINE__, " code=[$code] content=[$content]\n";
print
__LINE__
,
"
code=[
$code
] content=[
$content
]
\n
";
# main::hex_dump ($content);
# TODO/HACK: fix double UTF8 encoding handed down from PhaidraAPI
...
...
@@ -98,6 +98,7 @@ EOX
);
$writer
->
startTag
('
resource
',
'
xmlns:xsi
'
=>
'
http://www.w3.org/2001/XMLSchema-instance
',
'
xmlns
'
=>
'
http://datacite.org/schema/kernel-4
',
'
xsi:schemaLocation
'
=>
$datacite_schemaLocation
);
print
__LINE__
,
"
json:
",
main::
Dumper
(
$json
);
json_2_xml_rec
(
undef
,
$json
,
$writer
);
$writer
->
endTag
('
resource
');
...
...
@@ -118,36 +119,46 @@ sub json_2_xml_rec
my
$children
=
shift
;
my
$writer
=
shift
;
foreach
my
$child
(
@
{
$children
})
{
foreach
my
$child
(
@
{
$children
})
{
my
$children_size
=
defined
(
$child
->
{
children
})
?
scalar
(
@
{
$child
->
{
children
}})
:
0
;
my
$attributes_size
=
defined
(
$child
->
{
attributes
})
?
scalar
(
@
{
$child
->
{
attributes
}})
:
0
;
if
((
!
defined
(
$child
->
{
value
})
||
(
$child
->
{
value
}
eq
''))
&&
$children_size
==
0
&&
$attributes_size
==
0
){
if
((
!
defined
(
$child
->
{
value
})
||
(
$child
->
{
value
}
eq
''))
&&
$children_size
==
0
&&
$attributes_size
==
0
)
{
next
;
}
if
(
defined
(
$child
->
{
attributes
})
&&
(
scalar
@
{
$child
->
{
attributes
}}
>
0
)){
if
(
defined
(
$child
->
{
attributes
})
&&
(
scalar
@
{
$child
->
{
attributes
}}
>
0
))
{
my
@attrs
;
foreach
my
$a
(
@
{
$child
->
{
attributes
}}){
if
(
defined
(
$a
->
{
value
})
&&
$a
->
{
value
}
ne
'')
{
if
(
$a
->
{
xmlname
}
eq
'
DOESNOTWORK_lang
'){
if
(
$a
->
{
xmlname
}
eq
'
DOESNOTWORK_lang
')
{
push
@attrs
,
['
http://www.w3.org/XML/1998/namespace
',
'
lang
']
=>
$a
->
{
value
};
}
else
{
}
else
{
push
@attrs
,
$a
->
{
xmlname
}
=>
$a
->
{
value
};
}
}
}
$writer
->
startTag
(
$child
->
{
xmlname
},
@attrs
);
}
else
{
}
else
{
$writer
->
startTag
(
$child
->
{
xmlname
});
}
if
(
$children_size
>
0
){
if
(
$children_size
>
0
)
{
json_2_xml_rec
(
$child
,
$child
->
{
children
},
$writer
);
}
else
{
}
else
{
$writer
->
characters
(
$child
->
{
value
});
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment