Skip to content
Snippets Groups Projects
Commit dcb166af authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

first shot at the Utheses API

parent 5240ee73
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/perl
use strict;
use utf8;
package Univie::Utheses::API;
use base 'Module';
use LWP;
use JSON -convert_blessed_universally;
use Encode;
=head2 my $utapi= new Univie::Utheses::API( api_url => 'https://utheses-api-utheses-prod.cprod.univie.ac.at/' );
=cut
sub getContainerPublicMetadata
{
my $self= shift;
my $utheses_id= shift;
my ($code1, $res1)= $self->utheses_request('GET', 'container/get/public', $utheses_id);
print __LINE__, " code1=[$code1] res1=[$res1]\n";
my $info;
if ($code1 =~ m#^2#)
{
eval
{
# utf8::upgrade($res1);
$info= from_json(decode("utf8", $res1));
};
if ($@)
{
die $@;
}
}
$info;
}
sub utheses_request
{
my $self= shift;
my $method= shift;
my $what= shift;
my $id= shift;
my ($api_url)= map { $self->{config}->{$_} } qw(api_url);
my $req_url= join ('/', $api_url, $what, $id);
print __LINE__, " req_url=[$req_url]\n";
my $req = HTTP::Request->new(
$method => $req_url,
);
my $ua= LWP::UserAgent->new;
my $res= $ua->request($req);
# print __LINE__, " res: ", main::Dumper($res);
my $txt= $res->content();
return ($res->code(), $txt);
}
1;
ut1.pl 0 → 100755
#!/usr/bin/perl
use strict;
use FileHandle;
binmode( STDOUT, ':utf8' ); autoflush STDOUT 1;
binmode( STDERR, ':utf8' ); autoflush STDERR 1;
binmode( STDIN, ':utf8' );
use Data::Dumper;
$Data::Dumper::Indent= 1;
use lib 'lib';
use Univie::Utheses::API;
use Phaidra::Utils::iso639;
# my %LANGUAGES= ( 'deu' => 'de', 'eng' => 'en' ); not used, see Phaidra::Utils::iso639
my @TSV_COLUMNS= qw( utheses_id suffix doi nbn ac_number langs language persistent_link errors );
my $op_mode= 'fetch_metadata';
my $fnm_tsv= 'utheses/utheses_info.tsv'; # TODO: timestamp!
my @pars= ();
while (my $arg= shift (@ARGV))
{
if ($arg =~ /^-/)
{
}
else
{
push (@pars, $arg);
}
}
if ($op_mode eq 'fetch_metadata')
{
fetch_metadata(\@pars);
}
exit(0);
sub fetch_metadata
{
my $utheses_ids= shift;
my $utapi= new Univie::Utheses::API( config => { api_url => 'https://utheses-api-utheses-prod.cprod.univie.ac.at' } );
open (TSV, '>:utf8', $fnm_tsv) or die;
print TSV join("\t", @TSV_COLUMNS), "\n";
foreach my $utheses_id (@$utheses_ids)
{
next if ($utheses_id eq 'utheses_id'); # CSV column name...
print __LINE__, " utheses_id=[$utheses_id]\n";
my $info= $utapi->getContainerPublicMetadata($utheses_id);
print __LINE__, " info: ", Dumper($info);
if ($info->{status} eq '200' && exists ($info->{metadata}->{thesis}->{doi}))
{
my ($row, $xml)= utheses2datacite($info->{metadata}, $utheses_id);
print __LINE__, " row: ", Dumper($row);
print __LINE__, " xml=[$xml]\n";
print TSV join("\t", map { $row->{$_} } @TSV_COLUMNS), "\n";
if (defined ($row->{doi}))
{
my $xml_fnm= 'utheses/DataCite_XML/'.$row->{doi}.'.xml';
open (XML, '>:utf8', $xml_fnm) or die;
print XML $xml;
close (XML);
}
}
}
}
sub utheses2datacite
{
my $md= shift;
my $utheses_id= shift;
my @errors;
my ($th, $authors)= map { $md->{$_} } qw(thesis authors);
my ($doi, $nbn, $ac_number, $persistent_link)= map { $th->{$_} } qw(doi urn ac_number persistent_link);
# TODO: check doi, nbn, ac_number for syntactic validity
my $suffix;
if ($doi =~ m#^10.25365/(thesis\.\d+)$#)
{
$suffix= $1;
}
else
{
push (@errors, 'bad_doi');
}
push (@errors, 'bad_nbn') unless ($nbn =~ m#^urn:nbn:at:at-ubw:1-\d{5}\.\d{5}\.\d{6}-\d$#);
push (@errors, 'bad_ac_number') unless ($ac_number =~ m#^AC\d{8}$#);
my ($titles, $abstracts, $publication_date, $langs)= map { $th->{$_} } qw(titels abstracts publication_date languages);
my $language= Phaidra::Utils::iso639::iso_639_2_to_1($langs->[0]);
unless (defined ($language))
{
$language= 'UNKNOWN';
push (@errors, 'language');
}
my $xml= << "EOX";
<?xml version="1.0" encoding="utf-8"?>
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
<identifier identifierType="DOI">$doi</identifier>
<creators>
EOX
foreach my $author (@$authors)
{
$xml .= << "EOX";
<creator>
<creatorName>$author->{family_name}, $author->{given_name}</creatorName>
</creator>
EOX
}
$xml .= << "EOX";
</creators>
<titles>
EOX
$xml .= << "EOX";
</titles>
<publisher>(:none)</publisher>
<publicationYear>2002</publicationYear>
<language>$language</language>
<resourceType resourceTypeGeneral="Text">Thesis</resourceType>
<alternateIdentifiers>
<alternateIdentifier alternateIdentifierType="nbn">$nbn</alternateIdentifier>
</alternateIdentifiers>
<descriptions>
EOX
foreach my $abstract (@$abstracts)
{
$xml .= << "EOX";
<description descriptionType="Abstract">
$abstract->{text}
</description>
EOX
}
$xml .= << "EOX";
</descriptions>
</resource>
EOX
my %row=
(
utheses_id => $utheses_id,
suffix => $suffix,
doi => $doi,
nbn => $nbn,
ac_number => $ac_number,
langs => join(',', @$langs),
language => $language,
persistent_link => $persistent_link,
errors => join(',',@errors),
);
(\%row, $xml);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment