Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
irma2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
irma2
Commits
c3d44902
Commit
c3d44902
authored
2 years ago
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
latest version of ut1.pl
parent
3059a218
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
ut1.pl
+175
-40
175 additions, 40 deletions
ut1.pl
with
175 additions
and
40 deletions
ut1.pl
+
175
−
40
View file @
c3d44902
...
@@ -13,6 +13,8 @@ use Data::Dumper;
...
@@ -13,6 +13,8 @@ use Data::Dumper;
$
Data::Dumper::
Indent
=
1
;
$
Data::Dumper::
Indent
=
1
;
use
Digest::MD5::
File
qw(file_md5_hex)
;
use
Digest::MD5::
File
qw(file_md5_hex)
;
use
Digest::
MD5
qw(md5_hex)
;
use
Encode
qw(encode_utf8)
;
use
Util::
ts
;
use
Util::
ts
;
use
Util::
JSON
;
use
Util::
JSON
;
...
@@ -28,7 +30,6 @@ use Alma::MARC_Extractor;
...
@@ -28,7 +30,6 @@ use Alma::MARC_Extractor;
my
@TSV_COLUMNS
=
qw( utheses_id fulltext_locked suffix doi nbn ac_number langs language persistent_link xml_fnm errors )
;
my
@TSV_COLUMNS
=
qw( utheses_id fulltext_locked suffix doi nbn ac_number langs language persistent_link xml_fnm errors )
;
# my $op_mode= 'fetch_metadata_bulk';
my
$op_mode
=
'
process
';
my
$op_mode
=
'
process
';
my
$fnm_tsv
=
'
utheses/utheses_info.tsv
';
# TODO: timestamp!
my
$fnm_tsv
=
'
utheses/utheses_info.tsv
';
# TODO: timestamp!
...
@@ -38,6 +39,8 @@ my $agent_config_file= '/etc/irma/pidagent.json';
...
@@ -38,6 +39,8 @@ my $agent_config_file= '/etc/irma/pidagent.json';
my
$MAX_MARC_AGE
=
86400
*
60
;
my
$MAX_MARC_AGE
=
86400
*
60
;
my
$MAX_MARC_REQUESTS
=
10_000
;
my
$MAX_MARC_REQUESTS
=
10_000
;
my
%metrics
;
my
$do_register_doi
=
0
;
my
$do_register_doi
=
0
;
my
$agent_name
=
'
pidagent
';
my
$agent_name
=
'
pidagent
';
my
$agent_id
=
$$
;
my
$agent_id
=
$$
;
...
@@ -53,6 +56,7 @@ while (my $arg= shift (@ARGV))
...
@@ -53,6 +56,7 @@ while (my $arg= shift (@ARGV))
if
(
$opt
eq
'
help
')
{
usage
();
}
if
(
$opt
eq
'
help
')
{
usage
();
}
elsif
(
$opt
eq
'
register-doi
')
{
$do_register_doi
=
1
;
}
elsif
(
$opt
eq
'
register-doi
')
{
$do_register_doi
=
1
;
}
elsif
(
$opt
eq
'
fix
')
{
$fix_problems
=
1
;
}
elsif
(
$opt
eq
'
fix
')
{
$fix_problems
=
1
;
}
elsif
(
$opt
eq
'
fmb
')
{
my
$op_mode
=
'
fetch_metadata_bulk
';
}
else
{
usage
();
}
else
{
usage
();
}
}
}
elsif
(
$arg
=~
/^-(.+)/
)
elsif
(
$arg
=~
/^-(.+)/
)
...
@@ -89,12 +93,30 @@ die "no reg_obj" unless (defined ($reg_obj));
...
@@ -89,12 +93,30 @@ die "no reg_obj" unless (defined ($reg_obj));
# get handles for various databases
# get handles for various databases
my
$marc_db
=
IRMA::db::
get_any_db
(
$agent_cnf
,
'
marc_database
');
my
$marc_db
=
IRMA::db::
get_any_db
(
$agent_cnf
,
'
marc_database
');
my
$
agent_db
=
IRMA::db::
get_any_db
(
$a
ge
n
t_c
nf
,
'
pidagent_database
');
my
$
marc_col
=
$marc_db
->
get_c
ollection
('
alma.marc
');
print
__LINE__
,
"
agent_db=[
$a
ge
n
t_
db
]
\n
"
;
my
$req_col
=
$marc_db
->
get_
collection
('
requests
')
;
my
@marc_fields
=
qw(ac_number mms_id fetched lib_code)
;
my
@marc_fields
=
qw(ac_number mms_id fetched lib_code)
;
my
$mex
=
Alma::
MARC_Extractor
->
new
(
\
@marc_fields
);
my
$mex
=
Alma::
MARC_Extractor
->
new
(
\
@marc_fields
);
my
$agent_db
=
IRMA::db::
get_any_db
(
$agent_cnf
,
'
pidagent_database
');
# print __LINE__, " agent_db=[$agent_db]\n";
my
$ut_col
=
$agent_db
->
get_collection
('
utheses
');
my
$dc_col
=
$agent_db
->
get_collection
('
datacite
');
my
$q_col
=
$agent_db
->
get_collection
('
queue
');
my
$problems_col
=
$agent_db
->
get_collection
('
problems
');
my
(
$nx_metrics_temp
,
$nx_metrics_prom
);
my
@REPORT
;
if
(
exists
(
$agent_cnf
->
{'
collector-textfile-directory
'})
&&
defined
(
my
$p
=
$agent_cnf
->
{'
collector-textfile-directory
'})
)
{
$nx_metrics_temp
=
join
('
/
',
$p
,
'
ut1.temp
');
$nx_metrics_prom
=
join
('
/
',
$p
,
'
ut1.prom
');
}
my
$running
=
0
;
my
$running
=
0
;
if
(
$op_mode
eq
'
process
')
if
(
$op_mode
eq
'
process
')
{
{
...
@@ -109,6 +131,7 @@ if ($op_mode eq 'process')
...
@@ -109,6 +131,7 @@ if ($op_mode eq 'process')
my
$utheses_id
=
$
1
;
my
$utheses_id
=
$
1
;
my
@actions
=
process_utheses_item
(
$utheses_id
);
my
@actions
=
process_utheses_item
(
$utheses_id
);
print
__LINE__
,
"
process_utheses_item(
$utheses_id
) ==> [
",
join
('
,
',
@actions
),
"
]
\n
";
print
__LINE__
,
"
process_utheses_item(
$utheses_id
) ==> [
",
join
('
,
',
@actions
),
"
]
\n
";
push
(
@REPORT
,
join
('
',
$utheses_id
,
@actions
));
}
}
elsif
(
$par
=~
m#^(\d+)\.\.(\d+)$#
||
$par
=~
m#^(\d+)\-(\d+)$#
||
$par
=~
m#(blk)(\d+)$#
)
elsif
(
$par
=~
m#^(\d+)\.\.(\d+)$#
||
$par
=~
m#^(\d+)\-(\d+)$#
||
$par
=~
m#(blk)(\d+)$#
)
{
{
...
@@ -141,8 +164,10 @@ if ($op_mode eq 'process')
...
@@ -141,8 +164,10 @@ if ($op_mode eq 'process')
}
}
elsif
(
$par
eq
'
gpdcr
')
elsif
(
$par
eq
'
gpdcr
')
{
{
my
@utheses_ids
=
gpdcr
();
my
$utheses_ids1
=
gpdcr
();
# push (@pars, @utheses_ids);
my
$utheses_ids2
=
remove_registered_DOIs
(
$utheses_ids1
);
print
__LINE__
,
"
gpdcr utheses_ids2:
",
join
('
',
@$utheses_ids2
),
"
\n
";
push
(
@pars
,
@$utheses_ids2
);
}
}
sleep
(
2
)
if
(
@pars
);
sleep
(
2
)
if
(
@pars
);
...
@@ -153,17 +178,26 @@ elsif ($op_mode eq 'fetch_metadata_bulk')
...
@@ -153,17 +178,26 @@ elsif ($op_mode eq 'fetch_metadata_bulk')
{
{
fetch_metadata_bulk
(
\
@pars
);
fetch_metadata_bulk
(
\
@pars
);
}
}
print
__LINE__
,
"
metrics:
",
Dumper
(
\
%metrics
);
write_metrics
(
\
%metrics
);
print
__LINE__
,
"
REPORT:
\n
";
foreach
my
$l
(
@REPORT
)
{
print
$l
,
"
\n
";
}
exit
(
0
);
exit
(
0
);
sub
gpdcr
sub
gpdcr
{
{
my
(
$status
,
$txt
,
$info
)
=
$ut_api
->
getPendingDoisCreateRequest
();
my
(
$status
,
$txt
,
$info
)
=
$ut_api
->
getPendingDoisCreateRequest
();
print
__LINE__
,
"
gpdcr: info:
",
Dumper
(
$info
);
#
print __LINE__, " gpdcr: info: ", Dumper($info);
my
@utheses_ids
=
();
my
@utheses_ids
=
();
if
(
$status
eq
'
200
')
if
(
$status
eq
'
200
')
{
{
my
$p
=
$info
->
{
pendingDois
};
my
$p
=
$info
->
{
pendingDois
};
my
$fnm
=
'
Pending_DOI_Create_Requests_
'
.
Util::ts::
ts_ISO_gmt
()
.
'
.json
';
Util::JSON::
write_json_file
(
$fnm
,
$info
);
print
__LINE__
,
"
saved
$fnm
\n
";
if
(
defined
(
$p
)
&&
ref
(
$p
)
eq
'
HASH
')
if
(
defined
(
$p
)
&&
ref
(
$p
)
eq
'
HASH
')
{
{
push
(
@utheses_ids
,
map
{
$p
->
{
$_
}
->
{
utheses_id
}
}
keys
%$p
);
push
(
@utheses_ids
,
map
{
$p
->
{
$_
}
->
{
utheses_id
}
}
keys
%$p
);
...
@@ -174,13 +208,47 @@ sub gpdcr
...
@@ -174,13 +208,47 @@ sub gpdcr
}
}
}
}
@utheses_ids
;
$metrics
{
ut1_gpdcr_count
}
=
@utheses_ids
;
(
wantarray
)
?
@utheses_ids
:
\
@utheses_ids
;
}
}
sub
cleanup_queue
sub
remove_registered_DOIs
{
{
my
$
q_col
=
$agent_db
->
get_collection
('
queue
')
;
my
$
utheses_id_list
=
shift
;
print
__LINE__
,
"
remove_registered_DOIs()
\n
";
# this is not useful, because new IDs are not listed here yet
# my $cur= $dc_col->find({ utheses_id => { '$in' => $utheses_id_list }, reg_status => { '$ne' => 1} });
my
@utheses_ids
=
();
my
@registered_ids
=
();
my
(
$gpdcr_checked
,
$gpdcr_queued
)
=
(
0
,
0
);
foreach
my
$ut_id
(
@$utheses_id_list
)
{
my
$dc_info
=
$dc_col
->
find_one
({
utheses_id
=>
"
$ut_id
"
});
$gpdcr_checked
++
;
if
(
$dc_info
->
{
reg_status
}
==
1
)
{
push
(
@registered_ids
,
$ut_id
);
next
;
}
print
__LINE__
,
"
ut_id=[
$ut_id
]
",
Dumper
(
$dc_info
);
push
(
@utheses_ids
,
$ut_id
);
$gpdcr_queued
++
;
}
print
__LINE__
,
"
registered ids:
",
join
('
',
@registered_ids
),
"
\n
";
$metrics
{
ut1_gpdcr_already_registered
}
=
@registered_ids
;
$metrics
{
ut1_gpdcr_checked
}
=
$gpdcr_checked
;
$metrics
{
ut1_gpdcr_queued
}
=
$gpdcr_queued
;
(
wantarray
)
?
@utheses_ids
:
\
@utheses_ids
;
}
sub
cleanup_queue
{
my
$j
=
$q_col
->
find_one
({
status
=>
"
in_progress
",
agent_name
=>
$agent_name
,
agent_id
=>
$agent_id
});
my
$j
=
$q_col
->
find_one
({
status
=>
"
in_progress
",
agent_name
=>
$agent_name
,
agent_id
=>
$agent_id
});
if
(
defined
(
$j
))
if
(
defined
(
$j
))
{
{
...
@@ -192,8 +260,6 @@ sub cleanup_queue
...
@@ -192,8 +260,6 @@ sub cleanup_queue
sub
get_job_from_queue
sub
get_job_from_queue
{
{
my
$q_col
=
$agent_db
->
get_collection
('
queue
');
my
$j
=
$q_col
->
find_one
({
status
=>
"
in_progress
",
agent_name
=>
$agent_name
});
my
$j
=
$q_col
->
find_one
({
status
=>
"
in_progress
",
agent_name
=>
$agent_name
});
if
(
defined
(
$j
))
if
(
defined
(
$j
))
{
{
...
@@ -222,8 +288,13 @@ sub process_utheses_item
...
@@ -222,8 +288,13 @@ sub process_utheses_item
print
__LINE__
,
"
process_utheses_item: utheses_id=[
$utheses_id
]
",
'
=
'
x50
,
"
\n
";
print
__LINE__
,
"
process_utheses_item: utheses_id=[
$utheses_id
]
",
'
=
'
x50
,
"
\n
";
my
$register_doi_ok
=
1
;
my
$register_doi_ok
=
1
;
if
(
$fix_problems
)
{
remove_problem
('
utheses
',
$utheses_id
);
}
# first: compare existing utheses record (if it exists) with data from utheses
my
$ut_data
=
$ut_col
->
find_one
(
{
utheses_id
=>
$utheses_id
}
);
my
$row
=
{};
my
$row
=
{};
my
(
$error_code
,
$status
,
$xml
)
=
get_utheses_metadata
(
$row
,
$utheses_id
);
my
(
$error_code
,
$status
,
$xml
)
=
get_utheses_metadata
(
$row
,
$utheses_id
,
$ut_data
);
print
__LINE__
,
"
error_code=[
$error_code
] status=[
$status
]
\n
";
print
__LINE__
,
"
error_code=[
$error_code
] status=[
$status
]
\n
";
print
__LINE__
,
"
xml=[
$xml
]
\n
";
print
__LINE__
,
"
xml=[
$xml
]
\n
";
...
@@ -233,6 +304,7 @@ sub process_utheses_item
...
@@ -233,6 +304,7 @@ sub process_utheses_item
unless
(
$status
eq
'
200
')
unless
(
$status
eq
'
200
')
{
{
push
(
@actions
,
'
no_utheses_record
');
push
(
@actions
,
'
no_utheses_record
');
$metrics
{
ut1_no_record
}
++
;
return
@actions
;
return
@actions
;
}
}
...
@@ -241,6 +313,7 @@ sub process_utheses_item
...
@@ -241,6 +313,7 @@ sub process_utheses_item
push
(
@actions
,
"
error_code=[
$error_code
]
");
push
(
@actions
,
"
error_code=[
$error_code
]
");
report_problem
(
{
area
=>
'
utheses
',
problem
=>
'
error_code
',
utheses_id
=>
$utheses_id
,
error_code
=>
$error_code
}
);
report_problem
(
{
area
=>
'
utheses
',
problem
=>
'
error_code
',
utheses_id
=>
$utheses_id
,
error_code
=>
$error_code
}
);
$register_doi_ok
=
0
;
$register_doi_ok
=
0
;
$metrics
{
ut1_utheses_errors
}
++
;
}
}
my
@x
=
analyze_marc_record
(
$row
);
my
@x
=
analyze_marc_record
(
$row
);
...
@@ -248,14 +321,10 @@ sub process_utheses_item
...
@@ -248,14 +321,10 @@ sub process_utheses_item
unless
(
defined
(
$agent_db
))
unless
(
defined
(
$agent_db
))
{
{
$metrics
{
ut1_missing_agent_db
}
++
;
return
@actions
;
return
@actions
;
}
}
my
$ut_col
=
$agent_db
->
get_collection
('
utheses
');
if
(
$fix_problems
)
{
remove_problem
('
utheses
',
$row
->
{
utheses_id
});
}
# first: compare existing utheses record (if it exists) with data from utheses
my
$ut_data
=
$ut_col
->
find_one
(
{
utheses_id
=>
$row
->
{
utheses_id
}
}
);
if
(
defined
(
$ut_data
))
if
(
defined
(
$ut_data
))
{
{
my
@cmp_fields
=
qw(doi urn nbn ac_number)
;
my
@cmp_fields
=
qw(doi urn nbn ac_number)
;
...
@@ -272,6 +341,7 @@ sub process_utheses_item
...
@@ -272,6 +341,7 @@ sub process_utheses_item
{
{
report_problem
(
{
area
=>
'
utheses
',
problem
=>
'
utheses_data_missmatch
',
utheses_id
=>
$utheses_id
,
problems
=>
\
@problems
}
);
report_problem
(
{
area
=>
'
utheses
',
problem
=>
'
utheses_data_missmatch
',
utheses_id
=>
$utheses_id
,
problems
=>
\
@problems
}
);
push
(
@actions
,
'
problem report utheses
');
push
(
@actions
,
'
problem report utheses
');
$metrics
{
ut1_utheses_data_missmatch
}
++
;
return
(
@actions
);
return
(
@actions
);
}
}
}
}
...
@@ -289,8 +359,7 @@ sub process_utheses_item
...
@@ -289,8 +359,7 @@ sub process_utheses_item
remove_problem
('
datacite
',
$row
->
{
doi
});
remove_problem
('
datacite
',
$row
->
{
doi
});
}
}
my
$dc_col
=
$agent_db
->
get_collection
('
datacite
');
print
__LINE__
,
"
row:
",
Dumper
(
$row
);
my
(
$utheses_id
,
$doi
,
$xml_md5
,
$url
)
=
map
{
$row
->
{
$_
}
}
qw(utheses_id doi xml_md5 persistent_link)
;
my
(
$utheses_id
,
$doi
,
$xml_md5
,
$url
)
=
map
{
$row
->
{
$_
}
}
qw(utheses_id doi xml_md5 persistent_link)
;
my
$dc_record
=
$dc_col
->
find_one
(
{
doi
=>
$doi
}
);
my
$dc_record
=
$dc_col
->
find_one
(
{
doi
=>
$doi
}
);
...
@@ -304,6 +373,7 @@ sub process_utheses_item
...
@@ -304,6 +373,7 @@ sub process_utheses_item
utheses_id
=>
$utheses_id
,
utheses_id
=>
$utheses_id
,
ac_number
=>
$row
->
{
ac_number
},
ac_number
=>
$row
->
{
ac_number
},
xml_md5
=>
$xml_md5
,
xml_md5
=>
$xml_md5
,
# xml_fnm => $row->{xml_fnm},
ts_epoch
=>
time
(),
ts_epoch
=>
time
(),
ts_iso_gmt
=>
Util::ts::
ts_ISO_gmt
(),
ts_iso_gmt
=>
Util::ts::
ts_ISO_gmt
(),
);
);
...
@@ -336,6 +406,7 @@ sub process_utheses_item
...
@@ -336,6 +406,7 @@ sub process_utheses_item
{
{
report_problem
(
{
area
=>
'
datacite
',
problem
=>
'
datacite_data_missmatch
',
utheses_id
=>
$utheses_id
,
doi
=>
$doi
,
problems
=>
\
@problems
}
);
report_problem
(
{
area
=>
'
datacite
',
problem
=>
'
datacite_data_missmatch
',
utheses_id
=>
$utheses_id
,
doi
=>
$doi
,
problems
=>
\
@problems
}
);
push
(
@actions
,
'
problem report datacite
');
push
(
@actions
,
'
problem report datacite
');
$metrics
{
ut1_datacite_data_missmatch
}
++
;
return
(
@actions
);
return
(
@actions
);
}
}
...
@@ -343,6 +414,7 @@ sub process_utheses_item
...
@@ -343,6 +414,7 @@ sub process_utheses_item
{
{
print
__LINE__
,
"
datacite metadata unchanged; not updating!
\n
";
print
__LINE__
,
"
datacite metadata unchanged; not updating!
\n
";
push
(
@actions
,
'
datacite doi metadata unchanged
');
push
(
@actions
,
'
datacite doi metadata unchanged
');
$metrics
{
ut1_datacite_doi_metadata_unchanged
}
++
;
return
@actions
;
return
@actions
;
}
}
}
}
...
@@ -360,9 +432,14 @@ sub process_utheses_item
...
@@ -360,9 +432,14 @@ sub process_utheses_item
my
$res
=
$dc_col
->
update
(
{
doi
=>
$doi
},
\
%reg_info
,
{
upsert
=>
1
}
);
my
$res
=
$dc_col
->
update
(
{
doi
=>
$doi
},
\
%reg_info
,
{
upsert
=>
1
}
);
print
__LINE__
,
"
insert res:
",
Dumper
(
$res
);
print
__LINE__
,
"
insert res:
",
Dumper
(
$res
);
unless
(
$ok
)
if
(
$ok
)
{
$metrics
{
ut1_datacite_doi_registration_ok
}
++
;
}
else
{
{
report_problem
(
{
area
=>
'
datacite
',
problem
=>
'
registration failure
',
utheses_id
=>
$utheses_id
,
doi
=>
$doi
,
reg_info
=>
\
%reg_info
}
);
report_problem
(
{
area
=>
'
datacite
',
problem
=>
'
registration failure
',
utheses_id
=>
$utheses_id
,
doi
=>
$doi
,
reg_info
=>
\
%reg_info
}
);
$metrics
{
ut1_datacite_doi_registration_failure
}
++
;
}
}
}
}
...
@@ -383,10 +460,12 @@ sub process_ac_number
...
@@ -383,10 +460,12 @@ sub process_ac_number
{
{
my
$utheses_id
=
$
1
;
my
$utheses_id
=
$
1
;
my
(
$error_code
,
$status
,
$xml
)
=
get_utheses_metadata
(
$row
,
$utheses_id
);
my
(
$error_code
,
$status
,
$xml
)
=
get_utheses_metadata
(
$row
,
$utheses_id
);
$metrics
{
ut1_alma_utheses_link_ok
}
++
;
}
}
else
else
{
{
# TODO: report bad link in Alma
# TODO: report bad link in Alma
$metrics
{
ut1_alma_utheses_link_bad
}
++
;
}
}
}
}
...
@@ -398,11 +477,18 @@ sub analyze_marc_record
...
@@ -398,11 +477,18 @@ sub analyze_marc_record
my
$row
=
shift
;
my
$row
=
shift
;
my
$ac_number
=
shift
||
$row
->
{
ac_number
};
my
$ac_number
=
shift
||
$row
->
{
ac_number
};
return
(
$row
->
{
marc_record
}
=
'
no_marc_db
')
unless
(
defined
(
$marc_db
));
unless
(
defined
(
$marc_db
))
return
(
$row
->
{
marc_record
}
=
'
invalid_ac_number
')
unless
(
$ac_number
=~
m#^AC\d{8}$#
);
{
return
(
$row
->
{
marc_record
}
=
'
no_marc_db
');
$metrics
{
ut1_no_marc_db
}
++
;
}
unless
(
$ac_number
=~
m#^AC\d{8}$#
)
{
return
(
$row
->
{
marc_record
}
=
'
invalid_ac_number
');
$metrics
{
ut1_invalid_ac_number
}
++
;
}
my
@actions
=
();
my
@actions
=
();
my
$marc_col
=
$marc_db
->
get_collection
('
alma.marc
');
my
$marc_rec
=
$marc_col
->
find_one
({
ac_number
=>
$ac_number
});
my
$marc_rec
=
$marc_col
->
find_one
({
ac_number
=>
$ac_number
});
print
__LINE__
,
"
marc_rec:
",
Dumper
(
$marc_rec
);
print
__LINE__
,
"
marc_rec:
",
Dumper
(
$marc_rec
);
my
$request_marc_rec
=
0
;
my
$request_marc_rec
=
0
;
...
@@ -418,12 +504,14 @@ sub analyze_marc_record
...
@@ -418,12 +504,14 @@ sub analyze_marc_record
if
(
$best_before
>
$now
)
if
(
$best_before
>
$now
)
{
{
$row
->
{
marc_record
}
=
'
ok
';
$row
->
{
marc_record
}
=
'
ok
';
$metrics
{
ut1_alma_marc_record_fresh
}
++
;
}
}
else
else
{
{
print
__LINE__
,
"
marc_record too old
\n
";
print
__LINE__
,
"
marc_record too old
\n
";
$row
->
{
marc_record
}
=
'
too_old
';
$row
->
{
marc_record
}
=
'
too_old
';
$request_marc_rec
=
1
;
$request_marc_rec
=
1
;
$metrics
{
ut1_alma_marc_record_requested
}
++
;
}
}
# check marc record, even when it is too old
# check marc record, even when it is too old
...
@@ -435,12 +523,11 @@ sub analyze_marc_record
...
@@ -435,12 +523,11 @@ sub analyze_marc_record
{
{
$row
->
{
marc_record
}
=
'
not_found
';
$row
->
{
marc_record
}
=
'
not_found
';
$request_marc_rec
=
1
;
$request_marc_rec
=
1
;
$metrics
{
ut1_alma_marc_record_not_found
}
++
;
}
}
if
(
$request_marc_rec
)
if
(
$request_marc_rec
)
{
{
my
$req_col
=
$marc_db
->
get_collection
('
requests
');
my
$req
=
my
$req
=
{
{
agent
=>
'
alma_cat
',
agent
=>
'
alma_cat
',
...
@@ -556,6 +643,7 @@ sub get_utheses_metadata
...
@@ -556,6 +643,7 @@ sub get_utheses_metadata
{
{
my
$row
=
shift
;
my
$row
=
shift
;
my
$utheses_id
=
shift
;
my
$utheses_id
=
shift
;
my
$ut_data
=
shift
;
print
__LINE__
,
"
utheses_id=[
$utheses_id
]
\n
";
print
__LINE__
,
"
utheses_id=[
$utheses_id
]
\n
";
my
$info
=
$ut_api
->
getContainerPublicMetadata
(
$utheses_id
);
my
$info
=
$ut_api
->
getContainerPublicMetadata
(
$utheses_id
);
...
@@ -567,20 +655,43 @@ sub get_utheses_metadata
...
@@ -567,20 +655,43 @@ sub get_utheses_metadata
if
(
$status
eq
'
200
')
if
(
$status
eq
'
200
')
{
{
$xml
=
utheses2datacite
(
$row
,
$info
,
$utheses_id
);
$xml
=
utheses2datacite
(
$row
,
$info
,
$utheses_id
);
# print __LINE__, " row: ", Dumper($row);
print
__LINE__
,
"
row:
",
Dumper
(
$row
);
$row
->
{
xml_md5
}
=
my
$md5_hex
=
md5_hex
(
encode_utf8
(
$xml
));
if
(
defined
(
$row
->
{
doi
}))
if
(
defined
(
$row
->
{
doi
}))
{
{
if
(
defined
(
$xml
)
&&
defined
(
my
$xml_fnm
=
$row
->
{
xml_fnm
})
)
if
(
defined
(
$xml
))
{
{
my
$save_xml
=
1
;
# print __LINE__, " xml=[$xml]\n";
$row
->
{
xml_fnm
}
=
my
$xml_fnm
=
'
utheses/DataCite_XML/
'
.
$row
->
{
doi
}
.
'
.xml
';
$row
->
{
xml_fnm
}
=
my
$xml_fnm
=
'
utheses/DataCite_XML/
'
.
$row
->
{
doi
}
.
'
.xml
';
# print __LINE__, " DataCite_XML=[$xml_fnm] xml=[$xml]\n";
# print __LINE__, " DataCite_XML=[$xml_fnm] xml=[$xml]\n";
open
(
XML
,
'
>:utf8
',
$xml_fnm
)
or
die
"
can't write to
$xml_fnm
";
# TODO: or do something else...
print
XML
$xml
;
print
__LINE__
,
"
ut_data=[
$ut_data
]
\n
";
close
(
XML
);
if
(
defined
(
$ut_data
))
my
$md5
=
file_md5_hex
(
$xml_fnm
);
{
# we have xml info in the database, check if they match
print
__LINE__
,
"
xml_fnm=[
$xml_fnm
] md5=[
$md5
]
\n
";
print
__LINE__
,
"
ut_data:
",
Dumper
(
$ut_data
);
$row
->
{
xml_md5
}
=
$md5
;
if
(
defined
(
my
$xml_fnm
=
$ut_data
->
{
xml_fnm
})
&&
defined
(
my
$xml_md5
=
$ut_data
->
{
xml_md5
}))
{
print
__LINE__
,
"
xml_fnm=[
$xml_fnm
] xml_md5=[
$xml_md5
] md5_hex=[
$md5_hex
]
\n
";
if
(
$md5_hex
eq
$xml_md5
)
{
$save_xml
=
0
;
print
__LINE__
,
"
md5 unchanged, will not save it again; save_xml=0
\n
";
}
}
}
if
(
$save_xml
)
{
print
__LINE__
,
"
save_xml=[
$save_xml
], saving DataCite XML to [
$xml_fnm
]
\n
";
open
(
XML
,
'
>:utf8
',
$xml_fnm
)
or
die
"
can't write to
$xml_fnm
";
# TODO: or do something else...
print
XML
$xml
;
close
(
XML
);
$row
->
{
xml_md5
}
=
my
$md5
=
file_md5_hex
(
$xml_fnm
);
print
__LINE__
,
"
xml_fnm=[
$xml_fnm
] md5=[
$md5
]
\n
";
}
}
}
if
(
$row
->
{
datacite_conversion_error_count
}
==
0
)
if
(
$row
->
{
datacite_conversion_error_count
}
==
0
)
...
@@ -684,9 +795,10 @@ EOX
...
@@ -684,9 +795,10 @@ EOX
foreach
my
$author
(
@$authors
)
foreach
my
$author
(
@$authors
)
{
{
my
(
$family
,
$given
)
=
map
{
xml_escape
(
$_
)
}
(
$author
->
{
family_name
},
$author
->
{
given_name
});
$xml
.=
<<
"
EOX
";
$xml
.=
<<
"
EOX
";
<
creator
>
<
creator
>
<
creatorName
>
$
author
->
{
family_name
},
$author
->
{
given_name
}
</
creatorName
>
<
creatorName
>
$
family
,
$given
</
creatorName
>
</
creator
>
</
creator
>
EOX
EOX
}
}
...
@@ -785,8 +897,6 @@ sub report_problem
...
@@ -785,8 +897,6 @@ sub report_problem
{
{
my
$problem
=
shift
;
my
$problem
=
shift
;
my
$ut_col
=
$agent_db
->
get_collection
('
problems
');
$problem
->
{
ts_iso_gmt
}
=
Util::ts::
ts_ISO_gmt
();
$problem
->
{
ts_iso_gmt
}
=
Util::ts::
ts_ISO_gmt
();
my
$area
=
$problem
->
{
area
};
my
$area
=
$problem
->
{
area
};
...
@@ -795,7 +905,7 @@ sub report_problem
...
@@ -795,7 +905,7 @@ sub report_problem
elsif
(
$area
eq
'
marc
')
{
$check_id
=
'
ac_number
'
}
elsif
(
$area
eq
'
marc
')
{
$check_id
=
'
ac_number
'
}
elsif
(
$area
eq
'
datacite
')
{
$check_id
=
'
doi
'
}
elsif
(
$area
eq
'
datacite
')
{
$check_id
=
'
doi
'
}
$
ut
_col
->
update
(
{
area
=>
$area
,
$check_id
=>
my
$id
=
$problem
->
{
$check_id
}
},
$problem
,
{
upsert
=>
1
}
);
# replace problem report, if one already exists
$
problems
_col
->
update
(
{
area
=>
$area
,
$check_id
=>
my
$id
=
$problem
->
{
$check_id
}
},
$problem
,
{
upsert
=>
1
}
);
# replace problem report, if one already exists
print
__LINE__
,
"
ATTN: problem reported for
$check_id
=
$id
:
",
Dumper
(
$problem
);
print
__LINE__
,
"
ATTN: problem reported for
$check_id
=
$id
:
",
Dumper
(
$problem
);
}
}
...
@@ -806,8 +916,6 @@ sub remove_problem
...
@@ -806,8 +916,6 @@ sub remove_problem
my
$id
=
shift
;
my
$id
=
shift
;
my
$check_id
=
shift
;
my
$check_id
=
shift
;
my
$ut_col
=
$agent_db
->
get_collection
('
problems
');
unless
(
defined
(
$check_id
))
unless
(
defined
(
$check_id
))
{
{
if
(
$area
eq
'
utheses
')
{
$check_id
=
'
utheses_id
'
}
if
(
$area
eq
'
utheses
')
{
$check_id
=
'
utheses_id
'
}
...
@@ -815,7 +923,34 @@ sub remove_problem
...
@@ -815,7 +923,34 @@ sub remove_problem
elsif
(
$area
eq
'
datacite
')
{
$check_id
=
'
doi
'
}
elsif
(
$area
eq
'
datacite
')
{
$check_id
=
'
doi
'
}
}
}
$ut_col
->
remove
({
area
=>
$area
,
$check_id
=>
$id
});
$problems_col
->
remove
({
area
=>
$area
,
$check_id
=>
$id
});
}
sub
write_metrics
{
my
$metrics
=
shift
;
open
(
FO
,
'
>:utf8
',
$nx_metrics_temp
)
or
die
"
can't write to
$nx_metrics_temp
";
my
$now
=
time
();
print
FO
<<"EOX";
# HELP ut1_last_run last time ut1 ran
# TYPE ut1_last_run counter
ut1_last_run $now
EOX
foreach
my
$mn
(
sort
keys
%$metrics
)
{
my
$cnt
=
$metrics
->
{
$mn
};
print
FO
<<"EOX";
# HELP $mn ut1 gauge
# TYPE $mn gauge
$mn $cnt
EOX
}
close
(
FO
);
rename
(
$nx_metrics_temp
,
$nx_metrics_prom
);
print
__LINE__
,
"
metrics written to
$nx_metrics_prom
\n
";
}
}
__END__
__END__
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment