Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
irma2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
irma2
Commits
141adce9
Commit
141adce9
authored
Jul 12, 2020
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
ot2ut optimizations
parent
ce7906e6
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
eprints1.pl
+38
-40
38 additions, 40 deletions
eprints1.pl
with
38 additions
and
40 deletions
eprints1.pl
+
38
−
40
View file @
141adce9
...
@@ -108,6 +108,7 @@ my $delete_bad_ac_entries= 0; # TODO: should be an option?
...
@@ -108,6 +108,7 @@ my $delete_bad_ac_entries= 0; # TODO: should be an option?
my
$show_TODOs
=
0
;
my
$show_TODOs
=
0
;
my
$die_nbn_already_defined
=
0
;
my
$die_nbn_already_defined
=
0
;
my
$op_mode
=
'
unknown
';
# ======================================================================
# ======================================================================
# BEGIN OT2UT: Othes to Utheses
# BEGIN OT2UT: Othes to Utheses
...
@@ -155,22 +156,28 @@ my %map_ot2ut_thesis_type=
...
@@ -155,22 +156,28 @@ my %map_ot2ut_thesis_type=
phd
=>
'
https://pid.phaidra.org/vocabulary/1PHE-7VMS
',
# 'Dissertation'
phd
=>
'
https://pid.phaidra.org/vocabulary/1PHE-7VMS
',
# 'Dissertation'
);
);
my
@ot2ut_synced_columns
=
qw( eprint_id eprint_status lastmod context ts_upload td_total error_code error_cnt utheses_id uploaded_fnm )
;
# Fields currently not available:
# container_pid container_result document_pid document_result
# activate_result import_code response_msg import_note
my
$ot2ut_eprint_status
=
'
archive
';
my
$silent_upload_success
=
0
;
my
$do_upload
=
0
;
my
$no_doi
=
0
;
my
$ignore_errors
=
0
;
if
(
$
0
eq
'
./ot2ut.pl
')
{
$op_mode
=
'
ot2ut
';
$do_upload
=
1
;
$MAX_SYNC
=
1
;
}
if
(
$
0
eq
'
./oma.pl
')
{
$op_mode
=
'
oma
';
$do_upload
=
1
;
}
# END OT2UT: Othesis to Utheses
# END OT2UT: Othesis to Utheses
# ======================================================================
# ======================================================================
my
$op_mode
=
'
unknown
';
my
@db_tables
=
();
my
@db_tables
=
();
my
@PARS
;
my
@PARS
;
my
$debug_level
=
0
;
my
$debug_level
=
0
;
my
$force
=
0
;
my
$force
=
0
;
my
$do_upload
=
0
;
my
$db_name
;
my
$db_name
;
my
$no_doi
=
0
;
my
$ignore_errors
=
0
;
my
$ot2ut_eprint_status
=
'
archive
';
my
$silent_upload_success
=
0
;
if
(
$
0
eq
'
./ot2ut.pl
')
{
$op_mode
=
'
ot2ut
';
$MAX_SYNC
=
1
;
$do_upload
=
1
;
}
my
$arg
;
my
$arg
;
while
(
defined
(
$arg
=
shift
(
@ARGV
)))
while
(
defined
(
$arg
=
shift
(
@ARGV
)))
...
@@ -2320,16 +2327,6 @@ sub get_othes_timestamp
...
@@ -2320,16 +2327,6 @@ sub get_othes_timestamp
my
$row
=
shift
;
my
$row
=
shift
;
my
$name
=
shift
;
my
$name
=
shift
;
=begin comment
old format...
my $ts= sprintf("%4d-%02d-%02d", map { $row->{$name . '_' . $_} } qw(year month day));
$ts .= sprintf("T%02d%02d%02d", map { $row->{$name . '_' . $_} } qw(hour minute second)) if (exists ($row->{$name . '_hour'}));
=end comment
=cut
my
@ts
;
my
@ts
;
foreach
my
$el
(
qw(year month day)
)
foreach
my
$el
(
qw(year month day)
)
{
{
...
@@ -2361,8 +2358,8 @@ sub doigen
...
@@ -2361,8 +2358,8 @@ sub doigen
my
$canonical_url
=
sprintf
("
https://othes.univie.ac.at/%s/
",
$eprintid
);
my
$canonical_url
=
sprintf
("
https://othes.univie.ac.at/%s/
",
$eprintid
);
my
$datacite_xml_path
=
'
othes/DataCite_XML/
'
.
$doi
.
'
.xml
';
my
$datacite_xml_path
=
'
othes/DataCite_XML/
'
.
$doi
.
'
.xml
';
my
$lastmod
=
sprintf
("
%4d-%02d-%02dT%02d%02d%02d
",
map
{
$row
->
{
$_
}
}
qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)
);
#
my $lastmod= sprintf("%4d-%02d-%02dT%02d%02d%02d", map { $row->{$_} } qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second));
#
my $lastmod= get_othes_timestamp($row, 'lastmod');
my
$lastmod
=
get_othes_timestamp
(
$row
,
'
lastmod
');
# TODO: utheses and datacite metadata should be considered independently
# TODO: utheses and datacite metadata should be considered independently
if
(
-
f
$datacite_xml_path
)
if
(
-
f
$datacite_xml_path
)
...
@@ -2591,6 +2588,7 @@ sub ot2ut
...
@@ -2591,6 +2588,7 @@ sub ot2ut
{
{
my
@fetch_pars
;
my
@fetch_pars
;
push
(
@fetch_pars
,
{
doi
=>
1
})
unless
(
$no_doi
);
push
(
@fetch_pars
,
{
doi
=>
1
})
unless
(
$no_doi
);
print
__LINE__
,
"
no_doi=[
$no_doi
] fetch_pars:
",
Dumper
(
\
@fetch_pars
);
sleep
(
3
);
$res1
=
$epr
->
fetch_data
('
archive
',
@fetch_pars
);
$res1
=
$epr
->
fetch_data
('
archive
',
@fetch_pars
);
}
}
elsif
(
$ot2ut_eprint_status
eq
'
buffer
')
elsif
(
$ot2ut_eprint_status
eq
'
buffer
')
...
@@ -2604,12 +2602,9 @@ sub ot2ut
...
@@ -2604,12 +2602,9 @@ sub ot2ut
}
}
else
else
{
{
die
"
no eprints objects found
";
print
__LINE__
,
"
ATTN: no eprints objects found
";
return
undef
;
}
}
# TODO, future ...
# my $res2= $epr->fetch_data('buffer');
# push (@eprint_ids, keys %$res2);
}
}
my
@synced
=
();
my
@synced
=
();
...
@@ -2620,7 +2615,7 @@ sub ot2ut
...
@@ -2620,7 +2615,7 @@ sub ot2ut
sleep
(
3
);
sleep
(
3
);
foreach
my
$eprint_id
(
@eprint_ids
)
foreach
my
$eprint_id
(
@eprint_ids
)
{
{
last
if
(
!
$running
);
last
unless
(
$running
);
last
if
(
defined
(
$MAX_SYNC
)
&&
$cnt_synced
>=
$MAX_SYNC
);
last
if
(
defined
(
$MAX_SYNC
)
&&
$cnt_synced
>=
$MAX_SYNC
);
activity
({
activity
=>
'
ot2ut
'})
if
(
$last_activity
+
$activity_period
<=
time
());
activity
({
activity
=>
'
ot2ut
'})
if
(
$last_activity
+
$activity_period
<=
time
());
...
@@ -2824,19 +2819,22 @@ old format 2019-11..2020-01
...
@@ -2824,19 +2819,22 @@ old format 2019-11..2020-01
}
}
$cnt_synced
++
;
$cnt_synced
++
;
last
unless
(
$running
);
}
}
my
@columns
=
qw( eprint_id eprint_status lastmod context ts_upload td_total error_code error_cnt utheses_id uploaded_fnm )
;
my
$res
;
if
(
$cnt_synced
)
# Fields currently not available: container_pid container_result document_pid document_result activate_result import_code response_msg import_note
{
$res
=
"
synced
$cnt_synced
objects in context
$ot2ut_context
;
$cnt_errors
objects with errors
";
my
$fnm
=
sprintf
('
ot2ut_%s.tsv
',
ts_ISO
());
my
$fnm
=
sprintf
('
ot2ut_%s.tsv
',
ts_ISO
());
Util::Matrix::
save_hash_as_csv
(
\
@columns
,
\
@synced
,
$fnm
,
"
\t
",
'',
"
\n
",
1
);
Util::Matrix::
save_hash_as_csv
(
\
@ot2ut_synced_columns
,
\
@synced
,
$fnm
,
"
\t
",
'',
"
\n
",
1
);
my
$res
=
"
synced
$cnt_synced
objects in context
$ot2ut_context
;
$cnt_errors
objects with errors
";
print
__LINE__
,
"
$res
, see [
$fnm
]
\n
";
print
__LINE__
,
"
$res
, see [
$fnm
]
\n
";
}
else
{
print
__LINE__
,
"
$res
\n
";
$res
=
"
synced no objects in context
$ot2ut_context
";
}
(
\
@synced
,
$res
);
(
\
@synced
,
$res
);
}
}
...
@@ -2883,13 +2881,14 @@ sub generate_utheses_metadata
...
@@ -2883,13 +2881,14 @@ sub generate_utheses_metadata
push
(
@errors
,
{
error
=>
'
no_file
'
});
push
(
@errors
,
{
error
=>
'
no_file
'
});
}
}
my
$doi
=
$row
->
{
doi
}
;
my
(
$doi
,
$eprint_status
)
=
map
{
$row
->
{
$_
}
}
qw(doi eprint_status)
;
push
(
@errors
,
{
error
=>
'
no_doi
',
note
=>
'
for now...
'
})
unless
(
defined
(
$doi
)
||
$no_doi
);
push
(
@errors
,
{
error
=>
'
no_doi
',
note
=>
'
for now...
'
})
unless
(
defined
(
$doi
)
||
$no_doi
);
my
$utheses_json_path
=
'
othes/utheses_json/
'
.
$eprintid
.
'
.json
';
my
$utheses_json_path
=
'
othes/utheses_json/
'
.
$eprintid
.
'
.json
';
my
$utheses_upload_result_json_path
=
'
othes/utheses_json/
'
.
$eprintid
.
'
_upload_result.json
';
my
$utheses_upload_result_json_path
=
'
othes/utheses_json/
'
.
$eprintid
.
'
_upload_result.json
';
my
$lastmod
=
sprintf
("
%4d-%02d-%02dT%02d%02d%02d
",
map
{
$row
->
{
$_
}
}
qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)
);
my
$lastmod
=
sprintf
("
%4d-%02d-%02dT%02d%02d%02d
",
map
{
$row
->
{
$_
}
}
qw(lastmod_year lastmod_month lastmod_day lastmod_hour lastmod_minute lastmod_second)
);
# my $lastmod= get_othes_timestamp($row, 'lastmod'); that's a different format: yyyy-mm-ddTHH:MM:SSZ
if
(
-
f
$utheses_json_path
)
if
(
-
f
$utheses_json_path
)
{
{
...
@@ -2981,7 +2980,7 @@ sub generate_utheses_metadata
...
@@ -2981,7 +2980,7 @@ sub generate_utheses_metadata
}
}
# $utp->{utheses_id}= wird erzeugt beim Import, kennma ned wissn
# $utp->{utheses_id}= wird erzeugt beim Import, kennma ned wissn
$utp
->
{
utheses_status
}
=
(
$
row
->
{
eprint_status
}
eq
'
archive
')
$utp
->
{
utheses_status
}
=
(
$eprint_status
eq
'
archive
')
?
'
published
'
?
'
published
'
:
'
thesis_doc_added
';
# objects in eprint_status "buffer" are 'thesis_doc_added', formerly 'work_in_progress';
:
'
thesis_doc_added
';
# objects in eprint_status "buffer" are 'thesis_doc_added', formerly 'work_in_progress';
...
@@ -3012,7 +3011,7 @@ sub generate_utheses_metadata
...
@@ -3012,7 +3011,7 @@ sub generate_utheses_metadata
# TODO: add information about the uploaded files
# TODO: add information about the uploaded files
},
},
fields_processed
=>
fields_processed
=>
# TODO: may be obosolete
{
{
(
map
{
$_
=>
$row
->
{
$_
}
}
qw( thesis_type )
),
(
map
{
$_
=>
$row
->
{
$_
}
}
qw( thesis_type )
),
# thesis_type: various strings; see %map_ot2ut_thesis_type
# thesis_type: various strings; see %map_ot2ut_thesis_type
...
@@ -3027,7 +3026,6 @@ sub generate_utheses_metadata
...
@@ -3027,7 +3026,6 @@ sub generate_utheses_metadata
# always NULL: department
# always NULL: department
# various timestamps
# various timestamps
lastmod
=>
$lastmod
,
(
map
{
$_
=>
get_othes_timestamp
(
$row
,
$_
)
}
qw( datestamp )
),
(
map
{
$_
=>
get_othes_timestamp
(
$row
,
$_
)
}
qw( datestamp )
),
},
},
},
},
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment