Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
irma2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
irma2
Commits
0b8aeced
Commit
0b8aeced
authored
May 22, 2020
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
modifications for utheses uploads
parent
e50bdf16
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
eprints1.pl
+129
-24
129 additions, 24 deletions
eprints1.pl
lib/IRMA/eprints.pm
+1
-1
1 addition, 1 deletion
lib/IRMA/eprints.pm
with
130 additions
and
25 deletions
eprints1.pl
+
129
−
24
View file @
0b8aeced
...
...
@@ -115,8 +115,8 @@ my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
my
%map_ot2ut_roles
=
(
'
advis
o
rs
'
=>
[
qw(betreuer betreuer_2 betreuer_3)
],
'
coadvis
o
rs
'
=>
[
qw(mitbetreuer mitbetreuer_2)
],
'
advis
e
rs
'
=>
[
qw(betreuer betreuer_2 betreuer_3)
],
'
coadvis
e
rs
'
=>
[
qw(mitbetreuer mitbetreuer_2)
],
'
assessors
'
=>
[
qw(beurteiler_1 beurteiler_2 beurteiler_3)
],
);
...
...
@@ -203,6 +203,7 @@ while (defined ($arg= shift (@ARGV)))
elsif
(
$opt
eq
'
debug_names
')
{
$op_mode
=
'
debug_names
';
}
elsif
(
$opt
eq
'
debug_classifications
'
||
$opt
eq
'
DC
')
{
$op_mode
=
'
debug_classifications
';
}
elsif
(
$opt
eq
'
debug_keywords
')
{
$op_mode
=
'
debug_keywords
';
}
elsif
(
$opt
eq
'
debug_abstracts
')
{
$op_mode
=
'
debug_abstracts
';
}
elsif
(
$opt
eq
'
debug_stkz
')
{
$op_mode
=
'
debug_stkz
';
}
elsif
(
$opt
eq
'
max
')
{
$MAX_SYNC
=
$val
||
shift
(
@ARGV
);
}
elsif
(
$opt
eq
'
mab-age
')
{
$MAX_MAB_AGE
=
$val
||
shift
(
@ARGV
);
}
# in seconds
...
...
@@ -341,6 +342,10 @@ elsif ($op_mode eq 'debug_keywords')
{
debug_keywords
();
}
elsif
(
$op_mode
eq
'
debug_abstracts
')
{
debug_abstracts
();
}
elsif
(
$op_mode
eq
'
debug_classifications
')
{
# print "cnf: ", Dumper ($cnf);
...
...
@@ -2453,6 +2458,7 @@ sub ot2ut
my
$ot2ut
=
get_any_db
(
$cnf
,
'
ot2ut_database
');
my
$col_sync
=
$ot2ut
->
get_collection
('
sync
');
my
$col_msg
=
$ot2ut
->
get_collection
('
messages
');
unless
(
defined
(
$utheses_faculty_map
))
{
...
...
@@ -2484,6 +2490,8 @@ sub ot2ut
{
last
if
(
defined
(
$MAX_SYNC
)
&&
$cnt_synced
>=
$MAX_SYNC
);
my
$t_start
=
time
();
my
$sync_info
=
$col_sync
->
find_one
({
eprint_id
=>
$eprint_id
});
my
(
$errors
,
$row
,
$lastmod
,
$ut
,
$utheses_json_path
,
$files
,
$utheses_upload_result_json_path
)
=
generate_utheses_metadata
(
$epr
,
$eprint_id
);
...
...
@@ -2511,7 +2519,7 @@ sub ot2ut
}
else
{
print
__LINE__
,
"
earlier sync attempt had errors, ret
g
rying...
\n
";
print
__LINE__
,
"
earlier sync attempt had errors, retrying...
\n
";
$col_sync
->
remove
(
{
_id
=>
$sync_info
->
{
_id
}
}
);
$sync_info
=
undef
;
}
...
...
@@ -2529,11 +2537,20 @@ sub ot2ut
print
__LINE__
,
"
ERRORS; ut:
",
Dumper
(
$ut
);
print
__LINE__
,
"
generate_utheses_metadata: errors:
",
Dumper
(
$errors
)
if
(
@$errors
);
my
$el
=
{
eprint_id
=>
$eprint_id
,
lastmod
=>
$lastmod
,
ts_upload
=>
$ts_upload
,
error_code
=>
'
conversion_errors
',
error_cnt
=>
scalar
@$errors
};
my
$el
=
{
eprint_id
=>
$eprint_id
,
lastmod
=>
$lastmod
,
ts_upload
=>
$ts_upload
,
context
=>
$ot2ut_context
,
error_code
=>
'
conversion_errors
',
error_cnt
=>
scalar
@$errors
};
push
(
@synced
,
$el
);
$el
->
{
errors
}
=
$errors
;
$col_sync
->
insert
(
$el
);
my
$msg
=
{
message
=>
"
upload error: eprint_id=[
$eprint_id
] lastmod=[
$lastmod
] [conversion errors]
",
priority
=>
'
normal
',
state
=>
'
new
',
to
=>
'
oma
'
};
$col_msg
->
insert
(
$msg
);
my
$utheses_errors_json_path
=
'
othes/utheses_json/errors/
'
.
$eprint_id
.
'
.json
';
Util::JSON::
write_json_file
(
$utheses_errors_json_path
,
$errors
);
...
...
@@ -2564,15 +2581,7 @@ sub ot2ut
print
__LINE__
,
"
upload_cmd: [
",
join
('
',
@upload_cmd
),
"
]
\n
";
if
(
$do_upload
)
{
=begin comment
my $upload_result= `@upload_cmd`;
print __LINE__, " upload_result=[$upload_result]\n";
=end comment
=cut
my
$t_curl
=
time
();
system
(
@upload_cmd
);
my
$result_data
;
...
...
@@ -2585,7 +2594,7 @@ sub ot2ut
{
print
__LINE__
,
"
can't parse upload_result; error=[$@]
\n
";
push
(
@$errors
,
{
error
=>
'
upload_error
',
error_info
=>
$@
});
my
$el
=
{
eprint_id
=>
$eprint_id
,
lastmod
=>
$lastmod
,
ts_upload
=>
$ts_upload
,
error_code
=>
'
upload_error
',
1
};
my
$el
=
{
eprint_id
=>
$eprint_id
,
lastmod
=>
$lastmod
,
ts_upload
=>
$ts_upload
,
context
=>
$ot2ut_context
,
error_code
=>
'
upload_error
',
1
};
push
(
@synced
,
$el
);
$el
->
{
errors
}
=
$errors
;
$col_sync
->
insert
(
$el
);
...
...
@@ -2609,6 +2618,7 @@ old format 2019-11..2020-01
eprint_id => $eprint_id,
lastmod => $lastmod,
ts_upload => $ts_upload,
context => $ot2ut_context,
error_code => 'ok',
error_cnt => 0,
utheses_id => $utheses_id,
...
...
@@ -2631,6 +2641,7 @@ old format 2019-11..2020-01
eprint_id
=>
$eprint_id
,
lastmod
=>
$lastmod
,
ts_upload
=>
$ts_upload
,
context
=>
$ot2ut_context
,
error_code
=>
'
ok
',
error_cnt
=>
0
,
utheses_id
=>
$utheses_id
,
...
...
@@ -2639,6 +2650,17 @@ old format 2019-11..2020-01
push
(
@synced
,
$out_row
);
$col_sync
->
insert
(
$out_row
);
my
$td_start
=
time
()
-
$t_start
;
my
$td_curl
=
time
()
-
$t_curl
;
my
$msg
=
{
message
=>
"
upload success: eprint_id=[
$eprint_id
] lastmod=[
$lastmod
] context=[
$ot2ut_context
] utheses_id=[
$utheses_id
] time_total=
$td_start
time_upload=
$td_curl
",
priority
=>
'
normal
',
state
=>
'
new
',
to
=>
'
oma
'
};
$col_msg
->
insert
(
$msg
);
}
sleep
(
5
);
...
...
@@ -2687,6 +2709,9 @@ sub generate_utheses_metadata
my
$row
=
$all_rows
->
{
$eprintid
};
my
$history
=
get_history
(
$epr_db
,
$eprintid
);
# print __LINE__, " history: ", Dumper($history); exit;
my
(
$lang_pdf
,
$files
)
=
analyze_files
(
map
{
$row
->
{
$_
}
}
qw(fileinfo dir)
);
print
__LINE__
,
"
lang_pdf=[
$lang_pdf
] files:
",
Dumper
(
$files
);
my
$main_file
;
...
...
@@ -2731,8 +2756,7 @@ sub generate_utheses_metadata
my
$utp
=
$ut
->
{
public
};
$utp
->
{
origin
}
=
'
import
';
$utp
->
{
datamodel
}
=
'
container
';
$utp
->
{
uploaded_by
}
=
'
ot2ut
';
# not needed/wanted 2020-05-14: $utp->{datamodel}= 'container';
$utp
->
{
rights_statement
}
=
'
http://rightsstatements.org/vocab/InC/1.0/
';
# "In Copyright" or "Alle Rechte vorbehalten"
if
(
defined
(
$row
->
{
matr
}))
...
...
@@ -2795,7 +2819,12 @@ sub generate_utheses_metadata
$utp
->
{
utheses_status
}
=
(
$row
->
{
eprint_status
}
eq
'
archive
')
?
'
published
'
:
'
work_in_progress
';
# objects in eprint_status "buffer" are 'work_in_progress';
$utp
->
{
utheses_status_last_modified
}
=
get_othes_timestamp
(
$row
,
'
status_changed
');
# $utp->{utheses_status_last_modified}= get_othes_timestamp($row, 'status_changed');
# $utp->{phaidra_thesis_doc_added_date}= get_othes_timestamp($history->{create}, 'timestamp');
$utp
->
{
othes
}
->
{
history_create
}
=
get_othes_timestamp
(
$history
->
{
create
},
'
timestamp
');
$utp
->
{
utheses_status_last_modified
}
=
get_othes_timestamp
(
$history
->
{
move_buffer_to_archive
},
'
timestamp
');
$utp
->
{
last_modified
}
=
get_othes_timestamp
(
$row
,
'
lastmod
');
$utp
->
{
import
}
=
# stored verbatim in mysql table utheses_import in column import_info
{
...
...
@@ -2862,6 +2891,7 @@ sub generate_utheses_metadata
my
(
$errors2
,
$classifications
)
=
$epr
->
get_classifications
(
$eprintid
);
push
(
@errors
,
@$errors2
)
if
(
@$errors2
);
# 2020-05-14 nd: not needed: $thesis->{uploaded_by}= 'ot2ut';
$thesis
->
{
subject_classifications
}
=
$classifications
;
$thesis
->
{
number_of_pages
}
=
"
$main_file
->{page_count}
";
# Phaidra expects this as a string
...
...
@@ -2869,7 +2899,8 @@ sub generate_utheses_metadata
# Mon May 11 22:12:38 CEST 2020 asked nd about this, especially thesis_doc_added_date:
my
%phaidra
=
map
{
$_
=>
''
}
qw(container_pid container_status container_created_date thesis_doc_pid thesis_doc_status)
;
$phaidra
{
thesis_doc_added_date
}
=
get_othes_timestamp
(
$row
,
'
datestamp
');
$phaidra
{
thesis_doc_added_date
}
=
get_othes_timestamp
(
$history
->
{
create
},
'
timestamp
');
# $phaidra{thesis_doc_added_date}= get_othes_timestamp($row, 'datestamp');
$ut
->
public
('
phaidra
',
\
%phaidra
);
...
...
@@ -2878,6 +2909,43 @@ sub generate_utheses_metadata
(
\
@errors
,
$row
,
$lastmod
,
$ut
,
$utheses_json_path
,
$files
,
$utheses_upload_result_json_path
);
}
sub
get_history
{
my
$epr_db
=
shift
;
my
$eprintid
=
shift
;
my
$history_rows
=
$epr_db
->
get_all_x
('
history
',
['
objectid=?
',
$eprintid
]);
# print __LINE__, " history_rows: ", Dumper($history_rows);
my
%historyids
;
my
(
$create
,
$move_buffer_to_archive
);
foreach
my
$historyid
(
keys
%$history_rows
)
{
my
$row
=
$history_rows
->
{
$historyid
};
# print __LINE__, " history_row: ", Dumper($row);
# NOTE: a revision can be present multiple times, so we need to sort by historyid
# $revisions{$row->{revision}}= $row;
$historyids
{
$historyid
}
=
$row
;
$create
=
$row
if
(
$row
->
{
action
}
eq
'
create
'
&&
!
defined
(
$create
));
$move_buffer_to_archive
=
$row
if
(
$row
->
{
action
}
eq
'
move_buffer_to_archive
'
&&
!
defined
(
$move_buffer_to_archive
));
}
my
@historyids
=
sort
{
$a
<=>
$b
}
keys
%historyids
;
print
__LINE__
,
"
historyids:
",
join
('
',
@historyids
),
"
\n
";
my
@events
=
map
{
$historyids
{
$_
}
}
@historyids
;
my
$history
=
{
events
=>
\
@events
,
create
=>
$create
,
move_buffer_to_archive
=>
$move_buffer_to_archive
,
};
$history
;
}
sub
get_study_id
{
my
$matr
=
shift
;
...
...
@@ -2963,11 +3031,16 @@ sub get_thesis_data
push
(
@j_titles
,
{
type
=>
'
parallel
',
title_lang
=>
$lang
,
title_text
=>
$title
,
origin
=>
'
title
'
})
if
(
$title
);
push
(
@j_titles
,
{
type
=>
'
parallel
',
title_lang
=>
'
deu
',
title_text
=>
$title_ger
,
origin
=>
'
title_ger
'
})
if
(
$title_ger
);
push
(
@j_titles
,
{
type
=>
'
parallel
',
title_lang
=>
'
eng
',
title_text
=>
$title_eng
,
origin
=>
'
title_eng
'
})
if
(
$title_eng
);
push
(
@j_titles
,
{
type
=>
'
parallel
',
title_lang
=>
$lang
,
title_text
=>
$title_zusatz
,
origin
=>
'
title_zusatz
'
})
if
(
$title_zusatz
);
@j_titles
[
0
]
->
{
type
}
=
'
main
';
# push (@j_titles, { type => 'parallel', title_lang => $lang, title_text => $title_zusatz, origin => 'title_zusatz' }) if ($title_zusatz);
$j_titles
[
0
]
->
{
type
}
=
'
main
';
if
(
$title_zusatz
)
{
$j_titles
[
0
]
->
{
subtitle_text
}
=
$title_zusatz
;
$j_titles
[
0
]
->
{
subtitle_lang
}
=
$lang
;
};
my
@j_abstracts
;
push
(
@j_abstracts
,
{
language
=>
$lang
,
text
=>
$abstract
,
origin
=>
'
abstract
'
})
if
(
$abstract
);
push
(
@j_abstracts
,
{
language
=>
'
deu
'
,
text
=>
$abstract
,
origin
=>
'
abstract
'
})
if
(
$abstract
);
push
(
@j_abstracts
,
{
language
=>
'
eng
',
text
=>
$abstract_eng
,
origin
=>
'
abstract_eng
'
})
if
(
$abstract_eng
);
my
@keywords
=
split
(
/\s*\/\s*/
,
$keywords
);
...
...
@@ -2994,7 +3067,7 @@ sub get_thesis_data
(
languages
=>
[
$lang
],
titles
=>
\
@j_titles
,
abstract
=>
\
@j_abstracts
,
abstract
s
=>
\
@j_abstracts
,
keywords
=>
\
@j_keywords
,
type
=>
$map_ot2ut_thesis_type
{
$row
->
{
thesis_type
}},
...
...
@@ -3210,7 +3283,7 @@ sub debug_keywords
my
$epr_db
=
$epr
->
connect
();
my
@col_names_db
=
qw( eprintid eprint_status sprache keywords keywords_eng )
;
my
$search_term
=
"
eprint_status in ('archive', 'buffer') and sprache<>'ger' and sprache<>'eng'
";
#
my $search_term= "eprint_status in ('archive', 'buffer') and sprache<>'ger' and sprache<>'eng'";
my
$search_term
=
"
eprint_status in ('archive', 'buffer')
";
my
$keys
=
$epr_db
->
get_all_x
('
eprint
',
[
$search_term
],
join
('
,
',
@col_names_db
));
...
...
@@ -3273,6 +3346,38 @@ sub debug_keywords
Util::JSON::
write_json_file
('
/backup/othes/eprints/test/othes_keywords.json
',
\
%all_othes
);
}
sub
debug_abstracts
{
my
$epr
=
get_eprints_db
(
$cnf
);
my
$epr_db
=
$epr
->
connect
();
my
@col_names_db
=
qw( eprintid eprint_status sprache abstract abstract_eng )
;
# my $search_term= "eprint_status in ('archive', 'buffer') and sprache<>'ger' and sprache<>'eng'";
# my $search_term= "eprint_status in ('archive', 'buffer') and sprache='ger'";
# my $search_term= "eprint_status in ('archive', 'buffer') and sprache='eng'";
# my $search_term= "eprint_status in ('archive', 'buffer')";
my
$search_term
=
"
eprintid in (2276, 3432, 8314, 9358, 10236, 10941, 15148, 15934, 18224, 23898, 27575, 28791, 30614, 32692, 35111, 38069, 40982, 42122, 43078, 44504, 44510, 46380, 46381, 49927, 51776, 52780, 52925, 56916, 60835)
";
my
$keys
=
$epr_db
->
get_all_x
('
eprint
',
[
$search_term
],
join
('
,
',
@col_names_db
));
open
(
FO
,
'
>:utf8
',
'
all_keywords.tsv
')
or
die
;
# print FO join("\t", qw( eprintid eprint_status lang n_kw kw n_kwe kwe )), "\n";
print
FO
join
("
\t
",
qw( eprintid eprint_status lang lang_kw n_kw kw )
),
"
\n
";
my
(
%all_keywords_de
,
%all_keywords_en
);
my
%all_othes
;
foreach
my
$key
(
keys
%$keys
)
{
my
$r
=
$keys
->
{
$key
};
print
__LINE__
,
"
key=[
$key
]
",
Dumper
(
$r
);
my
(
$id
,
$es
,
$lang
,
$abs
,
$abse
)
=
map
{
$r
->
{
$_
}
}
@col_names_db
;
# $abs =~ tr/ \t\r\n/ /s;
# print join("\t", $id, $abs), "\n";
}
}
sub
debug_stkz
{
my
$epr
=
get_eprints_db
(
$cnf
);
...
...
This diff is collapsed.
Click to expand it.
lib/IRMA/eprints.pm
+
1
−
1
View file @
0b8aeced
...
...
@@ -43,7 +43,7 @@ sub fetch_data
$conditions
.=
'
AND doi IS NULL
'
if
(
$c
eq
'
doi
'
&&
!
$other_conditions
->
{
doi
});
}
$m
->
show_query
(
1
);
#
$m->show_query(1);
my
$res
=
$m
->
get_all_x
('
eprint
',
[
$conditions
,
$eprint_status
],
'
eprintid,eprint_status,ac_nummer,type,matr,urn,uri,sperre,einverstaendnis,rev_number
'
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment