Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
irma2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
irma2
Commits
4d63f0ff
Commit
4d63f0ff
authored
2 years ago
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
factored code out int Perl modele Univie::EoD::CrossReference
parent
e566b09e
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
eod1.pl
+104
-212
104 additions, 212 deletions
eod1.pl
lib/Univie/EoD/CrossReference.pm
+358
-0
358 additions, 0 deletions
lib/Univie/EoD/CrossReference.pm
with
462 additions
and
212 deletions
eod1.pl
+
104
−
212
View file @
4d63f0ff
...
...
@@ -29,16 +29,29 @@ use Util::MongoDB;
use
lib
'
lib
';
use
Alma::
MARC_Extractor
;
use
IRMA::
NA
;
# use IRMA::NA;
use
Univie::EoD::
CrossReference
;
my
$agent_config_file
=
'
/etc/irma/eodagent.json
';
my
$op_mode
=
'
complete
';
my
@PARS
;
my
@tsv_columns
=
qw( pid verdict ownerId state model
ac_number alma_notes ac_number_note aleph_url
marc_record ts_fetched fetched ts_marc mms_id lib_code
ticket ticket_status vt
df_doi val_doi doi update_doi
df_urn val_urn urn update_urn
df_hdl val_hdl hdl update_hdl
df_phaidra val_phaidra phaidra_url update_phaidra_url
)
;
my
(
@pars
,
@ac_numbers
,
@pids
);
my
$arg
;
while
(
defined
(
$arg
=
shift
(
@ARGV
)))
{
if
(
$arg
eq
'
-
')
{
push
(
@
PARS
,
'
-
');
}
elsif
(
$arg
eq
'
--
')
{
push
(
@
PARS
,
@ARGV
);
@ARGV
=
();
}
if
(
$arg
eq
'
-
')
{
push
(
@
pars
,
'
-
');
}
elsif
(
$arg
eq
'
--
')
{
push
(
@
pars
,
@ARGV
);
@ARGV
=
();
}
elsif
(
$arg
=~
/^--(.+)/
)
{
my
(
$opt
,
$val
)
=
split
('
=
',
$
1
,
2
);
...
...
@@ -56,7 +69,9 @@ while (defined ($arg= shift (@ARGV)))
}
else
{
push
(
@PARS
,
$arg
);
if
(
$arg
=~
m#^[\w\d\-]+:\d+$#
)
{
push
(
@pids
,
$arg
)
}
elsif
(
$arg
=~
m#^AC\d{8}$#
)
{
push
(
@ac_numbers
,
$arg
)
}
else
{
push
(
@pars
,
$arg
);
}
}
}
...
...
@@ -65,236 +80,87 @@ print join (' ', __FILE__, __LINE__, 'caller=['. caller() . ']'), "\n";
my
$agent_cnf
=
Util::JSON::
read_json_file
(
$agent_config_file
);
# print __LINE__, " agent_cnf: ", main::Dumper ($agent_cnf); exit(0);
# Step 1: get list of books from inventory
# get handles for various databases
my
$inv_db
=
IRMA::db::
get_any_db
(
$agent_cnf
,
'
inventory_database
');
my
$foxml_col
=
$inv_db
->
get_collection
('
foxml.data
');
# print __LINE__, " foxml_col: ", Dumper($foxml_col);
print
__LINE__
,
"
foxml_col=[
$foxml_col
]
\n
";
my
%counters
;
my
@books_duplicate_ac_number
;
my
@books_problems
;
my
@books_ok
;
my
@foxml_columns
=
qw(ownerId state model ac_number aleph_url pid)
;
# my %foxml_columns= map { $_ => 1 } @foxml_columns;
my
$search
=
{
ownerId
=>
'
ondemae7
',
state
=>
'
Active
',
model
=>
'
Book
'
};
my
$crf
=
Univie::EoD::
CrossReference
->
new
(
agent_cnf
=>
$agent_cnf
,
counters
=>
\
%counters
);
my
%counters
;
my
$cur
=
$foxml_col
->
find
(
$search
);
# print __LINE__, ' cur: ', Dumper($cur);
my
%ac_numbers
=
();
my
%pids
=
();
my
@books
=
();
while
(
my
$rec
=
$cur
->
next
())
my
$count_objects
=
0
;
if
(
@ac_numbers
)
{
# print __LINE__, " rec: ", Dumper($rec);
my
%book
=
map
{
$_
=>
$rec
->
{
$_
}
}
@foxml_columns
;
# print __LINE__, " book: ", Dumper(\%book);
$book
{
phaidra_url
}
=
'
https://phaidra.univie.ac.at/
'
.
$book
{
pid
};
push
(
@
{
$ac_numbers
{
$book
{
ac_number
}}}
=>
\
%book
);
$pids
{
$book
{
pid
}}
=
\
%book
;
$crf
->
get_book_by_ac_number
(
\
@ac_numbers
);
$count_objects
+=
@ac_numbers
;
}
push
(
@books
,
\
%book
);
if
(
@pids
)
{
$crf
->
get_book_by_pid
(
\
@pids
);
$count_objects
+=
@pids
;
}
print
__LINE__
,
"
checking for duplicate ac_numbers
\n
";
foreach
my
$ac_number
(
keys
%ac_numbers
)
if
(
@pars
)
{
my
$x
=
$ac_numbers
{
$ac_number
};
# print __LINE__, " ac_number=[$ac_number] pids=[", join(', ', map { $_->{pid} } @$x), "]\n";
if
(
@$x
!=
1
)
{
# either this is a duplicate or a member of a collection (ZS) where the Alma record should point to the collection instead
print
__LINE__
,
"
duplicate_ac_number=[
$ac_number
] pids=[
",
join
('
,
',
map
{
$_
->
{
pid
}
}
@$x
),
"
]
\n
";
$counters
{
duplicate_ac_number
}
++
;
# TODO: find out, why this is a duplicate, possibly do not mark the "canonical" version which should be identified in a ticket or so
# for now, just leave them out of further processing.
foreach
my
$book
(
@$x
)
{
$book
->
{
ac_number_note
}
=
'
dup
';
}
while
(
my
$par
=
shift
(
@pars
))
{
# TODO: ???
}
}
my
$book_count
=
@books
;
print
__LINE__
,
"
found
$book_count
books
\n
";
if
(
$count_objects
==
0
)
{
$crf
->
get_book_inventory
();
}
# BEGIN step 2: check IRMA records
my
$irma_db
=
IRMA::db::
get_any_db
(
$agent_cnf
,
'
irma_database
');
my
$irma_col
=
$irma_db
->
get_collection
('
irma.map
');
$crf
->
flag_duplicate_ac_numbers
();
process_book_list
(
$crf
->
get_book_list
());
# BEGIN step 3: check Alma records
my
$marc_db
=
IRMA::db::
get_any_db
(
$agent_cnf
,
'
marc_database
');
my
$marc_col
=
$marc_db
->
get_collection
('
alma.marc
');
my
$req_col
=
$marc_db
->
get_collection
('
requests
');
print
__LINE__
,
"
counters:
",
Dumper
(
\
%counters
);
my
@marc_mex_fields
=
qw(df_doi val_doi df_phaidra val_phaidra df_urn val_urn ts_marc)
;
my
@marc_fields
=
qw(ac_number mms_id fetched lib_code)
;
my
@marc_extra_fields
=
qw(marc_record ts_fetched)
;
my
$mex
=
Alma::
MARC_Extractor
->
new
(
\
@marc_fields
);
$mex
->
{
mex_ot2ut
}
=
0
;
$mex
->
{
mex_phaidra
}
=
1
;
Util::JSON::
write_json_file
('
eod_problems.json
',
\
@books_problems
);
my
@books_duplicate_ac_number
;
my
@books_problems
;
my
@books_ok
;
BOOK3:
foreach
my
$book
(
@books
)
write_tsv_file
('
eod_data.tsv
',
\
@books_ok
);
write_tsv_file
('
duplicate_eod_data.tsv
',
\
@books_duplicate_ac_number
);
write_tsv_file
('
problems_eod_data.tsv
',
\
@books_problems
);
exit
(
0
);
sub
process_book_list
{
my
@irma_notes
=
();
my
(
$ac_number
,
$pid
)
=
map
{
$book
->
{
$_
}
}
qw(ac_number pid)
;
my
$book_list
=
shift
;
unless
(
defined
(
$ac_number
)
&&
$ac_number
=~
m#^AC\d{8}$#
)
{
$book
->
{
problem
}
=
"
invalid ac_number=[
$ac_number
]
";
push
(
@books_problems
,
$book
);
$book
->
{
problem
}
=
'
invalid_ac_number
';
$counters
{
invalid_ac_number
}
++
;
next
BOOK3
;
}
my
$book_count
=
@$book_list
;
print
__LINE__
,
"
found
$book_count
books
\n
";
my
@irma_records
=
$irma_col
->
find
({
ac_number
=>
$ac_number
})
->
all
();
print
__LINE__
,
"
ac_number=[
$ac_number
] irma_records:
",
scalar
@irma_records
,
'
',
Dumper
(
\
@irma_records
);
# BEGIN step 2: check IRMA records
my
$marc
=
$marc_col
->
find_one
({
ac_number
=>
$ac_number
});
# print __LINE__, " marc: ", Dumper($marc);
unless
(
defined
(
$marc
))
{
$counters
{
missing_marc_record
}
++
;
$book
->
{
problem
}
=
'
missing_marc_record
';
push
(
@books_problems
,
$book
);
next
BOOK3
;
}
# BEGIN step 3: check Alma records
# my $req_col= $marc_db->get_collection('requests');
if
(
$book
->
{
ac_number_note
})
{
$counters
{
has_ac_number_note
}
++
;
push
(
@books_duplicate_ac_number
,
$book
);
# save for later, see above near the duplicate check
next
BOOK3
;
}
# my @marc_mex_fields= qw(df_doi val_doi df_phaidra val_phaidra df_urn val_urn ts_marc);
# my @marc_extra_fields= qw(marc_record ts_fetched);
my
@alma_notes
=
();
$book
->
{
ts_fetched
}
=
Util::ts::
ts_ISO_gmt
(
$marc
->
{
fetched
});
$mex
->
extract_identifiers2
(
$marc
,
$book
);
if
(
exists
(
$book
->
{
mex
}))
BOOK3:
foreach
my
$book
(
@$book_list
)
{
my
$mex
=
$book
->
{
mex
};
print
__LINE__
,
"
ac_number=[
$ac_number
] mex:
",
Dumper
(
$mex
);
foreach
my
$mf
(
keys
%$mex
)
my
(
$verdict
)
=
$crf
->
check_book
(
$book
);
$book
->
{
verdict
}
=
$verdict
;
$counters
{
$verdict
}
++
;
if
(
$verdict
eq
'
ok
')
{
my
$mfa
=
$mex
->
{
$mf
};
if
(
@$mfa
!=
1
)
{
# this should be inspected
print
__LINE__
,
"
ac_number=[
$ac_number
] multiple entries for
$mf
mex:
",
Dumper
(
$mex
);
$counters
{
multiple_mex_entries
}
++
;
$book
->
{
problems
}
=
'
multiple_mex_entries
';
push
(
@books_problems
,
$book
);
next
BOOK3
;
}
foreach
my
$mfe
(
@$mfa
)
{
my
(
$df
,
$val
)
=
@$mfe
;
my
$copy
=
0
;
if
(
$mf
eq
'
phaidra
')
{
if
(
$val
=~
m#^https?://phaidra.univie.ac.at/(.*)(o:\d+)$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
my
(
$extra
,
$found_pid
)
=
(
$
1
,
$
2
);
my
$correct_url
=
'
https://phaidra.univie.ac.at/
'
.
$found_pid
;
if
(
$correct_url
ne
$book
->
{
phaidra_url
}
||
$extra
ne
'')
{
$book
->
{
update_phaidra_url
}
=
$book
->
{
phaidra_url
};
$counters
{
update_phaidra_url
}
++
;
push
(
@alma_notes
,
'
update_phaidra_url
');
}
}
}
elsif
(
$mf
eq
'
hdl
')
{
if
(
$val
=~
m#^11353/10\.(\d+)$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
my
$found_pid
=
'
o:
'
.
$
1
;
if
(
$found_pid
ne
$pid
)
{
# TODO:
$book
->
{
update_hdl
}
=
$book
->
{
hdl
};
$counters
{
update_hdl
}
++
;
push
(
@alma_notes
,
'
update_hdl
');
}
}
}
elsif
(
$mf
eq
'
doi
')
{
if
(
$val
=~
m#^10.25365/digital-copy\.(\d+)$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
# TODO: DOI checken
# if ($found_pid ne $pid)
# { # TODO:
# $book->{update_hdl}= $book->{hdl};
# $counters{update_hdl}++;
# push(@alma_notes, 'update_hdl');
# }
}
}
elsif
(
$mf
eq
'
urn
')
{
if
(
$val
=~
m#^urn:nbn:at:at-ubw-\d{5}\.\d{5}\.\d{6}-\d$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
# TODO: check if this is the known nbn!
}
}
if
(
$copy
)
{
$book
->
{'
df_
'
.
$mf
}
=
$df
;
$book
->
{'
val_
'
.
$mf
}
=
$val
;
}
else
{
push
(
@alma_notes
,
"
junk_
${mf}
_in_marc
");
print
__LINE__
,
"
ATTN: ac_number=[
$ac_number
] pid=[
$pid
] junk data mf=[
$mf
] in marc record: df=[
$df
] val=[
$val
]
\n
";
}
}
push
(
@books_ok
,
$book
);
}
elsif
(
$verdict
eq
'
has_ac_number_note
')
{
push
(
@books_duplicate_ac_number
,
$book
);
}
else
{
push
(
@books_problems
,
$book
);
}
}
else
{
# this is ok, not a bug, Alma simply does not know anything about this phaidra object
print
__LINE__
,
"
ac_number=[
$ac_number
] mex missing
\n
";
}
unless
(
exists
(
$book
->
{
df_phaidra
}))
{
# TODO: if there is no phaidra_url at all, set it...
$counters
{
set_phaidra_url
}
++
;
$book
->
{
update_phaidra_url
}
=
$book
->
{
phaidra_url
};
push
(
@alma_notes
,
'
set_phaidra_url
');
}
push
(
@alma_notes
,
'
ok
')
unless
(
@alma_notes
);
$book
->
{
alma_notes
}
=
join
('
,
',
@alma_notes
);
push
(
@books_ok
,
$book
);
# print __LINE__, " book: ", Dumper($book);
$counters
{
ok
}
++
;
}
# END check Alma records
print
__LINE__
,
"
counters:
",
Dumper
(
\
%counters
);
Util::JSON::
write_json_file
('
eod_problems.json
',
\
@books_problems
);
my
@tsv_columns
=
qw( pid alma_notes ownerId state model ac_number ac_number_note aleph_url
marc_record ts_fetched fetched ts_marc mms_id lib_code
df_doi val_doi
df_urn val_urn
df_hdl val_hdl update_hdl
df_phaidra val_phaidra phaidra_url update_phaidra_url
)
;
write_tsv_file
('
eod_data.tsv
',
\
@books_ok
);
write_tsv_file
('
duplicate_eod_data.tsv
',
\
@books_duplicate_ac_number
);
sub
write_tsv_file
{
...
...
@@ -314,7 +180,32 @@ sub write_tsv_file
__END__
=head1 EXAMPLES
=head1 PROBLEMS
=head2 AC06947549
-[ RECORD 1 ]----------------------
pid | o:1024961
verdict | ok
ownerId | ondemae7
state | Active
model | Book
ac_number | AC06947549
alma_notes | update_hdl
aleph_url | https://ubdata.univie.ac.at/AC06947549
marc_record | marc_data_found
ts_fetched | 2023-03-18T172423
fetched | 1679160263
ts_marc | 20230210194745.0
mms_id | 990074078270203332
lib_code | 43ACC_UBW
df_hdl | 776:0:8:o
val_hdl | 11353/10.1027250
df_phaidra | 856:4:1:u
val_phaidra | https://phaidra.univie.ac.at/o:1024961
phaidra_url | https://phaidra.univie.ac.at/o:1024961
the handle value in 776:0:8:o is 11353/10.1027250 but it should be 11353/10.1024961
=head2 AC02901724
...
...
@@ -338,6 +229,7 @@ phaidra_url | https://phaidra.univie.ac.at/o:90496
update_phaidra_url | https://phaidra.univie.ac.at/o:90496
o:90495 is a collection, o:90496 is the only member; this is a ZS record,
so this is ok and the suggested change is not correct.
so this is ok and the suggested change is not correct. Filtered for now
This diff is collapsed.
Click to expand it.
lib/Univie/EoD/CrossReference.pm
0 → 100644
+
358
−
0
View file @
4d63f0ff
package
Univie::EoD::
CrossReference
;
use
strict
;
use
Data::
Dumper
;
use
IRMA::
db
;
use
Util::
ts
;
my
@foxml_columns
=
qw(ownerId state model ac_number aleph_url pid)
;
my
@marc_fields
=
qw(ac_number mms_id fetched lib_code)
;
sub
new
{
my
$class
=
shift
;
my
$self
=
{
ac_numbers
=>
{},
pids
=>
{},
books
=>
[]
,
};
bless
(
$self
,
$class
);
$self
->
set
(
@
_
);
$self
;
}
sub
set
{
my
$self
=
shift
;
my
%par
=
@_
;
foreach
my
$par
(
keys
%par
)
{
$self
->
{
$par
}
=
$par
{
$par
};
}
$self
;
}
sub
get_book_list
{
my
$self
=
shift
;
$self
->
{
books
};
}
sub
get_db_col
{
my
$self
=
shift
;
my
$db
=
shift
;
my
$col_name
=
shift
;
return
$self
->
{
_db
}
->
{
$db
}
->
{
$col_name
}
if
(
defined
(
$self
->
{
_db
}
->
{
$db
}
->
{
$col_name
}));
my
$inv_db
=
IRMA::db::
get_any_db
(
$self
->
{
agent_cnf
},
$db
);
$self
->
{
_db
}
->
{
$db
}
->
{
$col_name
}
=
my
$col
=
$inv_db
->
get_collection
(
$col_name
);
print
__LINE__
,
"
db=[
$db
] col_name=[
$col_name
] col=[
$col
]
\n
";
# print __LINE__, " col: ", Dumper($col);
$col
;
}
sub
get_book_inventory
{
my
$self
=
shift
;
my
$search
=
{
ownerId
=>
$self
->
{
agent_cnf
}
->
{
ownerId
},
state
=>
'
Active
',
model
=>
'
Book
'
};
$self
->
find_books
(
$search
);
}
sub
get_book_by_ac_number
{
my
$self
=
shift
;
my
$ac_numbers
=
shift
;
my
$search
=
{
ownerId
=>
$self
->
{
agent_cnf
}
->
{
ownerId
},
state
=>
'
Active
',
model
=>
'
Book
'
};
_modify_search
(
$search
,
'
ac_number
',
$ac_numbers
);
$self
->
find_books
(
$search
);
}
sub
get_book_by_pid
{
my
$self
=
shift
;
my
$pids
=
shift
;
my
$search
=
{
ownerId
=>
$self
->
{
agent_cnf
}
->
{
ownerId
},
state
=>
'
Active
',
model
=>
'
Book
'
};
_modify_search
(
$search
,
'
pid
',
$pids
);
$self
->
find_books
(
$search
);
}
sub
_modify_search
{
my
$s
=
shift
;
my
$what
=
shift
;
my
$val
=
shift
;
if
(
ref
(
$val
)
eq
'
ARRAY
')
{
$s
->
{
$what
}
=
{
'
$in
'
=>
$val
}
}
else
{
$s
->
{
$what
}
=
$val
;
}
}
sub
find_books
{
my
$self
=
shift
;
my
$search
=
shift
;
print
__LINE__
,
"
search:
",
join
('
,
',
%$search
),
"
\n
";
my
$foxml_col
=
$self
->
get_db_col
('
inventory_database
',
'
foxml.data
');
my
$cur
=
$foxml_col
->
find
(
$search
);
# print __LINE__, ' cur: ', Dumper($cur);
while
(
my
$foxml_rec
=
$cur
->
next
())
{
# print __LINE__, " foxml_rec=[$foxml_rec]\n";
$self
->
add_book
(
$foxml_rec
);
}
}
sub
add_book
{
my
$self
=
shift
;
my
$foxml_rec
=
shift
;
# print __LINE__, " foxml_rec: ", Dumper($foxml_rec);
my
%book
=
map
{
$_
=>
$foxml_rec
->
{
$_
}
}
@foxml_columns
;
# print __LINE__, " book: ", Dumper(\%book);
$book
{
phaidra_url
}
=
'
https://phaidra.univie.ac.at/
'
.
$book
{
pid
};
push
(
@
{
$self
->
{
ac_numbers
}
->
{
$book
{
ac_number
}}}
=>
\
%book
);
$self
->
{
pids
}
->
{
$book
{
pid
}}
=
\
%book
;
push
(
@
{
$self
->
{
books
}},
\
%book
);
\
%book
;
}
sub
flag_duplicate_ac_numbers
{
my
$self
=
shift
;
my
$ac_numbers
=
$self
->
{
ac_numbers
};
print
__LINE__
,
"
checking for duplicate ac_numbers
\n
";
foreach
my
$ac_number
(
keys
%$ac_numbers
)
{
my
$x
=
$ac_numbers
->
{
$ac_number
};
# print __LINE__, " ac_number=[$ac_number] pids=[", join(', ', map { $_->{pid} } @$x), "]\n";
if
(
@$x
!=
1
)
{
# either this is a duplicate or a member of a collection (ZS) where the Alma record should point to the collection instead
print
__LINE__
,
"
duplicate_ac_number=[
$ac_number
] pids=[
",
join
('
,
',
map
{
$_
->
{
pid
}
}
@$x
),
"
]
\n
";
# TODO: find out, why this is a duplicate, possibly do not mark the "canonical" version which should be identified in a ticket or so
# for now, just leave them out of further processing.
foreach
my
$book
(
@$x
)
{
$book
->
{
ac_number_note
}
=
'
dup
';
}
}
}
}
sub
check_book
{
my
$self
=
shift
;
my
$book
=
shift
;
my
@irma_notes
=
();
my
(
$ac_number
,
$pid
,
$ac_number_note
)
=
map
{
$book
->
{
$_
}
}
qw(ac_number pid ac_number_note)
;
unless
(
defined
(
$ac_number
)
&&
$ac_number
=~
m#^AC\d{8}$#
)
{
$book
->
{
problem
}
=
"
invalid ac_number=[
$ac_number
]
";
$book
->
{
problem
}
=
return
('
invalid_ac_number
');
}
return
'
ac_number_note
'
if
(
$ac_number_note
ne
'');
return
'
filtered_ac_numbers
'
if
(
exists
(
$self
->
{
agent_cnf
}
->
{
filtered_ac_numbers
}
->
{
$ac_number
}));
return
'
filtered_pids
'
if
(
exists
(
$self
->
{
agent_cnf
}
->
{
filtered_pids
}
->
{
$pid
}));
# check tickets
my
$ticket_col
=
$self
->
get_db_col
('
irma_database
',
'
eod.tickets
');
my
@tickets
=
$ticket_col
->
find
({
ac_number
=>
$ac_number
})
->
all
();
print
__LINE__
,
"
ac_number=[
$ac_number
] tickets:
",
scalar
@tickets
,
"
\n
";
# print __LINE__, Dumper(\@tickets);
if
(
@tickets
>
1
)
{
$book
->
{
ticket
}
=
join
('
,
',
map
{
$_
->
{
ticket
}
}
@tickets
);
return
'
multiple_tickets
';
}
elsif
(
@tickets
==
1
)
{
my
$t0
=
@tickets
[
0
];
$book
->
{
ticket
}
=
$t0
->
{
ticket
};
$book
->
{
ticket_status
}
=
$t0
->
{
status
};
$book
->
{
vt
}
=
$t0
->
{
custom_fields
}
->
{
vt
}
->
[
1
];
}
# check IRMA for registered identifiers
my
$irma_col
=
$self
->
get_db_col
('
irma_database
',
'
irma.map
');
my
@irma_records
=
$irma_col
->
find
({
ac_number
=>
$ac_number
})
->
all
();
# my @irma_records= $irma_col->find({ pid => $pid })->all(); # NOTE: there is no field named "pid" in the irma record!
print
__LINE__
,
"
ac_number=[
$ac_number
] irma_records:
",
scalar
@irma_records
,
"
\n
";
# print __LINE__, Dumper(\@irma_records);
if
(
@irma_records
>
1
)
{
return
'
multiple_irma_records
'
}
elsif
(
@irma_records
==
1
)
{
my
$ir0
=
$irma_records
[
0
];
foreach
my
$field
(
qw(hdl urn doi)
)
{
$book
->
{
$field
}
=
$ir0
->
{
$field
}
if
(
exists
(
$ir0
->
{
$field
}));
}
}
elsif
(
@irma_records
==
0
)
{
print
__LINE__
,
"
ATTN: ac_number=[
$ac_number
] no irma_record!
\n
";
$self
->
{
counters
}
->
{
missing_irma_record
}
++
;
}
my
$marc_col
=
$self
->
get_db_col
('
marc_database
',
'
alma.marc
');
my
$marc
=
$marc_col
->
find_one
({
ac_number
=>
$ac_number
});
# print __LINE__, " marc: ", Dumper($marc);
return
'
missing_marc_record
'
unless
(
defined
(
$marc
));
my
@alma_notes
=
();
$book
->
{
ts_fetched
}
=
Util::ts::
ts_ISO_gmt
(
$marc
->
{
fetched
});
my
$mex
=
Alma::
MARC_Extractor
->
new
(
\
@marc_fields
);
$mex
->
{
mex_ot2ut
}
=
0
;
$mex
->
{
mex_phaidra
}
=
1
;
$mex
->
extract_identifiers2
(
$marc
,
$book
);
if
(
exists
(
$book
->
{
mex
}))
{
my
$mex
=
$book
->
{
mex
};
print
__LINE__
,
"
ac_number=[
$ac_number
] mex:
",
Dumper
(
$mex
);
foreach
my
$mf
(
keys
%$mex
)
{
my
$mfa
=
$mex
->
{
$mf
};
if
(
@$mfa
!=
1
)
{
# this should be inspected
print
__LINE__
,
"
ATTN: ac_number=[
$ac_number
] multiple mex entries for
$mf
; mex:
",
Dumper
(
$mex
);
return
'
multiple_mex_entries
';
}
foreach
my
$mfe
(
@$mfa
)
{
my
(
$df
,
$val
)
=
@$mfe
;
my
$copy
=
0
;
if
(
$mf
eq
'
phaidra
')
{
if
(
$val
=~
m#^https?://phaidra.univie.ac.at/(.*)(o:\d+)$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
my
(
$extra
,
$found_pid
)
=
(
$
1
,
$
2
);
my
$correct_url
=
'
https://phaidra.univie.ac.at/
'
.
$found_pid
;
if
(
$correct_url
ne
$book
->
{
phaidra_url
}
||
$extra
ne
'')
{
$book
->
{
update_phaidra_url
}
=
$book
->
{
phaidra_url
};
$self
->
{
counters
}
->
{
update_phaidra_url
}
++
;
push
(
@alma_notes
,
'
update_phaidra_url
');
}
}
}
elsif
(
$mf
eq
'
hdl
')
{
if
(
$val
=~
m#^11353/10\.(\d+)$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
if
(
$val
ne
$book
->
{
hdl
})
{
# TODO:
$book
->
{
update_hdl
}
=
$book
->
{
hdl
};
$self
->
{
counters
}
->
{
update_hdl
}
++
;
push
(
@alma_notes
,
'
update_hdl
');
}
}
}
elsif
(
$mf
eq
'
doi
')
{
if
(
$val
=~
m#^10.25365/digital-copy\.(\d+)$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
# TODO: DOI checken
if
(
$val
ne
$book
->
{
doi
})
{
# TODO:
$book
->
{
update_doi
}
=
$book
->
{
doi
};
$self
->
{
counters
}
->
{
update_doi
}
++
;
push
(
@alma_notes
,
'
update_doi
');
}
}
else
{
print
__LINE__
,
"
ATTN: unknown DOI format in marc record: df=[
$df
] val=[
$val
]
\n
";
$self
->
{
counters
}
->
{
bad_doi
}
++
;
}
}
elsif
(
$mf
eq
'
urn
')
{
if
(
$val
=~
m#^urn:nbn:at:at-ubw-\d{5}\.\d{5}\.\d{6}-\d$#
)
{
$copy
=
1
;
# only transcribe those URLs that really look like some
# TODO: check if this is the known nbn!
if
(
$val
ne
$book
->
{
urn
})
{
# TODO:
$book
->
{
update_urn
}
=
$book
->
{
urn
};
$self
->
{
counters
}
->
{
update_urn
}
++
;
push
(
@alma_notes
,
'
update_urn
');
}
}
else
{
print
__LINE__
,
"
ATTN: unknown NBN (urn) format in marc record: df=[
$df
] val=[
$val
]
\n
";
$self
->
{
counters
}
->
{
bad_urn
}
++
;
}
}
if
(
$copy
)
{
$book
->
{'
df_
'
.
$mf
}
=
$df
;
$book
->
{'
val_
'
.
$mf
}
=
$val
;
}
else
{
push
(
@alma_notes
,
"
junk_
${mf}
_in_marc
");
print
__LINE__
,
"
ATTN: ac_number=[
$ac_number
] pid=[
$pid
] junk data mf=[
$mf
] in marc record: df=[
$df
] val=[
$val
]
\n
";
}
}
}
}
else
{
# this is ok, not a bug, Alma simply does not know anything about this phaidra object
print
__LINE__
,
"
ac_number=[
$ac_number
] mex missing
\n
";
}
if
(
!
exists
(
$book
->
{
val_urn
})
&&
exists
(
$book
->
{
urn
}))
{
$book
->
{
update_urn
}
=
$book
->
{
urn
};
$self
->
{
counters
}
->
{
set_urn
}
++
;
push
(
@alma_notes
,
'
set_urn
');
}
unless
(
exists
(
$book
->
{
df_phaidra
}))
{
# TODO: if there is no phaidra_url at all, set it...
$self
->
{
counters
}
->
{
set_phaidra_url
}
++
;
$book
->
{
update_phaidra_url
}
=
$book
->
{
phaidra_url
};
push
(
@alma_notes
,
'
set_phaidra_url
');
}
push
(
@alma_notes
,
'
ok
')
unless
(
@alma_notes
);
$book
->
{
alma_notes
}
=
join
('
,
',
@alma_notes
);
return
'
ok
';
}
=head1 INTERNAL? METHODS
=cut
1
;
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment