From 5240ee7346322c30bc98b1317158949530e653bf Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Mon, 17 Jan 2022 13:03:35 +0100 Subject: [PATCH] migration table with more information from alma --- eprints1.pl | 82 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/eprints1.pl b/eprints1.pl index d7cae7a..771df7f 100755 --- a/eprints1.pl +++ b/eprints1.pl @@ -3155,9 +3155,9 @@ sub ot2ut while (my $attachment= shift(@docs)) { $attachment_number++; - my $fnm_attachment_md= 'othes/utheses_json/' . $eprint_id . '_' . $attachment_number . '_md.json'; - my $fnm_attachment_res= 'othes/utheses_json/' . $eprint_id . '_' . $attachment_number . '_res.json'; - my $fnm_attachment_chg= 'othes/utheses_json/' . $eprint_id . '_' . $attachment_number . '_chg.json'; + my $fnm_attachment_md= 'othes/utheses_json/' . $ot2ut_context . '/' . $eprint_id . '_' . $attachment_number . '_md.json'; + my $fnm_attachment_res= 'othes/utheses_json/' . $ot2ut_context . '/' . $eprint_id . '_' . $attachment_number . '_res.json'; + my $fnm_attachment_chg= 'othes/utheses_json/' . $ot2ut_context . '/' . $eprint_id . '_' . $attachment_number . '_chg.json'; my $attachment_md= { @@ -5211,7 +5211,6 @@ sub policies_stats <p><font color="red">$msg</font></p> <p><a href="buckets.html" target="buckets">$bucket_cnt buckets</a></p> <h2>upload counters</h2> -<p><a href="http://xx2.test.univie.ac.at:3001/html/metablocks" target="opa">Live Counter</a></p> <table border="1"> <tr> <th>metablock</th> @@ -5228,6 +5227,7 @@ sub policies_stats <th>count</th><th>pct</th><th>err</th> </tr> EOX +# <p><a href="http://xx2.test.univie.ac.at:3001/html/metablocks" target="opa">Live Counter</a></p> my %incomplete_blocks= (); @@ -5571,6 +5571,7 @@ sub export_migration_data my %data; # pass 1: get data from sync db + print __LINE__, " get data from sync db\n"; my @sync_fields= qw(eprint_id eprint_status utheses_id ts_upload upload_status); { my $col_sync= $db_ot2ut->get_collection('sync'); @@ -5589,6 +5590,7 @@ sub export_migration_data } # pass 2: get data from utheses.policies + print __LINE__, " get data from utheses.policies\n"; my @utp_fields= qw(eprint_id ac_nummer urn doi lastmod); { my $col_utp= $db_ot2ut->get_collection('utheses.policies'); @@ -5609,6 +5611,7 @@ sub export_migration_data } # 3: get data from alma.marc + print __LINE__, " get data from alma.marc\n"; my @marc_fields= qw(ac_number mms_id fetched lib_code); my @marc_extra_fields= qw(marc_record ts_fetched); { @@ -5647,6 +5650,67 @@ sub export_migration_data next MARC; } $rec->{marc_record}= 'marc_data_found'; + + # print __LINE__, " mrd: ", Dumper($mrd); + my ($df_urn, $val_urn, $df_doi, $val_doi, $df_othes, $val_othes, $df_utheses, $val_utheses); + foreach my $df (@{$mrd->{datafield}}) + { + foreach my $sf (@{$df->{subfield}}) + { + if ($sf->{content} =~ m#urn:nbn#) + { + $df_urn= join(':', (map { $df->{$_} } qw(tag ind1 ind2)), $sf->{code}); + $val_urn= $sf->{content}; + } + elsif ($sf->{content} =~ m#10\.25365#) + { + $df_doi= join(':', (map { $df->{$_} } qw(tag ind1 ind2)), $sf->{code}); + $val_doi= $sf->{content}; + } + elsif ($sf->{content} =~ m#othes\.univie#) + { + $df_othes= join(':', (map { $df->{$_} } qw(tag ind1 ind2)), $sf->{code}); + $val_othes= $sf->{content}; + } + elsif ($sf->{content} =~ m#utheses\.univie#) + { + $df_utheses= join(':', (map { $df->{$_} } qw(tag ind1 ind2)), $sf->{code}); + $val_utheses= $sf->{content}; + } + } + } + $rec->{ df_urn}= $df_urn; + $rec->{val_urn}= $val_urn; + $rec->{ df_doi}= $df_doi; + $rec->{val_doi}= $val_doi; + $rec->{ df_othes}= $df_othes; + $rec->{val_othes}= $val_othes; + $rec->{ df_utheses}= $df_utheses; + $rec->{val_utheses}= $val_utheses; + } + } + + # 4: get bucket codes from bucket.lists + print __LINE__, " get data from bucket.lists\n"; + { + $db_ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database') unless (defined ($db_ot2ut)); + my $col_bl= $db_ot2ut->get_collection('bucket.lists'); + my $cur_bl= $col_bl->find(); + print __LINE__, " cur_bl=[$cur_bl]\n"; + + BUCKET: while ($running) + { + my $bucket= $cur_bl->next(); + last BUCKET unless (defined ($bucket)); + my ($bucket_code, $eprint_ids)= map { $bucket->{$_} } qw( code ids ); + # print __LINE__, " bucket_code=[bucket_code] eprint_ids=[$eprint_ids]\n"; + + foreach my $id (keys %$eprint_ids) + { + # print __LINE__, " id=[$id]\n"; + next unless (exists ($data{$id})); + $data{$id}->{bucket_code}= $bucket_code; + } } } @@ -5655,7 +5719,15 @@ sub export_migration_data my $tsv_fnm= sprintf ("%s/sync_%s.tsv", $base_path, $ot2ut_context); print __LINE__, " writing migration table [$tsv_fnm]\n"; - my @tsv_columns= (@sync_fields, @utp_fields, @marc_fields, @marc_extra_fields); # NOTE: eprint_id is there several times + # my @tsv_columns= (@sync_fields, @utp_fields, @marc_fields, @marc_extra_fields); # NOTE: eprint_id is there several times + + my @tsv_columns= qw( eprint_id eprint_status lastmod + urn doi ac_nummer bucket_code + utheses_id ts_upload upload_status + ac_number mms_id fetched lib_code marc_record ts_fetched + df_urn val_urn df_doi val_doi df_othes val_othes df_utheses val_utheses + ); + open (TSV, '>:utf8', $tsv_fnm) or die; print TSV join("\t", @tsv_columns), "\n"; foreach my $eprint_id (sort { $a <=> $b } keys %data) -- GitLab