factored code out int Perl modele Univie::EoD::CrossReference

4d63f0ff · Gerhard Gonter · e566b09e · 4d63f0ff · 4d63f0ff
Commit 4d63f0ff authored Mar 24, 2023 by Gerhard Gonter
--- a/eod1.pl
+++ b/eod1.pl
@@ -29,16 +29,29 @@ use Util::MongoDB;
 use lib 'lib';

 use Alma::MARC_Extractor;
-use IRMA::NA;
+# use IRMA::NA;
+use Univie::EoD::CrossReference;

 my $agent_config_file= '/etc/irma/eodagent.json';
+my $op_mode= 'complete';

-my @PARS;
+my @tsv_columns=
+qw( pid verdict ownerId state model
+    ac_number alma_notes ac_number_note aleph_url
+    marc_record ts_fetched fetched ts_marc mms_id lib_code
+    ticket ticket_status vt
+    df_doi val_doi doi update_doi
+    df_urn val_urn urn update_urn
+    df_hdl val_hdl hdl update_hdl
+    df_phaidra val_phaidra phaidra_url update_phaidra_url
+    );
+
+my (@pars, @ac_numbers, @pids);
 my $arg;
 while (defined ($arg= shift (@ARGV)))
 {
-     if ($arg eq '-')  { push (@PARS, '-'); }
-  elsif ($arg eq '--') { push (@PARS, @ARGV); @ARGV= (); }
+     if ($arg eq '-')  { push (@pars, '-'); }
+  elsif ($arg eq '--') { push (@pars, @ARGV); @ARGV= (); }
  elsif ($arg =~ /^--(.+)/)
  {
    my ($opt, $val)= split ('=', $1, 2); 
@@ -56,7 +69,9 @@ while (defined ($arg= shift (@ARGV)))
  }
  else
  {
-    push (@PARS, $arg);
+       if ($arg =~ m#^[\w\d\-]+:\d+$#) { push (@pids, $arg) }
+    elsif ($arg =~ m#^AC\d{8}$#)       { push (@ac_numbers, $arg) }
+    else  { push (@pars, $arg); }
  }
 }

@@ -65,236 +80,87 @@ print join (' ', __FILE__, __LINE__, 'caller=['. caller() . ']'), "\n";
 my $agent_cnf= Util::JSON::read_json_file ($agent_config_file);
 # print __LINE__, " agent_cnf: ", main::Dumper ($agent_cnf); exit(0);

-# Step 1: get list of books from inventory
-# get handles for various databases
-my $inv_db=  IRMA::db::get_any_db($agent_cnf, 'inventory_database');
-my $foxml_col= $inv_db->get_collection('foxml.data');
-# print __LINE__, " foxml_col: ", Dumper($foxml_col);
-print __LINE__, " foxml_col=[$foxml_col]\n";
+my %counters;
+my @books_duplicate_ac_number;
+my @books_problems;
+my @books_ok;

-my @foxml_columns= qw(ownerId state model ac_number aleph_url pid);
-# my %foxml_columns= map { $_ => 1 } @foxml_columns;
-my $search= { ownerId => 'ondemae7', state => 'Active', model => 'Book' };
+my $crf= Univie::EoD::CrossReference->new( agent_cnf => $agent_cnf, counters => \%counters );

-my %counters;
-my $cur= $foxml_col->find( $search );
-# print __LINE__, ' cur: ', Dumper($cur);
-my %ac_numbers= ();
-my %pids= ();
-my @books= ();
-while (my $rec= $cur->next())
+my $count_objects= 0;
+if (@ac_numbers)
 {
-  # print __LINE__, " rec: ", Dumper($rec);
-  my %book= map { $_ => $rec->{$_} } @foxml_columns;
-  # print __LINE__, " book: ", Dumper(\%book);
-
-  $book{phaidra_url}= 'https://phaidra.univie.ac.at/'. $book{pid};
-  push (@{$ac_numbers{$book{ac_number}}} => \%book);
-  $pids{$book{pid}}= \%book;
+  $crf->get_book_by_ac_number(\@ac_numbers);
+  $count_objects += @ac_numbers;
+}

-  push (@books, \%book);
+if (@pids)
+{
+  $crf->get_book_by_pid(\@pids);
+  $count_objects += @pids;
 }

-print __LINE__, " checking for duplicate ac_numbers\n";
-foreach my $ac_number (keys %ac_numbers)
+if (@pars)
 {
-  my $x= $ac_numbers{$ac_number};
-  # print __LINE__, " ac_number=[$ac_number] pids=[", join(', ', map { $_->{pid} } @$x), "]\n";
-  if (@$x != 1)
-  { # either this is a duplicate or a member of a collection (ZS) where the Alma record should point to the collection instead
-    print __LINE__, " duplicate_ac_number=[$ac_number] pids=[", join(', ', map { $_->{pid} } @$x), "]\n";
-    $counters{duplicate_ac_number}++;
-
-    # TODO: find out, why this is a duplicate, possibly do not mark the "canonical" version which should be identified in a ticket or so
-    # for now, just leave them out of further processing.
-    foreach my $book (@$x)
+  while (my $par= shift (@pars))
  {
-      $book->{ac_number_note}= 'dup';
+    # TODO: ???
  }
 }
+
+if ($count_objects == 0)
+{
+  $crf->get_book_inventory();
 }

-my $book_count= @books;
-print __LINE__, " found $book_count books\n";
+$crf->flag_duplicate_ac_numbers();
+process_book_list($crf->get_book_list());

-# BEGIN step 2: check IRMA records
-my $irma_db=  IRMA::db::get_any_db($agent_cnf, 'irma_database');
-my $irma_col= $irma_db->get_collection('irma.map');
+print __LINE__, " counters: ", Dumper(\%counters);

-# BEGIN step 3: check Alma records
-my $marc_db=  IRMA::db::get_any_db($agent_cnf, 'marc_database');
-my $marc_col= $marc_db->get_collection('alma.marc');
-my $req_col= $marc_db->get_collection('requests');
+Util::JSON::write_json_file('eod_problems.json', \@books_problems);

-  my @marc_mex_fields= qw(df_doi val_doi df_phaidra val_phaidra df_urn val_urn ts_marc);
-  my @marc_fields= qw(ac_number mms_id fetched lib_code);
-  my @marc_extra_fields= qw(marc_record ts_fetched);
-  my $mex= Alma::MARC_Extractor->new(\@marc_fields);
-  $mex->{mex_ot2ut}= 0; $mex->{mex_phaidra}= 1;
+write_tsv_file('eod_data.tsv', \@books_ok);
+write_tsv_file('duplicate_eod_data.tsv', \@books_duplicate_ac_number);
+write_tsv_file('problems_eod_data.tsv', \@books_problems);

-my @books_duplicate_ac_number;
-my @books_problems;
-my @books_ok;
-BOOK3: foreach my $book (@books)
-{
-  my @irma_notes= ();
-  my ($ac_number, $pid)= map { $book->{$_} } qw(ac_number pid);
+exit(0);

-  unless (defined ($ac_number) && $ac_number =~ m#^AC\d{8}$#)
+sub process_book_list
 {
-    $book->{problem}= "invalid ac_number=[$ac_number]";
-    push (@books_problems, $book);
-    $book->{problem}= 'invalid_ac_number';
-    $counters{invalid_ac_number}++;
-    next BOOK3;
-  }
-
-  my @irma_records= $irma_col->find({ ac_number => $ac_number })->all();
-  print __LINE__, " ac_number=[$ac_number] irma_records: ", scalar @irma_records, ' ', Dumper(\@irma_records);
+  my $book_list= shift;

-  my $marc= $marc_col->find_one({ ac_number => $ac_number });
-  # print __LINE__, " marc: ", Dumper($marc);
-  unless (defined ($marc))
-  {
-    $counters{missing_marc_record}++;
-    $book->{problem}= 'missing_marc_record';
-    push (@books_problems, $book);
-    next BOOK3;
-  }
+my $book_count= @$book_list;
+print __LINE__, " found $book_count books\n";

-  if ($book->{ac_number_note})
-  {
-    $counters{has_ac_number_note}++;
-    push (@books_duplicate_ac_number, $book); # save for later, see above near the duplicate check
-    next BOOK3;
-  }
+# BEGIN step 2: check IRMA records

-  my @alma_notes= ();
-  $book->{ts_fetched}= Util::ts::ts_ISO_gmt($marc->{fetched});
-  $mex->extract_identifiers2($marc, $book);
-  if (exists($book->{mex}))
-  {
-    my $mex= $book->{mex};
-    print __LINE__, " ac_number=[$ac_number] mex: ", Dumper($mex);
-    foreach my $mf (keys %$mex)
-    {
-      my $mfa= $mex->{$mf};
+# BEGIN step 3: check Alma records
+# my $req_col= $marc_db->get_collection('requests');

-      if (@$mfa != 1)
-      { # this should be inspected
-        print __LINE__, " ac_number=[$ac_number] multiple entries for $mf mex: ", Dumper($mex);
-        $counters{multiple_mex_entries}++;
-        $book->{problems}= 'multiple_mex_entries';
-        push (@books_problems, $book);
-        next BOOK3;
-      }
+  # my @marc_mex_fields= qw(df_doi val_doi df_phaidra val_phaidra df_urn val_urn ts_marc);
+  # my @marc_extra_fields= qw(marc_record ts_fetched);

-      foreach my $mfe (@$mfa)
+  BOOK3: foreach my $book (@$book_list)
  {
-        my ($df, $val)= @$mfe;
-        my $copy= 0;
-        if ($mf eq 'phaidra')
-        {
-          if ($val =~ m#^https?://phaidra.univie.ac.at/(.*)(o:\d+)$#)
-          {
-            $copy= 1; # only transcribe those URLs that really look like some
-            my ($extra, $found_pid)= ($1, $2);
-            my $correct_url= 'https://phaidra.univie.ac.at/'. $found_pid;
-            if ($correct_url ne $book->{phaidra_url} || $extra ne '')
-            {
-              $book->{update_phaidra_url}= $book->{phaidra_url};
-              $counters{update_phaidra_url}++;
-              push(@alma_notes, 'update_phaidra_url');
-					  }
-          }
-        }
-        elsif ($mf eq 'hdl')
-        {
-          if ($val =~ m#^11353/10\.(\d+)$#)
-          {
-            $copy= 1; # only transcribe those URLs that really look like some
-            my $found_pid= 'o:'.$1;
-            if ($found_pid ne $pid)
-            { # TODO:
-              $book->{update_hdl}= $book->{hdl};
-              $counters{update_hdl}++;
-              push(@alma_notes, 'update_hdl');
-            }
-          }
-        }
-        elsif ($mf eq 'doi')
-        {
-          if ($val =~ m#^10.25365/digital-copy\.(\d+)$#)
-          {
-            $copy= 1; # only transcribe those URLs that really look like some
-# TODO: DOI checken
-#           if ($found_pid ne $pid)
-#           { # TODO:
-#             $book->{update_hdl}= $book->{hdl};
-#             $counters{update_hdl}++;
-#             push(@alma_notes, 'update_hdl');
-#           }
-          }
-        }
-        elsif ($mf eq 'urn')
-        {
-          if ($val =~ m#^urn:nbn:at:at-ubw-\d{5}\.\d{5}\.\d{6}-\d$#)
-          {
-            $copy= 1; # only transcribe those URLs that really look like some
-            # TODO: check if this is the known nbn!
-          }
-        }
-
+    my ($verdict)= $crf->check_book($book);
+    $book->{verdict}= $verdict;
+    $counters{$verdict}++;

-        if ($copy)
+    if ($verdict eq 'ok')
    {
-          $book->{'df_'.$mf}= $df;
-          $book->{'val_'.$mf}= $val;
+      push (@books_ok, $book);
    }
-        else
+    elsif ($verdict eq 'has_ac_number_note')
    {
-          push(@alma_notes, "junk_${mf}_in_marc");
-          print __LINE__, " ATTN: ac_number=[$ac_number] pid=[$pid] junk data mf=[$mf] in marc record: df=[$df] val=[$val]\n";
-        }
-      }
-    }
+      push (@books_duplicate_ac_number, $book);
    }
    else
    {
-    # this is ok, not a bug, Alma simply does not know anything about this phaidra object
-    print __LINE__, " ac_number=[$ac_number] mex missing\n";
+      push (@books_problems, $book);
    }
-
-  unless (exists($book->{df_phaidra}))
-  { # TODO: if there is no phaidra_url at all, set it...
-    $counters{set_phaidra_url}++;
-    $book->{update_phaidra_url}= $book->{phaidra_url};
-    push(@alma_notes, 'set_phaidra_url');
  }
-
-  push (@alma_notes, 'ok') unless (@alma_notes);
-  $book->{alma_notes}= join(',', @alma_notes);
-  push (@books_ok, $book);
-  # print __LINE__, " book: ", Dumper($book);
-  $counters{ok}++;
 }
-# END check Alma records
-
-print __LINE__, " counters: ", Dumper(\%counters);
-
-Util::JSON::write_json_file('eod_problems.json', \@books_problems);
-
-my @tsv_columns=
-qw( pid alma_notes ownerId state model ac_number ac_number_note aleph_url
-    marc_record ts_fetched fetched ts_marc mms_id lib_code
-    df_doi val_doi
-    df_urn val_urn
-    df_hdl val_hdl update_hdl
-    df_phaidra val_phaidra phaidra_url update_phaidra_url
-    );
-
-write_tsv_file('eod_data.tsv', \@books_ok);
-write_tsv_file('duplicate_eod_data.tsv', \@books_duplicate_ac_number);

 sub write_tsv_file
 {
@@ -314,7 +180,32 @@ sub write_tsv_file

 __END__

-=head1 EXAMPLES
+=head1 PROBLEMS
+
+=head2 AC06947549
+
+-[ RECORD 1 ]----------------------
+pid         | o:1024961
+verdict     | ok
+ownerId     | ondemae7
+state       | Active
+model       | Book
+ac_number   | AC06947549
+alma_notes  | update_hdl
+aleph_url   | https://ubdata.univie.ac.at/AC06947549
+marc_record | marc_data_found
+ts_fetched  | 2023-03-18T172423
+fetched     | 1679160263
+ts_marc     | 20230210194745.0
+mms_id      | 990074078270203332
+lib_code    | 43ACC_UBW
+df_hdl      | 776:0:8:o
+val_hdl     | 11353/10.1027250
+df_phaidra  | 856:4:1:u
+val_phaidra | https://phaidra.univie.ac.at/o:1024961
+phaidra_url | https://phaidra.univie.ac.at/o:1024961
+
+the handle value in 776:0:8:o is 11353/10.1027250 but it should be 11353/10.1024961

 =head2 AC02901724

@@ -338,6 +229,7 @@ phaidra_url        | https://phaidra.univie.ac.at/o:90496
 update_phaidra_url | https://phaidra.univie.ac.at/o:90496

 o:90495 is a collection, o:90496 is the only member; this is a ZS record,
-so this is ok and the suggested change is not correct.
+so this is ok and the suggested change is not correct.  Filtered for now
+


--- a/lib/Univie/EoD/CrossReference.pm
+++ b/lib/Univie/EoD/CrossReference.pm
+package Univie::EoD::CrossReference;
+
+use strict;
+
+use Data::Dumper;
+
+use IRMA::db;
+use Util::ts;
+
+my @foxml_columns= qw(ownerId state model ac_number aleph_url pid);
+my @marc_fields= qw(ac_number mms_id fetched lib_code);
+
+sub new
+{
+  my $class= shift;
+
+  my $self=
+  {
+    ac_numbers => {},
+    pids => {},
+    books => [],
+  };
+
+  bless ($self, $class);
+  $self->set(@_);
+
+  $self;
+}
+
+sub set
+{
+  my $self= shift;
+  my %par= @_;
+  foreach my $par (keys %par)
+  {
+    $self->{$par}= $par{$par};
+  }
+  $self;
+}
+
+sub get_book_list
+{
+  my $self= shift;
+  $self->{books};
+}
+
+sub get_db_col
+{
+  my $self= shift;
+  my $db= shift;
+  my $col_name= shift;
+
+  return $self->{_db}->{$db}->{$col_name} if (defined ($self->{_db}->{$db}->{$col_name}));
+
+  my $inv_db=  IRMA::db::get_any_db($self->{agent_cnf}, $db);
+  $self->{_db}->{$db}->{$col_name}= my $col= $inv_db->get_collection($col_name);
+  print __LINE__, " db=[$db] col_name=[$col_name] col=[$col]\n";
+  # print __LINE__, " col: ", Dumper($col);
+  $col;
+}
+
+sub get_book_inventory
+{
+  my $self= shift;
+  my $search= { ownerId => $self->{agent_cnf}->{ownerId}, state => 'Active', model => 'Book' };
+  $self->find_books($search);
+}
+
+sub get_book_by_ac_number
+{
+  my $self= shift;
+  my $ac_numbers= shift;
+
+  my $search= { ownerId => $self->{agent_cnf}->{ownerId}, state => 'Active', model => 'Book' };
+  _modify_search ($search, 'ac_number', $ac_numbers);
+  $self->find_books($search);
+}
+
+sub get_book_by_pid
+{
+  my $self= shift;
+  my $pids= shift;
+
+  my $search= { ownerId => $self->{agent_cnf}->{ownerId}, state => 'Active', model => 'Book' };
+  _modify_search ($search, 'pid', $pids);
+  $self->find_books($search);
+}
+
+sub _modify_search
+{
+  my $s= shift;
+  my $what= shift;
+  my $val= shift;
+
+  if (ref($val) eq 'ARRAY') { $s->{$what} = { '$in' => $val } }
+  else { $s->{$what}= $val; }
+}
+
+sub find_books
+{
+  my $self= shift;
+  my $search= shift;
+  print __LINE__, " search: ", join(', ', %$search), "\n";
+
+  my $foxml_col= $self->get_db_col('inventory_database', 'foxml.data');
+  my $cur= $foxml_col->find( $search );
+  # print __LINE__, ' cur: ', Dumper($cur);
+
+  while (my $foxml_rec= $cur->next())
+  {
+    # print __LINE__, " foxml_rec=[$foxml_rec]\n";
+    $self->add_book($foxml_rec);
+  }
+}
+
+sub add_book
+{
+  my $self= shift;
+  my $foxml_rec= shift;
+
+  # print __LINE__, " foxml_rec: ", Dumper($foxml_rec);
+  my %book= map { $_ => $foxml_rec->{$_} } @foxml_columns;
+  # print __LINE__, " book: ", Dumper(\%book);
+
+  $book{phaidra_url}= 'https://phaidra.univie.ac.at/'. $book{pid};
+  push (@{$self->{ac_numbers}->{$book{ac_number}}} => \%book);
+  $self->{pids}->{$book{pid}}= \%book;
+
+  push (@{$self->{books}}, \%book);
+
+  \%book;
+}
+
+sub flag_duplicate_ac_numbers
+{
+  my $self= shift;
+
+  my $ac_numbers= $self->{ac_numbers};
+  print __LINE__, " checking for duplicate ac_numbers\n";
+  foreach my $ac_number (keys %$ac_numbers)
+  {
+    my $x= $ac_numbers->{$ac_number};
+    # print __LINE__, " ac_number=[$ac_number] pids=[", join(', ', map { $_->{pid} } @$x), "]\n";
+
+    if (@$x != 1)
+    { # either this is a duplicate or a member of a collection (ZS) where the Alma record should point to the collection instead
+      print __LINE__, " duplicate_ac_number=[$ac_number] pids=[", join(', ', map { $_->{pid} } @$x), "]\n";
+
+      # TODO: find out, why this is a duplicate, possibly do not mark the "canonical" version which should be identified in a ticket or so
+      # for now, just leave them out of further processing.
+      foreach my $book (@$x)
+      {
+        $book->{ac_number_note}= 'dup';
+      }
+    }
+  }
+}
+
+sub check_book
+{
+  my $self= shift;
+  my $book= shift;
+
+  my @irma_notes= ();
+  my ($ac_number, $pid, $ac_number_note)= map { $book->{$_} } qw(ac_number pid ac_number_note);
+
+  unless (defined ($ac_number) && $ac_number =~ m#^AC\d{8}$#)
+  {
+    $book->{problem}= "invalid ac_number=[$ac_number]";
+    $book->{problem}= 
+    return ('invalid_ac_number');
+  }
+
+  return 'ac_number_note'      if ($ac_number_note ne '');
+  return 'filtered_ac_numbers' if (exists ($self->{agent_cnf}->{filtered_ac_numbers}->{$ac_number}));
+  return 'filtered_pids'       if (exists ($self->{agent_cnf}->{filtered_pids}->{$pid}));
+
+  # check tickets
+  my $ticket_col= $self->get_db_col('irma_database', 'eod.tickets');
+  my @tickets= $ticket_col->find({ ac_number => $ac_number })->all();
+  print __LINE__, " ac_number=[$ac_number] tickets: ", scalar @tickets, "\n";
+  # print __LINE__, Dumper(\@tickets);
+  if (@tickets  > 1)
+  {
+    $book->{ticket}= join(',', map { $_->{ticket} } @tickets);
+    return 'multiple_tickets';
+  }
+  elsif (@tickets == 1)
+  {
+    my $t0= @tickets[0];
+    $book->{ticket}= $t0->{ticket};
+    $book->{ticket_status}= $t0->{status};
+    $book->{vt}= $t0->{custom_fields}->{vt}->[1];
+  }
+
+  # check IRMA for registered identifiers
+  my $irma_col= $self->get_db_col('irma_database', 'irma.map');
+  my @irma_records= $irma_col->find({ ac_number => $ac_number })->all();
+  # my @irma_records= $irma_col->find({ pid => $pid })->all(); # NOTE: there is no field named "pid" in the irma record!
+  print __LINE__, " ac_number=[$ac_number] irma_records: ", scalar @irma_records, "\n";
+  # print __LINE__, Dumper(\@irma_records);
+  if (@irma_records > 1) { return 'multiple_irma_records' }
+  elsif (@irma_records == 1)
+  {
+    my $ir0= $irma_records[0];
+    foreach my $field (qw(hdl urn doi))
+    {
+      $book->{$field}= $ir0->{$field} if (exists ($ir0->{$field}));
+    }
+  }
+  elsif (@irma_records == 0)
+  {
+    print __LINE__, " ATTN: ac_number=[$ac_number] no irma_record!\n";
+    $self->{counters}->{missing_irma_record}++;
+  }
+
+  my $marc_col= $self->get_db_col('marc_database', 'alma.marc');
+  my $marc= $marc_col->find_one({ ac_number => $ac_number });
+  # print __LINE__, " marc: ", Dumper($marc);
+  return 'missing_marc_record' unless (defined ($marc));
+
+  my @alma_notes= ();
+  $book->{ts_fetched}= Util::ts::ts_ISO_gmt($marc->{fetched});
+
+  my $mex= Alma::MARC_Extractor->new(\@marc_fields);
+  $mex->{mex_ot2ut}= 0; $mex->{mex_phaidra}= 1;
+  $mex->extract_identifiers2($marc, $book);
+
+  if (exists($book->{mex}))
+  {
+    my $mex= $book->{mex};
+    print __LINE__, " ac_number=[$ac_number] mex: ", Dumper($mex);
+    foreach my $mf (keys %$mex)
+    {
+      my $mfa= $mex->{$mf};
+
+      if (@$mfa != 1)
+      { # this should be inspected
+        print __LINE__, " ATTN: ac_number=[$ac_number] multiple mex entries for $mf; mex: ", Dumper($mex);
+        return 'multiple_mex_entries';
+      }
+
+      foreach my $mfe (@$mfa)
+      {
+        my ($df, $val)= @$mfe;
+        my $copy= 0;
+        if ($mf eq 'phaidra')
+        {
+          if ($val =~ m#^https?://phaidra.univie.ac.at/(.*)(o:\d+)$#)
+          {
+            $copy= 1; # only transcribe those URLs that really look like some
+            my ($extra, $found_pid)= ($1, $2);
+            my $correct_url= 'https://phaidra.univie.ac.at/'. $found_pid;
+            if ($correct_url ne $book->{phaidra_url} || $extra ne '')
+            {
+              $book->{update_phaidra_url}= $book->{phaidra_url};
+              $self->{counters}->{update_phaidra_url}++;
+              push(@alma_notes, 'update_phaidra_url');
+					  }
+          }
+        }
+        elsif ($mf eq 'hdl')
+        {
+          if ($val =~ m#^11353/10\.(\d+)$#)
+          {
+            $copy= 1; # only transcribe those URLs that really look like some
+            if ($val ne $book->{hdl})
+            { # TODO:
+              $book->{update_hdl}= $book->{hdl};
+              $self->{counters}->{update_hdl}++;
+              push(@alma_notes, 'update_hdl');
+            }
+          }
+        }
+        elsif ($mf eq 'doi')
+        {
+          if ($val =~ m#^10.25365/digital-copy\.(\d+)$#)
+          {
+            $copy= 1; # only transcribe those URLs that really look like some
+            # TODO: DOI checken
+            if ($val ne $book->{doi})
+            { # TODO:
+              $book->{update_doi}= $book->{doi};
+              $self->{counters}->{update_doi}++;
+              push(@alma_notes, 'update_doi');
+            }
+          }
+          else
+          {
+            print __LINE__, " ATTN: unknown DOI format in marc record: df=[$df] val=[$val]\n";
+            $self->{counters}->{bad_doi}++;
+          }
+        }
+        elsif ($mf eq 'urn')
+        {
+          if ($val =~ m#^urn:nbn:at:at-ubw-\d{5}\.\d{5}\.\d{6}-\d$#)
+          {
+            $copy= 1; # only transcribe those URLs that really look like some
+            # TODO: check if this is the known nbn!
+            if ($val ne $book->{urn})
+            { # TODO:
+              $book->{update_urn}= $book->{urn};
+              $self->{counters}->{update_urn}++;
+              push(@alma_notes, 'update_urn');
+            }
+          }
+          else
+          {
+            print __LINE__, " ATTN: unknown NBN (urn) format in marc record: df=[$df] val=[$val]\n";
+            $self->{counters}->{bad_urn}++;
+          }
+        }
+
+        if ($copy)
+        {
+          $book->{'df_'.$mf}= $df;
+          $book->{'val_'.$mf}= $val;
+        }
+        else
+        {
+          push(@alma_notes, "junk_${mf}_in_marc");
+          print __LINE__, " ATTN: ac_number=[$ac_number] pid=[$pid] junk data mf=[$mf] in marc record: df=[$df] val=[$val]\n";
+        }
+      }
+    }
+  }
+  else
+  {
+    # this is ok, not a bug, Alma simply does not know anything about this phaidra object
+    print __LINE__, " ac_number=[$ac_number] mex missing\n";
+  }
+
+  if (!exists($book->{val_urn}) && exists ($book->{urn}))
+  {
+    $book->{update_urn}= $book->{urn};
+    $self->{counters}->{set_urn}++;
+    push(@alma_notes, 'set_urn');
+  }
+
+  unless (exists($book->{df_phaidra}))
+  { # TODO: if there is no phaidra_url at all, set it...
+    $self->{counters}->{set_phaidra_url}++;
+    $book->{update_phaidra_url}= $book->{phaidra_url};
+    push(@alma_notes, 'set_phaidra_url');
+  }
+
+  push (@alma_notes, 'ok') unless (@alma_notes);
+  $book->{alma_notes}= join(',', @alma_notes);
+
+  return 'ok';
+}
+
+=head1 INTERNAL? METHODS
+
+=cut
+
+1;
+