From 64bfebb8e8589dc958d4a587acfc83f925543e7a Mon Sep 17 00:00:00 2001
From: Gerhard Gonter <ggonter@gmail.com>
Date: Wed, 19 Aug 2020 17:57:22 +0200
Subject: [PATCH] fixup for names and document order

---
 eprints1.pl | 99 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 76 insertions(+), 23 deletions(-)

diff --git a/eprints1.pl b/eprints1.pl
index 52175d3..d7ff4b1 100755
--- a/eprints1.pl
+++ b/eprints1.pl
@@ -113,8 +113,8 @@ my $op_mode= 'unknown';
 
 # ======================================================================
 # BEGIN OT2UT: Othes to Utheses 
-# my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
-my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize
+my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
+# my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize
 my $oma_sleep_time= 10;
 
 my %map_ot2ut_roles=
@@ -214,29 +214,29 @@ my $fn_cnt= 0;
 # This should be factored out!!
 my @first_names= qw(
   Adam Adelheid Alejandro Alexa Alexander Alexandra Alfred Alice Alina
-  Alois Amadou-Lamine Ana András Andre Andrea Andreas Angela Angeles
+  Alois Alojz Amadou-Lamine Ana András Andre Andrea Andreas Angela Angeles
   Ania Anika Anjte Anke Anna Annegret Annemarie Anton Antonio Aprile
   Armando Armin Arnd Arno Arnold Arthur Aslan Astrid Awad Axel Barbara
   Barbara-Amina Beatrice Berhard Bernd Bernd-Christian Bernhard Berta
   Bertrand Bettecke Bettina Birgit Birgitta Björn Bodo Bodomo Boris
-  Brigitta Brigitte Caecilie Cesare Chris Christa Christian Christiane
+  Brigitta Brigitte Caecilie Cengiz Cesare Chris Christa Christian Christiane
   Christian-Hubert Christine Christof Christoph Christopher Claire
   Claudia Claus Clemens Constanze Dagmar Daiana Daniel Daniela David
   Diamantopoulos Diana Dieter Dietmar Dimitris Dirim Donald Doris Dorothea
   Dorothée Eberhard Eckhard Edit Eduard Elisabeth Elke Elmar Erhard
-  Erich Erik Ernst Estella Esther Eugen Eva Eva-Maria Eveline Evrim Ewald
+  Erich Erik Ernst Erwin Estella Esther Eugen Eva Eva-Maria Eveline Evrim Ewald
   Federico Fedor Ferdinand Floortje Florian France Frank Franz
   Franz-Markus Franz-Stefan Friederike Friedhelm Friedrich Frigerio
-  Fritz Gabriela Gabriele Garcia Georg George Georgios Gerald Gerda
+  Fritz Gabriel Gabriela Gabriele Garcia Georg George Georgios Gerald Gerda
   Gerhard Gerit Germain Gernot Gero Gerte Gertraud Gil Gordon Gottfried
-  Grandner Gualtiero Gunda Gunnar Gunter Günter Günther Gustav Gyongyi
+  Gualtiero Gunda Gunnar Gunter Günter Günther Gustav Gyongyi
   Gyöngyi Hanna Hannes Hanno Hans Hans-Georg Hans-Jürgen Hanspeter
   Harald Heidemarie Heiner Heinrich Heinz Helen Helga Helmut Helmuth
   Henk Henning Henry Herbert Hermann Hermine Herwig Hilde Hildegard
   Horst Hristov Iacopo Igor Ilona Ilse Ines Ingeborg Ingfrid Ingo Ingrid Irene
-  Irmgard Irmtraud Isabella Isabelle Item Jadranka Jakiša Jan Jan-Heiner
+  Irmgard Irmtraud Isabella Isabelle Item Jadranka Jan Jan-Heiner
   Jens Jiří Joan Joao Johann Johanna Johannes Jörg Jorinde Josef
-  Josipovic József Judith Julia Julius Jürg Jürgen Jutta Karel Karen
+  József Judith Julia Julius Jürg Jürgen Jutta Karel Karen
   Karin Karl Karl-Heinz Katharina Katharine Kathrin Katja Katrin Kerstin
   Kim Kirsten Klara Klára Klaus Klaus-Börge Klaus-Dieter Klemens Konrad
   Konstanze Korina Kornelia Kristina Kurt Larisa Leopold Lieselotte Lorenz
@@ -245,23 +245,23 @@ my @first_names= qw(
   Maria Marianne Marie-France Marija Mario Marion Marko Markus Martin
   Martina Mathilde Matthias Maximilian Melissa Meta Michael Michaela
   Michaela-Maria Michal Michela Michele Michèle Michelle Milena Mira
-  Miranda Moga Mona Monika Monje Nadine Nadja Natalia Nicole Nikolaus
-  Norbert Nuno Oliver Oskar Oskár Otmar Otto Patricia Patrick Paul Paulus
+  Miranda Mitchell Moga Mona Monika Monje Murray Nadine Nadja Natalia Nicole Nikolaus
+  Norbert Nuno Oliver Oskar Oskár Otmar Otto Patricia Patrick Paul
   Peter Petra Philip Philipp Pia Qi Rainer Ralf Ramon Ramón Raphael
   Regina Regine Reingard Reinhard Reinhold Renate René Richard Robert
   Robin Roland Ronald Rosa Rüdiger Rüdiger Rudolf Rupert Ruth Sabina
   Sabine Sami Sandra Sarah Sascha Saskia Savvas Sebastian Segeja Serge
   Sergey Sergio Siegfried Sieglinde Sigmar Sigrid Simon Sonia Sonja
-  Sophie Stefan Stefanie Stefan-Michael Steffen Stephan Stephanie Susanne
-  Susi Suzanne Sylvia Tamara Taťána Tatjana Tecumseh Theodoros Thierry
+  Sophie Stefan Stefanie Stefan-Michael Steffen Stephan Stephanie Steven Susanne
+  Susi Suzanne Sylvia Tamara Taťána Tatjana Tecumseh Theodoros Thierry Thilo
   Thomas Timothy Tobias Tom Udo Ulf Ulrich Ulrike Urs Ursula Ute Verena
   Veronika Viera Viktor Vincente Violetta Vittorio Vlastimil Waldemar Walter
-  Walther Werner Wieland Wilhelm Wolfgang Wolfram Wynfrid Yan Yulia
+  Werner Wieland Wilfried Wilhelm Wolfgang Wolfram Wynfrid Yan Yulia
 );
 
 my %first_names= map { $_ => 1 } @first_names;
 
-my %special_name_mapping=
+my %name_fixup_by_name=
 (
   'THEIS Lioba' => { fn => 'Theis, Lioba' },
   'Univ. Prof in. Dr. in med.univ. Margarethe Geiger' => { fn => 'Geiger, Margarethe' },
@@ -302,10 +302,48 @@ my %special_name_mapping=
   'Uni.-Prof. Dr. Ulrich Teichler' => { fn => 'Teichler, Ulrich' },
   'Ao. Univ.-Porf. Dr. REINPRECHT Christoph' => { fn => 'Reinprecht, Christoph' },
   'ao. Univ.-Prof. Dr. Fritz (Friedrich) Hausjell' => { fn => 'Hausjell, Friedrich' }, # laut ufind ist der Vorname "Friedrich", siehe https://ufind.univie.ac.at/en/person.html?id=1872
+  'Frank Christa' => { fn => 'Frank, Christa' },
+  'Uni. Prof. Dr. Dr. hc. Peter Fischer' => { fn => 'Fischer, Peter' },
+  'MR Dr. Andreas SOMMER' => { fn => 'Sommer, Andreas' },
+  'Univ.-Prof. DI Dr. Wolfgang Kainz' => { fn => 'Kainz, Wolfgang' },
+  'Mag.a art. Dr.in.phil Sabine Prokop' => { fn => 'Prokop, Sabine' },
+  'Univ.-Prov. Dr. Biewer Gottfried' => { fn => 'Biewer, Gottfired' },
+  'Kosman Admial ' => { fn => 'Admial, Kosman' }, # that's a guess because: eprint_id 62899 has mitbetreuer='Pokorny Lukas', betreuer='Langer Gerhard', beurteiler_1='Kosman Admial', beurteiler_2='Ruzer Serge'
+  'Privatdozent Mag. Dr. Ulrich Tran' => { fn => 'Tran, Ulrick' },
+  'Lueger - Schuster Brigitte' => { fn => 'Lueger-Schuster, Brigitte' },
 
 # 'PD DDr Wippersberg Julia' => { fn => 'Wippersberg, Julia' },
 );
 
+my %name_fixup_by_eprint_id=
+(
+  '23255' => { betreuer => 'Frank, Christa' },
+  '46724' => { betreuer => 'Leitner, Manuela', beurteiler_1 => 'Leitner, Manuela' }, # othes only shows "Manuela", see https://ubdata.univie.ac.at/AC14545271
+  '47697' => { betreuer => 'Fischer, Peter' },
+  '49466' => { betreuer => 'Frank, Rüdiger' },
+  '58166' => { betreuer => 'Marko, Doris' }, # othes says: "Marko Doris" which results in a wrong pick; there are also objects that say "Doris Marko" which lead to a correct pick
+  '55849' => { betreuer => 'Kirchmayr-Schliesselberger, Sabine' },
+  '60229' => { betreuer => 'Slunecko, Thomas' },
+  '60326' => { mitbetreuer => 'Liebhart, Karin' },
+  '60552' => { betreuer => 'Hashi, Hisaki' }, # https://ufind.univie.ac.at/en/person.html?id=13985
+  '60679' => { betreuer => 'Reichertz, Jo' },
+  '60745' => { betreuer => 'Leoonida, Fusani' }, # see: https://ufind.univie.ac.at/en/person.html?id=56183
+  '60808' => { beurteiler_1 => 'Weigelin-Schwiedrzik, Susanne', betreuer => 'Weigelin-Schwiedrzik, Susanne' },
+  '60977' => { betreuer => 'Hofmann, Thilo' },
+  '61076' => { betreuer => 'Heinz, Sarah' },
+  '61589' => { betreuer => 'Hofmann, Christa-Ch.' },
+  '61603' => { beurteiler_2 => 'Sejdini, Zekirija' },
+  '61899' => { betreuer => 'Heinz, Sarah' },
+  '62393' => { betreuer => 'Rudolf, Claudia' },
+  '62397' => { betreuer => 'Rudolf, Claudia' },
+  '62568' => { mitbetreuer => 'Montaño, Manuel D.', betreuer => 'Hofmann, Thilo' },
+  '62705' => { betreuer => 'Mathes, Klaus-Dieter' },
+  '62734' => { betreuer => 'Kirchmayr-Schliesselberger, Sabine' },
+  '63391' => { mitbetreuer => 'Garcia, Daniel', beurteiler_2 => 'Garcia, Daniel', },
+  '63404' => { betreuer => 'Rachwald, Arthur R.' },
+  '63461' => { betreuer => 'Marko, Doris' }, # othes says: "Marko Doris" which results in a wrong pick; there are also objects that say "Doris Marko" which lead to a correct pick
+);
+
 my %doc_embargo_dates;
 my $base_path= '/var/www/ot2ut'; # TODO(maybe): get this from the config...
 # END OT2UT: Othes to Utheses 
@@ -3450,7 +3488,12 @@ sub get_documents
     my $row= $document_rows->{$document_id};
 
     # print __LINE__, " document_row: ", Dumper($row);
-    my ($main, $idx)= map { $row->{$_} } qw(main pos);
+    my ($main, $idx, $docid)= map { $row->{$_} } qw(main pos docid);
+
+    # FIXUP: modifying database rows; this should be done in the database by the application!
+    # eprint_id 7355: the first pdf is the attachment, the second one is the actual thesis; UB can not fix this in the app
+    $row->{placement}= 2 if ($docid == 7538);
+    $row->{placement}= 1 if ($docid == 7539);
 
     if ($row->{main} eq 'indexcodes.txt' || $row->{main} eq 'preview.png')
     { # ignore these ...
@@ -3822,27 +3865,37 @@ sub fixup_name
 
   unless ($fn_init)
   {
-    open(FN_lst, '>:utf8', 'fixup_names.tsv');
+    my $fnm= 'fixup_names_'.time().'.tsv';
+    open(FN_lst, '>:utf8', $fnm);
+    print __LINE__, " opening $fnm for writing\n";
     print FN_lst join("\t", @fn_lst), "\n";
     $fn_init= 1;
   }
 
   # let the generic fixups begin
   my @n;
-  my $f= $c->{name};
+  my ($f, $ei, $cn)= map { $c->{$_ } } qw(name eprint_id column_name);
 
   my $info;
 
   my ($rc, $nn, $vn);
 
-  if (exists ($special_name_mapping{$f}))
+  if (exists ($name_fixup_by_eprint_id{$ei}) && exists ($name_fixup_by_eprint_id{$ei}->{$cn}))
   {
-    my $x= $special_name_mapping{$f};
+    $f= $name_fixup_by_eprint_id{$ei}->{$cn};
+    $rc= 'by_id';
+    push (@n, 'by_id');
+  }
+  elsif (exists ($name_fixup_by_name{$f}))
+  {
+    my $x= $name_fixup_by_name{$f};
     $f= $x->{fn} if (exists ($x->{fn}));
-    $rc= 'special';
+
+    $rc= 'by_name';
+    push (@n, 'by_name');
   }
   else
-  {
+  { # TODO: refactor this ...
     push (@n, 'aoup')  if ($f=~ s#\ba\.o\s+univ\.\s*prof\.\s*##i);
     push (@n, 'hr')    if ($f=~ s#\bhr\.\s*##i);
     push (@n, 'tit')   if ($f=~ s#\btit\.\s*##i);
@@ -3997,7 +4050,7 @@ sub get_names_for_role
         my ($rc, $nn1, $vn1)= fixup_name({ name => $name, ac_number => $row->{ac_nummer}, eprint_id => $row->{eprintid}, column_name => $column_name });
         print __LINE__, " fixup_name: rc=[$rc] nn1=[$nn1] vn1=[$vn1]\n";
 
-        if (defined ($rc) && ($rc eq 'special' || $rc eq 'split' || $rc eq 'picked'))
+        if (defined ($rc) && ($rc eq 'by_id' || $rc eq 'by_name' || $rc eq 'split' || $rc eq 'picked'))
         {
           push (@result, { family_name => $nn1, given_name => $vn1 });
         }
-- 
GitLab