diff --git a/eprints1.pl b/eprints1.pl
index 4f96c50b2dcd0ee08d3beefb80552209b73a9756..cb21900ffbcb172b6e1708ef96df6220fd9a9cad 100755
--- a/eprints1.pl
+++ b/eprints1.pl
@@ -74,6 +74,7 @@ $Data::Dumper::Sortkeys= 1;
 
 use FileHandle;
 
+use utf8;
 binmode( STDIN,  ':utf8' );
 binmode( STDOUT, ':utf8' ); autoflush STDOUT 1;
 binmode( STDERR, ':utf8' ); autoflush STDERR 1;
@@ -112,8 +113,8 @@ my $op_mode= 'unknown';
 
 # ======================================================================
 # BEGIN OT2UT: Othes to Utheses 
-# my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
-my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize
+my $ot2ut_context= 'ot2ut-entw'; # TODO: parametrize
+# my $ot2ut_context= 'ot2ut-test'; # TODO: parametrize
 my $oma_sleep_time= 10;
 
 my %map_ot2ut_roles=
@@ -172,6 +173,36 @@ if ($0 eq './oma.pl')   { $op_mode= 'oma';   $do_upload= 1; }
 my $flag_add_utheses_policies= 1;
 my $flag_add_identifiers= 1;
 
+my %bucketlist_column_descriptions=
+(
+  bucket => 'Kategorie bzw. Klassifizierung - wird dynamisch erstellt - somit keine ID',
+  eprint_status => 'Status [Archiv/Buffer]',
+  einverstaendnis => 'Werknutzung [TRUE/FALSE/NULL=nicht vorhanden]',
+  sperre => 'Sperre nach §86 gesetzt [TRUE/FALSE/NULL=nicht vorhanden]',
+  hds => 'Has Date Sperre [future/past/no]',
+  fts => 'Full-Text-Status [public/restricted]',
+  docs => 'Verfügbare Dokumente [1 = Thesis]',
+  pub => 'Zahl der zugaenglichen Dokumente',
+  restr => 'Zahl der gesperrten Dokumente',
+  embargo =>  'Zahl der Dokumente mit Embargo',
+  errors => 'z.B. Datensatz entspricht nicht den Erwartungen',
+
+  # Abbildung bzw. Status in utheses
+  lrq => 'utheses policy flag: Lock-Request (wird ergaenzt, wenn Sperrdatum vorhanden: wird in Folge für den Import aus Hopla verwendet.)',
+  lst => 'utheses policy flag: Lock-Status nach §86',
+  kwl => 'utheses policy flag: Keywords-Locked',
+  absl => 'utheses policy flag: Abstract-Locked',
+  ftl => 'utheses policy flag: Full-Text-Locked',
+  urn => '[ID present/NULL]',
+  doi => '[ID present/NULL]',
+
+  # Anzahl auf Instanz
+  othes => 'Zahl der Objekte in Eprints DB (othes)',
+  entw => 'Zahl der Objekte in utheses Entw',
+  test => 'Zahl der Objekte in utheses Test',
+  prod => 'Zahl der Objekte in utheses Prod',
+);
+
 my %doc_embargo_dates;
 # END OT2UT: Othes to Utheses 
 # ======================================================================
@@ -2520,14 +2551,16 @@ sub oma
     {
       $ignore_errors= 1;
       $col_req->update({ _id => $row->{_id}}, { '$set' => { status => 'in_progress' }});
-      my $cnt= @{$row->{ids}};
+      my @ids;
+      foreach my $id (@{$row->{ids}}) { push (@ids, $id) if ($id =~ m#^\d+$#); }
+      my $cnt= @ids;
 
       my $msg= "send_ids: sending $cnt objects to $ot2ut_context";
       activity({ activity => 'send_batch', msg => $msg});
       send_message($msg);
       $silent_upload_success= 0;
 
-      my ($synced, $res)= ot2ut(@{$row->{ids}});
+      my ($synced, $res)= ot2ut(@ids);
       send_message("send_ids: $res");
       $new_status= 'done' if (@$synced);
     }
@@ -2651,7 +2684,7 @@ sub ot2ut
       }
     }
 
-    my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path)=
+    my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path)=
        generate_utheses_metadata($epr, $eprint_id);
     my ($eprint_status)= map { $row->{$_} } qw(eprint_status);
 
@@ -2665,7 +2698,7 @@ sub ot2ut
 
         if ($sync_info->{lastmod} eq $lastmod)
         {
-          print __LINE__, " already synced; skipping...\n";
+          print __LINE__, " eprint_id=[$eprint_id] already synced; skipping...\n";
           next;
         }
         else
@@ -2680,7 +2713,10 @@ sub ot2ut
 
     my $ts_upload= ts_ISO_gmt();
 
-    if (!defined ($files) || ref($files) ne 'ARRAY' ||  @$files != 1)
+    if (!defined ($files) || ref($files) ne 'ARRAY' || @$files < 1
+        # || @$files != 1  # no attachments allowed yet
+        || @$files > 10 # testing ...
+       )
     {
       push (@$errors, { error => 'num_files', note => 'currently limited to objects with exactly one file' } );
     }
@@ -2722,8 +2758,9 @@ sub ot2ut
       my $upload_cnf= $cnf->{$ot2ut_context};
       die "no valid ot2ut context" unless (defined ($upload_cnf));
 
-      my $main_file= $files->[0];
-      my ($local_filename, $lfnm)= map { $main_file->{$_} } qw(path upl_fnm);
+      my @docs= @{$docs->{documents}};
+      my $main_file= shift(@docs);
+      my ($local_filename, $lfnm)= map { $main_file->{$_} } qw(path_doc main);
 
       # TODO: use curl for now
       my @upload_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F), 'metadata=@' . $utheses_json_path,
@@ -2739,6 +2776,7 @@ sub ot2ut
       }
 
       print __LINE__, " upload_cmd: [", join(' ', @upload_cmd), "]\n";
+      my $utheses_id;
       if ($do_upload)
       {
         my $t_curl= time();
@@ -2804,11 +2842,13 @@ old format 2019-11..2020-01
 =end comment
 =cut
 
-          my ($status, $import_status, $utheses_id, $response_msg, $alerts)= map { $result_data->{$_} } qw(status importStatus uthesesId responseMsg alerts);
+          my ($status, $import_status, $utheses_id1, $response_msg, $alerts)= map { $result_data->{$_} } qw(status importStatus uthesesId responseMsg alerts);
+          print __LINE__, " status=[$status] response_msg=[$response_msg]\n";
 
           my $td_start= time()-$t_start;
           my $td_curl= time()-$t_curl;
 
+          my $upload_success;
           my $out_row=
           {
             eprint_id        => $eprint_id,
@@ -2817,20 +2857,134 @@ old format 2019-11..2020-01
             context          => $ot2ut_context,
             ts_upload        => $ts_upload,
             td_total         => $td_start,
-            error_code       => 'ok',
-            error_cnt        => 0,
-            utheses_id       => $utheses_id,
+            error_code       => 'unknown',
+            utheses_id       => $utheses_id1,
             uploaded_fnm     => $lfnm,
+            upload_status    => $status,
             response_msg     => $response_msg,
           };
 
+          if (defined ($utheses_id1) && $status eq '200')
+          {
+            $utheses_id= $utheses_id1;
+            $out_row->{error_code}= $upload_success= 'ok';
+            $out_row->{error_cnt}= 0;
+          }
+          else
+          {
+            $out_row->{error_code}= $upload_success= 'error';
+          }
+
           push (@synced, $out_row);
           $col_sync->insert($out_row);
 
-          send_message("upload success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl") unless ($silent_upload_success);
+          send_message("upload $upload_success: eprint_id=[$eprint_id] eprint_status=[$eprint_status] lastmod=[$lastmod] context=[$ot2ut_context] utheses_id=[$utheses_id] time_total=$td_start time_upload=$td_curl") unless ($silent_upload_success && $upload_success eq 'ok');
         }
 
         sleep(2);
+
+        if (defined ($utheses_id))
+        {
+          # process remaining documents as attachments
+          my $attachment_number= 0;
+          while (my $attachment= shift(@docs))
+          {
+            $attachment_number++;
+            my $fnm_attachment_md=  'othes/utheses_json/' . $eprint_id . '_' . $attachment_number . '_md.json';
+            my $fnm_attachment_res= 'othes/utheses_json/' . $eprint_id . '_' . $attachment_number . '_res.json';
+            my $fnm_attachment_chg= 'othes/utheses_json/' . $eprint_id . '_' . $attachment_number . '_chg.json';
+
+            my $attachment_md=
+            {
+              attachment => my $ai=
+              {
+                file_name => $attachment->{main},
+                file_mime_type => $attachment->{mime_type},
+                lock_status => ($attachment->{security} eq 'public') ? 0 : 1,
+                upload_date => Util::ts::ts_gmdate(),
+                uploaded_by => 'ot2ut',
+                size => int($attachment->{stat_doc}->[7] / 1024) . ' KB',
+              },
+              origin => "admin",
+              rights_statement => "http://rightsstatements.org/vocab/InC/1.0/",
+              data_model_type => "attachment",
+            };
+
+            $ai->{embargo_until_date}= join ('-', map { $attachment->{$_} } qw(date_embargo_year date_embargo_month date_embargo_day)) if (exists ($attachment->{date_embargo}));
+            $ai->{description}= $attachment->{formatdesc} if (defined ($attachment->{formatdate}));
+
+            Util::JSON::write_json_file($fnm_attachment_md, $attachment_md);
+
+# curl -X POST http://localhost:3000/attachment/add/#uthesesId/fromOthes -F "metadata=@uthesesDM.json" -F "file=@attachment.jpg" 
+            my $url1= join ('/', $upload_cnf->{api_url}, qw(attachment add), $utheses_id, 'fromOthes');
+            my @attachment_add_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F),
+               'metadata=@' . $fnm_attachment_md, '-F', 'file=@' . $attachment->{path_doc}, $url1, '-o' . $fnm_attachment_res);
+
+            if (exists ($upload_cnf->{headers}))
+            {
+              foreach my $header (@{$upload_cnf->{headers}})
+              {
+                push (@attachment_add_cmd, '--header', $header );
+              }
+            }
+
+            print __LINE__, " attachment_add_cmd: [", join(' ', @attachment_add_cmd), "]\n";
+            if ($do_upload)
+            {
+              my $t_curl= time();
+              system(@attachment_add_cmd);
+
+              my $result_data1;
+              eval { $result_data1= Util::JSON::read_json_file($fnm_attachment_res); };
+              if ($@)
+              {
+                print __LINE__, " can't parse upload_result; error=[$@]\n";
+              }
+              else
+              {
+                print __LINE__, " upload attchment [$attachment_number] result: ", Dumper($result_data1);
+                my ($attachment_pid, $curl_status, $response_msg)= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg);
+
+                send_message("upload attachment success: eprint_id=[$eprint_id] context=[$ot2ut_context] utheses_id=[$utheses_id] attachment_number=[$attachment_number] attachment_pid=[$attachment_pid] curl_status=[$curl_status]"); # unless ($silent_upload_success);
+
+# curl -X POST http://localhost:3000/attachment/changeStatus/#attachmentPid -F "status=A" 
+                my $url2= join ('/', $upload_cnf->{api_url}, qw(attachment changeStatus), $attachment_pid);
+                my @attachment_chg_cmd= (qw(/usr/bin/curl -X POST -v -H Content-Type:multipart/form-data -F status=A),
+                  $url2, '-o' . $fnm_attachment_chg);
+
+                if (exists ($upload_cnf->{headers}))
+                {
+                  foreach my $header (@{$upload_cnf->{headers}})
+                  {
+                    push (@attachment_chg_cmd, '--header', $header );
+                  }
+                }
+
+                print __LINE__, " attachment_chg_cmd: [", join(' ', @attachment_chg_cmd), "]\n";
+                my $t_curl= time();
+                system(@attachment_chg_cmd);
+
+                my $result_data2;
+                eval { $result_data2= Util::JSON::read_json_file($fnm_attachment_res); };
+                if ($@)
+                {
+                  print __LINE__, " can't parse upload_result; error=[$@]\n";
+                }
+                else
+                {
+                  print __LINE__, " change attchment [$attachment_number] result: ", Dumper($result_data2);
+                  # my $attachment_pid= map { $result_data1->{$_ } } qw(attachmentPid status responseMsg);
+                }
+
+              }
+
+            }
+          }
+        }
+        else
+        {
+          # no utheses_id defined
+        }
       }
     }
 
@@ -2885,19 +3039,33 @@ sub generate_utheses_metadata
   my $history= get_history($epr_db, $eprintid);
   # print __LINE__, " history: ", Dumper($history); exit;
 
-  my ($lang_pdf, $files)= analyze_files(map { $row->{$_} } qw(eprintid fileinfo dir));
+  my ($eprint_status, $fileinfo, $dir)= map { $row->{$_} } qw(eprint_status fileinfo dir);
+
+  my $docs= get_documents($epr_db, $eprintid, $dir);
+  print __LINE__, " docs: ", Dumper($docs);
+
+  my ($lang_pdf, $files)= analyze_files($eprintid, $fileinfo, $dir);
   print __LINE__, " lang_pdf=[$lang_pdf] files: ", Dumper($files);
-  my $main_file;
-  if (@$files)
-  {
-    $main_file= $files->[0];
-  }
-  else
+
+  # merge info from get_documents() and analyze_files()
+  my @d= @{$docs->{documents}};
+  push (@errors, { error => 'no_file' }) unless (@d);
+  my $main_file= $d[0]; # documents are ordered by their "pos" or "placement" attribute; the first on is the main document
+  my %docs= map { $_->{path_doc} => $_ } @d;
+
+  foreach my $fileinfo (@$files)
   {
-    push (@errors, { error => 'no_file' });
+    my $doc= $docs{$fileinfo->{path}};
+    unless (defined ($doc))
+    {
+      push (@errors, { error => 'document_mismatch', fileinfo => $fileinfo });
+      next;
+    }
+    $doc->{fileinfo}= $fileinfo;
   }
 
-  my ($eprint_status)= map { $row->{$_} } qw(eprint_status);
+  print __LINE__, " docs: ", Dumper($docs);
+  print __LINE__, " main_file: ", Dumper($main_file);
 
   my $utheses_json_path= 'othes/utheses_json/' . $eprintid . '.json';
   my $utheses_upload_result_json_path= 'othes/utheses_json/' . $eprintid . '_upload_result.json';
@@ -2920,8 +3088,9 @@ sub generate_utheses_metadata
 
   print __LINE__, " row: ", Dumper($row) if ($debug_level > 2);
 
-  my ($err_st_id, $study_id, $studies_codes)= get_study_id($row->{matr}, $row->{studienkennzahl});
-  push (@errors, @$err_st_id) if (@$err_st_id);
+  my ($err_st_id, $warn_st_id, $study_id, $studies_codes)= get_study_id($row->{matr}, $row->{studienkennzahl});
+  push (@errors,   @$err_st_id)  if (@$err_st_id);
+  push (@warnings, @$warn_st_id) if (@$warn_st_id);
 
   my $ut= new Univie::Utheses::Container();
 
@@ -3074,11 +3243,11 @@ sub generate_utheses_metadata
 
   # 2020-05-14 nd: not needed: $thesis->{uploaded_by}= 'ot2ut';
   $thesis->{subject_classifications}= $classifications;
-  $thesis->{number_of_pages}= "$main_file->{page_count}";  # Phaidra expects this as a string
+  $thesis->{number_of_pages}= "$main_file->{fileinfo}->{page_count}";  # Phaidra or utheses expects this as a string
 
   # 2020-06-30: modify filename for files containing sensitive information, but keep the original filename
-  $thesis->{original_filename}= $main_file->{orig_fnm};
-  $thesis->{upload_filename}= $main_file->{upl_fnm};
+  $thesis->{original_filename}= $main_file->{fileinfo}->{orig_fnm};
+  $thesis->{upload_filename}= $main_file->{fileinfo}->{upl_fnm};
 
   $ut->public('thesis', $thesis);
 
@@ -3091,23 +3260,25 @@ sub generate_utheses_metadata
 
   Util::JSON::write_json_file($utheses_json_path, $ut->{public});
 
-  (\@errors, \@warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path);
+  (\@errors, \@warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path);
 }
 
 sub get_documents
 {
   my $epr_db= shift;
   my $eprint_id= shift;
+  my $dir= shift;
 
   my $document_rows= $epr_db->get_all_x('document', ['eprintid=?', $eprint_id]);
   # print __LINE__, " document_rows: ", Dumper($document_rows);
 
-  my @documents;
+  my @dirs= split('/', $dir);
+  shift(@dirs); # remove "disk0" from the beginning
+
   my @notes;
   my $res=
   {
     eprint_id      => $eprint_id,
-    documents      => \@documents,
     cnt_docs       => 0,
     cnt_public     => 0,
     cnt_restricted => 0,
@@ -3116,17 +3287,25 @@ sub get_documents
     notes => \@notes,
   };
 
+  my @docs;
   DOCUMENT: foreach my $document_id (keys %$document_rows)
   {
     my $row= $document_rows->{$document_id};
 
+    # print __LINE__, " document_row: ", Dumper($row);
+    my ($main, $idx)= map { $row->{$_} } qw(main pos);
+
     if ($row->{main} eq 'indexcodes.txt' || $row->{main} eq 'preview.png')
     { # ignore these ...
       next DOCUMENT;
     }
 
-    # print __LINE__, " document_row: ", Dumper($row);
-    my $idx= $row->{pos};
+    my $rev_dir= sprintf("%02d", $idx);
+    my $path_doc= join('/', '/backup/othes/eprints', @dirs, $rev_dir, $main); # TODO: parametrize!
+    my (@stat_doc)= stat($path_doc);
+    $row->{path_doc}= $path_doc;
+    $row->{stat_doc}= \@stat_doc;
+
     if (defined ($row->{placement}) && $row->{pos} != $row->{placement})
     {
       push (@notes, "pos != placement: pos=[$row->{pos}] placement=[$row->{placement}]");
@@ -3136,21 +3315,21 @@ sub get_documents
 
     $idx--; # NOTE: pos and placement start at 1
     # print __LINE__, " idx=[$idx]\n";
-    if (defined($documents[$idx]))
+    if (defined($docs[$idx]))
     {
       push (@notes, "already a document at index=[$idx]");
       $res->{show}++;
-      push (@documents, $row);
+      push (@docs, $row);
     }
     elsif ($idx < 0)
     {
       push (@notes, "negative index=[$idx]");
       $res->{show}++;
-      push (@documents, $row);
+      push (@docs, $row);
     }
     else
     {
-      $documents[$idx]= $row;
+      $docs[$idx]= $row;
     }
 
     $res->{cnt_docs}++;
@@ -3178,12 +3357,15 @@ sub get_documents
     }
   }
 
-  my $idx_first_public;
-  foreach my $doc (@documents)
+  # document list may contain holes, that is, undefined items, so we remove these
+  my @docs2;
+  foreach my $doc (@docs)
   {
-    
+    push (@docs2, $doc) if (defined ($doc));
   }
 
+  $res->{documents}= \@docs2,
+
   $res;
 }
 
@@ -3232,9 +3414,7 @@ sub get_study_id
   my $matr= shift;
   my $stkz= shift;
 
-  my @errors= ();
-  my @stkz;
-  my $studies_codes;
+  my (@errors, @warnings, @stkz, $studies_codes);
 
   # NOTES:
   # * Bildungseinrichtungs-ID (here university_code) see
@@ -3255,9 +3435,9 @@ sub get_study_id
         while ($digits =~ s#^(\d\d\d)#{push(@stkz,$1);''}#ge){};
         if ($digits ne '')
         {
-          if ($digits =~ m#^[23]$# && !defined ($coop_kz))
+          if ($digits =~ m#^0?(2)$# && !defined ($coop_kz))
           {
-            $coop_kz= $digits;
+            $coop_kz= $1;
           }
           else
           {
@@ -3272,7 +3452,7 @@ sub get_study_id
         if (defined ($coop_kz))
         {
           $studies_codes->{cooperation_code}= 'U'. $coop_kz if ($coop_kz =~ m#[A-Z]#);
-          $studies_codes->{cooperation_code}= '0'. $coop_kz if ($coop_kz =~ m#[23]#);
+          $studies_codes->{cooperation_code}= '0'. $coop_kz if ($coop_kz =~ m#^2$#);
         }
       }
       else
@@ -3287,12 +3467,12 @@ sub get_study_id
   }
   else
   {
-    push (@errors, { error => 'stkz_empty' });
+    push (@warnings, { warning => 'stkz_empty' });
   }
 
   my $study_id= join ('', $matr, @stkz);
 
-  (\@errors, $study_id, $studies_codes);
+  (\@errors, \@warnings, $study_id, $studies_codes);
 }
 
 sub get_thesis_data
@@ -3499,7 +3679,7 @@ sub get_names_for_role
   {
     my $names= $row->{$column_name};
     print __LINE__, " names=[$names]\n";
-    next if ($names eq '');
+    next if ($names eq '' || $names eq '-' || $names eq 'k. A.' || $names =~ m#nicht angegeben *#);
 
     my @names= split (/\s*;\s*/, $names);
     foreach my $name (@names)
@@ -3546,6 +3726,9 @@ sub analyze_files
     $format= 'pdf' if ($icon eq '/style/images/fileicons/pdf.png' || $icon eq '/style/images/fileicons/application_pdf.png');
 
     $filepath =~ s#%([\dA-Fa-f]{2})#chr(hex($1))#ge; # filenames are URL encoded, see 19072 for an example
+    print __LINE__, " vor utf8::decode filepath=[$filepath]\n";
+    utf8::decode($filepath);
+    print __LINE__, " nach utf8::decode filepath=[$filepath]\n";
 
     my @filepath= split('/', $filepath);
     my $upl_fnm= my $fnm= pop(@filepath);
@@ -3568,29 +3751,35 @@ sub analyze_files
     print __LINE__, " dir=[$dir] fi=[$fi] fnm=[$fnm] ext=[$ext]\n";
 
     my $rev_dir= sprintf("%02d", pop(@filepath));
-    my $path_pdf= join('/', '/backup/othes/eprints', @dirs, $rev_dir, $fnm);
-    my $path_txt= join('/', '/backup/othes/eprints', @dirs, $rev_dir, join ('.', @fnm, 'txt'));
+    my $path_pdf= join('/', '/backup/othes/eprints', @dirs, $rev_dir, $fnm); # TODO: parametrize!
     print __LINE__, " path_pdf=[$path_pdf]\n";
-    print __LINE__, " path_txt=[$path_txt]\n";
 
-    my @st_pdf= stat($path_pdf);
-    my @st_txt= stat($path_txt);
-    if (!@st_txt || $st_txt[9] < $st_pdf[9])
-    {
-      system ('pdftotext', $path_pdf);
-    }
+    push (@files, my $f_obj= { format => $format, path => $path_pdf, orig_fnm => $fnm, upl_fnm => $upl_fnm,});
 
-    my $verdict= `/home/gg/bin/guesslang.py '$path_txt'`;
-    print __LINE__, " guesslang: verdict=[$verdict]\n";
-    my @pages= split("\n", $verdict);
-    foreach my $page (@pages)
+    if ($format eq 'pdf')
     {
-      my ($lang)= split(' ', $page);
-      # print __LINE__, " lang=[$lang]\n";
-      $lang{$lang}++;
-    }
+      my $path_txt= join('/', '/backup/othes/eprints', @dirs, $rev_dir, join ('.', @fnm, 'txt'));
+      print __LINE__, " path_txt=[$path_txt]\n";
+
+      my @st_pdf= stat($path_pdf);
+      my @st_txt= stat($path_txt);
+      if (!@st_txt || $st_txt[9] < $st_pdf[9])
+      {
+        system ('pdftotext', $path_pdf);
+      }
 
-    push (@files, { format => $format, path => $path_pdf, orig_fnm => $fnm, upl_fnm => $upl_fnm, page_count => scalar @pages });
+      my $verdict= `/home/gg/bin/guesslang.py '$path_txt'`;
+      print __LINE__, " guesslang: verdict=[$verdict]\n";
+      my @pages= split("\n", $verdict);
+      foreach my $page (@pages)
+      {
+        my ($lang)= split(' ', $page);
+        # print __LINE__, " lang=[$lang]\n";
+        $lang{$lang}++;
+      }
+      
+      $f_obj->{page_count}= scalar @pages;
+    }
   }
 
     my $max= 0;
@@ -3820,7 +4009,7 @@ sub debug_stkz
     my ($id, $stkz, $matr)= map { $r->{$_} } @col_names_db;
     push (@{$stkz{$stkz}}, $id);
 
-    my ($errors, $study_id, $studies_codes)= get_study_id($matr, $stkz);
+    my ($errors, $warnings, $study_id, $studies_codes)= get_study_id($matr, $stkz);
     my $stcd= join(':', map { $studies_codes->{$_} } qw(university_code code_1 code_2 code_3 cooperation_code));
     # print __LINE__, " id=[$id] stcd=[$stcd] ", Dumper($studies_codes);
 
@@ -3891,10 +4080,9 @@ sub update_policies
     print __LINE__, " eprint_id=[$eprint_id] x1_lastmod=[$x1_lastmod]\n";
 
     print __LINE__, ' ', '='x70, "\n";
-    my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $utheses_upload_result_json_path)=
+    my ($errors, $warnings, $row, $lastmod, $ut, $utheses_json_path, $files, $docs, $utheses_upload_result_json_path)=
        generate_utheses_metadata($epr, $eprint_id);
 
-    my $docs= get_documents($epr_db, $eprint_id);
     my $show= $docs->{show};
 
     my $cnt_errors=   @$errors;
@@ -3980,6 +4168,7 @@ sub policies_stats
   my $cur_sync= $col_sync->find({});
 
   my %synced;
+  my (@upload_errors, %upload_errors);
   while ($running)
   {
     my $row_sync= $cur_sync->next();
@@ -3987,7 +4176,12 @@ sub policies_stats
 
     # print __LINE__, " row_sync: ", Dumper($row_sync); last;
     my ($eprint_id, $lastmod, $context, $utheses_id)= map { $row_sync->{$_} } qw(eprint_id lastmod context utheses_id);
-    next unless (defined ($utheses_id));
+    unless (defined ($utheses_id))
+    {
+      # push (@upload_errors, $row_sync);
+      push (@{$upload_errors{$row_sync->{error_code}}}, $eprint_id);
+      next;
+    }
 
        if ($context eq 'ot2ut-test') { $context= 'test'; }
     elsif ($context eq 'ot2ut-entw') { $context= 'entw'; }
@@ -4000,10 +4194,10 @@ sub policies_stats
   my $col_utp= $db_ot2ut->get_collection('utheses.policies');
   my $cur_utp= $col_utp->find({});
   
-  my @columns= qw(eprint_status einverstaendnis sperre hds fts docs pub restr errors);
+  my @columns= qw(eprint_status einverstaendnis sperre hds fts docs pub restr embargo errors);
   if ($flag_add_utheses_policies)
   {
-    push (@columns, qw(lrq kwl absl ftl));
+    push (@columns, qw(lrq lst kwl absl ftl));
   }
   if ($flag_add_identifiers)
   {
@@ -4015,6 +4209,9 @@ sub policies_stats
   my $max; #= 1000;
   my @contexts= qw(entw test prod);
   my (%cnt_errors, %cnt_warnings, %all_errors);
+  my @lst_nonpublic_doc_first; # list of objects with cnt_public > 1 && cnt_restricted > 1 && first document is not public
+  my @lst_docs_with_notes;
+  my %docs_with_holes;
   while ($running)
   {
     my $row_utp= $cur_utp->next();
@@ -4025,8 +4222,26 @@ sub policies_stats
        map { $row_utp->{$_} } qw(eprint_id eprint_status date_sperre einverstaendnis lastmod full_text_status sperre cnt_errors cnt_warnings errors warnings docs ut_public urn doi ac_nummer);
 
     $eprint_id += 0;
-    my ($cnt_docs, $cnt_public, $cnt_restricted)=
-       map { $docs->{$_} } qw(cnt_docs cnt_public cnt_restricted);
+    my ($cnt_docs, $cnt_public, $cnt_restricted, $cnt_embargo)=
+       map { $docs->{$_} } qw(cnt_docs cnt_public cnt_restricted cnt_embargo);
+
+    push (@lst_nonpublic_doc_first, $eprint_id) if ($cnt_public > 1 && $cnt_restricted > 1 && $docs->{documents}->[0]->{security} ne 'public');
+    push (@lst_docs_with_notes, $eprint_id) if (@{$docs->{notes}} > 0);
+    
+    foreach my $doc (@{$docs->{documents}})
+    {
+      unless (defined ($doc))
+      {
+        $docs_with_holes{$eprint_id}++;
+        next;
+      }
+
+      if (exists ($doc->{date_embargo}))
+      {
+        my $d= $doc->{date_embargo};
+        if ($d eq '2099-12-31T00:00:00Z') { $doc_embargo_dates{$d}++; } else { push (@{$doc_embargo_dates{$d}}, $eprint_id); }
+      }
+    }
 
     my $has_errors= ($cnt_errors > 0) ? 'yes' : 'no';
     my $has_date_sperre= 'no';
@@ -4036,9 +4251,9 @@ sub policies_stats
       $has_date_sperre=($date_sperre lt $ts_now) ? 'past' : 'future';
     }
 
-    my @bucket_selectors= ($eprint_status, $einverstaendnis, $sperre, $has_date_sperre, $full_text_status, $cnt_docs, $cnt_public, $cnt_restricted, $has_errors);
+    my @bucket_selectors= ($eprint_status, $einverstaendnis, $sperre, $has_date_sperre, $full_text_status, $cnt_docs, $cnt_public, $cnt_restricted, $cnt_embargo, $has_errors);
 
-    my ($utptp, $lrq, $kwl, $absl, $ftl);
+    my ($utptp, $lrq, $lst, $kwl, $absl, $ftl);
     my $utpt= $utp->{thesis};
     if (defined ($utpt))
     {
@@ -4047,12 +4262,12 @@ sub policies_stats
 
     if (defined ($utptp))
     {
-      ($lrq, $kwl, $absl, $ftl)= map { $utptp->{$_} } qw(lock_request keywords_locked abstract_locked fulltext_locked);
+      ($lrq, $lst, $kwl, $absl, $ftl)= map { $utptp->{$_} } qw(lock_request lock_status keywords_locked abstract_locked fulltext_locked);
     }
 
     if ($flag_add_utheses_policies)
     {
-      push (@bucket_selectors, $lrq, $kwl, $absl, $ftl);
+      push (@bucket_selectors, $lrq, $lst, $kwl, $absl, $ftl);
     }
 
     if ($flag_add_identifiers)
@@ -4072,9 +4287,10 @@ sub policies_stats
       foreach my $error (@$errors)
       {
         my $error_name= $error->{error};
+        $error_name= 'bad_name_buffer' if ($error_name eq 'bad_name' && $eprint_status eq 'buffer');
         $cnt_errors{$error_name}++;
 
-        $error->{eprint_id}= $eprint_id;
+        $error->{eprint_id}= "$eprint_id";
         push (@{$all_errors{$error_name}}, $error);
       }
     }
@@ -4108,9 +4324,9 @@ sub policies_stats
   }
 
 # print __LINE__, " working: cctab: ", Dumper($cctab);
-  # annotations
-  my @s1= qw(archive FALSE FALSE no restricted 1 1 0 no);
-  push (@s1, qw( 0 0 0 1))     if ($flag_add_utheses_policies);
+  # BEGIN annotations
+  my @s1= qw(archive FALSE FALSE no restricted 1 1 0 0 no);
+  push (@s1, qw(0 0 0 0 1))    if ($flag_add_utheses_policies);
   push (@s1, qw(present NULL)) if ($flag_add_identifiers);
   my $b1= $cctab->bucket(0, @s1);
   if (defined ($b1))
@@ -4119,8 +4335,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b1);
   }
 
-  my @s2= qw(archive FALSE FALSE no public 1 1 0 no);
-  push (@s2, qw(0 0 0 0))      if ($flag_add_utheses_policies);
+  my @s2= qw(archive FALSE FALSE no public 1 1 0 0 no);
+  push (@s2, qw(0 0 0 0 0))    if ($flag_add_utheses_policies);
   push (@s2, qw(present NULL)) if ($flag_add_identifiers);
   my $b2= $cctab->bucket(0, @s2);
   if (defined ($b2))
@@ -4129,8 +4345,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b2);
   }
 
-  my @s2b= qw(archive TRUE FALSE no public 1 1 0 no);
-  push (@s2b, qw(0 1 1 0))      if ($flag_add_utheses_policies);
+  my @s2b= qw(archive TRUE FALSE no public 1 1 0 0 no);
+  push (@s2b, qw(0 0 1 1 0))    if ($flag_add_utheses_policies);
   push (@s2b, qw(present NULL)) if ($flag_add_identifiers);
   my $b2b= $cctab->bucket(0, @s2b);
   if (defined ($b2b))
@@ -4139,8 +4355,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b2b);
   }
 
-  my @s2c= qw(archive TRUE NULL no public 1 1 0 no);
-  push (@s2c, qw(0 1 1 0))      if ($flag_add_utheses_policies);
+  my @s2c= qw(archive TRUE NULL no public 1 1 0 0 no);
+  push (@s2c, qw(0 0 1 1 0))    if ($flag_add_utheses_policies);
   push (@s2c, qw(present NULL)) if ($flag_add_identifiers);
   my $b2c= $cctab->bucket(0, @s2c);
   if (defined ($b2c))
@@ -4148,9 +4364,10 @@ sub policies_stats
     $b2c->{annotation}= { bgcolor => 'pink', note => 'abstract und keywords locked, aber pdf public' };
     print __LINE__, " special bucket found: ", Dumper($b2c);
   }
+  # ZZZZ
 
-  my @s3= qw(archive TRUE FALSE no restricted 1 1 0 no);
-  push (@s3, qw(0 0 0 1))      if ($flag_add_utheses_policies);
+  my @s3= qw(archive TRUE FALSE no restricted 1 1 0 0 no);
+  push (@s3, qw(0 0 0 0 1))    if ($flag_add_utheses_policies);
   push (@s3, qw(present NULL)) if ($flag_add_identifiers);
   my $b3= $cctab->bucket(0, @s3);
   if (defined ($b3))
@@ -4159,8 +4376,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b3);
   }
 
-  my @s4a= qw(archive TRUE NULL no public 1 1 0 no);
-  push (@s4a, qw(0 0 0 0))         if ($flag_add_utheses_policies);
+  my @s4a= qw(archive TRUE NULL no public 1 1 0 0 no);
+  push (@s4a, qw(0 0 0 0 0))       if ($flag_add_utheses_policies);
   push (@s4a, qw(present present)) if ($flag_add_identifiers);
   my $b4a= $cctab->bucket(0, @s4a);
   if (defined ($b4a))
@@ -4169,8 +4386,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b4a);
   }
 
-  my @s4b= qw(archive TRUE NULL no public 1 1 0 yes);
-  push (@s4b, qw(0 0 0 0))   if ($flag_add_utheses_policies);
+  my @s4b= qw(archive TRUE NULL no public 1 1 0 0 yes);
+  push (@s4b, qw(0 0 0 0 0)) if ($flag_add_utheses_policies);
   push (@s4b, qw(NULL NULL)) if ($flag_add_identifiers);
   my $b4b= $cctab->bucket(0, @s4b);
   if (defined ($b4b))
@@ -4179,8 +4396,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b4b);
   }
 
-  my @s4c= qw(archive TRUE NULL no public 1 1 0 yes);
-  push (@s4c, qw(0 0 0 0))         if ($flag_add_utheses_policies);
+  my @s4c= qw(archive TRUE NULL no public 1 1 0 0 yes);
+  push (@s4c, qw(0 0 0 0 0))       if ($flag_add_utheses_policies);
   push (@s4c, qw(present present)) if ($flag_add_identifiers);
   my $b4c= $cctab->bucket(0, @s4c);
   if (defined ($b4c))
@@ -4189,8 +4406,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b4c);
   }
 
-  my @s5a= qw(archive TRUE FALSE no public 1 1 0 no);
-  push (@s5a, qw(0 0 0 0))         if ($flag_add_utheses_policies);
+  my @s5a= qw(archive TRUE FALSE no public 1 1 0 0 no);
+  push (@s5a, qw(0 0 0 0 0))       if ($flag_add_utheses_policies);
   push (@s5a, qw(present present)) if ($flag_add_identifiers);
   my $b5a= $cctab->bucket(0, @s5a);
   if (defined ($b5a))
@@ -4199,8 +4416,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b5a);
   }
 
-  my @s5b= qw(archive TRUE FALSE no public 1 1 0 no);
-  push (@s5b, qw(0 0 0 0))      if ($flag_add_utheses_policies);
+  my @s5b= qw(archive TRUE FALSE no public 1 1 0 0 no);
+  push (@s5b, qw(0 0 0 0 0))    if ($flag_add_utheses_policies);
   push (@s5b, qw(present NULL)) if ($flag_add_identifiers);
   my $b5b= $cctab->bucket(0, @s5b);
   if (defined ($b5b))
@@ -4209,8 +4426,8 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b5b);
   }
 
-  my @s5c= qw(archive TRUE FALSE no public 1 1 0 no);
-  push (@s5c, qw(0 0 0 0))      if ($flag_add_utheses_policies);
+  my @s5c= qw(archive TRUE FALSE no public 1 1 0 0 no);
+  push (@s5c, qw(0 0 0 0 0)) if ($flag_add_utheses_policies);
   push (@s5c, qw(NULL NULL)) if ($flag_add_identifiers);
   my $b5c= $cctab->bucket(0, @s5c);
   if (defined ($b5c))
@@ -4219,31 +4436,57 @@ sub policies_stats
     print __LINE__, " special bucket found: ", Dumper($b5c);
   }
 
-  my @s6a= qw(archive FALSE FALSE no restricted 1 0 1 no);
-  push (@s6a, qw(0 0 0 1))      if ($flag_add_utheses_policies);
+  my @s6a= qw(archive FALSE FALSE no restricted 1 0 1 1 no);
+  push (@s6a, qw(0 0 0 0 1))    if ($flag_add_utheses_policies);
   push (@s6a, qw(present NULL)) if ($flag_add_identifiers);
   my $b6a= $cctab->bucket(0, @s6a);
   if (defined ($b6a))
   {
-    $b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext' };
+    $b6a->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN schon vergeben' };
     print __LINE__, " special bucket found: ", Dumper($b6a);
   }
 
-  my @s6b= qw(archive FALSE FALSE no restricted 1 0 1 no);
-  push (@s6b, qw(0 0 0 1))         if ($flag_add_utheses_policies);
+  my @s6a2= qw(archive FALSE FALSE no restricted 1 0 1 1 no);
+  push (@s6a2, qw(0 0 0 0 1))    if ($flag_add_utheses_policies);
+  push (@s6a2, qw(NULL NULL)) if ($flag_add_identifiers);
+  my $b6a2= $cctab->bucket(0, @s6a2);
+  if (defined ($b6a))
+  {
+    $b6a2->{annotation}= { bgcolor => 'lightblue', note => 'kein Volltext; NBN noch nicht vergeben' };
+    print __LINE__, " special bucket found: ", Dumper($b6a2);
+  }
+
+  my @s6b= qw(archive FALSE FALSE no restricted 1 0 1 1 no);
+  push (@s6b, qw(0 0 0 0 1))         if ($flag_add_utheses_policies);
   push (@s6b, qw(present present)) if ($flag_add_identifiers);
   my $b6b= $cctab->bucket(0, @s6b);
   if (defined ($b6b))
   {
-    $b6b->{annotation}= { bgcolor => ' #33a2ff', note => 'kein Volltext; DOI wurde offenbar vorher vergeben' };
+    $b6b->{annotation}= { bgcolor => '#33a2ff', note => 'Volltext gesperrt; DOI wurde offenbar vorher vergeben' };
+    print __LINE__, " special bucket found: ", Dumper($b6b);
+  }
+
+  my @s6c= qw(archive FALSE FALSE no restricted 1 0 1 0 no);
+  push (@s6c, qw(0 0 0 0 1))    if ($flag_add_utheses_policies);
+  push (@s6c, qw(present NULL)) if ($flag_add_identifiers);
+  my $b6c= $cctab->bucket(0, @s6c);
+  if (defined ($b6c))
+  {
+    $b6c->{annotation}= { bgcolor => 'pink', note => 'Volltext restrected, kein Embargo' };
     print __LINE__, " special bucket found: ", Dumper($b6b);
   }
+  # END annotations
 
   # print __LINE__, " cctab: ", Dumper($cctab);
   my $trailer= "<h2>errors</h2>\n". Dumper(\%cnt_errors);
   $trailer .= "<h2>warnings</h2>\n". Dumper(\%cnt_warnings);
   print __LINE__, " trailer=[",$trailer,"]\n";
 
+  $trailer .= "<h2>nonpublic_doc_first</h2>\n". Dumper(\@lst_nonpublic_doc_first) if (@lst_nonpublic_doc_first);
+  $trailer .= "<h2>docs with notes</h2>\n". Dumper(\@lst_docs_with_notes) if (@lst_docs_with_notes);
+  $trailer .= "<h2>embargo dates</h2>\n". Dumper(\%doc_embargo_dates);
+  $trailer .= "<h2>upload_errors</h2>\n". Dumper(\%upload_errors);
+
   $cctab->show_tsv(['othes', @contexts], 'counters.tsv', $trailer);
 
   # show objects which were uploaded to utheses but are no longer present in othes
@@ -4254,6 +4497,8 @@ sub policies_stats
   }
 
   Util::JSON::write_json_file('all_errors.json', \%all_errors);
+  # Util::JSON::write_json_file('upload_errors.json', \@upload_errors);
+  Util::JSON::write_json_file('docs_with_holes.json', \%docs_with_holes);
 }
 
 sub cleanup_keywords
@@ -4611,7 +4856,11 @@ EOX
     my $path_lst= join ('/', $base_path, $fnm_lst);
     open (LST, '>:utf8', $path_lst) or die;
     print LST <<"EOX";
+<head>
+<meta charset="UTF-8">
 <title>bucket $bucket_nr</title>
+</head>
+<body>
 <table>
 EOX
 
@@ -4619,7 +4868,8 @@ EOX
     foreach my $col (@columns)
     {
       my $val= shift (@vals);
-      print LST "<tr><th>$col</th><td>$val</td></tr>\n";
+      my $desc= $bucketlist_column_descriptions{$col} || &nbsp;
+      print LST "<tr><th>$col</th><td>$val</td><td>$desc</td></tr>\n";
     }
     print LST "</table>\n";
 
@@ -4713,9 +4963,12 @@ EOX
   if (defined($trailer))
   {
     print IDX $trailer;
-    print __LINE__, " trailer=[",$trailer,"]\n";
+    # print __LINE__, " trailer=[",$trailer,"]\n";
   }
 
+  print IDX <<EOX;
+</body>
+EOX
   close (IDX);
   close (TSV);
 }
@@ -4897,4 +5150,3 @@ Notes:
  * & in abstracts is encoded as &amp; in the DataCite.XML file, but DataCite itself seems to strip that out again
  * as of 2020-01-20, title is processed with strip_text()
 
-