From 8177bc6e94c37c9560858a8ad58df9fc858f2967 Mon Sep 17 00:00:00 2001
From: Gerhard Gonter <ggonter@gmail.com>
Date: Thu, 5 Sep 2013 14:22:45 +0200
Subject: [PATCH] relocate vlib-related stuff out of TA::ObjReg

---
 .gitignore                   |  1 +
 textarchive/lib/TA/ObjReg.pm | 32 +++++---------
 textarchive/vlib001.pl       | 86 ++++++++++++++++++++++++------------
 3 files changed, 69 insertions(+), 50 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..90c254d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+@*
diff --git a/textarchive/lib/TA/ObjReg.pm b/textarchive/lib/TA/ObjReg.pm
index 4196eef..9811e0e 100644
--- a/textarchive/lib/TA/ObjReg.pm
+++ b/textarchive/lib/TA/ObjReg.pm
@@ -233,27 +233,24 @@ sub load_toc_v1
 sub verify_toc
 {
   my $reg= shift;
+  my $check_item= shift; # callback: update TOC item
+  my $hdr= shift || [];
 
-print "sub verify_toc_v1\n";
-  # my $store= shift; this does not make sense, we need to verify verything anyway
+  my @hdr1= qw(seq found store_count);
 
-  # my @stores= (defined ($store)) ? $store : $reg->stores();
   my @stores= $reg->stores();
   # print "stores: ", join (', ', @stores), "\n"; exit;
-  my %stores;
-
-  my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : ();
-
-  # TODO: this is specific for vlib001.pl, this should be a passed as code ref!
-  my @hdr= qw(seq found store_count path_count path mtime fs_size ino);
 
+  #### my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : ();
   my $c= $reg->{'proj_cat'};
+
   # pick up current tocs to see if the sequence needs to be updated
+  my %stores;
   foreach my $s (@stores)
   {
     my $f= $c . '/' . $s . '.toc.json';
     my $t= TA::Util::slurp_file ($f, 'json');
-    $t= {} unless (defined ($t)); # we need an empty toc if there is none yet
+    $t= {} unless (defined ($t)); # we need an empty toc if there is no toc yet
 
     $stores{$s}= $t;
   }
@@ -309,18 +306,10 @@ print "sub verify_toc_v1\n";
       }
       $ster->{'found'}= 1;
 
-      # TODO: this is specific for vlib001.pl, this should be a passed as code ref!
       my $jj= $j->{'store'}->{$store};
-      my @paths= keys %{$jj->{'path'}};
-      $ster->{'path_count'}= scalar @paths;
       $ster->{'store_count'}= scalar @i_stores;
-      my $p1= shift (@paths);
-      my $px1= $jj->{'path'}->{$p1};
 
-      $ster->{'path'}= $p1;
-      $ster->{'mtime'}= $px1->{'mtime'};
-      $ster->{'fs_size'}= $px1->{'fs_size'};
-      $ster->{'ino'}= $px1->{'ino'};
+      &$check_item($j, $jj, $ster) if (defined ($check_item));
     }
   }
 
@@ -347,13 +336,12 @@ print "sub verify_toc_v1\n";
       print STDERR "cant save toc file '$f'\n";
       next;
     }
-    print TOC join (';', 'key', @hdr), "\n";
+    print TOC join (';', 'key', @hdr1, @$hdr), "\n";
 
     foreach my $k (keys %$ss)
     {
       my $r= $ss->{$k};
-      # TODO: this is specific for vlib001.pl, this should be a passed as code ref!
-      print TOC join (';', $k, map { $r->{$_} } @hdr), "\n";
+      print TOC join (';', $k, map { $r->{$_} } @hdr1, @$hdr), "\n";
     }
 
     close (TOC);
diff --git a/textarchive/vlib001.pl b/textarchive/vlib001.pl
index 4dfb713..9385716 100755
--- a/textarchive/vlib001.pl
+++ b/textarchive/vlib001.pl
@@ -6,17 +6,19 @@
 
 =head1 USAGE
 
-  vlib001.pl -p project-name -s store-name
+  vlib001.pl -p project-name [-s store-name] [paraemters]*
 
   options:
   * -p <project-name>
   * -s <store-name>
+  * --verify   ... verify/create TOC structures
   * --fileinfo ... refresh file info
+  * --lookup   ... lookup for hashes given as parameters
   * -D ... increase debug level
 
 =head1 DESCRIPTION
 
-updates the _catalog file using md5cat scripts and registers the files in
+Updates the _catalog file using md5cat scripts and registers the files in
 the project's object registry.  The environment variable TABASE must point
 to the directory where the object registry's configuration is stored.
 
@@ -76,17 +78,17 @@ exit if ($STOP);
 
 if ($op_mode eq 'refresh')
 {
-my $catalog= $objreg->{'cfg'}->{'catalog'};
-&usage ('no catalog found in config') unless (defined ($catalog));
+  my $catalog= $objreg->{'cfg'}->{'catalog'};
+  &usage ('no catalog found in config') unless (defined ($catalog));
 
-my $stores_p= $objreg->{'cfg'}->{'stores'};
-my $store_cfg= $stores_p->{$store};
-unless (defined ($store_cfg))
-{
-  print "no store config found for '$store', check these: ", Dumper ($stores_p);
-  exit (-2);
-}
-print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG);
+  my $stores_p= $objreg->{'cfg'}->{'stores'};
+  my $store_cfg= $stores_p->{$store};
+  unless (defined ($store_cfg))
+  {
+    print "no store config found for '$store', check these: ", Dumper ($stores_p);
+    exit (-2);
+  }
+  print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG);
 
   if ($catalog->{'format'} eq 'md5cat')
   {
@@ -95,7 +97,9 @@ print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG);
 }
 elsif ($op_mode eq 'verify')
 {
-  $objreg->verify_toc ($store);
+  my @hdr= qw(path_count path mtime fs_size ino);
+
+  $objreg->verify_toc (\&verify_toc_item, \@hdr);
 }
 elsif ($op_mode eq 'lookup')
 {
@@ -159,14 +163,18 @@ sub process_file
       return undef;
     }
 
-    my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1] };
+    my $xdata=
+    {
+      'c_size' => $size, 'path' => $path, 'md5' => $md5,
+      'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1]
+    };
 
     my $reg= $objreg->lookup ($md5);
 
     my @upd;
     my $ydata;   # pointer to file catalog data within main datastructure
     if (defined ($reg))
-    { # we know something about this hash value but not in respect to the repository at hand
+    { # we know something about this key value but not in respect to the repository at hand
       # print "json read: ", main::Dumper ($reg);
       my $sb;
       if (defined ($sb= $reg->{'store'}->{$store})
@@ -175,7 +183,7 @@ sub process_file
           && $st[7] == $ydata->{'fs_size'}
           && $st[9] == $ydata->{'mtime'}
         )
-      { # TODO: compare stored and current information
+      { # compare stored and current information and update if necessary
         foreach my $an (keys %$xdata)
         {
           unless ($ydata->{$an} eq $xdata->{$an})
@@ -187,22 +195,14 @@ sub process_file
       }
       else
       {
-        # print "st: fs_size(7)=[$st[7]] mtime(9)=[$st[9]]\n";
-        # print "ydata: ", Dumper ($ydata);
-        # print "xdata: ", Dumper ($xdata);
-
         $reg->{'store'}->{$store}->{'path'}->{$path}= $ydata= $xdata;
-        # print __LINE__, " reg: ", Dumper ($reg);
-        # print "ydata: ", Dumper ($ydata);
-        # print "xdata: ", Dumper ($xdata);
-
         push (@upd, 'store upd');
       }
     }
     else
-    {
+    { # this key is new, so we simply place what we know in the newly created registry item
       $reg= { 'key' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } };
-      push (@upd, 'new md5');
+      push (@upd, 'new key');
     }
 
     # fill in some more information about that file
@@ -212,10 +212,12 @@ sub process_file
       $xpath=~ s#'#'\\''#g;
       my $res= `/usr/bin/file '$xpath'`;
       chop ($res);
+
       my ($xpath, $fileinfo)= split (/: */, $res, 2);
       $ydata->{'fileinfo'}= $fileinfo;
       push (@upd, 'fileinfo updated');
     }
+
     # TODO: some more information would probably be nice as well
     # e.g. mp3info or stuff
 
@@ -229,6 +231,24 @@ sub process_file
   (wantarray) ? @upd : \@upd;
 }
 
+# callback function for TA::ObjReg::verify
+sub verify_toc_item
+{
+  my $j= shift;     # currently not used, that's the complete json entry for this item
+  my $jj= shift;    # this is just the part refering to the store currently processed
+  my $ster= shift;  # TOC item to be updated
+
+  my @paths= keys %{$jj->{'path'}};
+  $ster->{'path_count'}= scalar @paths;
+  my $p1= shift (@paths);
+  my $px1= $jj->{'path'}->{$p1};
+
+  $ster->{'path'}= $p1;
+  $ster->{'mtime'}= $px1->{'mtime'};
+  $ster->{'fs_size'}= $px1->{'fs_size'};
+  $ster->{'ino'}= $px1->{'ino'};
+}
+
 __END__
 
 =head1 TODO
@@ -238,9 +258,19 @@ __END__
   * The project's config contains all the information that is needed to
     locate all the stores on a given machine, so there should be an option
     that updates everything.
-  * specifing the store should be optional
+  * specifing the store should be optional.
+
+=head2 misc
+
   * maybe it makes sense to offer an option to perform backups along the
     way, for instance, when the store is actually a git repository.
-  * Also, checking the VCS status might (if not committing updates)
+  * also, checking the VCS status (if not committing updates)
     might be useful.
+  * other hashing algorithms:
+    * currently we use md5 for hashing, however, this could should
+      be fairly simple to adopt for sha1, sha256 or something else.
+    * possibly, it makes sense to allow several hashing algorithms
+      in parallel, however, then it might be a good idea to store
+      file metadata in one place and let other hashes point to that
+      place.
 
-- 
GitLab