restructured data model to be compatible with MongoDB which was

also added as a possible backend for storage.

restructured data model to be compatible with MongoDB which was
10355f7c · Gerhard Gonter · 57a48fcb · 10355f7c · 10355f7c
Commit 10355f7c authored 11 years ago by Gerhard Gonter
--- a/textarchive/lib/TA/ObjReg.pm
+++ b/textarchive/lib/TA/ObjReg.pm
@@ -18,7 +18,8 @@ use JSON;
 use File::Find;
 use TA::Util;
-use TA::Hasher;
+my %plugins_loaded= ();
 sub new
 {
@@ -77,13 +78,35 @@ sub get_project
  # print "proj_cfg: ", main::Dumper ($proj_cfg);
  # TODO: check authorization (no need, if local, but for client-server, we need something
+  my $be= $proj_cfg->{'backend'};
+  unless (exists ($plugins_loaded{$be}))
+  {
+       if ($be eq 'TA::Hasher')  { require TA::Hasher; }
+    elsif ($be eq 'TA::UrxnBla') { require TA::UrxnBla; }
+    elsif ($be eq 'MongoDB')     { require MongoDB; }
+    else
+    {
+      print "ATTN: unknown backend '$be'\n";
+      return undef;
+    }
+    $plugins_loaded{$be}= 1;
+  }
+  if ($be eq 'TA::Hasher')
+  {
    # initialize hasher
-  my $base_dir= $obj->{'proj_cfg_dir'};
+    my $ta= $proj_cfg->{'TA::Hasher'};
-  $obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat');
+    $ta->{'name'}= 'file';
-  $obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file');
+    $ta->{'pfx'}= $obj->{'proj_cat'}= my $proj_cat= join ('/', $proj_cfg_dir, 'cat');
+    $obj->{'hasher'}= my $hasher= new TA::Hasher (%$ta);
+  }
+  elsif ($be eq 'MongoDB')
+  {
+    $obj->connect_MongoDB ($proj_cfg);
+  }
  # get sequence number
-  $obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json');
+  $obj->{'seq_file'}= my $fnm_seq= join ('/', $proj_cfg_dir, 'sequence.json');
  $obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json');
  # print "seq: ", main::Dumper ($seq);
  unless (defined ($seq))
@@ -121,20 +144,26 @@ returns that keys value, if present, otherwise, undef.
 sub lookup
 {
  my $obj= shift;
-  my $id_str= shift;
+  my $search= shift;
-  # print "lookup [$id_str]\n";
+  my $be= $obj->{'cfg'}->{'backend'};
-  my @r= $obj->{'hasher'}->check_file ($id_str, 0);
+  print "lookup [$search] be=[$be]\n";
-  # print "id_str=[$id_str] r=", main::Dumper (\@r);
+  print main::Dumper ($search);
-  my ($rc, $path)= @r;
-  my $fnm= $path . '/' . $id_str . '.json';
-  # print "description: [$fnm]\n";
-  my @st= stat ($fnm);
+  my $reg;
-  return undef unless (@st);
+  if ($be eq 'TA::Hasher')
+  {
-  my $reg= TA::Util::slurp_file ($fnm, 'json');
+    my $id_str= $search->{$obj->{'key'}};
+    my ($all_reg, $fnm)= $obj->ta_retrieve ($id_str, 0);
+    print "fnm=[$fnm] all_reg: ", main::Dumper ($all_reg);
+    return undef unless (defined ($all_reg));
+    ($reg)= ta_match ($all_reg, $search);
+  }
+  elsif ($be eq 'MongoDB')
+  {
+    $reg= $obj->{'_col'}->find_one ( $search );
+  }
+  # print "reg: ", main::Dumper ($reg);
  return $reg;
 }
@@ -142,26 +171,55 @@ sub lookup
 sub save
 {
  my $obj= shift;
-  my $id_str= shift;
+  my $search= shift;
  my $new_reg= shift;
-  print "save [$id_str]\n";
+  my $be= $obj->{'cfg'}->{'backend'};
-  my @r= $obj->{'hasher'}->check_file ($id_str, 1);
+  print "save [$new_reg] be=[$be]\n";
-  # print "id_str=[$id_str] r=", main::Dumper (\@r);
+  print main::Dumper ($new_reg);
-  my ($rc, $path)= @r;
+  if ($be eq 'TA::Hasher')
+  {
+    my $id_str= $search->{$obj->{'key'}};
+    my ($all_reg, $fnm)= $obj->ta_retrieve ($id_str, 1);
-  my $fnm= $path . '/' . $id_str . '.json';
+=begin comment
-  # print "description: [$fnm]\n";
    my @st= stat ($fnm);
    unless (@st)
    { # TODO: increment sequence and toc
    }
-  my $j= encode_json ($new_reg);
+=end comment
-  # print "generated json: [$j]\n";
+=cut
+    if (defined ($all_reg))
+    {
+      my ($reg, $idx)= ta_match ($all_reg, $search);
+      if (defined ($reg))
+      {
+        $all_reg->{'entries'}->[$idx]= $new_reg;
+      }
+      else
+      {
+        push (@{$all_reg->{'entries'}}, $new_reg);
+      }
+    }
+    else
+    {
+      $all_reg= { 'key' => $id_str, 'entries' => [ $new_reg ] }
+    }
+    my $j= encode_json ($all_reg);
+    print "fnm=[$fnm]\n";
+    print "generated json: [$j]\n";
    open (J, '>:utf8', $fnm); print J $j; close (J);
  }
+  elsif ($be eq 'MongoDB')
+  {
+    print "new_reg: ", main::Dumper ($new_reg);
+    $obj->{'_col'}->insert ($new_reg);
+  }
+}
 =head1 TOC: Table of Contents
@@ -385,7 +443,99 @@ sub next_seq
  $seq->{'seq'};
 }
-# =head1 INTERNAL FUNCTIONS
+=head1 INTERNAL METHODS
+=head2 $mongo_collection= $obj->connect_MongoDB ($config);
+Connect to MongoDB with connection parameters in hash_ref $config and
+returns the MongoDB collection object.
+$config needs the following attribues: host, db, user, pass, collection
+=cut
+sub connect_MongoDB
+{
+  my $obj= shift;
+  my $cfg= shift;
+  my $cmm= $cfg->{'MongoDB'};
+  print "cmm: ", main::Dumper ($cmm);
+  my $col;
+  eval
+  {
+    my $connection= MongoDB::Connection->new(host => $cmm->{'host'});
+    $connection->authenticate($cmm->{'db'}, $cmm->{'user'}, $cmm->{'pass'});
+    my $db= $connection->get_database($cmm->{'db'});
+    $col= $db->get_collection($cmm->{'collection'});
+    print "col: [$col]\n";
+  };
+  if ($@)
+  {
+    print "ATTN: can't connect to MongoDB ", (join ('/', map { $cmm->{$_} } qw(host user collection))), "\n";
+    return undef;
+  }
+  return $obj->{'_col'}= $col;
+}
+=head2 ($data, $fnm)= $objreg->ta_retrieve ($key, $create)
+Retrieve and return data referenced by $key and returns path name of
+that file.  If $create is true, the path leading to that file is created,
+when it is not already present.
+=cut
+sub ta_retrieve
+{
+  my $obj= shift;
+  my $id_str= shift;
+  my $create= shift;
+    my @r= $obj->{'hasher'}->check_file ($id_str, $create);
+    # print "id_str=[$id_str] r=", main::Dumper (\@r);
+    my ($rc, $path)= @r;
+    my $fnm= $path . '/' . $id_str . '.json';
+    # print "description: [$fnm]\n";
+    my @st= stat ($fnm);
+    return (undef, $fnm) unless (@st);
+    my $all_reg= TA::Util::slurp_file ($fnm, 'json');
+  return ($all_reg, $fnm);
+}
+=head1 INTERNAL FUNCTIONS
+=head2 ($entry, $index)= ta_match ($data, $search)
+Select first $entry from $data that matches hash ref $search.
+=cut
+sub ta_match
+{
+  my $all_reg= shift;
+  my $search= shift;
+  my @k= keys $search;
+  my @e= @{$all_reg->{'entries'}};
+  REG: for (my $i= 0; $i <= $#e; $i++)
+  {
+    my $reg= $e[$i];
+    foreach my $k (@k)
+    {
+      next REG unless ($reg->{$k} eq $search->{$k});
+    }
+    print "found match: ", main::Dumper ($reg);
+    return ($reg, $i);
+  }
+  return (undef, 0);
+}
 1;
 __END__

--- a/textarchive/vlib001.pl
+++ b/textarchive/vlib001.pl
@@ -77,7 +77,7 @@ unless (defined ($project))
 }
 # &usage ('no store specified') unless (defined ($store));
-my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store);
+my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store, 'key' => 'md5');
 &usage ('no config found') unless (defined ($objreg));
 print "objreg: ", Dumper ($objreg) if ($DEBUG || $STOP);
 exit if ($STOP);
@@ -171,48 +171,39 @@ sub process_file
    my $xdata=
    {
+      # 'key' => $md5, 'key_type' => 'md5',
+      'store' => $store,
      'c_size' => $size, 'path' => $path, 'md5' => $md5,
      'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1]
    };
-    my $reg= $objreg->lookup ($md5);
+    my $search= { 'md5' => $md5, 'store' => $store, 'path' => $path };
+    my $reg= $objreg->lookup ($search);
+    print __LINE__, " reg: ", Dumper ($reg);
    my @upd;
    my $ydata;   # pointer to file catalog data within main datastructure
    if (defined ($reg))
    { # we know something about this key value but not in respect to the repository at hand
      # print "json read: ", main::Dumper ($reg);
-      my $sb;
-      if (defined ($sb= $reg->{'store'}->{$store})
-          && exists ($sb->{'path'})
-          && defined ($ydata= $sb->{'path'}->{$path}) # we need to keep track of the path as well otherwise we can't handly duplicates in the same store
-          && $st[7] == $ydata->{'fs_size'}
-          && $st[9] == $ydata->{'mtime'}
-        )
-      { # compare stored and current information and update if necessary
        foreach my $an (keys %$xdata)
        {
-          unless ($ydata->{$an} eq $xdata->{$an})
+          unless ($reg->{$an} eq $xdata->{$an})
          {
-            $ydata->{$an}= $xdata->{$an};
+            $reg->{$an}= $xdata->{$an};
            push (@upd, $an);
          }
        }
    }
    else
-      {
-        $reg->{'store'}->{$store}->{'path'}->{$path}= $ydata= $xdata;
-        push (@upd, 'store upd');
-      }
-    }
-    else
    { # this key is new, so we simply place what we know in the newly created registry item
-      $reg= { 'key' => $md5, 'key_type' => 'md5', 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } };
+      # $reg= { 'key' => $md5, 'key_type' => 'md5', 'store' => { $store => $ydata= $xdata } };
+      $reg= $xdata;
      push (@upd, 'new key');
    }
    # fill in some more information about that file
-    if (!exists ($ydata->{'fileinfo'}) || $refresh_fileinfo)
+    if (!exists ($reg->{'fileinfo'}) || $refresh_fileinfo)
    {
      my $xpath= $path;
      $xpath=~ s#'#'\\''#g;
@@ -220,7 +211,7 @@ sub process_file
      chop ($res);
      my ($xpath, $fileinfo)= split (/: */, $res, 2);
-      $ydata->{'fileinfo'}= $fileinfo;
+      $reg->{'fileinfo'}= $fileinfo;
      push (@upd, 'fileinfo updated');
    }
@@ -231,7 +222,7 @@ sub process_file
  {
    print "saving (", join ('|', @upd), ")\n";
    # print __LINE__, " reg: ", Dumper ($reg);
-    $objreg->save ($md5, $reg);
+    $objreg->save ($search, $reg);
  }
  (wantarray) ? @upd : \@upd;