Skip to content
Snippets Groups Projects
Commit 10355f7c authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

restructured data model to be compatible with MongoDB which was

also added as a possible backend for storage.
parent 57a48fcb
No related branches found
No related tags found
No related merge requests found
...@@ -18,7 +18,8 @@ use JSON; ...@@ -18,7 +18,8 @@ use JSON;
use File::Find; use File::Find;
use TA::Util; use TA::Util;
use TA::Hasher;
my %plugins_loaded= ();
sub new sub new
{ {
...@@ -77,13 +78,35 @@ sub get_project ...@@ -77,13 +78,35 @@ sub get_project
# print "proj_cfg: ", main::Dumper ($proj_cfg); # print "proj_cfg: ", main::Dumper ($proj_cfg);
# TODO: check authorization (no need, if local, but for client-server, we need something # TODO: check authorization (no need, if local, but for client-server, we need something
my $be= $proj_cfg->{'backend'};
unless (exists ($plugins_loaded{$be}))
{
if ($be eq 'TA::Hasher') { require TA::Hasher; }
elsif ($be eq 'TA::UrxnBla') { require TA::UrxnBla; }
elsif ($be eq 'MongoDB') { require MongoDB; }
else
{
print "ATTN: unknown backend '$be'\n";
return undef;
}
$plugins_loaded{$be}= 1;
}
if ($be eq 'TA::Hasher')
{
# initialize hasher # initialize hasher
my $base_dir= $obj->{'proj_cfg_dir'}; my $ta= $proj_cfg->{'TA::Hasher'};
$obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat'); $ta->{'name'}= 'file';
$obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file'); $ta->{'pfx'}= $obj->{'proj_cat'}= my $proj_cat= join ('/', $proj_cfg_dir, 'cat');
$obj->{'hasher'}= my $hasher= new TA::Hasher (%$ta);
}
elsif ($be eq 'MongoDB')
{
$obj->connect_MongoDB ($proj_cfg);
}
# get sequence number # get sequence number
$obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json'); $obj->{'seq_file'}= my $fnm_seq= join ('/', $proj_cfg_dir, 'sequence.json');
$obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json'); $obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json');
# print "seq: ", main::Dumper ($seq); # print "seq: ", main::Dumper ($seq);
unless (defined ($seq)) unless (defined ($seq))
...@@ -121,20 +144,26 @@ returns that keys value, if present, otherwise, undef. ...@@ -121,20 +144,26 @@ returns that keys value, if present, otherwise, undef.
sub lookup sub lookup
{ {
my $obj= shift; my $obj= shift;
my $id_str= shift; my $search= shift;
# print "lookup [$id_str]\n"; my $be= $obj->{'cfg'}->{'backend'};
my @r= $obj->{'hasher'}->check_file ($id_str, 0); print "lookup [$search] be=[$be]\n";
# print "id_str=[$id_str] r=", main::Dumper (\@r); print main::Dumper ($search);
my ($rc, $path)= @r;
my $fnm= $path . '/' . $id_str . '.json';
# print "description: [$fnm]\n";
my @st= stat ($fnm); my $reg;
return undef unless (@st); if ($be eq 'TA::Hasher')
{
my $reg= TA::Util::slurp_file ($fnm, 'json'); my $id_str= $search->{$obj->{'key'}};
my ($all_reg, $fnm)= $obj->ta_retrieve ($id_str, 0);
print "fnm=[$fnm] all_reg: ", main::Dumper ($all_reg);
return undef unless (defined ($all_reg));
($reg)= ta_match ($all_reg, $search);
}
elsif ($be eq 'MongoDB')
{
$reg= $obj->{'_col'}->find_one ( $search );
}
# print "reg: ", main::Dumper ($reg);
return $reg; return $reg;
} }
...@@ -142,26 +171,55 @@ sub lookup ...@@ -142,26 +171,55 @@ sub lookup
sub save sub save
{ {
my $obj= shift; my $obj= shift;
my $id_str= shift; my $search= shift;
my $new_reg= shift; my $new_reg= shift;
print "save [$id_str]\n"; my $be= $obj->{'cfg'}->{'backend'};
my @r= $obj->{'hasher'}->check_file ($id_str, 1); print "save [$new_reg] be=[$be]\n";
# print "id_str=[$id_str] r=", main::Dumper (\@r); print main::Dumper ($new_reg);
my ($rc, $path)= @r; if ($be eq 'TA::Hasher')
{
my $id_str= $search->{$obj->{'key'}};
my ($all_reg, $fnm)= $obj->ta_retrieve ($id_str, 1);
my $fnm= $path . '/' . $id_str . '.json'; =begin comment
# print "description: [$fnm]\n";
my @st= stat ($fnm); my @st= stat ($fnm);
unless (@st) unless (@st)
{ # TODO: increment sequence and toc { # TODO: increment sequence and toc
} }
my $j= encode_json ($new_reg); =end comment
# print "generated json: [$j]\n"; =cut
if (defined ($all_reg))
{
my ($reg, $idx)= ta_match ($all_reg, $search);
if (defined ($reg))
{
$all_reg->{'entries'}->[$idx]= $new_reg;
}
else
{
push (@{$all_reg->{'entries'}}, $new_reg);
}
}
else
{
$all_reg= { 'key' => $id_str, 'entries' => [ $new_reg ] }
}
my $j= encode_json ($all_reg);
print "fnm=[$fnm]\n";
print "generated json: [$j]\n";
open (J, '>:utf8', $fnm); print J $j; close (J); open (J, '>:utf8', $fnm); print J $j; close (J);
} }
elsif ($be eq 'MongoDB')
{
print "new_reg: ", main::Dumper ($new_reg);
$obj->{'_col'}->insert ($new_reg);
}
}
=head1 TOC: Table of Contents =head1 TOC: Table of Contents
...@@ -385,7 +443,99 @@ sub next_seq ...@@ -385,7 +443,99 @@ sub next_seq
$seq->{'seq'}; $seq->{'seq'};
} }
# =head1 INTERNAL FUNCTIONS =head1 INTERNAL METHODS
=head2 $mongo_collection= $obj->connect_MongoDB ($config);
Connect to MongoDB with connection parameters in hash_ref $config and
returns the MongoDB collection object.
$config needs the following attribues: host, db, user, pass, collection
=cut
sub connect_MongoDB
{
my $obj= shift;
my $cfg= shift;
my $cmm= $cfg->{'MongoDB'};
print "cmm: ", main::Dumper ($cmm);
my $col;
eval
{
my $connection= MongoDB::Connection->new(host => $cmm->{'host'});
$connection->authenticate($cmm->{'db'}, $cmm->{'user'}, $cmm->{'pass'});
my $db= $connection->get_database($cmm->{'db'});
$col= $db->get_collection($cmm->{'collection'});
print "col: [$col]\n";
};
if ($@)
{
print "ATTN: can't connect to MongoDB ", (join ('/', map { $cmm->{$_} } qw(host user collection))), "\n";
return undef;
}
return $obj->{'_col'}= $col;
}
=head2 ($data, $fnm)= $objreg->ta_retrieve ($key, $create)
Retrieve and return data referenced by $key and returns path name of
that file. If $create is true, the path leading to that file is created,
when it is not already present.
=cut
sub ta_retrieve
{
my $obj= shift;
my $id_str= shift;
my $create= shift;
my @r= $obj->{'hasher'}->check_file ($id_str, $create);
# print "id_str=[$id_str] r=", main::Dumper (\@r);
my ($rc, $path)= @r;
my $fnm= $path . '/' . $id_str . '.json';
# print "description: [$fnm]\n";
my @st= stat ($fnm);
return (undef, $fnm) unless (@st);
my $all_reg= TA::Util::slurp_file ($fnm, 'json');
return ($all_reg, $fnm);
}
=head1 INTERNAL FUNCTIONS
=head2 ($entry, $index)= ta_match ($data, $search)
Select first $entry from $data that matches hash ref $search.
=cut
sub ta_match
{
my $all_reg= shift;
my $search= shift;
my @k= keys $search;
my @e= @{$all_reg->{'entries'}};
REG: for (my $i= 0; $i <= $#e; $i++)
{
my $reg= $e[$i];
foreach my $k (@k)
{
next REG unless ($reg->{$k} eq $search->{$k});
}
print "found match: ", main::Dumper ($reg);
return ($reg, $i);
}
return (undef, 0);
}
1; 1;
__END__ __END__
......
...@@ -77,7 +77,7 @@ unless (defined ($project)) ...@@ -77,7 +77,7 @@ unless (defined ($project))
} }
# &usage ('no store specified') unless (defined ($store)); # &usage ('no store specified') unless (defined ($store));
my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store); my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store, 'key' => 'md5');
&usage ('no config found') unless (defined ($objreg)); &usage ('no config found') unless (defined ($objreg));
print "objreg: ", Dumper ($objreg) if ($DEBUG || $STOP); print "objreg: ", Dumper ($objreg) if ($DEBUG || $STOP);
exit if ($STOP); exit if ($STOP);
...@@ -171,48 +171,39 @@ sub process_file ...@@ -171,48 +171,39 @@ sub process_file
my $xdata= my $xdata=
{ {
# 'key' => $md5, 'key_type' => 'md5',
'store' => $store,
'c_size' => $size, 'path' => $path, 'md5' => $md5, 'c_size' => $size, 'path' => $path, 'md5' => $md5,
'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1] 'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1]
}; };
my $reg= $objreg->lookup ($md5); my $search= { 'md5' => $md5, 'store' => $store, 'path' => $path };
my $reg= $objreg->lookup ($search);
print __LINE__, " reg: ", Dumper ($reg);
my @upd; my @upd;
my $ydata; # pointer to file catalog data within main datastructure my $ydata; # pointer to file catalog data within main datastructure
if (defined ($reg)) if (defined ($reg))
{ # we know something about this key value but not in respect to the repository at hand { # we know something about this key value but not in respect to the repository at hand
# print "json read: ", main::Dumper ($reg); # print "json read: ", main::Dumper ($reg);
my $sb;
if (defined ($sb= $reg->{'store'}->{$store})
&& exists ($sb->{'path'})
&& defined ($ydata= $sb->{'path'}->{$path}) # we need to keep track of the path as well otherwise we can't handly duplicates in the same store
&& $st[7] == $ydata->{'fs_size'}
&& $st[9] == $ydata->{'mtime'}
)
{ # compare stored and current information and update if necessary
foreach my $an (keys %$xdata) foreach my $an (keys %$xdata)
{ {
unless ($ydata->{$an} eq $xdata->{$an}) unless ($reg->{$an} eq $xdata->{$an})
{ {
$ydata->{$an}= $xdata->{$an}; $reg->{$an}= $xdata->{$an};
push (@upd, $an); push (@upd, $an);
} }
} }
} }
else else
{
$reg->{'store'}->{$store}->{'path'}->{$path}= $ydata= $xdata;
push (@upd, 'store upd');
}
}
else
{ # this key is new, so we simply place what we know in the newly created registry item { # this key is new, so we simply place what we know in the newly created registry item
$reg= { 'key' => $md5, 'key_type' => 'md5', 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } }; # $reg= { 'key' => $md5, 'key_type' => 'md5', 'store' => { $store => $ydata= $xdata } };
$reg= $xdata;
push (@upd, 'new key'); push (@upd, 'new key');
} }
# fill in some more information about that file # fill in some more information about that file
if (!exists ($ydata->{'fileinfo'}) || $refresh_fileinfo) if (!exists ($reg->{'fileinfo'}) || $refresh_fileinfo)
{ {
my $xpath= $path; my $xpath= $path;
$xpath=~ s#'#'\\''#g; $xpath=~ s#'#'\\''#g;
...@@ -220,7 +211,7 @@ sub process_file ...@@ -220,7 +211,7 @@ sub process_file
chop ($res); chop ($res);
my ($xpath, $fileinfo)= split (/: */, $res, 2); my ($xpath, $fileinfo)= split (/: */, $res, 2);
$ydata->{'fileinfo'}= $fileinfo; $reg->{'fileinfo'}= $fileinfo;
push (@upd, 'fileinfo updated'); push (@upd, 'fileinfo updated');
} }
...@@ -231,7 +222,7 @@ sub process_file ...@@ -231,7 +222,7 @@ sub process_file
{ {
print "saving (", join ('|', @upd), ")\n"; print "saving (", join ('|', @upd), ")\n";
# print __LINE__, " reg: ", Dumper ($reg); # print __LINE__, " reg: ", Dumper ($reg);
$objreg->save ($md5, $reg); $objreg->save ($search, $reg);
} }
(wantarray) ? @upd : \@upd; (wantarray) ? @upd : \@upd;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment