From 8177bc6e94c37c9560858a8ad58df9fc858f2967 Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Thu, 5 Sep 2013 14:22:45 +0200 Subject: [PATCH] relocate vlib-related stuff out of TA::ObjReg --- .gitignore | 1 + textarchive/lib/TA/ObjReg.pm | 32 +++++--------- textarchive/vlib001.pl | 86 ++++++++++++++++++++++++------------ 3 files changed, 69 insertions(+), 50 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..90c254d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +@* diff --git a/textarchive/lib/TA/ObjReg.pm b/textarchive/lib/TA/ObjReg.pm index 4196eef..9811e0e 100644 --- a/textarchive/lib/TA/ObjReg.pm +++ b/textarchive/lib/TA/ObjReg.pm @@ -233,27 +233,24 @@ sub load_toc_v1 sub verify_toc { my $reg= shift; + my $check_item= shift; # callback: update TOC item + my $hdr= shift || []; -print "sub verify_toc_v1\n"; - # my $store= shift; this does not make sense, we need to verify verything anyway + my @hdr1= qw(seq found store_count); - # my @stores= (defined ($store)) ? $store : $reg->stores(); my @stores= $reg->stores(); # print "stores: ", join (', ', @stores), "\n"; exit; - my %stores; - - my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : (); - - # TODO: this is specific for vlib001.pl, this should be a passed as code ref! - my @hdr= qw(seq found store_count path_count path mtime fs_size ino); + #### my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : (); my $c= $reg->{'proj_cat'}; + # pick up current tocs to see if the sequence needs to be updated + my %stores; foreach my $s (@stores) { my $f= $c . '/' . $s . '.toc.json'; my $t= TA::Util::slurp_file ($f, 'json'); - $t= {} unless (defined ($t)); # we need an empty toc if there is none yet + $t= {} unless (defined ($t)); # we need an empty toc if there is no toc yet $stores{$s}= $t; } @@ -309,18 +306,10 @@ print "sub verify_toc_v1\n"; } $ster->{'found'}= 1; - # TODO: this is specific for vlib001.pl, this should be a passed as code ref! my $jj= $j->{'store'}->{$store}; - my @paths= keys %{$jj->{'path'}}; - $ster->{'path_count'}= scalar @paths; $ster->{'store_count'}= scalar @i_stores; - my $p1= shift (@paths); - my $px1= $jj->{'path'}->{$p1}; - $ster->{'path'}= $p1; - $ster->{'mtime'}= $px1->{'mtime'}; - $ster->{'fs_size'}= $px1->{'fs_size'}; - $ster->{'ino'}= $px1->{'ino'}; + &$check_item($j, $jj, $ster) if (defined ($check_item)); } } @@ -347,13 +336,12 @@ print "sub verify_toc_v1\n"; print STDERR "cant save toc file '$f'\n"; next; } - print TOC join (';', 'key', @hdr), "\n"; + print TOC join (';', 'key', @hdr1, @$hdr), "\n"; foreach my $k (keys %$ss) { my $r= $ss->{$k}; - # TODO: this is specific for vlib001.pl, this should be a passed as code ref! - print TOC join (';', $k, map { $r->{$_} } @hdr), "\n"; + print TOC join (';', $k, map { $r->{$_} } @hdr1, @$hdr), "\n"; } close (TOC); diff --git a/textarchive/vlib001.pl b/textarchive/vlib001.pl index 4dfb713..9385716 100755 --- a/textarchive/vlib001.pl +++ b/textarchive/vlib001.pl @@ -6,17 +6,19 @@ =head1 USAGE - vlib001.pl -p project-name -s store-name + vlib001.pl -p project-name [-s store-name] [paraemters]* options: * -p <project-name> * -s <store-name> + * --verify ... verify/create TOC structures * --fileinfo ... refresh file info + * --lookup ... lookup for hashes given as parameters * -D ... increase debug level =head1 DESCRIPTION -updates the _catalog file using md5cat scripts and registers the files in +Updates the _catalog file using md5cat scripts and registers the files in the project's object registry. The environment variable TABASE must point to the directory where the object registry's configuration is stored. @@ -76,17 +78,17 @@ exit if ($STOP); if ($op_mode eq 'refresh') { -my $catalog= $objreg->{'cfg'}->{'catalog'}; -&usage ('no catalog found in config') unless (defined ($catalog)); + my $catalog= $objreg->{'cfg'}->{'catalog'}; + &usage ('no catalog found in config') unless (defined ($catalog)); -my $stores_p= $objreg->{'cfg'}->{'stores'}; -my $store_cfg= $stores_p->{$store}; -unless (defined ($store_cfg)) -{ - print "no store config found for '$store', check these: ", Dumper ($stores_p); - exit (-2); -} -print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG); + my $stores_p= $objreg->{'cfg'}->{'stores'}; + my $store_cfg= $stores_p->{$store}; + unless (defined ($store_cfg)) + { + print "no store config found for '$store', check these: ", Dumper ($stores_p); + exit (-2); + } + print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG); if ($catalog->{'format'} eq 'md5cat') { @@ -95,7 +97,9 @@ print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG); } elsif ($op_mode eq 'verify') { - $objreg->verify_toc ($store); + my @hdr= qw(path_count path mtime fs_size ino); + + $objreg->verify_toc (\&verify_toc_item, \@hdr); } elsif ($op_mode eq 'lookup') { @@ -159,14 +163,18 @@ sub process_file return undef; } - my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1] }; + my $xdata= + { + 'c_size' => $size, 'path' => $path, 'md5' => $md5, + 'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1] + }; my $reg= $objreg->lookup ($md5); my @upd; my $ydata; # pointer to file catalog data within main datastructure if (defined ($reg)) - { # we know something about this hash value but not in respect to the repository at hand + { # we know something about this key value but not in respect to the repository at hand # print "json read: ", main::Dumper ($reg); my $sb; if (defined ($sb= $reg->{'store'}->{$store}) @@ -175,7 +183,7 @@ sub process_file && $st[7] == $ydata->{'fs_size'} && $st[9] == $ydata->{'mtime'} ) - { # TODO: compare stored and current information + { # compare stored and current information and update if necessary foreach my $an (keys %$xdata) { unless ($ydata->{$an} eq $xdata->{$an}) @@ -187,22 +195,14 @@ sub process_file } else { - # print "st: fs_size(7)=[$st[7]] mtime(9)=[$st[9]]\n"; - # print "ydata: ", Dumper ($ydata); - # print "xdata: ", Dumper ($xdata); - $reg->{'store'}->{$store}->{'path'}->{$path}= $ydata= $xdata; - # print __LINE__, " reg: ", Dumper ($reg); - # print "ydata: ", Dumper ($ydata); - # print "xdata: ", Dumper ($xdata); - push (@upd, 'store upd'); } } else - { + { # this key is new, so we simply place what we know in the newly created registry item $reg= { 'key' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } }; - push (@upd, 'new md5'); + push (@upd, 'new key'); } # fill in some more information about that file @@ -212,10 +212,12 @@ sub process_file $xpath=~ s#'#'\\''#g; my $res= `/usr/bin/file '$xpath'`; chop ($res); + my ($xpath, $fileinfo)= split (/: */, $res, 2); $ydata->{'fileinfo'}= $fileinfo; push (@upd, 'fileinfo updated'); } + # TODO: some more information would probably be nice as well # e.g. mp3info or stuff @@ -229,6 +231,24 @@ sub process_file (wantarray) ? @upd : \@upd; } +# callback function for TA::ObjReg::verify +sub verify_toc_item +{ + my $j= shift; # currently not used, that's the complete json entry for this item + my $jj= shift; # this is just the part refering to the store currently processed + my $ster= shift; # TOC item to be updated + + my @paths= keys %{$jj->{'path'}}; + $ster->{'path_count'}= scalar @paths; + my $p1= shift (@paths); + my $px1= $jj->{'path'}->{$p1}; + + $ster->{'path'}= $p1; + $ster->{'mtime'}= $px1->{'mtime'}; + $ster->{'fs_size'}= $px1->{'fs_size'}; + $ster->{'ino'}= $px1->{'ino'}; +} + __END__ =head1 TODO @@ -238,9 +258,19 @@ __END__ * The project's config contains all the information that is needed to locate all the stores on a given machine, so there should be an option that updates everything. - * specifing the store should be optional + * specifing the store should be optional. + +=head2 misc + * maybe it makes sense to offer an option to perform backups along the way, for instance, when the store is actually a git repository. - * Also, checking the VCS status might (if not committing updates) + * also, checking the VCS status (if not committing updates) might be useful. + * other hashing algorithms: + * currently we use md5 for hashing, however, this could should + be fairly simple to adopt for sha1, sha256 or something else. + * possibly, it makes sense to allow several hashing algorithms + in parallel, however, then it might be a good idea to store + file metadata in one place and let other hashes point to that + place. -- GitLab