diff --git a/textarchive/lib/TA/ObjReg.pm b/textarchive/lib/TA/ObjReg.pm index 98a2813fcc46b69fba8071a7716877c03882f0c3..e851ad49e6aa8eb8924732aa168f0844896a5077 100644 --- a/textarchive/lib/TA/ObjReg.pm +++ b/textarchive/lib/TA/ObjReg.pm @@ -64,45 +64,44 @@ sub get_project $proj_cfg; } - -sub add +sub lookup { my $obj= shift; my $id_str= shift; - my $max_age= shift; # TODO: add code to skip processing if registry data was not updated; but this is tricky because it really depends on the subproject... - my $new_reg= shift; - - my @r= $obj->{'hasher'}->check_file ($id_str, 1); - print "id_str=[$id_str] r=", main::Dumper (\@r); + print "lookup [$id_str]\n"; + my @r= $obj->{'hasher'}->check_file ($id_str, 0); + # print "id_str=[$id_str] r=", main::Dumper (\@r); my ($rc, $path)= @r; my $fnm= $path . '/' . $id_str . '.json'; print "description: [$fnm]\n"; - my $reg; - my $upd= 0; - my @st= stat ($fnm); + return undef unless (defined (@st)); - if (@st) - { - $reg= TA::Util::slurp_file ($fnm, 'json'); - print "json read: ", main::Dumper ($reg); - } - else - { - $reg= $new_reg; - $upd= 1; - } + my $reg= TA::Util::slurp_file ($fnm, 'json'); + + return $reg; +} - if ($upd) - { - print "reg: ", main::Dumper ($reg); - my $j= encode_json ($reg); - print "generated json: [$j]\n"; - open (J, '>:utf8', $fnm); print J $j; close (J); - } +sub save +{ + my $obj= shift; + my $id_str= shift; + my $new_reg= shift; + + print "save [$id_str]\n"; + my @r= $obj->{'hasher'}->check_file ($id_str, 1); + # print "id_str=[$id_str] r=", main::Dumper (\@r); + my ($rc, $path)= @r; + + my $fnm= $path . '/' . $id_str . '.json'; + print "description: [$fnm]\n"; + + my $j= encode_json ($new_reg); + print "generated json: [$j]\n"; + open (J, '>:utf8', $fnm); print J $j; close (J); } # =head1 INTERNAL FUNCTIONS diff --git a/textarchive/vlib001.pl b/textarchive/vlib001.pl index 4558af61fc1a8b5c0a9d5abea5698d976e077606..9159da15d4f5092bc3cbe341182fd99d00c00f21 100755 --- a/textarchive/vlib001.pl +++ b/textarchive/vlib001.pl @@ -6,12 +6,13 @@ use Data::Dumper; $Data::Dumper::Indent= 1; use TA::ObjReg; -use TA::Hasher; -use TA::Util; +# use TA::Hasher; +# use TA::Util; my @PAR= (); my $project; my $subproject; +my $refresh_fileinfo= 0; while (my $arg= shift (@ARGV)) { @@ -20,6 +21,7 @@ while (my $arg= shift (@ARGV)) { if ($arg eq '--project') { $project= shift (@ARGV); } elsif ($arg eq '--subproject') { $subproject= shift (@ARGV); } + elsif ($arg eq '--fileinfo') { $refresh_fileinfo= 1; } } elsif ($arg =~ /^-/) { @@ -49,16 +51,17 @@ unless (defined ($catalog)) if ($catalog->{'format'} eq 'md5cat') { - refresh_md5cat ($objreg, 'subproject' => $subproject); + refresh_md5cat ($objreg, $subproject); } -print "objreg: (after refresh)", Dumper ($objreg); +# print "objreg: (after refresh)", Dumper ($objreg); exit (0); sub refresh_md5cat { my $objreg= shift; + my $subproject= shift; my %extra= @_; # my $catalog= $objreg->{'cfg'}->{'catalog'}; @@ -67,13 +70,62 @@ sub refresh_md5cat # my $hasher= $objreg->{'hasher'}; open (CAT, '_catalog') or die "cant read catalog"; - while (<CAT>) + CAT: while (<CAT>) { chop; my ($md5, $xf, $size, $path)= split (' ', $_, 4); + $path=~ s#^\.\/##; print "md5=[$md5] size=[$size] path=[$path]\n"; - $objreg->add ($md5, 0, { 'md5' => $md5, 'size' => $size, 'path' => $path, %extra } ); + my @st= stat ($path); + + unless (@st) + { + print STDERR "ATTN: could not stat file '$path'\n"; + next CAT; + } + + my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7] }; + # $objreg->add ($md5, 0, { 'md5' => $md5, $subproject => { 'size' => $size, 'path' => $path, %extra } } ); + + my $reg= $objreg->lookup ($md5); + print "json read: ", main::Dumper ($reg); + + my $upd= 0; + my $ydata; # pointer to file catalog data within main datastructure + if (defined ($reg)) + { # we know something about this hash value but not in respect to the repository at hand + if (defined ($ydata= $reg->{$subproject}) + && $st[7] == $ydata->{'fs_size'} + && $st[9] == $ydata->{'mtime'} + ) + { # TODO: compare stored and current information + } + else + { + $ydata= $reg->{$subproject}= $xdata; + $upd= 1; + } + } + else + { + $reg= { 'md5' => $md5, $subproject => $ydata= $xdata }; + $upd= 1; + } + + # fill in some more information about that file + if (!exists ($ydata->{'fileinfo'}) || $refresh_fileinfo) + { + my $res= `/usr/bin/file '$path'`; + chop ($res); + my ($xpath, $fileinfo)= split (/: */, $res, 2); + $ydata->{'fileinfo'}= $fileinfo; + $upd= 1; + } + # TODO: some more information would probably be nice as well + # e.g. mp3info or stuff + + $objreg->save ($md5, $reg) if ($upd); } close (CAT); }