diff --git a/textarchive/lib/TA/ObjReg.pm b/textarchive/lib/TA/ObjReg.pm index cb76270cabdc979a8fdeacfd9d456b9e6b758b39..7f8c697f7356d6ddd031d37c761a461340ea281d 100644 --- a/textarchive/lib/TA/ObjReg.pm +++ b/textarchive/lib/TA/ObjReg.pm @@ -83,7 +83,6 @@ sub get_project $obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat'); $obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file'); - # get sequence number $obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json'); $obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json'); @@ -193,7 +192,7 @@ if $store is undef, returns a toc of all stores =cut -sub load_toc +sub load_toc_v1 { my $reg= shift; my $store= shift; @@ -236,6 +235,7 @@ sub verify_toc { my $reg= shift; +print "sub verify_toc_v1\n"; # my $store= shift; this does not make sense, we need to verify verything anyway # my @stores= (defined ($store)) ? $store : $reg->stores(); @@ -243,6 +243,11 @@ sub verify_toc # print "stores: ", join (', ', @stores), "\n"; exit; my %stores; + my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : (); + + # TODO: this is specific for vlib001.pl, this should be a passed as code ref! + my @hdr= qw(seq found paths path mtime fs_size ino); + my $c= $reg->{'proj_cat'}; # pick up current tocs to see if the sequence needs to be updated foreach my $s (@stores) @@ -258,6 +263,127 @@ sub verify_toc sub item_files { next if ($_ =~ /\.toc\.json$/); + next if ($_ =~ /\.toc\.csv$/); + next unless ($_ =~ /\.json$/ && -f (my $x= $File::Find::name)); + + # print "file=[$_] path=[$x]\n"; + + $items{$_}= [ $x ]; + } + + my $d= $reg->{'proj_cat'}; + print "proj_cat=[$d]\n"; + find (\&item_files, $d); + + # print "items: ", main::Dumper (\%items); + foreach my $item (keys %items) + { + my $p= $items{$item}; + my $j= TA::Util::slurp_file ($p->[0], 'json'); + # print "[$p->[0]] j: ", main::Dumper ($j); + my @i_stores= keys %{$j->{'store'}}; + my $key= $j->{'key'}; + # print join (' ', $key, @i_stores), "\n"; + + # search for a key's sequence number in all known stores, not only + # in those that are *currently* used for this store + my $seq; + S1: foreach my $store (@stores) + { + if (exists ($stores{$store}->{$key})) + { + $seq= $stores{$store}->{$key}->{'seq'}; + last S1; + } + } + + S2: foreach my $store (@i_stores) + { + my $ster; # store's toc entry record ;) + unless (defined ($ster= $stores{$store}->{$key})) + { + $ster= $stores{$store}->{$key}= + { + 'seq' => $reg->next_seq(), + 'upd' => time (), + }; + } + $ster->{'found'}= 1; + + # TODO: this is specific for vlib001.pl, this should be a passed as code ref! + my $jj= $j->{'store'}->{$store}; + my @paths= keys %{$jj->{'path'}}; + $ster->{'path_count'}= scalar @paths; + my $p1= shift (@paths); + my $px1= $jj->{'path'}->{$p1}; + + $ster->{'path'}= $p1; + $ster->{'mtime'}= $px1->{'mtime'}; + $ster->{'fs_size'}= $px1->{'fs_size'}; + $ster->{'ino'}= $px1->{'ino'}; + } + } + + print "finishing\n"; + # save all tocs now + foreach my $s (@stores) + { + my $ss= $stores{$s}; + + my $f= $c . '/' . $s . '.toc.json'; + print "saving toc to [$f]\n"; + unless (open (TOC, '>:utf8', $f)) + { + print STDERR "cant save toc file '$f'\n"; + next; + } + print TOC encode_json ($ss), "\n"; + close (TOC); + + $f= $c . '/' . $s . '.toc.csv'; + print "saving toc to [$f]\n"; + unless (open (TOC, '>:utf8', $f)) + { + print STDERR "cant save toc file '$f'\n"; + next; + } + print TOC join (';', 'key', @hdr), "\n"; + + foreach my $k (keys %$ss) + { + my $r= $ss->{$k}; + # TODO: this is specific for vlib001.pl, this should be a passed as code ref! + print TOC join (';', $k, map { $r->{$_} } @hdr), "\n"; + } + + close (TOC); + } + + # TODO: return something meaningful +} + +sub verify_toc_v2 +{ + my $reg= shift; + + # my $store= shift; this does not make sense, we need to verify verything anyway + + # my @stores= (defined ($store)) ? $store : $reg->stores(); + my @stores= $reg->stores(); + # print "stores: ", join (', ', @stores), "\n"; exit; + my %stores; + + my $c= $reg->{'proj_cfg_dir'}; + # pick up current tocs to see if the sequence needs to be updated + my $f= $c . '/' . 'TOC.csv'; + my ($toc_hdr, $toc_data)= TA::Util::slurp_file ($f, 'csv'); + $toc_data= [] unless (defined ($toc_data)); # we need an empty toc if there is none yet + + my %items; + sub item_files_2 + { + next if ($_ =~ /\.toc\.json$/); + # next if ($_ eq 'TOC\.csv'); my $x; next unless ($_ =~ /\.json$/ && -f ($x= $File::Find::name)); @@ -274,11 +400,13 @@ sub verify_toc { my $p= $items{$item}; my $j= TA::Util::slurp_file ($p->[0], 'json'); - # print "j: ", main::Dumper ($j); + print "j: ", main::Dumper ($j); my @i_stores= keys %{$j->{'store'}}; my $key= $j->{'key'}; print join (' ', $key, @i_stores), "\n"; +=begin comment + # search for a key's sequence number in all known stores, not only # in those that are *currently* used for this store my $seq; @@ -304,10 +432,17 @@ sub verify_toc } $ster->{'found'}= 1; } + +=end comment +=cut + } print "finishing\n"; # save all tocs now + +=begin comment + foreach my $s (@stores) { my $f= $c . '/' . $s . '.toc.json'; @@ -321,9 +456,15 @@ sub verify_toc close (TOC); } + +=end comment +=cut + # TODO: return something meaningful } +# *verify_toc= *verify_toc_v1; + =head1 sequence number =head2 $reg->next_seq() diff --git a/textarchive/lib/TA/Util.pm b/textarchive/lib/TA/Util.pm index 69e8e4f73126c5e4c5441a30e4e883b38d0bc4c3..a0df6eb02118355028dc93c986598eeebf4eefc4 100644 --- a/textarchive/lib/TA/Util.pm +++ b/textarchive/lib/TA/Util.pm @@ -37,6 +37,16 @@ sub slurp_file my $str= join ('', @lines); return decode_json ($str); } + elsif ($format eq 'csv') + { + my $hdr= split (';', shift (@lines)); + my @d; + while (my $l= shift (@lines)) + { + push (@d, split (';', $l)); + } + return [$hdr, \@d]; + } print STDERR "unknown slurp format '$format'\n"; return undef; diff --git a/textarchive/vlib001.pl b/textarchive/vlib001.pl index 19e818f38a7184deba782fefeecd67bde0809040..4dfb713bd230ca87276e1a936e23c6e0d92a3fe2 100755 --- a/textarchive/vlib001.pl +++ b/textarchive/vlib001.pl @@ -159,7 +159,7 @@ sub process_file return undef; } - my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7] }; + my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1] }; my $reg= $objreg->lookup ($md5); @@ -173,9 +173,17 @@ sub process_file && exists ($sb->{'path'}) && defined ($ydata= $sb->{'path'}->{$path}) # we need to keep track of the path as well otherwise we can't handly duplicates in the same store && $st[7] == $ydata->{'fs_size'} - && $st[9] == $ydata->{'mtime'} - ) + && $st[9] == $ydata->{'mtime'} + ) { # TODO: compare stored and current information + foreach my $an (keys %$xdata) + { + unless ($ydata->{$an} eq $xdata->{$an}) + { + $ydata->{$an}= $xdata->{$an}; + push (@upd, $an); + } + } } else { @@ -188,7 +196,7 @@ sub process_file # print "ydata: ", Dumper ($ydata); # print "xdata: ", Dumper ($xdata); - push (@upd, 'store upd'); + push (@upd, 'store upd'); } } else