Skip to content
Snippets Groups Projects
Commit a7825b20 authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

play around with toc structure

parent 8c29f939
No related branches found
No related tags found
No related merge requests found
...@@ -83,7 +83,6 @@ sub get_project ...@@ -83,7 +83,6 @@ sub get_project
$obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat'); $obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat');
$obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file'); $obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file');
# get sequence number # get sequence number
$obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json'); $obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json');
$obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json'); $obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json');
...@@ -193,7 +192,7 @@ if $store is undef, returns a toc of all stores ...@@ -193,7 +192,7 @@ if $store is undef, returns a toc of all stores
=cut =cut
sub load_toc sub load_toc_v1
{ {
my $reg= shift; my $reg= shift;
my $store= shift; my $store= shift;
...@@ -236,6 +235,7 @@ sub verify_toc ...@@ -236,6 +235,7 @@ sub verify_toc
{ {
my $reg= shift; my $reg= shift;
print "sub verify_toc_v1\n";
# my $store= shift; this does not make sense, we need to verify verything anyway # my $store= shift; this does not make sense, we need to verify verything anyway
# my @stores= (defined ($store)) ? $store : $reg->stores(); # my @stores= (defined ($store)) ? $store : $reg->stores();
...@@ -243,6 +243,11 @@ sub verify_toc ...@@ -243,6 +243,11 @@ sub verify_toc
# print "stores: ", join (', ', @stores), "\n"; exit; # print "stores: ", join (', ', @stores), "\n"; exit;
my %stores; my %stores;
my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : ();
# TODO: this is specific for vlib001.pl, this should be a passed as code ref!
my @hdr= qw(seq found paths path mtime fs_size ino);
my $c= $reg->{'proj_cat'}; my $c= $reg->{'proj_cat'};
# pick up current tocs to see if the sequence needs to be updated # pick up current tocs to see if the sequence needs to be updated
foreach my $s (@stores) foreach my $s (@stores)
...@@ -258,6 +263,127 @@ sub verify_toc ...@@ -258,6 +263,127 @@ sub verify_toc
sub item_files sub item_files
{ {
next if ($_ =~ /\.toc\.json$/); next if ($_ =~ /\.toc\.json$/);
next if ($_ =~ /\.toc\.csv$/);
next unless ($_ =~ /\.json$/ && -f (my $x= $File::Find::name));
# print "file=[$_] path=[$x]\n";
$items{$_}= [ $x ];
}
my $d= $reg->{'proj_cat'};
print "proj_cat=[$d]\n";
find (\&item_files, $d);
# print "items: ", main::Dumper (\%items);
foreach my $item (keys %items)
{
my $p= $items{$item};
my $j= TA::Util::slurp_file ($p->[0], 'json');
# print "[$p->[0]] j: ", main::Dumper ($j);
my @i_stores= keys %{$j->{'store'}};
my $key= $j->{'key'};
# print join (' ', $key, @i_stores), "\n";
# search for a key's sequence number in all known stores, not only
# in those that are *currently* used for this store
my $seq;
S1: foreach my $store (@stores)
{
if (exists ($stores{$store}->{$key}))
{
$seq= $stores{$store}->{$key}->{'seq'};
last S1;
}
}
S2: foreach my $store (@i_stores)
{
my $ster; # store's toc entry record ;)
unless (defined ($ster= $stores{$store}->{$key}))
{
$ster= $stores{$store}->{$key}=
{
'seq' => $reg->next_seq(),
'upd' => time (),
};
}
$ster->{'found'}= 1;
# TODO: this is specific for vlib001.pl, this should be a passed as code ref!
my $jj= $j->{'store'}->{$store};
my @paths= keys %{$jj->{'path'}};
$ster->{'path_count'}= scalar @paths;
my $p1= shift (@paths);
my $px1= $jj->{'path'}->{$p1};
$ster->{'path'}= $p1;
$ster->{'mtime'}= $px1->{'mtime'};
$ster->{'fs_size'}= $px1->{'fs_size'};
$ster->{'ino'}= $px1->{'ino'};
}
}
print "finishing\n";
# save all tocs now
foreach my $s (@stores)
{
my $ss= $stores{$s};
my $f= $c . '/' . $s . '.toc.json';
print "saving toc to [$f]\n";
unless (open (TOC, '>:utf8', $f))
{
print STDERR "cant save toc file '$f'\n";
next;
}
print TOC encode_json ($ss), "\n";
close (TOC);
$f= $c . '/' . $s . '.toc.csv';
print "saving toc to [$f]\n";
unless (open (TOC, '>:utf8', $f))
{
print STDERR "cant save toc file '$f'\n";
next;
}
print TOC join (';', 'key', @hdr), "\n";
foreach my $k (keys %$ss)
{
my $r= $ss->{$k};
# TODO: this is specific for vlib001.pl, this should be a passed as code ref!
print TOC join (';', $k, map { $r->{$_} } @hdr), "\n";
}
close (TOC);
}
# TODO: return something meaningful
}
sub verify_toc_v2
{
my $reg= shift;
# my $store= shift; this does not make sense, we need to verify verything anyway
# my @stores= (defined ($store)) ? $store : $reg->stores();
my @stores= $reg->stores();
# print "stores: ", join (', ', @stores), "\n"; exit;
my %stores;
my $c= $reg->{'proj_cfg_dir'};
# pick up current tocs to see if the sequence needs to be updated
my $f= $c . '/' . 'TOC.csv';
my ($toc_hdr, $toc_data)= TA::Util::slurp_file ($f, 'csv');
$toc_data= [] unless (defined ($toc_data)); # we need an empty toc if there is none yet
my %items;
sub item_files_2
{
next if ($_ =~ /\.toc\.json$/);
# next if ($_ eq 'TOC\.csv');
my $x; my $x;
next unless ($_ =~ /\.json$/ && -f ($x= $File::Find::name)); next unless ($_ =~ /\.json$/ && -f ($x= $File::Find::name));
...@@ -274,11 +400,13 @@ sub verify_toc ...@@ -274,11 +400,13 @@ sub verify_toc
{ {
my $p= $items{$item}; my $p= $items{$item};
my $j= TA::Util::slurp_file ($p->[0], 'json'); my $j= TA::Util::slurp_file ($p->[0], 'json');
# print "j: ", main::Dumper ($j); print "j: ", main::Dumper ($j);
my @i_stores= keys %{$j->{'store'}}; my @i_stores= keys %{$j->{'store'}};
my $key= $j->{'key'}; my $key= $j->{'key'};
print join (' ', $key, @i_stores), "\n"; print join (' ', $key, @i_stores), "\n";
=begin comment
# search for a key's sequence number in all known stores, not only # search for a key's sequence number in all known stores, not only
# in those that are *currently* used for this store # in those that are *currently* used for this store
my $seq; my $seq;
...@@ -304,10 +432,17 @@ sub verify_toc ...@@ -304,10 +432,17 @@ sub verify_toc
} }
$ster->{'found'}= 1; $ster->{'found'}= 1;
} }
=end comment
=cut
} }
print "finishing\n"; print "finishing\n";
# save all tocs now # save all tocs now
=begin comment
foreach my $s (@stores) foreach my $s (@stores)
{ {
my $f= $c . '/' . $s . '.toc.json'; my $f= $c . '/' . $s . '.toc.json';
...@@ -321,9 +456,15 @@ sub verify_toc ...@@ -321,9 +456,15 @@ sub verify_toc
close (TOC); close (TOC);
} }
=end comment
=cut
# TODO: return something meaningful # TODO: return something meaningful
} }
# *verify_toc= *verify_toc_v1;
=head1 sequence number =head1 sequence number
=head2 $reg->next_seq() =head2 $reg->next_seq()
......
...@@ -37,6 +37,16 @@ sub slurp_file ...@@ -37,6 +37,16 @@ sub slurp_file
my $str= join ('', @lines); my $str= join ('', @lines);
return decode_json ($str); return decode_json ($str);
} }
elsif ($format eq 'csv')
{
my $hdr= split (';', shift (@lines));
my @d;
while (my $l= shift (@lines))
{
push (@d, split (';', $l));
}
return [$hdr, \@d];
}
print STDERR "unknown slurp format '$format'\n"; print STDERR "unknown slurp format '$format'\n";
return undef; return undef;
......
...@@ -159,7 +159,7 @@ sub process_file ...@@ -159,7 +159,7 @@ sub process_file
return undef; return undef;
} }
my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7] }; my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7], 'ino' => $st[1] };
my $reg= $objreg->lookup ($md5); my $reg= $objreg->lookup ($md5);
...@@ -173,9 +173,17 @@ sub process_file ...@@ -173,9 +173,17 @@ sub process_file
&& exists ($sb->{'path'}) && exists ($sb->{'path'})
&& defined ($ydata= $sb->{'path'}->{$path}) # we need to keep track of the path as well otherwise we can't handly duplicates in the same store && defined ($ydata= $sb->{'path'}->{$path}) # we need to keep track of the path as well otherwise we can't handly duplicates in the same store
&& $st[7] == $ydata->{'fs_size'} && $st[7] == $ydata->{'fs_size'}
&& $st[9] == $ydata->{'mtime'} && $st[9] == $ydata->{'mtime'}
) )
{ # TODO: compare stored and current information { # TODO: compare stored and current information
foreach my $an (keys %$xdata)
{
unless ($ydata->{$an} eq $xdata->{$an})
{
$ydata->{$an}= $xdata->{$an};
push (@upd, $an);
}
}
} }
else else
{ {
...@@ -188,7 +196,7 @@ sub process_file ...@@ -188,7 +196,7 @@ sub process_file
# print "ydata: ", Dumper ($ydata); # print "ydata: ", Dumper ($ydata);
# print "xdata: ", Dumper ($xdata); # print "xdata: ", Dumper ($xdata);
push (@upd, 'store upd'); push (@upd, 'store upd');
} }
} }
else else
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment