From 3ed357a1284669005f13698230155555c7b30cc4 Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Tue, 3 Sep 2013 09:41:37 +0200 Subject: [PATCH] added code for toc and seq --- textarchive/lib/TA/ObjReg.pm | 259 ++++++++++++++++++++++++++++++++++- textarchive/vlib001.pl | 38 +++-- 2 files changed, 282 insertions(+), 15 deletions(-) diff --git a/textarchive/lib/TA/ObjReg.pm b/textarchive/lib/TA/ObjReg.pm index 827acd5..cb76270 100644 --- a/textarchive/lib/TA/ObjReg.pm +++ b/textarchive/lib/TA/ObjReg.pm @@ -1,9 +1,22 @@ +# +# File: lib/TA/ObjReg.pm +# package TA::ObjReg; +=head1 NAME + + TA::ObjReg -- Text-Archive Object Registry + +=head1 DESCRIPTION + +=cut + + use strict; use JSON; +use File::Find; use TA::Util; use TA::Hasher; @@ -15,7 +28,7 @@ sub new # check the presence of all required parameters my $stopit= 0; - foreach my $k (qw(project store)) + foreach my $k (qw(project)) { unless (exists ($par{$k})) { @@ -39,6 +52,14 @@ sub new $obj; } +=head1 project level methods + +=head2 $reg->get_project() + +(re)loads the project related data structures + +=cut + sub get_project { my $obj= shift; @@ -55,15 +76,50 @@ sub get_project } # print "proj_cfg: ", main::Dumper ($proj_cfg); - # TODO: check authorization (no need, if local, but for client-server, we need something1 + # TODO: check authorization (no need, if local, but for client-server, we need something + # initialize hasher my $base_dir= $obj->{'proj_cfg_dir'}; $obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat'); $obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file'); + + # get sequence number + $obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json'); + $obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json'); + # print "seq: ", main::Dumper ($seq); + unless (defined ($seq)) + { + $obj->{'seq'}= $seq= { 'seq' => 0, 'upd' => time () }; + $obj->_save_seq (); + } + $proj_cfg; } +=head2 $reg->stores() + +returns a list of all stores in the project + +=cut + +sub stores +{ + my $reg= shift; + + my @stores= keys %{$reg->{'cfg'}->{'stores'}}; + + (wantarray) ? @stores : \@stores; +} + +=head1 item related methods + +=head2 $reg->lookup($key) + +returns that keys value, if present, otherwise, undef. + +=cut + sub lookup { my $obj= shift; @@ -99,11 +155,210 @@ sub save my $fnm= $path . '/' . $id_str . '.json'; # print "description: [$fnm]\n"; + my @st= stat ($fnm); + unless (@st) + { # TODO: increment sequence and toc + } + my $j= encode_json ($new_reg); # print "generated json: [$j]\n"; open (J, '>:utf8', $fnm); print J $j; close (J); } +=head1 TOC: Table of Contents + +single TOC format: + key: + { + "seq": number, # this items sequence number + "upd": epoch # + } + +global TOC format: + key: + { + "seq": number, + "stores": [ { store-id: ..., "upd": epoch } ] + } + +The toc file is stored in: + + <project>/cat/<store-id>.toc.json + +=head2 $reg->load_toc ($store) + +returns toc hashed by key. + +if $store is undef, returns a toc of all stores + +=cut + +sub load_toc +{ + my $reg= shift; + my $store= shift; + my $cache= shift; + + my $c= $reg->{'proj_cat'}; + return undef unless (defined ($c)); # not initialized? + + my @stores= (defined ($store)) ? $store : $reg->stores(); + + return undef unless (@stores); # return nothing if there is nothing... + + my $toc= {}; + foreach my $s (@stores) + { + my $f= $c . '/' . $s . '.toc.json'; + my $t= TA::Util::slurp_file ($f, 'json'); + if ($cache) + { + $reg->{'tocs'}->{$s}= $t; + } + + foreach my $k (keys %$t) + { + my $r; + + unless (defined ($r= $toc->{$k})) + { # not yet present in the toc + $toc->{$k}= $r= { 'sequence' => $k->{'sequence'} }; + } + + push (@{$r->{'stores'}}, { 'store' => $s, 'upd' => $k->{'upd'} }); + } + } + + $toc; +} + +sub verify_toc +{ + my $reg= shift; + + # my $store= shift; this does not make sense, we need to verify verything anyway + + # my @stores= (defined ($store)) ? $store : $reg->stores(); + my @stores= $reg->stores(); + # print "stores: ", join (', ', @stores), "\n"; exit; + my %stores; + + my $c= $reg->{'proj_cat'}; + # pick up current tocs to see if the sequence needs to be updated + foreach my $s (@stores) + { + my $f= $c . '/' . $s . '.toc.json'; + my $t= TA::Util::slurp_file ($f, 'json'); + $t= {} unless (defined ($t)); # we need an empty toc if there is none yet + + $stores{$s}= $t; + } + + my %items; + sub item_files + { + next if ($_ =~ /\.toc\.json$/); + my $x; + next unless ($_ =~ /\.json$/ && -f ($x= $File::Find::name)); + + # print "file=[$_] path=[$x]\n"; + $items{$_}= [ $x ]; + } + + my $d= $reg->{'proj_cat'}; + print "proj_cat=[$d]\n"; + find (\&item_files, $d); + + # print "items: ", main::Dumper (\%items); + foreach my $item (keys %items) + { + my $p= $items{$item}; + my $j= TA::Util::slurp_file ($p->[0], 'json'); + # print "j: ", main::Dumper ($j); + my @i_stores= keys %{$j->{'store'}}; + my $key= $j->{'key'}; + print join (' ', $key, @i_stores), "\n"; + + # search for a key's sequence number in all known stores, not only + # in those that are *currently* used for this store + my $seq; + S1: foreach my $store (@stores) + { + if (exists ($stores{$store}->{$key})) + { + $seq= $stores{$store}->{$key}->{'seq'}; + last S1; + } + } + + S2: foreach my $store (@i_stores) + { + my $ster; # store's toc entry record ;) + unless (defined ($ster= $stores{$store}->{$key})) + { + $ster= $stores{$store}->{$key}= + { + 'seq' => $reg->next_seq(), + 'upd' => time (), + }; + } + $ster->{'found'}= 1; + } + } + + print "finishing\n"; + # save all tocs now + foreach my $s (@stores) + { + my $f= $c . '/' . $s . '.toc.json'; + print "saving toc to [$f]\n"; + unless (open (TOC, '>:utf8', $f)) + { + print STDERR "cant save toc file '$f'\n"; + next; + } + print TOC encode_json ($stores{$s}), "\n"; + close (TOC); + } + + # TODO: return something meaningful +} + +=head1 sequence number + +=head2 $reg->next_seq() + +=cut + +sub flush +{ + my $reg= shift; + + $reg->_save_seq (); +} + +sub _save_seq +{ + my $reg= shift; + + my $f= $reg->{'seq_file'}; + open (F_SEQ, '>:utf8', $f) or die "cant write sequence to '$f'"; + print F_SEQ encode_json ($reg->{'seq'}), "\n"; + close (F_SEQ); +} + +sub next_seq +{ + my $reg= shift; + + my $seq= $reg->{'seq'}; + $seq->{'seq'}++; + $seq->{'upd'}= time (); + $reg->_save_seq (); # TODO: optionally delay that until $reg->flush(); + + $seq->{'seq'}; +} + # =head1 INTERNAL FUNCTIONS 1; diff --git a/textarchive/vlib001.pl b/textarchive/vlib001.pl index 0e54aba..5f121ff 100755 --- a/textarchive/vlib001.pl +++ b/textarchive/vlib001.pl @@ -33,9 +33,11 @@ use TA::ObjReg; my @PAR= (); my $project; -my $store= '<none>'; +my $store; my $refresh_fileinfo= 0; my $DEBUG= 0; +my $STOP= 0; +my $op_mode= 'refresh'; while (my $arg= shift (@ARGV)) { @@ -45,6 +47,7 @@ while (my $arg= shift (@ARGV)) if ($arg eq '--project') { $project= shift (@ARGV); } elsif ($arg eq '--store') { $store= shift (@ARGV); } elsif ($arg eq '--fileinfo') { $refresh_fileinfo= 1; } + elsif ($arg =~ /^--(refresh|verify)$/) { $op_mode= $1; } } elsif ($arg =~ /^-/) { @@ -54,6 +57,7 @@ while (my $arg= shift (@ARGV)) if ($a eq 'p') { $project= shift (@ARGV); } elsif ($a eq 's') { $store= shift (@ARGV); } elsif ($a eq 'D') { $DEBUG++; } + elsif ($a eq 'X') { $STOP= 1; } } } else { push (@PAR, $arg); } @@ -67,8 +71,11 @@ print "debug level: $DEBUG\n"; my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store); &usage ('no config found') unless (defined ($objreg)); -print "objreg: ", Dumper ($objreg) if ($DEBUG); +print "objreg: ", Dumper ($objreg) if ($DEBUG || $STOP); +exit if ($STOP); +if ($op_mode eq 'refresh') +{ my $catalog= $objreg->{'cfg'}->{'catalog'}; &usage ('no catalog found in config') unless (defined ($catalog)); @@ -81,15 +88,28 @@ unless (defined ($store_cfg)) } print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG); -if ($catalog->{'format'} eq 'md5cat') + if ($catalog->{'format'} eq 'md5cat') + { + refresh_md5cat ($objreg, $store); + } +} +elsif ($op_mode eq 'verify') { - refresh_md5cat ($objreg, $store); + $objreg->verify_toc ($store); } # print "objreg: (after refresh)", Dumper ($objreg); exit (0); +sub usage +{ + my $msg= shift; + print $msg, "\n"; + system ("perldoc $0"); + exit -1; +} + sub refresh_md5cat { my $objreg= shift; @@ -165,7 +185,7 @@ sub process_file } else { - $reg= { 'md5' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } }; + $reg= { 'key' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } }; push (@upd, 'new md5'); } @@ -193,14 +213,6 @@ sub process_file (wantarray) ? @upd : \@upd; } -sub usage -{ - my $msg= shift; - print $msg, "\n"; - system ("perldoc $0"); - exit -1; -} - __END__ =head1 TODO -- GitLab