Skip to content
Snippets Groups Projects
Commit 3ed357a1 authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

added code for toc and seq

parent ae621533
Branches
No related tags found
No related merge requests found
#
# File: lib/TA/ObjReg.pm
#
package TA::ObjReg;
=head1 NAME
TA::ObjReg -- Text-Archive Object Registry
=head1 DESCRIPTION
=cut
use strict;
use JSON;
use File::Find;
use TA::Util;
use TA::Hasher;
......@@ -15,7 +28,7 @@ sub new
# check the presence of all required parameters
my $stopit= 0;
foreach my $k (qw(project store))
foreach my $k (qw(project))
{
unless (exists ($par{$k}))
{
......@@ -39,6 +52,14 @@ sub new
$obj;
}
=head1 project level methods
=head2 $reg->get_project()
(re)loads the project related data structures
=cut
sub get_project
{
my $obj= shift;
......@@ -55,15 +76,50 @@ sub get_project
}
# print "proj_cfg: ", main::Dumper ($proj_cfg);
# TODO: check authorization (no need, if local, but for client-server, we need something1
# TODO: check authorization (no need, if local, but for client-server, we need something
# initialize hasher
my $base_dir= $obj->{'proj_cfg_dir'};
$obj->{'proj_cat'}= my $proj_cat= join ('/', $base_dir, 'cat');
$obj->{'hasher'}= my $hasher= new TA::Hasher ('algorithm' => $proj_cfg->{'algorithm'}, 'pfx' => $proj_cat, 'name' => 'file');
# get sequence number
$obj->{'seq_file'}= my $fnm_seq= join ('/', $base_dir, 'sequence.json');
$obj->{'seq'}= my $seq= TA::Util::slurp_file ($fnm_seq, 'json');
# print "seq: ", main::Dumper ($seq);
unless (defined ($seq))
{
$obj->{'seq'}= $seq= { 'seq' => 0, 'upd' => time () };
$obj->_save_seq ();
}
$proj_cfg;
}
=head2 $reg->stores()
returns a list of all stores in the project
=cut
sub stores
{
my $reg= shift;
my @stores= keys %{$reg->{'cfg'}->{'stores'}};
(wantarray) ? @stores : \@stores;
}
=head1 item related methods
=head2 $reg->lookup($key)
returns that keys value, if present, otherwise, undef.
=cut
sub lookup
{
my $obj= shift;
......@@ -99,11 +155,210 @@ sub save
my $fnm= $path . '/' . $id_str . '.json';
# print "description: [$fnm]\n";
my @st= stat ($fnm);
unless (@st)
{ # TODO: increment sequence and toc
}
my $j= encode_json ($new_reg);
# print "generated json: [$j]\n";
open (J, '>:utf8', $fnm); print J $j; close (J);
}
=head1 TOC: Table of Contents
single TOC format:
key:
{
"seq": number, # this items sequence number
"upd": epoch #
}
global TOC format:
key:
{
"seq": number,
"stores": [ { store-id: ..., "upd": epoch } ]
}
The toc file is stored in:
<project>/cat/<store-id>.toc.json
=head2 $reg->load_toc ($store)
returns toc hashed by key.
if $store is undef, returns a toc of all stores
=cut
sub load_toc
{
my $reg= shift;
my $store= shift;
my $cache= shift;
my $c= $reg->{'proj_cat'};
return undef unless (defined ($c)); # not initialized?
my @stores= (defined ($store)) ? $store : $reg->stores();
return undef unless (@stores); # return nothing if there is nothing...
my $toc= {};
foreach my $s (@stores)
{
my $f= $c . '/' . $s . '.toc.json';
my $t= TA::Util::slurp_file ($f, 'json');
if ($cache)
{
$reg->{'tocs'}->{$s}= $t;
}
foreach my $k (keys %$t)
{
my $r;
unless (defined ($r= $toc->{$k}))
{ # not yet present in the toc
$toc->{$k}= $r= { 'sequence' => $k->{'sequence'} };
}
push (@{$r->{'stores'}}, { 'store' => $s, 'upd' => $k->{'upd'} });
}
}
$toc;
}
sub verify_toc
{
my $reg= shift;
# my $store= shift; this does not make sense, we need to verify verything anyway
# my @stores= (defined ($store)) ? $store : $reg->stores();
my @stores= $reg->stores();
# print "stores: ", join (', ', @stores), "\n"; exit;
my %stores;
my $c= $reg->{'proj_cat'};
# pick up current tocs to see if the sequence needs to be updated
foreach my $s (@stores)
{
my $f= $c . '/' . $s . '.toc.json';
my $t= TA::Util::slurp_file ($f, 'json');
$t= {} unless (defined ($t)); # we need an empty toc if there is none yet
$stores{$s}= $t;
}
my %items;
sub item_files
{
next if ($_ =~ /\.toc\.json$/);
my $x;
next unless ($_ =~ /\.json$/ && -f ($x= $File::Find::name));
# print "file=[$_] path=[$x]\n";
$items{$_}= [ $x ];
}
my $d= $reg->{'proj_cat'};
print "proj_cat=[$d]\n";
find (\&item_files, $d);
# print "items: ", main::Dumper (\%items);
foreach my $item (keys %items)
{
my $p= $items{$item};
my $j= TA::Util::slurp_file ($p->[0], 'json');
# print "j: ", main::Dumper ($j);
my @i_stores= keys %{$j->{'store'}};
my $key= $j->{'key'};
print join (' ', $key, @i_stores), "\n";
# search for a key's sequence number in all known stores, not only
# in those that are *currently* used for this store
my $seq;
S1: foreach my $store (@stores)
{
if (exists ($stores{$store}->{$key}))
{
$seq= $stores{$store}->{$key}->{'seq'};
last S1;
}
}
S2: foreach my $store (@i_stores)
{
my $ster; # store's toc entry record ;)
unless (defined ($ster= $stores{$store}->{$key}))
{
$ster= $stores{$store}->{$key}=
{
'seq' => $reg->next_seq(),
'upd' => time (),
};
}
$ster->{'found'}= 1;
}
}
print "finishing\n";
# save all tocs now
foreach my $s (@stores)
{
my $f= $c . '/' . $s . '.toc.json';
print "saving toc to [$f]\n";
unless (open (TOC, '>:utf8', $f))
{
print STDERR "cant save toc file '$f'\n";
next;
}
print TOC encode_json ($stores{$s}), "\n";
close (TOC);
}
# TODO: return something meaningful
}
=head1 sequence number
=head2 $reg->next_seq()
=cut
sub flush
{
my $reg= shift;
$reg->_save_seq ();
}
sub _save_seq
{
my $reg= shift;
my $f= $reg->{'seq_file'};
open (F_SEQ, '>:utf8', $f) or die "cant write sequence to '$f'";
print F_SEQ encode_json ($reg->{'seq'}), "\n";
close (F_SEQ);
}
sub next_seq
{
my $reg= shift;
my $seq= $reg->{'seq'};
$seq->{'seq'}++;
$seq->{'upd'}= time ();
$reg->_save_seq (); # TODO: optionally delay that until $reg->flush();
$seq->{'seq'};
}
# =head1 INTERNAL FUNCTIONS
1;
......
......@@ -33,9 +33,11 @@ use TA::ObjReg;
my @PAR= ();
my $project;
my $store= '<none>';
my $store;
my $refresh_fileinfo= 0;
my $DEBUG= 0;
my $STOP= 0;
my $op_mode= 'refresh';
while (my $arg= shift (@ARGV))
{
......@@ -45,6 +47,7 @@ while (my $arg= shift (@ARGV))
if ($arg eq '--project') { $project= shift (@ARGV); }
elsif ($arg eq '--store') { $store= shift (@ARGV); }
elsif ($arg eq '--fileinfo') { $refresh_fileinfo= 1; }
elsif ($arg =~ /^--(refresh|verify)$/) { $op_mode= $1; }
}
elsif ($arg =~ /^-/)
{
......@@ -54,6 +57,7 @@ while (my $arg= shift (@ARGV))
if ($a eq 'p') { $project= shift (@ARGV); }
elsif ($a eq 's') { $store= shift (@ARGV); }
elsif ($a eq 'D') { $DEBUG++; }
elsif ($a eq 'X') { $STOP= 1; }
}
}
else { push (@PAR, $arg); }
......@@ -67,8 +71,11 @@ print "debug level: $DEBUG\n";
my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store);
&usage ('no config found') unless (defined ($objreg));
print "objreg: ", Dumper ($objreg) if ($DEBUG);
print "objreg: ", Dumper ($objreg) if ($DEBUG || $STOP);
exit if ($STOP);
if ($op_mode eq 'refresh')
{
my $catalog= $objreg->{'cfg'}->{'catalog'};
&usage ('no catalog found in config') unless (defined ($catalog));
......@@ -85,11 +92,24 @@ if ($catalog->{'format'} eq 'md5cat')
{
refresh_md5cat ($objreg, $store);
}
}
elsif ($op_mode eq 'verify')
{
$objreg->verify_toc ($store);
}
# print "objreg: (after refresh)", Dumper ($objreg);
exit (0);
sub usage
{
my $msg= shift;
print $msg, "\n";
system ("perldoc $0");
exit -1;
}
sub refresh_md5cat
{
my $objreg= shift;
......@@ -165,7 +185,7 @@ sub process_file
}
else
{
$reg= { 'md5' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } };
$reg= { 'key' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } };
push (@upd, 'new md5');
}
......@@ -193,14 +213,6 @@ sub process_file
(wantarray) ? @upd : \@upd;
}
sub usage
{
my $msg= shift;
print $msg, "\n";
system ("perldoc $0");
exit -1;
}
__END__
=head1 TODO
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment