Skip to content
Snippets Groups Projects
Commit b72881aa authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

its taking on shape...

parent aad8c7f5
No related branches found
No related tags found
No related merge requests found
......@@ -9,7 +9,7 @@ package TA::Hasher;
use Data::Dumper;
$Data::Dumper::Indent= 1;
my %known_algorithms= map { $_ => 1 } qw(NULL S3C1L S3C2L P3C3L);
my %known_algorithms= map { $_ => 1 } qw(NULL S3C1L S3C2L P3C3L P3C2L P3C1L);
sub new
{
......@@ -43,6 +43,14 @@ sub new
{
$obj->{'mkpo'}= \&TA::Hasher::P3C3L::mkpo;
}
elsif ($algorithm eq 'P3C2L')
{
$obj->{'mkpo'}= \&TA::Hasher::P3C2L::mkpo;
}
elsif ($algorithm eq 'P3C1L')
{
$obj->{'mkpo'}= \&TA::Hasher::P3C1L::mkpo;
}
else
{
$obj->{'mkpo'}= \&TA::Hasher::NULL::mkpo;
......@@ -173,6 +181,42 @@ sub mkpo
return { 'L' => \@L };
}
package TA::Hasher::P3C2L;
sub mkpo
{
my $S= shift;
return undef unless (defined ($S));
my @L;
if ($S =~ m#^(...)(.{1,3})#)
{ @L= ( $1, $2 ); }
elsif ($S =~ m#^(.{1,3})#)
{ @L= ( $1, 'ZZZ' ); }
else { @L= ( $S, 'ZZZ' ); }
return { 'L' => \@L };
}
package TA::Hasher::P3C1L;
sub mkpo
{
my $S= shift;
return undef unless (defined ($S));
my @L;
if ($S =~ m#^(.{1,3})#)
{ @L= ( $1 ); }
else { @L= ( $S ); }
return { 'L' => \@L };
}
1;
__END__
......
......@@ -15,7 +15,7 @@ sub new
# check the presence of all required parameters
my $stopit= 0;
foreach my $k (qw(project subproject))
foreach my $k (qw(project store))
{
unless (exists ($par{$k}))
{
......@@ -69,16 +69,16 @@ sub lookup
my $obj= shift;
my $id_str= shift;
print "lookup [$id_str]\n";
# print "lookup [$id_str]\n";
my @r= $obj->{'hasher'}->check_file ($id_str, 0);
# print "id_str=[$id_str] r=", main::Dumper (\@r);
my ($rc, $path)= @r;
my $fnm= $path . '/' . $id_str . '.json';
print "description: [$fnm]\n";
# print "description: [$fnm]\n";
my @st= stat ($fnm);
return undef unless (defined (@st));
return undef unless (@st);
my $reg= TA::Util::slurp_file ($fnm, 'json');
......@@ -97,10 +97,10 @@ sub save
my ($rc, $path)= @r;
my $fnm= $path . '/' . $id_str . '.json';
print "description: [$fnm]\n";
# print "description: [$fnm]\n";
my $j= encode_json ($new_reg);
print "generated json: [$j]\n";
# print "generated json: [$j]\n";
open (J, '>:utf8', $fnm); print J $j; close (J);
}
......
......@@ -16,7 +16,7 @@ sub slurp_file
my $fnm= shift;
my $format= shift || 'lines';
open (FI, $fnm) or return undef;
open (FI, '<:utf8', $fnm) or return undef;
my @lines= <FI>;
close (FI);
......
......@@ -11,8 +11,9 @@ use TA::ObjReg;
my @PAR= ();
my $project;
my $subproject;
my $store= '<none>';
my $refresh_fileinfo= 0;
my $DEBUG= 0;
while (my $arg= shift (@ARGV))
{
......@@ -20,7 +21,7 @@ while (my $arg= shift (@ARGV))
elsif ($arg =~ /^--/)
{
if ($arg eq '--project') { $project= shift (@ARGV); }
elsif ($arg eq '--subproject') { $subproject= shift (@ARGV); }
elsif ($arg eq '--store') { $store= shift (@ARGV); }
elsif ($arg eq '--fileinfo') { $refresh_fileinfo= 1; }
}
elsif ($arg =~ /^-/)
......@@ -29,29 +30,37 @@ while (my $arg= shift (@ARGV))
foreach my $a (@a)
{
if ($a eq 'p') { $project= shift (@ARGV); }
elsif ($a eq 's') { $subproject= shift (@ARGV); }
elsif ($a eq 's') { $store= shift (@ARGV); }
elsif ($a eq 'D') { $DEBUG++; }
}
}
else { push (@PAR, $arg); }
}
unless (defined ($project))
{
&usage ('no project specified');
}
print "debug level: $DEBUG\n";
&usage ('no project specified') unless (defined ($project));
# &usage ('no store specified') unless (defined ($store));
my $objreg= new TA::ObjReg ('project' => $project, 'subproject' => $subproject);
print "objreg: ", Dumper ($objreg);
my $objreg= new TA::ObjReg ('project' => $project, 'store' => $store);
&usage ('no config found') unless (defined ($objreg));
print "objreg: ", Dumper ($objreg) if ($DEBUG);
my $catalog= $objreg->{'cfg'}->{'catalog'};
unless (defined ($catalog))
&usage ('no catalog found in config') unless (defined ($catalog));
my $stores_p= $objreg->{'cfg'}->{'stores'};
my $store_cfg= $stores_p->{$store};
unless (defined ($store_cfg))
{
&usage ('no catalog specified');
print "no store config found for '$store', check these: ", Dumper ($stores_p);
exit (-2);
}
print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG);
if ($catalog->{'format'} eq 'md5cat')
{
refresh_md5cat ($objreg, $subproject);
refresh_md5cat ($objreg, $store);
}
# print "objreg: (after refresh)", Dumper ($objreg);
......@@ -61,7 +70,7 @@ exit (0);
sub refresh_md5cat
{
my $objreg= shift;
my $subproject= shift;
my $store= shift;
my %extra= @_;
# my $catalog= $objreg->{'cfg'}->{'catalog'};
......@@ -69,33 +78,49 @@ sub refresh_md5cat
# my $hasher= $objreg->{'hasher'};
open (CAT, '_catalog') or die "cant read catalog";
open (CAT, '<:utf8', '_catalog') or die "cant read catalog";
my $cnt_processed= 0;
my $cnt_updated= 0;
CAT: while (<CAT>)
{
chop;
my ($md5, $xf, $size, $path)= split (' ', $_, 4);
$path=~ s#^\.\/##;
print "md5=[$md5] size=[$size] path=[$path]\n";
my @st= stat ($path);
# print "md5=[$md5] size=[$size] path=[$path]\n";
$cnt_processed++;
my @upd= process_file ($md5, $path, $size);
$cnt_updated++ if (@upd);
}
close (CAT);
printf ("%6d files processed; %6d files updated\n", $cnt_processed, $cnt_updated);
}
sub process_file
{
my ($md5, $path, $size)= @_;
my @st= stat ($path);
unless (@st)
{
print STDERR "ATTN: could not stat file '$path'\n";
next CAT;
return undef;
}
my $xdata= { 'c_size' => $size, 'path' => $path, 'mtime' => $st[9], 'fs_size' => $st[7] };
# $objreg->add ($md5, 0, { 'md5' => $md5, $subproject => { 'size' => $size, 'path' => $path, %extra } } );
my $reg= $objreg->lookup ($md5);
print "json read: ", main::Dumper ($reg);
my $upd= 0;
my @upd;
my $ydata; # pointer to file catalog data within main datastructure
if (defined ($reg))
{ # we know something about this hash value but not in respect to the repository at hand
if (defined ($ydata= $reg->{$subproject})
# print "json read: ", main::Dumper ($reg);
my $sb;
if (defined ($sb= $reg->{'store'}->{$store})
&& exists ($sb->{'path'})
&& defined ($ydata= $sb->{'path'}->{$path}) # we need to keep track of the path as well otherwise we can't handly duplicates in the same store
&& $st[7] == $ydata->{'fs_size'}
&& $st[9] == $ydata->{'mtime'}
)
......@@ -103,33 +128,47 @@ sub refresh_md5cat
}
else
{
$ydata= $reg->{$subproject}= $xdata;
$upd= 1;
# print "st: fs_size(7)=[$st[7]] mtime(9)=[$st[9]]\n";
# print "ydata: ", Dumper ($ydata);
# print "xdata: ", Dumper ($xdata);
$reg->{'store'}->{$store}->{'path'}->{$path}= $ydata= $xdata;
# print __LINE__, " reg: ", Dumper ($reg);
# print "ydata: ", Dumper ($ydata);
# print "xdata: ", Dumper ($xdata);
push (@upd, 'store upd');
}
}
else
{
$reg= { 'md5' => $md5, $subproject => $ydata= $xdata };
$upd= 1;
$reg= { 'md5' => $md5, 'store' => { $store => { 'path' => { $path => $ydata= $xdata } } } };
push (@upd, 'new md5');
}
# fill in some more information about that file
if (!exists ($ydata->{'fileinfo'}) || $refresh_fileinfo)
{
my $res= `/usr/bin/file '$path'`;
my $xpath= $path;
$xpath=~ s#'#'\\''#g;
my $res= `/usr/bin/file '$xpath'`;
chop ($res);
my ($xpath, $fileinfo)= split (/: */, $res, 2);
$ydata->{'fileinfo'}= $fileinfo;
$upd= 1;
push (@upd, 'fileinfo updated');
}
# TODO: some more information would probably be nice as well
# e.g. mp3info or stuff
$objreg->save ($md5, $reg) if ($upd);
}
close (CAT);
if (@upd)
{
print "saving (", join ('|', @upd), ")\n";
# print __LINE__, " reg: ", Dumper ($reg);
$objreg->save ($md5, $reg);
}
(wantarray) ? @upd : \@upd;
}
sub usage
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment