Skip to content
Snippets Groups Projects
Commit d6c12e8a authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

optimizations for selected directory or file uploads

parent 488ed322
Branches
No related tags found
No related merge requests found
...@@ -366,9 +366,10 @@ sub load_single_toc ...@@ -366,9 +366,10 @@ sub load_single_toc
{ {
my $reg= shift; my $reg= shift;
my $store= shift; my $store= shift;
my $cache= shift; my $cache= shift; # TODO: what is that intended for??
my $path_list= shift;
print "load_single_toc: store=[$store]\n"; # print __LINE__, " load_single_toc: store=[$store] path_list=[$path_list]\n";
if ((my $be= $reg->{'cfg'}->{'backend'}) eq 'TA::Hasher') if ((my $be= $reg->{'cfg'}->{'backend'}) eq 'TA::Hasher')
{ {
my $c= $reg->{'proj_cat'}; my $c= $reg->{'proj_cat'};
...@@ -384,7 +385,15 @@ print "load_single_toc: store=[$store]\n"; ...@@ -384,7 +385,15 @@ print "load_single_toc: store=[$store]\n";
} }
elsif ($be eq 'MongoDB') elsif ($be eq 'MongoDB')
{ {
my $cursor= $reg->{'_cat'}->find ( { 'store' => $store } ); my $search= { store => $store };
if (defined ($path_list))
{
print __LINE__, " load_single_toc; path_list DEFINED\n";
$search->{path}= { '$in' => $path_list };
}
# print __LINE__, " load_single_toc; search: ", main::Dumper ($search);
my $cursor= $reg->{'_cat'}->find ($search);
# print "cursor=[$cursor]\n"; # print "cursor=[$cursor]\n";
my @all= $cursor->all (); my @all= $cursor->all ();
return \@all; return \@all;
......
...@@ -97,7 +97,7 @@ sub save_catalog ...@@ -97,7 +97,7 @@ sub save_catalog
foreach my $xf (sort keys %$xFLIST) foreach my $xf (sort keys %$xFLIST)
{ {
my $xfo= $xFLIST->{$xf}; my $xfo= $xFLIST->{$xf};
printf CAT ("%s file %9d %s\n", $xfo->{'md5'}, $xfo->{'fs_size'}, $xf); printf CAT ("%s file %10d %s\n", $xfo->{'md5'}, $xfo->{'fs_size'}, $xf);
# print CAT $CAT{$entry}, "\n"; # print CAT $CAT{$entry}, "\n";
} }
close (CAT); close (CAT);
...@@ -134,9 +134,6 @@ sub read_flist ...@@ -134,9 +134,6 @@ sub read_flist
my $md5cat= shift; my $md5cat= shift;
my $fnm= shift; my $fnm= shift;
my $xFLIST= $md5cat->{'FLIST'};
my $INO= $md5cat->{'INO'};
print "reading reference list: [$fnm]\n"; print "reading reference list: [$fnm]\n";
unless (open (FI, $fnm)) unless (open (FI, $fnm))
{ {
...@@ -144,7 +141,7 @@ sub read_flist ...@@ -144,7 +141,7 @@ sub read_flist
return -1; return -1;
} }
my $ref_cnt= 0; my @files;
while (<FI>) while (<FI>)
{ {
s/\015//g; s/\015//g;
...@@ -164,6 +161,24 @@ sub read_flist ...@@ -164,6 +161,24 @@ sub read_flist
); );
next if ($md5cat->{'skip_vcs'} && $_ =~ m#(^|/)(CVS|\.git|\.svn|.bzr|RCS)/#); next if ($md5cat->{'skip_vcs'} && $_ =~ m#(^|/)(CVS|\.git|\.svn|.bzr|RCS)/#);
push (@files, $_);
}
close (FI);
$md5cat->process_flist (@files);
}
# ----------------------------------------------------------------------------
sub process_flist
{
my $md5cat= shift;
my $xFLIST= $md5cat->{'FLIST'};
my $INO= $md5cat->{'INO'};
my $ref_cnt= 0;
foreach (@_)
{
my @st= stat ($_); my @st= stat ($_);
my $ino= $st[1]; my $ino= $st[1];
...@@ -172,7 +187,6 @@ sub read_flist ...@@ -172,7 +187,6 @@ sub read_flist
push (@{$INO->{$ino}}, $_); push (@{$INO->{$ino}}, $_);
} }
close (FI);
$ref_cnt; $ref_cnt;
} }
...@@ -278,11 +292,11 @@ sub check_new_files ...@@ -278,11 +292,11 @@ sub check_new_files
) )
{ {
$last_mark= time(); $last_mark= time();
printf ("%9d items processed\n", $cnt); printf ("%10d items processed\n", $cnt);
} }
} }
printf ("%9d files to be checked\n", scalar (@tmp_2chk)); printf ("%10d files to be checked\n", scalar (@tmp_2chk));
if (@tmp_2chk) if (@tmp_2chk)
{ {
...@@ -451,7 +465,7 @@ sub check_md5_entries ...@@ -451,7 +465,7 @@ sub check_md5_entries
# Uh, this needs to be redesigned soon! # Uh, this needs to be redesigned soon!
my @stf= stat ($fnm); my @stf= stat ($fnm);
$size= $stf[7]; $size= $stf[7];
# $_= sprintf ("%s file %9d %s", $f[0], $size, $fnm); # $_= sprintf ("%s file %10d %s", $f[0], $size, $fnm);
} }
else else
{ {
...@@ -574,7 +588,7 @@ printf ("%10d %s %s\n", $st[7], $md5, $f); ...@@ -574,7 +588,7 @@ printf ("%10d %s %s\n", $st[7], $md5, $f);
) )
{ {
$last_mark= time(); $last_mark= time();
printf ("%9d items processed\n", $cnt); printf ("%10d items processed\n", $cnt);
} }
} }
...@@ -600,7 +614,7 @@ sub digest_md5_file ...@@ -600,7 +614,7 @@ sub digest_md5_file
} }
my $md5= Digest::MD5::File::file_md5_hex ($f); my $md5= Digest::MD5::File::file_md5_hex ($f);
printf FO ("%s file %9d %s\n", $md5, $st[7], $f); printf FO ("%s file %10d %s\n", $md5, $st[7], $f);
} }
close (FI); close (FI);
close (FO); close (FO);
......
...@@ -65,7 +65,6 @@ use TA::ObjReg; ...@@ -65,7 +65,6 @@ use TA::ObjReg;
use md5cat; use md5cat;
use Util::ts qw(ts_ISO); use Util::ts qw(ts_ISO);
my @PAR= ();
my $project; my $project;
my $store; my $store;
my $refresh_fileinfo= 0; my $refresh_fileinfo= 0;
...@@ -88,11 +87,20 @@ my $Dir_Pattern= '.'; ...@@ -88,11 +87,20 @@ my $Dir_Pattern= '.';
my $DEFAULT_file_list= "find $Dir_Pattern -xdev -type f -print|"; my $DEFAULT_file_list= "find $Dir_Pattern -xdev -type f -print|";
# --- >8 --- # --- >8 ---
my $par_mode= 'PAR';
my @PAR= ();
my @subdirs= (); my @subdirs= ();
my @files= ();
while (my $arg= shift (@ARGV)) while (my $arg= shift (@ARGV))
{ {
if ($arg eq '--') { push (@PAR, @ARGV); @ARGV= (); } if ($arg eq '--')
{
if ($par_mode eq 'subdir') { push (@subdirs, @ARGV); }
elsif ($par_mode eq 'file') { push (@files, @ARGV); }
else { push (@PAR, @ARGV); }
@ARGV= ();
}
elsif ($arg =~ /^--(.+)/) elsif ($arg =~ /^--(.+)/)
{ {
my ($opt, $val)= split ('=', $1, 2); my ($opt, $val)= split ('=', $1, 2);
...@@ -102,7 +110,8 @@ while (my $arg= shift (@ARGV)) ...@@ -102,7 +110,8 @@ while (my $arg= shift (@ARGV))
elsif ($opt eq 'limit') { $limit= $val || shift (@ARGV) ; } elsif ($opt eq 'limit') { $limit= $val || shift (@ARGV) ; }
elsif ($opt eq 'fileinfo') { $refresh_fileinfo= 1; } elsif ($opt eq 'fileinfo') { $refresh_fileinfo= 1; }
elsif ($opt eq 'noinode') { $check_inode= 0; } elsif ($opt eq 'noinode') { $check_inode= 0; }
elsif ($opt eq 'subdir') { push (@subdirs, $val || shift (@ARGV)); } elsif ($opt eq 'subdir') { $par_mode= 'subdir'; }
elsif ($opt eq 'file') { $par_mode= 'file'; }
elsif ($opt eq 'cd') { $cd_mode= 1; } elsif ($opt eq 'cd') { $cd_mode= 1; }
elsif ($arg =~ /^--(refresh|verify|lookup|edit|maint|next-seq|get-cat|policy)$/) { $op_mode= $1; } elsif ($arg =~ /^--(refresh|verify|lookup|edit|maint|next-seq|get-cat|policy)$/) { $op_mode= $1; }
else { &usage ("unknown option '$arg'"); } else { &usage ("unknown option '$arg'"); }
...@@ -120,7 +129,12 @@ while (my $arg= shift (@ARGV)) ...@@ -120,7 +129,12 @@ while (my $arg= shift (@ARGV))
else { &usage ("unknown option '-$a'"); } else { &usage ("unknown option '-$a'"); }
} }
} }
else { push (@PAR, $arg); } else
{
if ($par_mode eq 'subdir') { push (@subdirs, $arg); }
elsif ($par_mode eq 'file') { push (@files, $arg); }
else { push (@PAR, $arg); }
}
} }
print "debug level: $DEBUG\n"; print "debug level: $DEBUG\n";
...@@ -176,7 +190,6 @@ if ($op_mode eq 'refresh') ...@@ -176,7 +190,6 @@ if ($op_mode eq 'refresh')
exit (2); exit (2);
} }
# ZZZ
$DEBUG= 1; $DEBUG= 1;
print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG); print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG);
...@@ -319,7 +332,7 @@ sub refresh_md5cat ...@@ -319,7 +332,7 @@ sub refresh_md5cat
$cnt_updated++ if (@upd); $cnt_updated++ if (@upd);
} }
close (CAT); close (CAT);
printf ("%9d files processed; %9d files updated\n", $cnt_processed, $cnt_updated); printf ("%10d files processed; %10d files updated\n", $cnt_processed, $cnt_updated);
} }
sub refresh_internal sub refresh_internal
...@@ -334,46 +347,65 @@ sub refresh_internal ...@@ -334,46 +347,65 @@ sub refresh_internal
$objreg->verify_toc (\&verify_toc_item, \@hdr); $objreg->verify_toc (\&verify_toc_item, \@hdr);
print "TOC verified\n"; print "TOC verified\n";
my $toc= $objreg->load_single_toc ($store);
# print "toc: ", Dumper ($toc);
my $md5cat= new md5cat (); my $md5cat= new md5cat ();
my $quick_mode= 0;
if (@subdirs) if (@subdirs)
{ {
$quick_mode= 1;
foreach my $subdir (@subdirs) foreach my $subdir (@subdirs)
{ {
my $subdir_file_list= "find '$subdir' -xdev -type f -print|"; my $subdir_file_list= "find '$subdir' -xdev -type f -print|";
print __LINE__, " subdir_file_list: ", Dumper ($subdir_file_list);
$md5cat->read_flist ($subdir_file_list); $md5cat->read_flist ($subdir_file_list);
# print __LINE__, " md5cat: ", Dumper ($md5cat);
} }
} }
else
if (@files)
{ {
$md5cat->read_flist ($DEFAULT_file_list); $quick_mode= 1;
# TODO: check just that single file!
$md5cat->process_flist (@files);
# print __LINE__, " md5cat: ", Dumper ($md5cat);
} }
# print "md5cat: ", Dumper ($md5cat); $md5cat->read_flist ($DEFAULT_file_list) unless ($quick_mode);
# print __LINE__, " md5cat: ", Dumper ($md5cat);
print "flist processed\n"; print "flist processed\n";
my $fl= $md5cat->{'FLIST'};
# print __LINE__, " fl: ", Dumper ($fl);
my $path_list;
$path_list= [ sort keys %$fl ] if ($quick_mode);
# print __LINE__, " path_list: ", Dumper ($path_list);
my $toc= $objreg->load_single_toc ($store, undef, $path_list);
# print "toc: ", Dumper ($toc);
my @check_list= qw(mtime size); my @check_list= qw(mtime size);
push (@check_list, 'ino') if ($check_inode); push (@check_list, 'ino') if ($check_inode);
# compare TOC and reference filelist # compare TOC and reference filelist
my $fl= $md5cat->{'FLIST'};
my %key= (); my %key= ();
my $cnt= 0; my $cnt= 0;
if (defined ($toc)) if (defined ($toc))
{ {
# print "toc: ", Dumper ($toc); # print "toc: ", Dumper ($toc);
printf ("%9d items to be processed\n", scalar @$toc); printf ("%10d items to be processed\n", scalar @$toc);
print "\npass 1\n"; print "\npass 1\n";
foreach my $x (@$toc) foreach my $x (@$toc)
{ {
printf ("%9d items processed\n", $cnt) if ((++$cnt % 10000) == 0); printf ("%10d items processed\n", $cnt) if ((++$cnt % 10000) == 0); # TODO: or after a certain time passed
# print __LINE__, " k=[$k]\n"; # print __LINE__, " k=[$k]\n";
my $k= $x->{'key'}; my $k= $x->{'key'};
my $p= $x->{'path'}; my $p= $x->{'path'};
$key{$k}->{$p}= 0; $key{$k}->{$p}= 0;
if (exists ($fl->{$p})) if (exists ($fl->{$p}))
{ {
...@@ -402,10 +434,10 @@ sub refresh_internal ...@@ -402,10 +434,10 @@ sub refresh_internal
# print "file missing: ", Dumper ($x); # print "file missing: ", Dumper ($x);
$cnt_dropped++; $cnt_dropped++;
} }
} }
# my %paths= map { my $x= $toc->{$_}; $x->{'found'}= 0; $x->{'path'} => $x } keys %$toc; # my %paths= map { my $x= $toc->{$_}; $x->{'found'}= 0; $x->{'path'} => $x } keys %$toc;
# print "paths: ", Dumper (\%paths); # print "paths: ", Dumper (\%paths);
# print "fl: ", Dumper ($fl); # print "fl: ", Dumper ($fl);
} }
# print __LINE__, " check_new_files\n"; # print __LINE__, " check_new_files\n";
...@@ -422,11 +454,10 @@ sub refresh_internal ...@@ -422,11 +454,10 @@ sub refresh_internal
$md5cat->integrate_md5_sums ($new_files); $md5cat->integrate_md5_sums ($new_files);
# $md5cat->save_catalog (); # TODO: if save_catalog flag is true! # $md5cat->save_catalog (); # TODO: if save_catalog flag is true!
# ZZZ
# update the Object registry with new items # update the Object registry with new items
my $cnt_total= scalar @$new_files; my $cnt_total= scalar @$new_files;
my $cnt_done= 0; my $cnt_done= 0;
printf ("%9d new items to be processed\n", $cnt_total); printf ("%10d new items to be processed\n", $cnt_total);
foreach my $nf (@$new_files) foreach my $nf (@$new_files)
{ {
my ($md5, $path, $size, $mtime)= @$nf; my ($md5, $path, $size, $mtime)= @$nf;
...@@ -450,13 +481,13 @@ sub refresh_internal ...@@ -450,13 +481,13 @@ sub refresh_internal
# print __LINE__, " key: ", Dumper (\%key); # print __LINE__, " key: ", Dumper (\%key);
my @drop= (); my @drop= ();
if (@subdirs) if ($quick_mode)
{ {
=begin comment =begin comment
NOTE: we only inspected a subdirectory, but this inspects everything NOTE: we only inspected some subdirectores or files, but this inspects
and would remove items that were not even inspected everything and would remove items that were not even inspected
TODO: only drop the thing when it is in the right subdirectory! TODO: only drop the thing when it is in the right subdirectory!
...@@ -481,7 +512,7 @@ TODO: only drop the thing when it is in the right subdirectory! ...@@ -481,7 +512,7 @@ TODO: only drop the thing when it is in the right subdirectory!
$objreg->remove_from_store ($store, \@drop); $objreg->remove_from_store ($store, \@drop);
} }
printf ("files: %9d processed; %9d updated; %9d (%d) dropped\n", printf ("files: %10d processed; %10d updated; %10d (%d) dropped\n",
$cnt_processed, $cnt_updated, $cnt_dropped, scalar (@drop)); $cnt_processed, $cnt_updated, $cnt_dropped, scalar (@drop));
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment