Skip to content
Snippets Groups Projects
Commit d6c12e8a authored by Gerhard Gonter's avatar Gerhard Gonter :speech_balloon:
Browse files

optimizations for selected directory or file uploads

parent 488ed322
No related branches found
No related tags found
No related merge requests found
......@@ -366,9 +366,10 @@ sub load_single_toc
{
my $reg= shift;
my $store= shift;
my $cache= shift;
my $cache= shift; # TODO: what is that intended for??
my $path_list= shift;
print "load_single_toc: store=[$store]\n";
# print __LINE__, " load_single_toc: store=[$store] path_list=[$path_list]\n";
if ((my $be= $reg->{'cfg'}->{'backend'}) eq 'TA::Hasher')
{
my $c= $reg->{'proj_cat'};
......@@ -384,7 +385,15 @@ print "load_single_toc: store=[$store]\n";
}
elsif ($be eq 'MongoDB')
{
my $cursor= $reg->{'_cat'}->find ( { 'store' => $store } );
my $search= { store => $store };
if (defined ($path_list))
{
print __LINE__, " load_single_toc; path_list DEFINED\n";
$search->{path}= { '$in' => $path_list };
}
# print __LINE__, " load_single_toc; search: ", main::Dumper ($search);
my $cursor= $reg->{'_cat'}->find ($search);
# print "cursor=[$cursor]\n";
my @all= $cursor->all ();
return \@all;
......
......@@ -97,7 +97,7 @@ sub save_catalog
foreach my $xf (sort keys %$xFLIST)
{
my $xfo= $xFLIST->{$xf};
printf CAT ("%s file %9d %s\n", $xfo->{'md5'}, $xfo->{'fs_size'}, $xf);
printf CAT ("%s file %10d %s\n", $xfo->{'md5'}, $xfo->{'fs_size'}, $xf);
# print CAT $CAT{$entry}, "\n";
}
close (CAT);
......@@ -134,9 +134,6 @@ sub read_flist
my $md5cat= shift;
my $fnm= shift;
my $xFLIST= $md5cat->{'FLIST'};
my $INO= $md5cat->{'INO'};
print "reading reference list: [$fnm]\n";
unless (open (FI, $fnm))
{
......@@ -144,7 +141,7 @@ sub read_flist
return -1;
}
my $ref_cnt= 0;
my @files;
while (<FI>)
{
s/\015//g;
......@@ -164,6 +161,24 @@ sub read_flist
);
next if ($md5cat->{'skip_vcs'} && $_ =~ m#(^|/)(CVS|\.git|\.svn|.bzr|RCS)/#);
push (@files, $_);
}
close (FI);
$md5cat->process_flist (@files);
}
# ----------------------------------------------------------------------------
sub process_flist
{
my $md5cat= shift;
my $xFLIST= $md5cat->{'FLIST'};
my $INO= $md5cat->{'INO'};
my $ref_cnt= 0;
foreach (@_)
{
my @st= stat ($_);
my $ino= $st[1];
......@@ -172,7 +187,6 @@ sub read_flist
push (@{$INO->{$ino}}, $_);
}
close (FI);
$ref_cnt;
}
......@@ -278,11 +292,11 @@ sub check_new_files
)
{
$last_mark= time();
printf ("%9d items processed\n", $cnt);
printf ("%10d items processed\n", $cnt);
}
}
printf ("%9d files to be checked\n", scalar (@tmp_2chk));
printf ("%10d files to be checked\n", scalar (@tmp_2chk));
if (@tmp_2chk)
{
......@@ -451,7 +465,7 @@ sub check_md5_entries
# Uh, this needs to be redesigned soon!
my @stf= stat ($fnm);
$size= $stf[7];
# $_= sprintf ("%s file %9d %s", $f[0], $size, $fnm);
# $_= sprintf ("%s file %10d %s", $f[0], $size, $fnm);
}
else
{
......@@ -574,7 +588,7 @@ printf ("%10d %s %s\n", $st[7], $md5, $f);
)
{
$last_mark= time();
printf ("%9d items processed\n", $cnt);
printf ("%10d items processed\n", $cnt);
}
}
......@@ -600,7 +614,7 @@ sub digest_md5_file
}
my $md5= Digest::MD5::File::file_md5_hex ($f);
printf FO ("%s file %9d %s\n", $md5, $st[7], $f);
printf FO ("%s file %10d %s\n", $md5, $st[7], $f);
}
close (FI);
close (FO);
......
......@@ -65,7 +65,6 @@ use TA::ObjReg;
use md5cat;
use Util::ts qw(ts_ISO);
my @PAR= ();
my $project;
my $store;
my $refresh_fileinfo= 0;
......@@ -88,11 +87,20 @@ my $Dir_Pattern= '.';
my $DEFAULT_file_list= "find $Dir_Pattern -xdev -type f -print|";
# --- >8 ---
my $par_mode= 'PAR';
my @PAR= ();
my @subdirs= ();
my @files= ();
while (my $arg= shift (@ARGV))
{
if ($arg eq '--') { push (@PAR, @ARGV); @ARGV= (); }
if ($arg eq '--')
{
if ($par_mode eq 'subdir') { push (@subdirs, @ARGV); }
elsif ($par_mode eq 'file') { push (@files, @ARGV); }
else { push (@PAR, @ARGV); }
@ARGV= ();
}
elsif ($arg =~ /^--(.+)/)
{
my ($opt, $val)= split ('=', $1, 2);
......@@ -102,7 +110,8 @@ while (my $arg= shift (@ARGV))
elsif ($opt eq 'limit') { $limit= $val || shift (@ARGV) ; }
elsif ($opt eq 'fileinfo') { $refresh_fileinfo= 1; }
elsif ($opt eq 'noinode') { $check_inode= 0; }
elsif ($opt eq 'subdir') { push (@subdirs, $val || shift (@ARGV)); }
elsif ($opt eq 'subdir') { $par_mode= 'subdir'; }
elsif ($opt eq 'file') { $par_mode= 'file'; }
elsif ($opt eq 'cd') { $cd_mode= 1; }
elsif ($arg =~ /^--(refresh|verify|lookup|edit|maint|next-seq|get-cat|policy)$/) { $op_mode= $1; }
else { &usage ("unknown option '$arg'"); }
......@@ -120,8 +129,13 @@ while (my $arg= shift (@ARGV))
else { &usage ("unknown option '-$a'"); }
}
}
else
{
if ($par_mode eq 'subdir') { push (@subdirs, $arg); }
elsif ($par_mode eq 'file') { push (@files, $arg); }
else { push (@PAR, $arg); }
}
}
print "debug level: $DEBUG\n";
......@@ -176,7 +190,6 @@ if ($op_mode eq 'refresh')
exit (2);
}
# ZZZ
$DEBUG= 1;
print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG);
......@@ -319,7 +332,7 @@ sub refresh_md5cat
$cnt_updated++ if (@upd);
}
close (CAT);
printf ("%9d files processed; %9d files updated\n", $cnt_processed, $cnt_updated);
printf ("%10d files processed; %10d files updated\n", $cnt_processed, $cnt_updated);
}
sub refresh_internal
......@@ -334,42 +347,61 @@ sub refresh_internal
$objreg->verify_toc (\&verify_toc_item, \@hdr);
print "TOC verified\n";
my $toc= $objreg->load_single_toc ($store);
# print "toc: ", Dumper ($toc);
my $md5cat= new md5cat ();
my $quick_mode= 0;
if (@subdirs)
{
$quick_mode= 1;
foreach my $subdir (@subdirs)
{
my $subdir_file_list= "find '$subdir' -xdev -type f -print|";
print __LINE__, " subdir_file_list: ", Dumper ($subdir_file_list);
$md5cat->read_flist ($subdir_file_list);
# print __LINE__, " md5cat: ", Dumper ($md5cat);
}
}
else
if (@files)
{
$md5cat->read_flist ($DEFAULT_file_list);
$quick_mode= 1;
# TODO: check just that single file!
$md5cat->process_flist (@files);
# print __LINE__, " md5cat: ", Dumper ($md5cat);
}
# print "md5cat: ", Dumper ($md5cat);
$md5cat->read_flist ($DEFAULT_file_list) unless ($quick_mode);
# print __LINE__, " md5cat: ", Dumper ($md5cat);
print "flist processed\n";
my $fl= $md5cat->{'FLIST'};
# print __LINE__, " fl: ", Dumper ($fl);
my $path_list;
$path_list= [ sort keys %$fl ] if ($quick_mode);
# print __LINE__, " path_list: ", Dumper ($path_list);
my $toc= $objreg->load_single_toc ($store, undef, $path_list);
# print "toc: ", Dumper ($toc);
my @check_list= qw(mtime size);
push (@check_list, 'ino') if ($check_inode);
# compare TOC and reference filelist
my $fl= $md5cat->{'FLIST'};
my %key= ();
my $cnt= 0;
if (defined ($toc))
{
# print "toc: ", Dumper ($toc);
printf ("%9d items to be processed\n", scalar @$toc);
printf ("%10d items to be processed\n", scalar @$toc);
print "\npass 1\n";
foreach my $x (@$toc)
{
printf ("%9d items processed\n", $cnt) if ((++$cnt % 10000) == 0);
printf ("%10d items processed\n", $cnt) if ((++$cnt % 10000) == 0); # TODO: or after a certain time passed
# print __LINE__, " k=[$k]\n";
my $k= $x->{'key'};
my $p= $x->{'path'};
......@@ -422,11 +454,10 @@ sub refresh_internal
$md5cat->integrate_md5_sums ($new_files);
# $md5cat->save_catalog (); # TODO: if save_catalog flag is true!
# ZZZ
# update the Object registry with new items
my $cnt_total= scalar @$new_files;
my $cnt_done= 0;
printf ("%9d new items to be processed\n", $cnt_total);
printf ("%10d new items to be processed\n", $cnt_total);
foreach my $nf (@$new_files)
{
my ($md5, $path, $size, $mtime)= @$nf;
......@@ -450,13 +481,13 @@ sub refresh_internal
# print __LINE__, " key: ", Dumper (\%key);
my @drop= ();
if (@subdirs)
if ($quick_mode)
{
=begin comment
NOTE: we only inspected a subdirectory, but this inspects everything
and would remove items that were not even inspected
NOTE: we only inspected some subdirectores or files, but this inspects
everything and would remove items that were not even inspected
TODO: only drop the thing when it is in the right subdirectory!
......@@ -481,7 +512,7 @@ TODO: only drop the thing when it is in the right subdirectory!
$objreg->remove_from_store ($store, \@drop);
}
printf ("files: %9d processed; %9d updated; %9d (%d) dropped\n",
printf ("files: %10d processed; %10d updated; %10d (%d) dropped\n",
$cnt_processed, $cnt_updated, $cnt_dropped, scalar (@drop));
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment