diff --git a/.gitignore b/.gitignore index 3103fea892fb33f6ca1d9c3597db92f1750d281e..d647cc5db1ebe5e48f7556bfa8cd5bb63f70b628 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ @* tmp +_catalog +_catalog.inodes +_catalog.bup +_catalog.inodes.bup diff --git a/textarchive/lib/TA/ObjReg.pm b/textarchive/lib/TA/ObjReg.pm index 0f12710a99054ffe83382dae26759647ccc41a86..4800baf9f86887a104b181f648a28fc83cfd0256 100644 --- a/textarchive/lib/TA/ObjReg.pm +++ b/textarchive/lib/TA/ObjReg.pm @@ -10,6 +10,8 @@ package TA::ObjReg; =head1 DESCRIPTION +=head1 SYNOPSIS + =cut use strict; @@ -52,7 +54,7 @@ sub new $obj; } -=head1 project level methods +=head1 PROJECT LEVEL METHODS =head2 $reg->get_project() @@ -127,7 +129,7 @@ sub get_project } } - print "seq: [$seq] ", main::Dumper ($seq); + # print "seq: [$seq] ", main::Dumper ($seq); unless (defined ($seq)) { $obj->{'seq'}= $seq= { 'seq' => 0, 'upd' => time () }; @@ -366,7 +368,7 @@ print "load_single_toc: store=[$store]\n"; elsif ($be eq 'MongoDB') { my $cursor= $reg->{'_cat'}->find ( { 'store' => $store } ); - print "cursor=[$cursor]\n"; + # print "cursor=[$cursor]\n"; my @all= $cursor->all (); return \@all; } @@ -736,8 +738,9 @@ sub connect_MongoDB $col0= $db->get_collection($cmm->{'maint'}); $col1= $db->get_collection($cmm->{'catalog'}); $col2= $db->get_collection($cmm->{'keys'}); - print "col: [$col0] [$col1] [$col2]\n"; + # print "col: [$col0] [$col1] [$col2]\n"; }; + if ($@) { print "ATTN: can't connect to MongoDB ", (join ('/', map { $cmm->{$_} } qw(host user maint))), "\n"; diff --git a/textarchive/vlib001.pl b/textarchive/vlib001.pl index 9f90a92bdeb65fff58de75428e042492755c1dae..2130abadfae7ce2b523b7a24eacff2e0f5458e04 100755 --- a/textarchive/vlib001.pl +++ b/textarchive/vlib001.pl @@ -57,6 +57,8 @@ my $DEBUG= 0; my $STOP= 0; my $op_mode= 'refresh'; my $limit= undef; +my $cat_file= '_catalog'; +my $ino_file= '_catalog.inodes'; my @hdr= qw(md5 path mtime fs_size ino); @@ -75,7 +77,7 @@ while (my $arg= shift (@ARGV)) elsif ($arg eq '--store') { $store= shift (@ARGV); } elsif ($arg eq '--fileinfo') { $refresh_fileinfo= 1; } elsif ($arg eq '--limit') { $limit= shift (@ARGV); } - elsif ($arg =~ /^--(refresh|verify|lookup|edit|maint|next-seq)$/) { $op_mode= $1; } + elsif ($arg =~ /^--(refresh|verify|lookup|edit|maint|next-seq|get-cat)$/) { $op_mode= $1; } } elsif ($arg =~ /^-/) { @@ -150,11 +152,28 @@ elsif ($op_mode eq 'maint') =begin comment -For MongoDB backend: synchronize information about stores with maint collection +TODO: For MongoDB backend: synchronize information about stores with maint collection =end comment =cut +} +elsif ($op_mode eq 'get-cat') +{ + my $catalog= $objreg->{'cfg'}->{'catalog'}; + &usage ('no catalog found in config') unless (defined ($catalog)); + + my $stores_p= $objreg->{'cfg'}->{'stores'}; + my $store_cfg= $stores_p->{$store}; + unless (defined ($store_cfg)) + { + print "no store config found for '$store', check these: ", Dumper ($stores_p); + exit (-2); + } + print "store_cfg: ", Dumper ($store_cfg) if ($DEBUG); + + if ($catalog->{'format'} eq 'md5cat') { print "hmm... you should have a _catalog already!\n"; } + elsif ($catalog->{'format'} eq 'internal') { get_cat_internal ($objreg, $store); } } elsif ($op_mode eq 'next-seq') { @@ -396,6 +415,56 @@ sub process_file (wantarray) ? @upd : \@upd; } +sub get_cat_internal +{ + my $objreg= shift; + my $store= shift; + + my $toc= $objreg->load_single_toc ($store); + # print "toc: ", Dumper ($toc); + + unless (@$toc) + { + print "nothing found; exiting\n"; + return undef; + } + + my $count= 0; + unless (open (CAT, '>:utf8', $cat_file)) + { + print "can not write to '$cat_file'\n"; + return undef; + } + print "writing new catalog '$cat_file'\n"; + + my %inodes; + foreach my $t (@$toc) + { + my ($md5, $fs_size, $path, $ino)= map { $t->{$_} } qw(md5 fs_size path ino); + printf CAT ("%s file %9ld %s\n", $md5, $fs_size, $path); + # print "t: ", Dumper ($t); + push (@{$inodes{$ino}}, $path); + $count++; + } + close (CAT); + + if (open (INO, '>:utf8', $ino_file)) + { + print "writing new catalog '$ino_file'\n"; + foreach my $ino (sort { $a <=> $b } keys %inodes) + { + print INO join ('|', $ino, @{$inodes{$ino}}), "\n"; + } + close (INO); + } + else + { + print "can not write to '$ino_file'\n"; + } + + $count; +} + # callback function for TA::ObjReg::verify sub verify_toc_item {