diff --git a/parse_othes_index.pl b/parse_othes_index.pl index b45e9efba00d0b47e9617880f30b80dfac862e83..3eb5bcc30e113c060d753b1f722fb863d7fde4f0 100755 --- a/parse_othes_index.pl +++ b/parse_othes_index.pl @@ -32,8 +32,10 @@ while (defined ($arg= shift (@ARGV))) elsif ($arg =~ /^--(.+)/) { my ($opt, $val)= split ('=', $1, 2); - if ($opt eq 'help') { usage(); } + + if ($opt eq 'help') { usage(); } elsif ($opt eq 'force') { $force= (defined ($val)) ? $val : 1; } + elsif ($opt eq 'file') { my $f= $val || shift(@ARGV); get_file_list($f); } else { usage(); } } elsif ($arg =~ /^-(.+)/) @@ -42,6 +44,7 @@ while (defined ($arg= shift (@ARGV))) { if ($opt eq 'h') { usage(); exit (0); } elsif ($opt eq 'F') { $force= 1; } + elsif ($opt eq 'L') { my $f= shift(@ARGV); get_file_list($f); } else { usage(); } } } @@ -51,27 +54,83 @@ while (defined ($arg= shift (@ARGV))) } } +my $t_start= time(); my $cnf= Util::JSON::read_json_file ($config_fnm); my $ot2ut= IRMA::db::get_any_db($cnf, 'ot2ut_database'); my $col_othes_policy= $ot2ut->get_collection('othes.policy'); # print __LINE__, " ot2ut=[$ot2ut]\n"; +my $cnt_files_analyzed= 0; +my $cnt_files_ignored= 0; +my $cnt_files_processed= 0; +my $cnt_cant_stat= 0; +my $cnt_unchanged= 0; +my $cnt_policy_updated= 0; +my $cnt_policy_inserted= 0; +my $cnt_policy_invalid= 0; + PAR: foreach my $fnm (@PARS) { update_file($fnm); } + +my $t_finish= time(); +my $t_total= $t_finish-$t_start; + +print "statistics:\n"; +print "files analyzed: $cnt_files_analyzed\n"; +print "files ignored: $cnt_files_ignored\n"; +print "files processed: $cnt_files_processed\n"; +print "files can't stat: $cnt_cant_stat\n"; +print "files unchanged: $cnt_unchanged\n"; +print "policy records updated: $cnt_policy_updated\n"; +print "policy records inserted: $cnt_policy_inserted\n"; +print "policy records invalid: $cnt_policy_invalid\n"; +print "ts_start: ", scalar localtime($t_start), "\n"; +print "ts_finish: ", scalar localtime($t_finish), "\n"; +print "total processing time: $t_total seconds\n"; exit(0); +sub usage +{ + system ('perldoc', $0); +} + +sub get_file_list +{ + my $fnm_list= shift or return undef; + + unless (open (LST, '<:utf8', $fnm_list)) + { + print STDERR "can't read file list from [$fnm_list]\n"; + return undef; + } + + my $cnt++; + while (<LST>) + { + chop; + push (@PARS, $_); + $cnt++; + } + close (LST); + print __LINE__, " read $cnt filenames from $fnm_list\n"; + $cnt; +} + sub update_file { my $fnm= shift; + $cnt_files_analyzed++; + my @fnm= split('/', $fnm); my $last= pop (@fnm); unless ($last eq 'index.html') { print __LINE__, " ATTN: not an index.html: last=[$last]\n"; + $cnt_files_ignored++; return undef; } @@ -80,7 +139,12 @@ sub update_file # print __LINE__, " last4=[$last4] [",join(':', @last4), "]\n"; return undef; my @st= stat($fnm); - return undef unless (@st); + unless (@st) + { + print __LINE__, " can't read filename=[$fnm]\n"; + $cnt_cant_stat++; + return undef; + } my $mtime= $st[9]; my $search= { eprint_id => "$eprint_id" }; @@ -91,10 +155,12 @@ sub update_file { # print __LINE__, " already processed; skipping\n"; # print __LINE__, " ", Dumper($policy_rec); + $cnt_unchanged++; return undef; } my $policy_info= process_index_file($fnm); + my $cnt_files_processed++; $policy_info->{mtime}= $mtime; @@ -105,6 +171,7 @@ sub update_file { my $rc_upd= $col_othes_policy->update( { _id => $policy_rec->{_id} }, $policy_info ); print __LINE__, " policy update: rc_upd=[$rc_upd] ", Dumper($rc_upd); + $cnt_policy_updated++; } else { @@ -112,10 +179,12 @@ sub update_file { my $rc_ins= $col_othes_policy->insert( $policy_info ); print __LINE__, " policy insert: rc_ins=[$rc_ins] ", Dumper($rc_ins); + $cnt_policy_inserted++; } else { print __LINE__, " WARNING: eprint_id missing\n"; + $cnt_policy_invalid++; } } }