Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
forge001
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
forge001
Commits
3c2444aa
Commit
3c2444aa
authored
11 years ago
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
redesign toc structure
parent
10355f7c
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
textarchive/lib/TA/ObjReg.pm
+207
-39
207 additions, 39 deletions
textarchive/lib/TA/ObjReg.pm
textarchive/vlib001.pl
+123
-16
123 additions, 16 deletions
textarchive/vlib001.pl
with
330 additions
and
55 deletions
textarchive/lib/TA/ObjReg.pm
+
207
−
39
View file @
3c2444aa
...
...
@@ -175,8 +175,9 @@ sub save
my
$new_reg
=
shift
;
my
$be
=
$obj
->
{'
cfg
'}
->
{'
backend
'};
print
"
save [
$new_reg
] be=[
$be
]
\n
";
print
main::
Dumper
(
$new_reg
);
# print "save [$new_reg] be=[$be]\n";
# print main::Dumper ($new_reg);
if
(
$be
eq
'
TA::Hasher
')
{
my
$id_str
=
$search
->
{
$obj
->
{'
key
'}};
...
...
@@ -210,8 +211,8 @@ sub save
}
my
$j
=
encode_json
(
$all_reg
);
print
"
fnm=[
$fnm
]
\n
";
print
"
generated json: [
$j
]
\n
";
#
print "fnm=[$fnm]\n";
#
print "generated json: [$j]\n";
open
(
J
,
'
>:utf8
',
$fnm
);
print
J
$j
;
close
(
J
);
}
elsif
(
$be
eq
'
MongoDB
')
...
...
@@ -288,13 +289,77 @@ sub load_toc_v1
$toc
;
}
sub
load_single_toc
{
my
$reg
=
shift
;
my
$store
=
shift
;
my
$cache
=
shift
;
my
$c
=
$reg
->
{'
proj_cat
'};
return
undef
unless
(
defined
(
$c
));
# not initialized?
my
$f
=
$c
.
'
/
'
.
$store
.
'
.toc.json
';
my
$t
=
TA::Util::
slurp_file
(
$f
,
'
json
');
if
(
$cache
)
{
$reg
->
{'
tocs
'}
->
{
$store
}
=
$t
;
}
$t
;
}
sub
load_multi_tocs
{
my
$reg
=
shift
;
my
$store
=
shift
;
my
$cache
=
shift
;
my
$c
=
$reg
->
{'
proj_cat
'};
return
undef
unless
(
defined
(
$c
));
# not initialized?
my
@stores
=
(
defined
(
$store
))
?
$store
:
$reg
->
stores
();
return
undef
unless
(
@stores
);
# return nothing if there is nothing...
my
$toc
=
{};
foreach
my
$s
(
@stores
)
{
my
$f
=
$c
.
'
/
'
.
$s
.
'
.toc.json
';
my
$t
=
TA::Util::
slurp_file
(
$f
,
'
json
');
if
(
$cache
)
{
$reg
->
{'
tocs
'}
->
{
$s
}
=
$t
;
}
foreach
my
$k
(
keys
%$t
)
{
my
$tk
=
$t
->
{
$k
};
# print "k=[$k] item: ", main::Dumper ($tk);
my
$r
;
unless
(
defined
(
$r
=
$toc
->
{
$k
}))
{
# not yet present in the toc
$toc
->
{
$k
}
=
$r
=
{
'
seq
'
=>
$t
->
{
$k
}
->
{'
seq
'}
};
}
# print "r: ", main::Dumper ($r);
push
(
@
{
$r
->
{'
stores
'}},
{
'
store
'
=>
$s
,
'
upd
'
=>
$tk
->
{'
upd
'}
});
}
}
$toc
;
}
sub
verify_toc
{
my
$reg
=
shift
;
my
$check_item
=
shift
;
# callback: update TOC item
my
$hdr
=
shift
||
[]
;
my
$reset
=
shift
;
my
@hdr1
=
qw(seq found store_count)
;
my
@hdr1
=
qw(key seq found store_count)
;
# my @hdr1= qw(seq store_count);
my
@stores
=
$reg
->
stores
();
# print "stores: ", join (', ', @stores), "\n"; exit;
...
...
@@ -302,15 +367,38 @@ sub verify_toc
#### my @extra_fields= (exists ($reg->{'toc_extra_fields'})) ? $reg->{'toc_extra_fields'} : ();
my
$c
=
$reg
->
{'
proj_cat
'};
# get list of key to sequence mapping
my
$fnm_key_seq
=
$c
.
'
/KEY-SEQ.toc.json
';
my
$KEY_SEQ
;
$KEY_SEQ
=
TA::Util::
slurp_file
(
$fnm_key_seq
,
'
json
')
unless
(
$reset
);
$KEY_SEQ
=
{}
unless
(
defined
$KEY_SEQ
);
# pick up current tocs to see if the sequence needs to be updated
my
%stores
;
foreach
my
$s
(
@stores
)
{
my
$f
=
$c
.
'
/
'
.
$s
.
'
.toc.json
';
my
$t
=
TA::Util::
slurp_file
(
$f
,
'
json
');
$t
=
{}
unless
(
defined
(
$t
));
# we need an empty toc if there is no toc yet
=begin comment
my $f= $c . '/' . $s . '.toc.json';
my $t;
$t= TA::Util::slurp_file ($f, 'json') unless ($reset);
if (defined ($t))
{
foreach my $e (@$t) { $e->{'found'}= 0; }
}
else
{
$t= []; # we need an empty toc if there is no toc yet
}
$stores{$s}= $t;
... dunno ... do we need the old toc?
=end comment
=cut
$stores
{
$s
}
=
[]
;
}
my
%items
;
...
...
@@ -329,45 +417,45 @@ sub verify_toc
print
"
proj_cat=[
$d
]
\n
";
find
(
\
&item_files
,
$d
);
my
$key_seq_updated
=
0
;
# print "items: ", main::Dumper (\%items);
foreach
my
$item
(
keys
%items
)
{
my
$p
=
$items
{
$item
};
my
$j
=
TA::Util::
slurp_file
(
$p
->
[
0
],
'
json
');
#
print "[$p->[0]] j: ", main::Dumper ($j);
my
@i_stores
=
keys
%
{
$j
->
{'
store
'}};
print
"
[
$p
->[0]] j:
",
main::
Dumper
(
$j
);
my
$key
=
$j
->
{'
key
'};
# print join (' ', $key, @i_stores), "\n";
my
$seq
=
$KEY_SEQ
->
{
$key
};
unless
(
defined
(
$seq
))
{
$seq
=
$KEY_SEQ
->
{
$key
}
=
$reg
->
next_seq
();
$key_seq_updated
++
;
}
# search for a key's sequence number in all known stores, not only
# in those that are *currently* used for this store
my
$seq
;
S
1:
foreach
my
$
store
(
@stores
)
my
(
@i_stores
,
%i_stores
)
;
E
1:
foreach
my
$
jj
(
@
{
$j
->
{'
entries
'}}
)
{
if
(
exists
(
$stores
{
$store
}
->
{
$key
}))
{
$seq
=
$stores
{
$store
}
->
{
$key
}
->
{'
seq
'};
last
S1
;
}
}
my
$store
=
$jj
->
{'
store
'};
S2:
foreach
my
$store
(
@i_stores
)
{
my
$ster
;
# store's toc entry record ;)
unless
(
defined
(
$ster
=
$stores
{
$store
}
->
{
$key
}))
{
$ster
=
$stores
{
$store
}
->
{
$key
}
=
{
'
seq
'
=>
$reg
->
next_seq
(),
'
upd
'
=>
time
(),
};
}
$ster
->
{'
found
'}
=
1
;
print
join
('
/
',
$key
,
$seq
,
$store
),
"
\n
";
my
$jj
=
$j
->
{'
store
'}
->
{
$store
};
$ster
->
{'
store_count
'}
=
scalar
@i_stores
;
$i_stores
{
$store
}
=
$jj
;
push
(
@i_stores
,
$store
);
my
$ster
=
{
'
key
'
=>
$key
,
'
seq
'
=>
$seq
,
'
found
'
=>
0
,
# flag that indicates if object is present (not used here?)
'
upd
'
=>
time
(),
};
&$check_item
(
$j
,
$jj
,
$ster
)
if
(
defined
(
$check_item
));
print
"
ster:
",
main::
Dumper
(
$ster
);
push
(
@
{
$stores
{
$store
}},
$ster
);
}
}
...
...
@@ -377,6 +465,7 @@ sub verify_toc
{
my
$ss
=
$stores
{
$s
};
# save TOC in json format
my
$f
=
$c
.
'
/
'
.
$s
.
'
.toc.json
';
print
"
saving toc to [
$f
]
\n
";
unless
(
open
(
TOC
,
'
>:utf8
',
$f
))
...
...
@@ -387,6 +476,7 @@ sub verify_toc
print
TOC
encode_json
(
$ss
),
"
\n
";
close
(
TOC
);
# save TOC in CSV format
$f
=
$c
.
'
/
'
.
$s
.
'
.toc.csv
';
print
"
saving toc to [
$f
]
\n
";
unless
(
open
(
TOC
,
'
>:utf8
',
$f
))
...
...
@@ -394,20 +484,98 @@ sub verify_toc
print
STDERR
"
cant save toc file '
$f
'
\n
";
next
;
}
print
TOC
join
('
;
',
'
key
',
@hdr1
,
@$hdr
),
"
\n
";
print
TOC
join
('
;
',
@hdr1
,
@$hdr
),
"
\n
";
foreach
my
$
k
(
keys
%
$ss
)
foreach
my
$
r
(
@
$ss
)
{
my
$r
=
$ss
->
{
$k
};
print
TOC
join
('
;
',
$k
,
map
{
$r
->
{
$_
}
}
@hdr1
,
@$hdr
),
"
\n
";
}
print
__LINE__
,
"
r:
",
main::
Dumper
(
$r
);
print
TOC
join
('
;
',
map
{
$r
->
{
$_
}
}
@hdr1
),
'
;
';
if
(
1
||
$r
->
{'
found
'})
{
print
TOC
join
('
;
',
map
{
$r
->
{
$_
}
}
@$hdr
);
}
else
{
print
TOC
join
('
;
',
map
{
''
}
@$hdr
);
}
print
TOC
"
\n
";
}
close
(
TOC
);
}
if
(
$key_seq_updated
)
{
print
"
saving toc to [
$fnm_key_seq
]
\n
";
unless
(
open
(
KEY_SEQ
,
'
>:utf8
',
$fnm_key_seq
))
{
print
STDERR
"
cant save toc file '
$fnm_key_seq
'
\n
";
next
;
}
print
KEY_SEQ
encode_json
(
$KEY_SEQ
),
"
\n
";
close
(
KEY_SEQ
);
}
# TODO: return something meaningful
}
sub
remove_from_store
{
my
$reg
=
shift
;
my
$store
=
shift
;
my
$drop_list
=
shift
;
# array ref containing entries: [ $md5, $path ]
# TODO: maybe a more universial format could be useful
my
$be
=
$reg
->
{'
cfg
'}
->
{'
backend
'};
if
(
$be
eq
'
TA::Hasher
')
{
my
%drop
;
foreach
my
$item
(
@$drop_list
)
{
my
(
$id_str
,
$path
)
=
@$item
;
my
(
$r
,
$fnm
)
=
$reg
->
ta_retrieve
(
$id_str
);
# print "id_str=[$id_str] fnm=[$fnm] r: ", main::Dumper ($r);
next
unless
(
defined
(
$r
));
# this item has possibly been deleted already
my
@new_entries
=
();
my
@dropped_entries
=
();
foreach
my
$entry
(
@
{
$r
->
{'
entries
'}})
{
if
(
$entry
->
{'
store
'}
eq
$store
&&
$entry
->
{'
path
'}
eq
$path
)
{
push
(
@dropped_entries
,
$entry
);
}
else
{
push
(
@new_entries
,
$entry
);
}
}
$drop
{
$id_str
}
=
\
@dropped_entries
;
if
(
@new_entries
)
{
$r
->
{'
entries
'}
=
\
@new_entries
;
my
$j
=
encode_json
(
$r
);
# print "generated json: [$j]\n";
open
(
J
,
'
>:utf8
',
$fnm
);
print
J
$j
;
close
(
J
);
}
else
{
# print "nothing left to be saved; deleting file [$fnm]\n";
unlink
(
$fnm
);
}
}
return
\
%drop
;
}
elsif
(
$be
eq
'
MongoDB
')
{
die
("
implement MongoDB remove
");
}
}
=head1 sequence number
=head2 $reg->next_seq()
...
...
@@ -531,7 +699,7 @@ sub ta_match
{
next
REG
unless
(
$reg
->
{
$k
}
eq
$search
->
{
$k
});
}
print
"
found match:
",
main::
Dumper
(
$reg
);
#
print "found match: ", main::Dumper ($reg);
return
(
$reg
,
$i
);
}
return
(
undef
,
0
);
...
...
This diff is collapsed.
Click to expand it.
textarchive/vlib001.pl
+
123
−
16
View file @
3c2444aa
...
...
@@ -32,6 +32,7 @@ $Data::Dumper::Indent= 1;
use
TA::
ObjReg
;
# use TA::Hasher;
# use TA::Util;
use
md5cat
;
my
@PAR
=
();
my
$project
;
...
...
@@ -41,6 +42,14 @@ my $DEBUG= 0;
my
$STOP
=
0
;
my
$op_mode
=
'
refresh
';
my
@hdr
=
qw(md5 path mtime fs_size ino)
;
# --- 8< --- [from chkmd5.pl] ---
# my $Dir_Pattern= '[0-9_a-zA-Z]*';
my
$Dir_Pattern
=
'
.
';
my
$DEFAULT_file_list
=
"
find
$Dir_Pattern
-xdev -type f -print|
";
# --- >8 ---
while
(
my
$arg
=
shift
(
@ARGV
))
{
if
(
$arg
eq
'
--
')
{
push
(
@PAR
,
@ARGV
);
@ARGV
=
();
}
...
...
@@ -96,15 +105,11 @@ if ($op_mode eq 'refresh')
}
print
"
store_cfg:
",
Dumper
(
$store_cfg
)
if
(
$DEBUG
);
if
(
$catalog
->
{'
format
'}
eq
'
md5cat
')
{
refresh_md5cat
(
$objreg
,
$store
);
}
if
(
$catalog
->
{'
format
'}
eq
'
md5cat
')
{
refresh_md5cat
(
$objreg
,
$store
);
}
elsif
(
$catalog
->
{'
format
'}
eq
'
internal
')
{
refresh_internal
(
$objreg
,
$store
);
}
}
elsif
(
$op_mode
eq
'
verify
')
{
my
@hdr
=
qw(path_count path mtime fs_size ino)
;
$objreg
->
verify_toc
(
\
&verify_toc_item
,
\
@hdr
);
}
elsif
(
$op_mode
eq
'
lookup
')
...
...
@@ -158,6 +163,108 @@ sub refresh_md5cat
printf
("
%6d files processed; %6d files updated
\n
",
$cnt_processed
,
$cnt_updated
);
}
sub
refresh_internal
{
my
$objreg
=
shift
;
my
$store
=
shift
;
my
%extra
=
@_
;
my
$cnt_processed
=
0
;
my
$cnt_updated
=
0
;
my
$cnt_dropped
=
0
;
$objreg
->
verify_toc
(
\
&verify_toc_item
,
\
@hdr
);
my
$toc
=
$objreg
->
load_single_toc
(
$store
);
# print "toc: ", Dumper ($toc);
my
$md5cat
=
new
md5cat
();
$md5cat
->
read_flist
(
$DEFAULT_file_list
);
# print "md5cat: ", Dumper ($md5cat);
# compare TOC and reference filelist
my
$fl
=
$md5cat
->
{'
FLIST
'};
my
%key
=
();
foreach
my
$x
(
@$toc
)
{
# print __LINE__, " k=[$k]\n";
my
$k
=
$x
->
{'
key
'};
my
$p
=
$x
->
{'
path
'};
$key
{
$k
}
->
{
$p
}
=
0
;
if
(
exists
(
$fl
->
{
$p
}))
{
$cnt_processed
++
;
my
$f
=
$fl
->
{
$p
};
my
$matches
=
1
;
AN:
foreach
my
$an
(
qw(mtime size ino)
)
{
unless
(
$f
->
{
$an
}
eq
$x
->
{
$an
})
{
# print "mismatch in [$an]! x: ", Dumper ($x); print "f: ", Dumper ($f);
$matches
=
0
;
last
AN
;
}
}
# print "matches: $p $matches\n";
if
(
$matches
)
{
$f
->
{'
state
'}
=
'
nocheck
';
$f
->
{'
md5
'}
=
$x
->
{'
md5
'};
}
}
else
{
# print "file missing: ", Dumper ($x);
$cnt_dropped
++
;
}
}
# my %paths= map { my $x= $toc->{$_}; $x->{'found'}= 0; $x->{'path'} => $x } keys %$toc;
# print "paths: ", Dumper (\%paths);
# print "fl: ", Dumper ($fl);
my
$new_files
=
$md5cat
->
check_new_files
();
# print "new_files: ", Dumper ($new_files);
$md5cat
->
integrate_md5_sums
(
$new_files
);
# $md5cat->save_catalog (); # TODO: if save_catalog flag is true!
# ZZZ
# update the Object registry with new items
foreach
my
$nf
(
@$new_files
)
{
my
(
$md5
,
$path
,
$size
,
$mtime
)
=
@$nf
;
# print "md5=[$md5] size=[$size] path=[$path]\n";
$cnt_processed
++
;
my
@upd
=
process_file
(
$md5
,
$path
,
$size
);
$cnt_updated
++
if
(
@upd
);
}
# get filelist again after reintegration to find keys which are no longer in the catalog
$fl
=
$md5cat
->
{'
FLIST
'};
# print __LINE__, " fl: ", Dumper ($fl);
foreach
my
$p
(
keys
%$fl
)
{
$key
{
$fl
->
{
$p
}
->
{'
md5
'}}
->
{
$p
}
=
1
;
}
# print __LINE__, " key: ", Dumper (\%key);
my
@drop
=
();
foreach
my
$k
(
keys
%key
)
{
my
$x1
=
$key
{
$k
};
foreach
my
$p
(
keys
%$x1
)
{
push
(
@drop
,
[
$k
,
$p
])
if
(
$x1
->
{
$p
}
==
0
);
}
}
# print __LINE__, " drop: ", Dumper (\@drop);
$objreg
->
remove_from_store
(
$store
,
\
@drop
);
printf
("
files: %6d processed; %6d updated; %6d (%d) dropped
\n
",
$cnt_processed
,
$cnt_updated
,
$cnt_dropped
,
scalar
(
@drop
));
}
sub
process_file
{
my
(
$md5
,
$path
,
$size
)
=
@_
;
...
...
@@ -179,7 +286,7 @@ sub process_file
my
$search
=
{
'
md5
'
=>
$md5
,
'
store
'
=>
$store
,
'
path
'
=>
$path
};
my
$reg
=
$objreg
->
lookup
(
$search
);
print
__LINE__
,
"
reg:
",
Dumper
(
$reg
);
#
print __LINE__, " reg: ", Dumper ($reg);
my
@upd
;
my
$ydata
;
# pointer to file catalog data within main datastructure
...
...
@@ -220,7 +327,7 @@ sub process_file
if
(
@upd
)
{
print
"
saving (
",
join
('
|
',
@upd
),
"
)
\n
";
#
print "saving (", join ('|', @upd), ")\n";
# print __LINE__, " reg: ", Dumper ($reg);
$objreg
->
save
(
$search
,
$reg
);
}
...
...
@@ -235,15 +342,15 @@ sub verify_toc_item
my
$jj
=
shift
;
# this is just the part refering to the store currently processed
my
$ster
=
shift
;
# TOC item to be updated
my
@paths
=
keys
%
{
$jj
->
{'
path
'}}
;
$ster
->
{'
path_count
'}
=
scalar
@paths
;
my
$p1
=
shift
(
@
path
s
)
;
my
$px1
=
$jj
->
{'
path
'}
->
{
$p1
};
# print __LINE__, " verify_toc_item: j=", Dumper ($j)
;
print
__LINE__
,
"
verify_toc_item: jj=
",
Dumper
(
$jj
)
;
#
my
@paths= keys %{$jj->{'
path
'}}
;
# $ster->{'path_count'}= scalar @paths; ... we don't see that this way anymore
$ster
->
{'
path
'}
=
$p1
;
$ster
->
{'
mtime
'}
=
$px1
->
{'
mtime
'};
$ster
->
{
'
fs_size
'}
=
$px1
->
{'
fs_size
'
};
$ster
->
{'
ino
'}
=
$px1
->
{'
ino
'};
foreach
my
$k
(
qw(md5 path mtime fs_size ino)
)
{
$ster
->
{
$k
}
=
$jj
->
{
$k
};
}
}
__END__
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment