Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
forge001
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Gerhard Gonter
forge001
Commits
10355f7c
Commit
10355f7c
authored
11 years ago
by
Gerhard Gonter
Browse files
Options
Downloads
Patches
Plain Diff
restructured data model to be compatible with MongoDB which was
also added as a possible backend for storage.
parent
57a48fcb
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
textarchive/lib/TA/ObjReg.pm
+183
-33
183 additions, 33 deletions
textarchive/lib/TA/ObjReg.pm
textarchive/vlib001.pl
+13
-22
13 additions, 22 deletions
textarchive/vlib001.pl
with
196 additions
and
55 deletions
textarchive/lib/TA/ObjReg.pm
+
183
−
33
View file @
10355f7c
...
@@ -18,7 +18,8 @@ use JSON;
...
@@ -18,7 +18,8 @@ use JSON;
use
File::
Find
;
use
File::
Find
;
use
TA::
Util
;
use
TA::
Util
;
use
TA::
Hasher
;
my
%plugins_loaded
=
();
sub
new
sub
new
{
{
...
@@ -77,13 +78,35 @@ sub get_project
...
@@ -77,13 +78,35 @@ sub get_project
# print "proj_cfg: ", main::Dumper ($proj_cfg);
# print "proj_cfg: ", main::Dumper ($proj_cfg);
# TODO: check authorization (no need, if local, but for client-server, we need something
# TODO: check authorization (no need, if local, but for client-server, we need something
my
$be
=
$proj_cfg
->
{'
backend
'};
unless
(
exists
(
$plugins_loaded
{
$be
}))
{
if
(
$be
eq
'
TA::Hasher
')
{
require
TA::
Hasher
;
}
elsif
(
$be
eq
'
TA::UrxnBla
')
{
require
TA::
UrxnBla
;
}
elsif
(
$be
eq
'
MongoDB
')
{
require
MongoDB
;
}
else
{
print
"
ATTN: unknown backend '
$be
'
\n
";
return
undef
;
}
$plugins_loaded
{
$be
}
=
1
;
}
if
(
$be
eq
'
TA::Hasher
')
{
# initialize hasher
# initialize hasher
my
$base_dir
=
$obj
->
{'
proj_cfg_dir
'};
my
$ta
=
$proj_cfg
->
{'
TA::Hasher
'};
$obj
->
{'
proj_cat
'}
=
my
$proj_cat
=
join
('
/
',
$base_dir
,
'
cat
');
$ta
->
{'
name
'}
=
'
file
';
$obj
->
{'
hasher
'}
=
my
$hasher
=
new
TA::
Hasher
('
algorithm
'
=>
$proj_cfg
->
{'
algorithm
'},
'
pfx
'
=>
$proj_cat
,
'
name
'
=>
'
file
');
$ta
->
{'
pfx
'}
=
$obj
->
{'
proj_cat
'}
=
my
$proj_cat
=
join
('
/
',
$proj_cfg_dir
,
'
cat
');
$obj
->
{'
hasher
'}
=
my
$hasher
=
new
TA::
Hasher
(
%$ta
);
}
elsif
(
$be
eq
'
MongoDB
')
{
$obj
->
connect_MongoDB
(
$proj_cfg
);
}
# get sequence number
# get sequence number
$obj
->
{'
seq_file
'}
=
my
$fnm_seq
=
join
('
/
',
$
base
_dir
,
'
sequence.json
');
$obj
->
{'
seq_file
'}
=
my
$fnm_seq
=
join
('
/
',
$
proj_cfg
_dir
,
'
sequence.json
');
$obj
->
{'
seq
'}
=
my
$seq
=
TA::Util::
slurp_file
(
$fnm_seq
,
'
json
');
$obj
->
{'
seq
'}
=
my
$seq
=
TA::Util::
slurp_file
(
$fnm_seq
,
'
json
');
# print "seq: ", main::Dumper ($seq);
# print "seq: ", main::Dumper ($seq);
unless
(
defined
(
$seq
))
unless
(
defined
(
$seq
))
...
@@ -121,20 +144,26 @@ returns that keys value, if present, otherwise, undef.
...
@@ -121,20 +144,26 @@ returns that keys value, if present, otherwise, undef.
sub
lookup
sub
lookup
{
{
my
$obj
=
shift
;
my
$obj
=
shift
;
my
$
id_str
=
shift
;
my
$
search
=
shift
;
# print "lookup [$id_str]\n";
my
$be
=
$obj
->
{'
cfg
'}
->
{'
backend
'};
my
@r
=
$obj
->
{'
hasher
'}
->
check_file
(
$id_str
,
0
);
print
"
lookup [
$search
] be=[
$be
]
\n
";
# print "id_str=[$id_str] r=", main::Dumper (\@r);
print
main::
Dumper
(
$search
);
my
(
$rc
,
$path
)
=
@r
;
my
$fnm
=
$path
.
'
/
'
.
$id_str
.
'
.json
';
# print "description: [$fnm]\n";
my
@st
=
stat
(
$fnm
);
my
$reg
;
return
undef
unless
(
@st
);
if
(
$be
eq
'
TA::Hasher
')
{
my
$reg
=
TA::Util::
slurp_file
(
$fnm
,
'
json
');
my
$id_str
=
$search
->
{
$obj
->
{'
key
'}};
my
(
$all_reg
,
$fnm
)
=
$obj
->
ta_retrieve
(
$id_str
,
0
);
print
"
fnm=[
$fnm
] all_reg:
",
main::
Dumper
(
$all_reg
);
return
undef
unless
(
defined
(
$all_reg
));
(
$reg
)
=
ta_match
(
$all_reg
,
$search
);
}
elsif
(
$be
eq
'
MongoDB
')
{
$reg
=
$obj
->
{'
_col
'}
->
find_one
(
$search
);
}
# print "reg: ", main::Dumper ($reg);
return
$reg
;
return
$reg
;
}
}
...
@@ -142,26 +171,55 @@ sub lookup
...
@@ -142,26 +171,55 @@ sub lookup
sub
save
sub
save
{
{
my
$obj
=
shift
;
my
$obj
=
shift
;
my
$
id_str
=
shift
;
my
$
search
=
shift
;
my
$new_reg
=
shift
;
my
$new_reg
=
shift
;
print
"
save [
$id_str
]
\n
";
my
$be
=
$obj
->
{'
cfg
'}
->
{'
backend
'};
my
@r
=
$obj
->
{'
hasher
'}
->
check_file
(
$id_str
,
1
);
print
"
save [
$new_reg
] be=[
$be
]
\n
";
# print "id_str=[$id_str] r=", main::Dumper (\@r);
print
main::
Dumper
(
$new_reg
);
my
(
$rc
,
$path
)
=
@r
;
if
(
$be
eq
'
TA::Hasher
')
{
my
$id_str
=
$search
->
{
$obj
->
{'
key
'}};
my
(
$all_reg
,
$fnm
)
=
$obj
->
ta_retrieve
(
$id_str
,
1
);
my
$fnm
=
$path
.
'
/
'
.
$id_str
.
'
.json
';
=begin comment
# print "description: [$fnm]\n";
my @st= stat ($fnm);
my @st= stat ($fnm);
unless (@st)
unless (@st)
{ # TODO: increment sequence and toc
{ # TODO: increment sequence and toc
}
}
my
$j
=
encode_json
(
$new_reg
);
=end comment
# print "generated json: [$j]\n";
=cut
if
(
defined
(
$all_reg
))
{
my
(
$reg
,
$idx
)
=
ta_match
(
$all_reg
,
$search
);
if
(
defined
(
$reg
))
{
$all_reg
->
{'
entries
'}
->
[
$idx
]
=
$new_reg
;
}
else
{
push
(
@
{
$all_reg
->
{'
entries
'}},
$new_reg
);
}
}
else
{
$all_reg
=
{
'
key
'
=>
$id_str
,
'
entries
'
=>
[
$new_reg
]
}
}
my
$j
=
encode_json
(
$all_reg
);
print
"
fnm=[
$fnm
]
\n
";
print
"
generated json: [
$j
]
\n
";
open
(
J
,
'
>:utf8
',
$fnm
);
print
J
$j
;
close
(
J
);
open
(
J
,
'
>:utf8
',
$fnm
);
print
J
$j
;
close
(
J
);
}
}
elsif
(
$be
eq
'
MongoDB
')
{
print
"
new_reg:
",
main::
Dumper
(
$new_reg
);
$obj
->
{'
_col
'}
->
insert
(
$new_reg
);
}
}
=head1 TOC: Table of Contents
=head1 TOC: Table of Contents
...
@@ -385,7 +443,99 @@ sub next_seq
...
@@ -385,7 +443,99 @@ sub next_seq
$seq
->
{'
seq
'};
$seq
->
{'
seq
'};
}
}
# =head1 INTERNAL FUNCTIONS
=head1 INTERNAL METHODS
=head2 $mongo_collection= $obj->connect_MongoDB ($config);
Connect to MongoDB with connection parameters in hash_ref $config and
returns the MongoDB collection object.
$config needs the following attribues: host, db, user, pass, collection
=cut
sub
connect_MongoDB
{
my
$obj
=
shift
;
my
$cfg
=
shift
;
my
$cmm
=
$cfg
->
{'
MongoDB
'};
print
"
cmm:
",
main::
Dumper
(
$cmm
);
my
$col
;
eval
{
my
$connection
=
MongoDB::
Connection
->
new
(
host
=>
$cmm
->
{'
host
'});
$connection
->
authenticate
(
$cmm
->
{'
db
'},
$cmm
->
{'
user
'},
$cmm
->
{'
pass
'});
my
$db
=
$connection
->
get_database
(
$cmm
->
{'
db
'});
$col
=
$db
->
get_collection
(
$cmm
->
{'
collection
'});
print
"
col: [
$col
]
\n
";
};
if
(
$@
)
{
print
"
ATTN: can't connect to MongoDB
",
(
join
('
/
',
map
{
$cmm
->
{
$_
}
}
qw(host user collection)
)),
"
\n
";
return
undef
;
}
return
$obj
->
{'
_col
'}
=
$col
;
}
=head2 ($data, $fnm)= $objreg->ta_retrieve ($key, $create)
Retrieve and return data referenced by $key and returns path name of
that file. If $create is true, the path leading to that file is created,
when it is not already present.
=cut
sub
ta_retrieve
{
my
$obj
=
shift
;
my
$id_str
=
shift
;
my
$create
=
shift
;
my
@r
=
$obj
->
{'
hasher
'}
->
check_file
(
$id_str
,
$create
);
# print "id_str=[$id_str] r=", main::Dumper (\@r);
my
(
$rc
,
$path
)
=
@r
;
my
$fnm
=
$path
.
'
/
'
.
$id_str
.
'
.json
';
# print "description: [$fnm]\n";
my
@st
=
stat
(
$fnm
);
return
(
undef
,
$fnm
)
unless
(
@st
);
my
$all_reg
=
TA::Util::
slurp_file
(
$fnm
,
'
json
');
return
(
$all_reg
,
$fnm
);
}
=head1 INTERNAL FUNCTIONS
=head2 ($entry, $index)= ta_match ($data, $search)
Select first $entry from $data that matches hash ref $search.
=cut
sub
ta_match
{
my
$all_reg
=
shift
;
my
$search
=
shift
;
my
@k
=
keys
$search
;
my
@e
=
@
{
$all_reg
->
{'
entries
'}};
REG:
for
(
my
$i
=
0
;
$i
<=
$#e
;
$i
++
)
{
my
$reg
=
$e
[
$i
];
foreach
my
$k
(
@k
)
{
next
REG
unless
(
$reg
->
{
$k
}
eq
$search
->
{
$k
});
}
print
"
found match:
",
main::
Dumper
(
$reg
);
return
(
$reg
,
$i
);
}
return
(
undef
,
0
);
}
1
;
1
;
__END__
__END__
...
...
This diff is collapsed.
Click to expand it.
textarchive/vlib001.pl
+
13
−
22
View file @
10355f7c
...
@@ -77,7 +77,7 @@ unless (defined ($project))
...
@@ -77,7 +77,7 @@ unless (defined ($project))
}
}
# &usage ('no store specified') unless (defined ($store));
# &usage ('no store specified') unless (defined ($store));
my
$objreg
=
new
TA::
ObjReg
('
project
'
=>
$project
,
'
store
'
=>
$store
);
my
$objreg
=
new
TA::
ObjReg
('
project
'
=>
$project
,
'
store
'
=>
$store
,
'
key
'
=>
'
md5
'
);
&usage
('
no config found
')
unless
(
defined
(
$objreg
));
&usage
('
no config found
')
unless
(
defined
(
$objreg
));
print
"
objreg:
",
Dumper
(
$objreg
)
if
(
$DEBUG
||
$STOP
);
print
"
objreg:
",
Dumper
(
$objreg
)
if
(
$DEBUG
||
$STOP
);
exit
if
(
$STOP
);
exit
if
(
$STOP
);
...
@@ -171,48 +171,39 @@ sub process_file
...
@@ -171,48 +171,39 @@ sub process_file
my
$xdata
=
my
$xdata
=
{
{
# 'key' => $md5, 'key_type' => 'md5',
'
store
'
=>
$store
,
'
c_size
'
=>
$size
,
'
path
'
=>
$path
,
'
md5
'
=>
$md5
,
'
c_size
'
=>
$size
,
'
path
'
=>
$path
,
'
md5
'
=>
$md5
,
'
mtime
'
=>
$st
[
9
],
'
fs_size
'
=>
$st
[
7
],
'
ino
'
=>
$st
[
1
]
'
mtime
'
=>
$st
[
9
],
'
fs_size
'
=>
$st
[
7
],
'
ino
'
=>
$st
[
1
]
};
};
my
$reg
=
$objreg
->
lookup
(
$md5
);
my
$search
=
{
'
md5
'
=>
$md5
,
'
store
'
=>
$store
,
'
path
'
=>
$path
};
my
$reg
=
$objreg
->
lookup
(
$search
);
print
__LINE__
,
"
reg:
",
Dumper
(
$reg
);
my
@upd
;
my
@upd
;
my
$ydata
;
# pointer to file catalog data within main datastructure
my
$ydata
;
# pointer to file catalog data within main datastructure
if
(
defined
(
$reg
))
if
(
defined
(
$reg
))
{
# we know something about this key value but not in respect to the repository at hand
{
# we know something about this key value but not in respect to the repository at hand
# print "json read: ", main::Dumper ($reg);
# print "json read: ", main::Dumper ($reg);
my
$sb
;
if
(
defined
(
$sb
=
$reg
->
{'
store
'}
->
{
$store
})
&&
exists
(
$sb
->
{'
path
'})
&&
defined
(
$ydata
=
$sb
->
{'
path
'}
->
{
$path
})
# we need to keep track of the path as well otherwise we can't handly duplicates in the same store
&&
$st
[
7
]
==
$ydata
->
{'
fs_size
'}
&&
$st
[
9
]
==
$ydata
->
{'
mtime
'}
)
{
# compare stored and current information and update if necessary
foreach
my
$an
(
keys
%$xdata
)
foreach
my
$an
(
keys
%$xdata
)
{
{
unless
(
$
ydata
->
{
$an
}
eq
$xdata
->
{
$an
})
unless
(
$
reg
->
{
$an
}
eq
$xdata
->
{
$an
})
{
{
$
ydata
->
{
$an
}
=
$xdata
->
{
$an
};
$
reg
->
{
$an
}
=
$xdata
->
{
$an
};
push
(
@upd
,
$an
);
push
(
@upd
,
$an
);
}
}
}
}
}
}
else
else
{
$reg
->
{'
store
'}
->
{
$store
}
->
{'
path
'}
->
{
$path
}
=
$ydata
=
$xdata
;
push
(
@upd
,
'
store upd
');
}
}
else
{
# this key is new, so we simply place what we know in the newly created registry item
{
# this key is new, so we simply place what we know in the newly created registry item
$reg
=
{
'
key
'
=>
$md5
,
'
key_type
'
=>
'
md5
',
'
store
'
=>
{
$store
=>
{
'
path
'
=>
{
$path
=>
$ydata
=
$xdata
}
}
}
};
# $reg= { 'key' => $md5, 'key_type' => 'md5', 'store' => { $store => $ydata= $xdata } };
$reg
=
$xdata
;
push
(
@upd
,
'
new key
');
push
(
@upd
,
'
new key
');
}
}
# fill in some more information about that file
# fill in some more information about that file
if
(
!
exists
(
$
ydata
->
{'
fileinfo
'})
||
$refresh_fileinfo
)
if
(
!
exists
(
$
reg
->
{'
fileinfo
'})
||
$refresh_fileinfo
)
{
{
my
$xpath
=
$path
;
my
$xpath
=
$path
;
$xpath
=~
s#'#'\\''#g
;
$xpath
=~
s#'#'\\''#g
;
...
@@ -220,7 +211,7 @@ sub process_file
...
@@ -220,7 +211,7 @@ sub process_file
chop
(
$res
);
chop
(
$res
);
my
(
$xpath
,
$fileinfo
)
=
split
(
/: */
,
$res
,
2
);
my
(
$xpath
,
$fileinfo
)
=
split
(
/: */
,
$res
,
2
);
$
ydata
->
{'
fileinfo
'}
=
$fileinfo
;
$
reg
->
{'
fileinfo
'}
=
$fileinfo
;
push
(
@upd
,
'
fileinfo updated
');
push
(
@upd
,
'
fileinfo updated
');
}
}
...
@@ -231,7 +222,7 @@ sub process_file
...
@@ -231,7 +222,7 @@ sub process_file
{
{
print
"
saving (
",
join
('
|
',
@upd
),
"
)
\n
";
print
"
saving (
",
join
('
|
',
@upd
),
"
)
\n
";
# print __LINE__, " reg: ", Dumper ($reg);
# print __LINE__, " reg: ", Dumper ($reg);
$objreg
->
save
(
$
md5
,
$reg
);
$objreg
->
save
(
$
search
,
$reg
);
}
}
(
wantarray
)
?
@upd
:
\
@upd
;
(
wantarray
)
?
@upd
:
\
@upd
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment