diff --git a/modules/util/Util/Linux_lvm.pm b/modules/util/Util/Linux_lvm.pm index 2d8df644594c07063cf47dd174b6b3fecc46b26e..1e5bb50a789671b4fce97dda2ad27642a332c450 100755 --- a/modules/util/Util/Linux_lvm.pm +++ b/modules/util/Util/Linux_lvm.pm @@ -28,7 +28,7 @@ my @paths= qw(/usr/sbin /sbin); my @caller= caller (); # print "caller=[",join (':', @caller),"]\n"; # __PACKAGE__->main if (!defined (@caller) || ($caller[0] eq 'main' && $caller[1] eq '-')); -__PACKAGE__->main if (!defined (@caller)); +__PACKAGE__->main unless (@caller); __PACKAGE__->test if (($caller[0] eq 'main' && $caller[1] eq '-')); sub new diff --git a/modules/util/Util/MongoDB.pm b/modules/util/Util/MongoDB.pm index 3a09572562f6bf2790bf160875d7d4f63ffa1a4b..065e6a073ce4f2806916ab6c690038ee0b6a883f 100644 --- a/modules/util/Util/MongoDB.pm +++ b/modules/util/Util/MongoDB.pm @@ -95,7 +95,7 @@ sub disconnect_mongodb Retrieve MongoDB parameters from $config and connect to it and open named collection, if specified. -$config is a hash which provides the fillowing keys: +$config is a hash which provides the following keys: * host * db_name: Name of the MongoDB * username diff --git a/modules/util/Util/Simple_CSV.pm b/modules/util/Util/Simple_CSV.pm index f86f58525745c6da92bc4390f1a662d022c0fe5b..48c1dd295956c473b076858f3fc6bb565a4b6852 100644 --- a/modules/util/Util/Simple_CSV.pm +++ b/modules/util/Util/Simple_CSV.pm @@ -164,10 +164,12 @@ sub load_csv_file my $fi_open; (*FI, $fi_open)= $obj->open_csv_file ($fnm); - unless ($obj->{'no_headings'}) - { - $obj->load_csv_file_headings (*FI); + $obj->load_csv_file_headings (*FI) unless ($obj->{'no_headings'}); + if (@{$obj->{columns}} # NOTE: columns might have been defined using $obj->define_columns(...) + && exists ($obj->{fidef}) + ) + { my ($fidef)= map { $obj->{$_} } qw(fidef); if (defined ($fidef)) { diff --git a/modules/util/Util/XML/File.pm b/modules/util/Util/XML/File.pm index f55e48ce9b5a4dc48059eced04b46e87199b0e15..5072094c0bbf4e6745165b544c8a71f48815b8b0 100644 --- a/modules/util/Util/XML/File.pm +++ b/modules/util/Util/XML/File.pm @@ -8,7 +8,7 @@ use strict; # use XML::Simple; use XML::LibXML::Simple; -my $DEBUG= 1; +my $DEBUG= 0; sub fetch { @@ -57,7 +57,7 @@ print __FILE__, ' ', __LINE__, " cache: mtime=$st[9] max_age=$max_age use=$use_c } $xml= $res->decoded_content; -print __LINE__, " get_aleph_metadata: xml=[$xml]\n" if ($DEBUG >= 0); +print __LINE__, " fetch: xml=[$xml]\n" if ($DEBUG > 0); if (defined ($cache_file)) { @@ -75,11 +75,28 @@ print __LINE__, " get_aleph_metadata: xml=[$xml]\n" if ($DEBUG >= 0); } $xmlref= XMLin ($xml, ForceContent => 1, ForceArray => 1, KeyAttr => [ ] ); -print __LINE__, " xmlref: ", main::Dumper ($xmlref) if ($DEBUG >= 0); +print __LINE__, " xmlref: ", main::Dumper ($xmlref) if ($DEBUG > 0); return ($xmlref, $xml); } +sub save +{ + my $fnm= shift; + my $xml= shift; + + unless (open (XML, '>:utf8', $fnm)) + { + print "ATTN: can't write to fnm=[$fnm]"; + return undef; + } + + print "saving xml to $fnm\n"; + print XML $xml; + close (XML); + + 1; +} 1; diff --git a/modules/util/Util/hexdump.pm b/modules/util/Util/hexdump.pm index e9df0fac102273a542f878ffeae5d6e64510ebb1..0cce957d53fe935196757fbd5a00c2264bc834b4 100644 --- a/modules/util/Util/hexdump.pm +++ b/modules/util/Util/hexdump.pm @@ -20,35 +20,35 @@ sub hexdump my ($i, $c, $v); my $run= 1; + my $dl= length ($data); DATA: while ($run) { my $char= ''; my $hex= ''; my $offx= sprintf ('%08X', $off); - for ($i= 0; $i < 16; $i++) + BYTE: for ($i= 0; $i < 16; $i++) { - $c= substr ($data, $off+$i, 1); + $hex .= ' ' if ($i == 8); - if ($i == 8) + if ($off+$i < $dl) { - $hex .= ' '; - } + $c= substr ($data, $off+$i, 1); - if ($c ne '') - { - $v= unpack ('C', $c); - $c= '.' if ($v < 0x20 || $v >= 0x7F); + if (defined ($c) && $c ne '') + { + $v= unpack ('C', $c); + $c= '.' if ($v < 0x20 || $v >= 0x7F); - $char .= $c; - $hex .= sprintf (' %02X', $v); - } - else - { - $char .= ' '; - $hex .= ' '; - $run= 0; + $char .= $c; + $hex .= sprintf (' %02X', $v); + next BYTE; + } } + + $char .= ' '; + $hex .= ' '; + $run= 0; } print FX "$offx $hex |$char|\n"; diff --git a/modules/util/Util/tsv.pm b/modules/util/Util/tsv.pm new file mode 100644 index 0000000000000000000000000000000000000000..d566ede15b37e50219e0675db70fa74cfb1b61f2 --- /dev/null +++ b/modules/util/Util/tsv.pm @@ -0,0 +1,81 @@ +package Util::tsv; + +use strict; + +sub new +{ + my $class= shift; + my $label= shift; + my $columns= shift; + my @par= @_; + + my $obj= { label => $label, cols => $columns, rows => [] }; + bless ($obj, $class); + $obj->set(@par); + + $obj; +} + +sub set +{ + my $self= shift; + my %par= @_; + + foreach my $par (keys %par) + { + $self->{$par}= $par{$par}; + } +} + +sub add_items +{ + my $self= shift; + my $list= shift; + + my @tsv_cols= @{$self->{cols}}; + my ($rows, $label)= map { $self->{$_} } qw(rows label); + +=begin comment + + print <<"EOX"; + +h2. $label + +EOX + print "|_. ", join (" |_. ", @tsv_cols), "|\n"; + +=end comment +=cut + + foreach my $rec (@$list) + { + my %rec= map { $_ => $rec->{$_} } @tsv_cols; + # print "| ", join (" | ", map { $rec{$_} } @tsv_cols), "|\n"; + push (@$rows, \%rec); + } + + print "$label count: ", scalar (@$list), "\n"; + # print "$label: ", main::Dumper ($list); +} + +sub save_tsv +{ + my $tsv_data= shift; + my $tsv_name= shift; + + # print "tsv_data: ", main::Dumper ($tsv_data); + if (open (TSV, '>:utf8', $tsv_name)) # TODO: otherwise complain + { + my @cols= @{$tsv_data->{cols}}; + print TSV join ("\t", @cols), "\n"; + foreach my $row (@{$tsv_data->{rows}}) + { + print TSV join ("\t", map { $row->{$_} } @cols), "\n"; + } + close (TSV); + } +} + +1; + + diff --git a/modules/util/csv.pl b/modules/util/csv.pl index 52201f2d04340dea6ecfd429981bb7a58a4344e5..b488484d4bf0fd6d006624b11ab99e6d5708b2b0 100755 --- a/modules/util/csv.pl +++ b/modules/util/csv.pl @@ -62,6 +62,12 @@ print list of column names (CSV header) only display this many items +=head2 searching + + --select <field>=<value> + --find <pattern> + --in <field> <value-1> .. <value-n> + =cut use strict; @@ -91,16 +97,21 @@ my $view= 'matrix'; # values: matrix, extended, header, json, dumper my $all= 0; # for extend view, sofar... my $find_pattern= undef; # this is used for a pattern match my $search_string= undef; # this is used to select a certain value in a column + +# used for option --in <fieldname> <field_value>+ +my @search_strings; +my $search_field_name= + my $max_items= undef; -sub set_utf8 { $UTF8= 1; binmode (STDOUT, ':utf8'); } +sub set_utf8 { $UTF8= 1; binmode (STDIN, ':utf8'); binmode (STDOUT, ':utf8'); } sub usage { system ("perldoc '$0'"); exit (0); } -my @PAR= (); +my @PARS= (); while (defined (my $arg= shift (@ARGV))) { - if ($arg eq '--') { push (@PAR, @ARGV); @ARGV=(); } - elsif ($arg eq '-') { push (@PAR, $arg); } + if ($arg eq '--') { push (@PARS, @ARGV); @ARGV=(); } + elsif ($arg eq '-') { push (@PARS, $arg); } elsif ($arg =~ /^--(.+)/) { my ($opt, $val)= split ('=', $1, 2); @@ -112,6 +123,11 @@ while (defined (my $arg= shift (@ARGV))) elsif ($opt eq 'search' || $opt eq 'select') { # TODO: allow multiple searches! $search_string= $val || shift (@ARGV); + # print __LINE__, " search_string=[$search_string]\n"; + } + elsif ($opt eq 'in') + { + $search_field_name= $val || shift (@ARGV); } elsif ($opt eq 'max') { $max_items= $val || shift (@ARGV); } elsif ($opt eq 'hdr') { $view= 'header'; } @@ -159,12 +175,19 @@ while (defined (my $arg= shift (@ARGV))) } else { - push (@PAR, $arg); + if (defined ($search_field_name)) + { + push (@search_strings, $arg); + } + else + { + push (@PARS, $arg); + } } } -unless (@PAR) +unless (@PARS) { =begin comment @@ -177,7 +200,7 @@ EOX =end comment =cut - push (@PAR, '-'); + push (@PARS, '-'); } my $csv= new Util::Simple_CSV ('separator' => $CSV_SEP, @@ -193,7 +216,7 @@ if (@set_columns) $csv->{'no_headings'}= 1; } -# print "find_pattern=[$find_pattern] search_string=[$search_string]\n"; +# print __LINE__, " find_pattern=[$find_pattern] search_string=[$search_string]\n"; if (defined ($find_pattern)) { @@ -244,14 +267,15 @@ print "procssing find_pattern=[$find_pattern]\n"; if (defined ($search_string)) { -# print "procssing search_string=[$search_string]\n"; my ($field_name, $field_value)= split ('=', $search_string, 2); +# print __LINE__, " procssing search_string=[$search_string] field_name=[$field_name] field_value=[$field_value]\n"; # the filter is dynamically generated since the field number is only # known after the column names are identified! sub fidef2 { my $obj= shift; +# print __LINE__, " in fidef2\n"; my $cols= $obj->{'columns'}; my $col= 0; @@ -275,17 +299,51 @@ EOX $csv->set ('fidef' => \&fidef2); } +elsif (@search_strings) +{ + # the filter is dynamically generated since the field number is only + # known after the column names are identified! + + sub fidef3 + { + my $obj= shift; +# print __LINE__, " in fidef2\n"; + + my $cols= $obj->{'columns'}; + my $col= 0; + my %cols= map { $_ => $col++ } @$cols; + + # print "cols: ", Dumper ($cols); + # print "cols: ", Dumper (\%cols); + my %fidef3= map { $_ => 1 } @search_strings; + print STDERR __LINE__, " fidef3: ", main::Dumper (\%fidef3); + + my $sub= <<"EOX"; + my \$fidef= sub + { + my \$row= shift; + return (exists (\$fidef3{\$row->[$cols{$search_field_name}]})); + }; +EOX + print STDERR "sub: [$sub]\n"; # if ($debug_level ... ); + my $res= eval ($sub); + # print "res=[$res]\n"; + $res; + }; + + $csv->set ('fidef' => \&fidef3); +} if (defined ($max_items)) { $csv->set ( max_items => $max_items ); } -my $fnm= shift (@PAR); +my $fnm= shift (@PARS); $csv->load_csv_file ($fnm); # print "csv: ", Dumper ($csv); exit (0); -while (my $fnm= shift (@PAR)) +while (my $fnm= shift (@PARS)) { $csv->merge_csv_file ($fnm); } @@ -340,7 +398,7 @@ elsif ($view eq 'dumper') } elsif ($view eq 'no') { - # dont show anyhing + # dont show anything } else { @@ -372,7 +430,7 @@ __END__ =head1 Copyright -Copyright (c) 2006..2013 Gerhard Gonter. All rights reserved. This +Copyright (c) 2006..2018 Gerhard Gonter. All rights reserved. This is free software; you can redistribute it and/or modify it under the same terms as Perl itself. @@ -406,5 +464,5 @@ Together with tagging, this would be a powerful feature. Also: Add a method to tag (not just filter) rows via callback... -Uh... that's getting compplex! +Uh... that's getting complex! diff --git a/modules/util/json2tsv.pl b/modules/util/json2tsv.pl new file mode 100755 index 0000000000000000000000000000000000000000..b6e383588b81c88f6284a003d62af84d444a98a4 --- /dev/null +++ b/modules/util/json2tsv.pl @@ -0,0 +1,170 @@ +#!/usr/bin/perl + +=head1 USAGE + + cat data.json-lines | ./json2tsv.pl + +Reads individual lines which *each* contain a separate json structure +and saves the data in TSV format. + +This is useful to save data from MongoDB find() statement using +cut'n'paste. + +=cut + +use strict; + +use Data::Dumper; +$Data::Dumper::Indent= 1; +use JSON; +use Util::tsv; + +my $tsv_fnm= 'data.tsv'; +my @column_names; + +my @PARS; +my $arg; +while (defined ($arg= shift (@ARGV))) +{ + if ($arg eq '--') { push (@PARS, @ARGV); @ARGV= (); } + elsif ($arg =~ /^--(.+)/) + { + my ($opt, $val)= split ('=', $1, 2); + + if ($opt eq 'help') { usage(); } + elsif ($opt eq 'out') { $tsv_fnm= $val || shift (@ARGV); } + elsif ($opt eq 'col') + { + $val= shift (@ARGV) unless ($val); + push (@column_names, split (',', $val)); + } + else { usage(); } + } + elsif ($arg =~ /^-(.+)/) + { + foreach my $opt (split ('', $1)) + { + if ($opt eq 'h') { usage(); exit (0); } + # elsif ($opt eq 'x') { $x_flag= 1; } + else { usage(); } + } + } + else + { + push (@PARS, $arg); + } +} + +my @rows; +my %cols; + +if (@PARS) +{ + foreach my $fnm (@PARS) + { + if (open (FI, '<:utf8', $fnm)) + { + parse_stream(*FI, \@rows, \%cols); + close (FI); + } # TODO: else complain + } +} +else +{ + parse_stream(*STDIN, \@rows, \%cols); +} + +# print "rows: ", Dumper (\@rows); +# print "cols: ", Dumper (\%cols); + +my $cols= (@column_names) ? \@column_names : [ sort keys %cols ]; +my $data= new Util::tsv('data', $cols, rows => \@rows); +# print "data: ", Dumper ($data); +# $data->{rows}= \@rows; print "data: ", Dumper ($data); +$data->save_tsv ($tsv_fnm); + +exit(0); + +sub usage +{ + system ('perldoc', $0); + exit; +} + +sub parse_stream +{ + local *F= shift; + my $rows= shift; + my $columns= shift; + + my $count= 0; + LINE: while (my $l= <F>) + { + chop; + next LINE unless ($l); + # print ">> l=[$l]\n"; + my $data; + + eval { $data= from_json($l); }; + if ($@) + { + # print "error: ", $@, "\n"; + next LINE; + } + # print "data: ", Dumper ($data); + $count++; + + my %data2; + flatten ($columns, '', \%data2, $data); + push (@$rows, \%data2); + } + + $count; +} + +=head2 flatten ($columns, $prefix, $row, $data) + +transcribe a hash structure $data into $row. If elements of $data are hashes themselves, they are also folded into $row, where their column names are prefixed with the name of that element. + +Example: + + { profile => "pha", key => "62538ff3b0caccafeece073070a394e0a1e9bc84", pid => "o:123456", job_data => { date => "Fri, 18 May 2018 00:01:01 +0200" } } + +is transcribed into + + { profile => "pha", key => "62538ff3b0caccafeece073070a394e0a1e9bc84", pid => "o:123456", job_data_date => "Fri, 18 May 2018 00:01:01 +0200" } + +=cut + +sub flatten +{ + my $columns= shift; + my $prefix= shift; + my $row= shift; + my $data= shift; + + foreach my $e (keys %$data) + { + my $c= $prefix . $e; + my $d= $data->{$e}; + + if (ref ($d) eq 'HASH') + { + # print "flattening $e\n"; + flatten ($columns, $c.'_', $row, $d); + } + else + { + $columns->{$c}++; + $row->{$c}= $d; + } + } +} + +__END__ + +=head1 TODO + + * option to specify output filename + * option to specify column names in their expected order +