diff --git a/modules/util/Util/tsv.pm b/modules/util/Util/tsv.pm index 16f018f4bc3b933bea86bccdfa7b3aa3676da88c..d566ede15b37e50219e0675db70fa74cfb1b61f2 100644 --- a/modules/util/Util/tsv.pm +++ b/modules/util/Util/tsv.pm @@ -7,10 +7,10 @@ sub new my $class= shift; my $label= shift; my $columns= shift; - my @par= shift; + my @par= @_; my $obj= { label => $label, cols => $columns, rows => [] }; - bless $obj, $class; + bless ($obj, $class); $obj->set(@par); $obj; diff --git a/modules/util/json2tsv.pl b/modules/util/json2tsv.pl index 21c761aafbf09ef3448b8e586aeaef0ce6ba799c..e333ba99f2fa3736842eaa291c8cc3badf802300 100755 --- a/modules/util/json2tsv.pl +++ b/modules/util/json2tsv.pl @@ -10,7 +10,6 @@ and saves the data in TSV format. This is useful to save data from MongoDB find() statement using cut'n'paste. - =cut use strict; @@ -20,6 +19,9 @@ $Data::Dumper::Indent= 1; use JSON; use Util::tsv; +my $tsv_fnm= 'data.tsv'; +my @column_names; + my @PARS; my $arg; while (defined ($arg= shift (@ARGV))) @@ -28,7 +30,14 @@ while (defined ($arg= shift (@ARGV))) elsif ($arg =~ /^--(.+)/) { my ($opt, $val)= split ('=', $1, 2); - if ($opt eq 'help') { usage(); } + + if ($opt eq 'help') { usage(); } + elsif ($opt eq 'out') { $tsv_fnm= $val || shift (@ARGV); } + elsif ($opt eq 'col') + { + $val= shift (@ARGV) unless ($val); + push (@column_names, split (',', $val)); + } else { usage(); } } elsif ($arg =~ /^-(.+)/) @@ -36,7 +45,7 @@ while (defined ($arg= shift (@ARGV))) foreach my $opt (split ('', $1)) { if ($opt eq 'h') { usage(); exit (0); } - elsif ($opt eq 'x') { $x_flag= 1; } + # elsif ($opt eq 'x') { $x_flag= 1; } else { usage(); } } } @@ -46,18 +55,49 @@ while (defined ($arg= shift (@ARGV))) } } -my $x= parse_stream(*STDIN); -# print "x: ", Dumper ($x); -$x->save_tsv ('data.tsv'); +my @rows; +my %cols; + +if (@PARS) +{ + foreach my $fnm (@PARS) + { + if (open (FI, '<:utf8', $fnm)) + { + parse_stream(*FI, \@rows, \%cols); + close (FI); + } # TODO: else complain + } +} +else +{ + parse_stream(*STDIN, \@rows, \%cols); +} + +# print "rows: ", Dumper (\@rows); +# print "cols: ", Dumper (\%cols); + +my $cols= (@column_names) ? \@column_names : [ sort keys %cols ]; +my $data= new Util::tsv('data', $cols, rows => \@rows); +# print "data: ", Dumper ($data); +# $data->{rows}= \@rows; print "data: ", Dumper ($data); +$data->save_tsv ($tsv_fnm); exit(0); +sub usage +{ + system ('perldoc', $0); + exit; +} + sub parse_stream { local *F= shift; + my $rows= shift; + my $columns= shift; - my @rows; - my %columns; + my $count= 0; LINE: while (my $l= <F>) { chop; @@ -65,29 +105,20 @@ sub parse_stream # print ">> l=[$l]\n"; my $data; - eval { - $data= from_json($l); - }; + eval { $data= from_json($l); }; if ($@) { - print "error: ", $@, "\n"; + # print "error: ", $@, "\n"; next LINE; } # print "data: ", Dumper ($data); - push (@rows, $data); + $count++; + push (@$rows, $data); - foreach my $e (keys %$data) - { - $columns{$e}++; - } + foreach my $e (keys %$data) { $columns->{$e}++; } } - my $cols= [ sort keys %columns ]; - my $res= new Util::tsv('data', $cols); - # print "res: ", Dumper ($res); - $res->{rows}= \@rows; - - $res; + $count; } __END__