From 63ef07497c41de552e0b474d7ff348d6d80b6a70 Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Tue, 30 Apr 2019 19:37:53 +0200 Subject: [PATCH] tried to implement option --load-json; not quite working as intended --- modules/util/Util/Simple_CSV.pm | 103 ++++++++++++++++++++++++++++++-- modules/util/csv.pl | 44 ++++++++++++-- 2 files changed, 136 insertions(+), 11 deletions(-) diff --git a/modules/util/Util/Simple_CSV.pm b/modules/util/Util/Simple_CSV.pm index 48c1dd2..5edd1d9 100644 --- a/modules/util/Util/Simple_CSV.pm +++ b/modules/util/Util/Simple_CSV.pm @@ -164,6 +164,8 @@ sub load_csv_file my $fi_open; (*FI, $fi_open)= $obj->open_csv_file ($fnm); + return undef unless (defined ($fi_open)); + $obj->load_csv_file_headings (*FI) unless ($obj->{'no_headings'}); if (@{$obj->{columns}} # NOTE: columns might have been defined using $obj->define_columns(...) @@ -178,9 +180,11 @@ sub load_csv_file } } - $obj->load_csv_file_body (*FI) unless ($obj->{'no_body'}); + my $row_count= $obj->load_csv_file_body (*FI) unless ($obj->{'no_body'}); close (FI) if ($fi_open); + + $row_count; } sub open_csv_file @@ -213,7 +217,7 @@ sub open_csv_file } else { - $obj->{'ERROR'}= "cant read $fnm"; + print STDERR "ATTN: ", ($obj->{'ERROR'}= "can't read $fnm"), "\n"; return undef; } $fi_open= 1; @@ -372,6 +376,93 @@ print __LINE__, " row: ", join (', ', @row), "\n" if ($DEBUG > 1); $row_count; } +sub load_csv_data +{ + my $obj= shift; + my $rows= shift; + + unless (defined ($rows) && ref($rows) eq 'ARRAY') + { + print STDERR "need an array reference\n"; + return undef; + } + + my ($no_hash, $no_array, $sep, $columns, $strip, $filter, $max_items)= + map { $obj->{$_} } qw(no_hash no_array separator columns strip_quotes filter max_items); + + my $mk_columns= 0; + my @columns; + my %columns; + if (defined ($columns)) + { + @columns= @$columns; + } + else + { + print STDERR "NOTE: columns undefined; creating them\n"; + $mk_columns= 1; + } + + # NOTE: the code in load_csv_file_body() assumes that $row is an array reference! + # so we need to determine the columns first + + if ($mk_columns) + { + PASS1: foreach my $row (@$rows) + { + foreach my $column (keys %$row) + { + $columns{$column}++; + } + } + + @columns= sort keys %columns; # yeah, we do not know any better way... + $obj->{columns}= \@columns; + } + + my $idx= 0; + %columns= map { $_ => $idx++ } @columns; + + print __LINE__, " columns list: ", join (' ', @columns), "\n"; + print __LINE__, " columns indexes: ", join (' ', %columns), "\n"; + + my @rows= (); + my @data= (); + my $row_count= 0; + + PASS2: foreach my $row (@$rows) + { + my (@row, %row); + foreach my $column (keys %$row) + { + if (exists ($columns{$column})) + { + $row[$columns{$column}]= $row->{$column}; + $row{$column}= $row->{$column}; + } + } + print "row: ", Dumper (\%row); + + if (defined ($filter)) + { + my $take_it= &$filter ($row); + next PASS2 unless ($take_it); + } + + push (@rows, \@row) unless ($no_array); + push (@data, \%row) unless ($no_hash); + $row_count++; + + last ROW if (defined ($max_items) && $row_count >= $max_items); + } + + $obj->{rows}= \@rows unless ($no_array); + $obj->{data}= \@data unless ($no_hash); + $obj->{row_count}= $row_count; + + $row_count; +} + sub strip_row { my $sep= shift; @@ -711,9 +802,9 @@ sub merge } else { - push (@$c1, $c_name); # additional colum - my $c1_num= $#$c1; # new column number is highest index - $i1->{$c_name}= $map[$c_num]= $c1_num; + push (@$c1, $c_name); # additional colum + my $c1_num= $#$c1; # new column number is highest index + $i1->{$c_name}= $map[$c_num]= $c1_num; } $c_num++; @@ -736,7 +827,7 @@ sub merge my @row= (); for (my $column= 0; $column <= $o2_columns; $column++) { - $row[$map[$column]]= $row->[$column]; + $row[$map[$column]]= $row->[$column]; } # add new row to existing csv row diff --git a/modules/util/csv.pl b/modules/util/csv.pl index b488484..5165fd4 100755 --- a/modules/util/csv.pl +++ b/modules/util/csv.pl @@ -76,6 +76,7 @@ BEGIN { my @b= split ('/', $0); pop @b; unshift (@INC, join ('/', @b, 'perl')); use Util::Simple_CSV; use Util::Matrix; +use Util::JSON; use JSON; use Data::Dumper; $Data::Dumper::Indent= 1; @@ -100,9 +101,10 @@ my $search_string= undef; # this is used to select a certain value in a column # used for option --in <fieldname> <field_value>+ my @search_strings; -my $search_field_name= +my $search_field_name; -my $max_items= undef; +my $max_items; +my $json_file_to_load; sub set_utf8 { $UTF8= 1; binmode (STDIN, ':utf8'); binmode (STDOUT, ':utf8'); } sub usage { system ("perldoc '$0'"); exit (0); } @@ -142,6 +144,7 @@ while (defined (my $arg= shift (@ARGV))) elsif ($opt eq 'TAB' || $opt eq 'tab') { $CSV_SEP= "\t"; } elsif ($opt eq 'UTF8' || $opt eq 'utf8') { set_utf8(); } elsif ($opt eq 'border' || $opt eq 'style') { Util::Matrix::set_border_style ($val); } + elsif ($opt eq 'load-json') { $json_file_to_load= $val || shift (@ARGV); } elsif ($opt eq 'AWK') { # hmm... maybe this should be done in a completely different way Util::Matrix::set_header_style ('none'); @@ -341,6 +344,8 @@ if (defined ($max_items)) my $fnm= shift (@PARS); $csv->load_csv_file ($fnm); + +exit (0) unless defined ($csv); # print "csv: ", Dumper ($csv); exit (0); while (my $fnm= shift (@PARS)) @@ -348,10 +353,31 @@ while (my $fnm= shift (@PARS)) $csv->merge_csv_file ($fnm); } -exit (0) unless defined ($csv); +if (defined ($json_file_to_load)) +{ + my $data= Util::JSON::read_json_file ($json_file_to_load); + + my $csv2= clone Util::Simple_CSV($csv); + $csv2->load_csv_data($data); + + $csv2->save_csv_file ('filename' => '@json_data.tsv', + 'separator' => ((defined ($CSV_OUT_SEP)) ? $CSV_OUT_SEP : $CSV_SEP)); + + if (defined ($csv->{columns})) + { + $csv->merge($csv2, $csv->{no_array}, $csv->{no_hash}); + } + else + { + $csv= $csv2; + print STDERR "NOTE: using csv2\n"; + print "csv2: ", Dumper ($csv2); + @columns= @{$csv->{columns}}; + } +} # print "cols=", Dumper ($csv->{'columns'}), "\n"; -@columns= @{$csv->{'columns'}} if (!@columns && exists ($csv->{'columns'}) && defined ($csv->{'columns'})); +@columns= @{$csv->{columns}} if (!@columns && exists ($csv->{columns}) && defined ($csv->{columns})); if (@sort_columns) { @@ -430,7 +456,7 @@ __END__ =head1 Copyright -Copyright (c) 2006..2018 Gerhard Gonter. All rights reserved. This +Copyright (c) 2006..2019 Gerhard Gonter. All rights reserved. This is free software; you can redistribute it and/or modify it under the same terms as Perl itself. @@ -466,3 +492,11 @@ Also: Add a method to tag (not just filter) rows via callback... Uh... that's getting complex! +=head2 --load-json + +This is not yet working properly. Right now, it is called like this: + + tsv --load-json nodes.json dummy.tsv + +dummy.tsv should not exist; the result is a TSV file named @json_data.tsv + -- GitLab