diff --git a/lib/FDS.pm b/lib/FDS.pm
index 81fc9faa51dd045a3a4323ad4689d83fbd6a6504..5f441f6ca4f8b78ec0074ef05a7c35cbbc757be1 100644
--- a/lib/FDS.pm
+++ b/lib/FDS.pm
@@ -75,7 +75,7 @@ sub open
   my $fo_fnm= sprintf ($self->{out_pattern} . $self->{out_extension}, ++$self->{_count});
   local *FO_RECODED;
 
-  if ($self->{'compress'} == 1)
+  if ($self->{compress} == 1)
   {
     open (FO_RECODED, '|-', "gzip -c >'$fo_fnm'") or die "can't write to [$fo_fnm]";
   }
@@ -116,7 +116,10 @@ sub print
   if ($self->{compress} == 2)
   {
     # binmode (FO, ':raw');
-    $px= print FO compress($l);
+    utf8::encode($l);
+    my $compressed= compress($l);
+    # print __LINE__, " compressed=[$compressed]\n";
+    $px= print FO $compressed;
   }
   else
   {
diff --git a/lib/Wiktionary/Text.pm b/lib/Wiktionary/Text.pm
new file mode 100644
index 0000000000000000000000000000000000000000..6b9f229a18c1f97231aab7f0c78ed307234f8b94
--- /dev/null
+++ b/lib/Wiktionary/Text.pm
@@ -0,0 +1,123 @@
+
+package Wiktionary::Text;
+
+use Data::Dumper;
+
+sub analyze_wiki_text
+{
+  my $lines= shift;
+
+  my $type= 'unknown';
+  my $language= 'unknown';
+
+  return ('empty', $language, []) unless (@$lines);
+
+  # print __LINE__, " analyze_wiki_text: ", Dumper($lines);
+
+  my @errors;
+
+  TEXT: foreach my $l (@$lines)
+  {
+    print __LINE__, " [$l]\n";
+
+    if ($l =~ m#^=#)
+    {
+      my @tokens= split(' ', $l);
+
+      my $hl_o= shift(@tokens);
+      my $hl_c= pop(@tokens);
+
+      if ($hl_o ne $hl_c)
+      {
+        push (@errors, ['heading mismatch', $l, "hl_o=[$hl_o] hl_c=[$hl_c]", \@tokens]);
+        next TEXT;
+      }
+
+      my $hl= length($hl_o);
+      print __LINE__, " heading level=[$hl] tokens: ", Dumper(\@tokens);
+
+      my ($words, $macro_infos)= analyze_heading_tokens(@tokens);
+      print __LINE__, " words: ", Dumper($words);
+      print __LINE__, " macro_infos: ", Dumper($macro_infos);
+    }
+  }
+
+  if (@errors)
+  {
+    print __LINE__, " errors: ", Dumper(\@errors);
+  }
+
+  return ($type, $language, \@errors);
+}
+
+sub analyze_heading_tokens
+{
+  my @tokens= @_;
+
+  my @words= ();
+  my @macro_infos= ();
+
+      while (my $token= shift(@tokens))
+      {
+        if ($token=~ m#^\(?\{\{(.+)}}\)?#)
+        {
+          my $macro= $1;
+          push (@macro_infos, process_macro($macro));
+        }
+        elsif ($token =~ m#^\(?\{\{(.+)#)
+        {
+          my $macro= $1;
+
+          T2: while (my $t2= shift(@tokens)) # find the end of the macro
+          {
+            if ($t2 =~ m#(.+)}}\)?,?$#)  # there could be several macros, separated by ,
+            {
+              $macro .= ' ' . $1;
+              last T2;
+            }
+            else
+            {
+              $macro .= ' '. $t2;
+            }
+          }
+
+          print __LINE__, " macro=[$macro]\n";
+
+          push (@macro_infos, process_macro($macro));
+        }
+        else
+        {
+          push (@words, $token);
+        }
+      }
+
+  print __LINE__, " words: ", Dumper(\@words);
+  print __LINE__, " macro_infos: ", Dumper(\@macro_infos);
+
+  (\@words, \@macro_infos);
+}
+
+sub process_macro
+{
+  my $macro_string= shift;
+
+  my @elements= split (/\|/, $macro_string);
+  print __LINE__, " elements: ", Dumper(\@elements);
+
+  \@elements;
+}
+
+
+1;
+
+__END__
+
+=head1 NOTES
+
+=head2 heading level 2
+
+ format: == string ({{language_label|language}}) ==
+
+there can be several sections for the same title representing several languages
+
+
diff --git a/wkt1.pl b/wkt1.pl
index 9f9473b551ce9cb6ff8d924f76dba02dc343822c..20fc0b5de3c0d5303ee5ea1634e55303a31e3035 100755
--- a/wkt1.pl
+++ b/wkt1.pl
@@ -3,7 +3,6 @@
 use strict;
 
 use JSON;
-use FileHandle;
 
 use Util::JSON;
 use Util::Simple_CSV;
@@ -11,9 +10,16 @@ use Util::Simple_CSV;
 use Data::Dumper;
 $Data::Dumper::Indent= 1;
 
+use FileHandle;
+
+binmode( STDOUT, ':utf8' ); autoflush STDOUT 1;
+binmode( STDERR, ':utf8' ); autoflush STDERR 1;
+binmode( STDIN,  ':utf8' );
+
 use lib 'lib';
 use wkutils;
 use Wiktionary::Utils;
+use Wiktionary::Text;
 
 use FDS;
 
@@ -34,8 +40,6 @@ my $fo_compress= 2;
 # 1..compress output stream by piping into gzip; DO NOT USE
 # 2..compress individual records using Compress::Zlib::compress()
 
-binmode (STDOUT, ':utf8');
-
 my @PARS= ();
 while (my $arg= shift (@ARGV))
 {
@@ -94,6 +98,8 @@ EOX
 
 analyze_wiktionary_dump ($fnm);
 
+my $ts_stop= localtime (time());
+
 exit(0);
 
 sub analyze_wiktionary_dump
@@ -112,6 +118,7 @@ sub analyze_wiktionary_dump
     print "mkdir $data_dir\n";
     mkdir ($data_dir);
   }
+
   unless (-d $out_dir)
   {
     print "mkdir $out_dir\n";
@@ -127,7 +134,7 @@ sub analyze_wiktionary_dump
   print FO_ITEMS join ($TSV_SEP, @cols1), "\n";
   autoflush FO_ITEMS 1;
 
-  my $fo_rec= new FDS('out_pattern' => "${out_dir}/wkt%05d");
+  my $fo_rec= new FDS('out_pattern' => "${out_dir}/wkt%05d", compress => $fo_compress);
   # $fo_rec->set (compress => 0, out_extension => '');
   my $fo_count= $fo_rec->open();
   my $fo_pos= 0;
@@ -140,7 +147,7 @@ sub analyze_wiktionary_dump
   my %frame;
   my @text;
   my $cnt_ATTN= 0;
-  my $debug_item= 0;
+  my @debug_item= ();
   LINE: while (1)
   {
     $pos= tell(FI);
@@ -203,6 +210,10 @@ sub analyze_wiktionary_dump
       {
         $state= 1;
       }
+      elsif ($l =~ m#^\s*<text xml:space="preserve" */>#)  # NOTE: empty text
+      {
+        $state= 1;
+      }
       elsif ($l =~ m#^\s*<text xml:space="preserve">(.*)#) # TODO: check for other <text> tags
       {
         my $t= $1;
@@ -212,9 +223,10 @@ sub analyze_wiktionary_dump
       }
       elsif ($l =~ m#^\s*<text(.*)>#) # TODO: check for other <text> tags
       {
-        print "ATTN: strange text-tag: [$l] title=[$frame{title}]\n";
+        my $msg= "ATTN: strange text-tag: [$l] title=[$frame{title}]";
+        print $msg, "\n";
         $cnt_ATTN++;
-        $debug_item= 1;
+        push (@debug_item, $msg);
       }
       elsif ($l =~ m#^\s*<(id|sha1)>([^<]+)</.+>#)
       {
@@ -237,19 +249,37 @@ sub analyze_wiktionary_dump
 
     if ($flush)
     {
-      $fo_rec->print (join ("\n", @lines));
+      my $frame= join ("\n", @lines);
+      # utf8::encode($frame);
+      $fo_rec->print ($frame);
 
       $frame{fo_pos_end}= $fo_rec->tell();
 
-      if ($debug > 1 || $debug_item)
+      if ($debug > 1 || @debug_item)
       {
         print "="x72, "\n";
+        if (@debug_item)
+        {
+          print __LINE__, " debug_item reasons:\n";
+          foreach my $msg (@debug_item)
+          {
+            print __LINE__, " * reason=[", $msg, "]:\n";
+          }
+        }
+
         print __LINE__, " frame: ", Dumper(\%frame);
         print __LINE__, " text: ", Dumper(\@text);
         print __LINE__, " lines: ", Dumper (\@lines);
         print "="x72, "\n";
 
-        $debug_item= 0;
+        @debug_item= ();
+      }
+
+      # process wiki text
+      if ($seq eq 'b')
+      {
+        print __LINE__, " id=[", $frame{id}, "] title=[", $frame{title}, "]\n";
+        Wiktionary::Text::analyze_wiki_text(\@text);
       }
 
       print FO_ITEMS join ($TSV_SEP, map { $frame{$_} } @cols1), "\n";
@@ -277,3 +307,4 @@ sub analyze_wiktionary_dump
 
   1;
 }
+