From 34eb8afc67a01c9a26beb98387f4d1344af7ec1d Mon Sep 17 00:00:00 2001 From: Gerhard Gonter <ggonter@gmail.com> Date: Wed, 4 Apr 2018 10:19:27 +0200 Subject: [PATCH] testing --- .gitignore | 2 ++ scraper/htb.pl | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 .gitignore create mode 100755 scraper/htb.pl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..40ea050 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.swp +@* diff --git a/scraper/htb.pl b/scraper/htb.pl new file mode 100755 index 0000000..b1649f3 --- /dev/null +++ b/scraper/htb.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl + +use strict; + +use FileHandle; +use HTML::TreeBuilder; +use Data::Dumper; +$Data::Dumper::Indent= 1; + +binmode STDOUT, ':utf8'; + +my $t1= <<"EOX"; +<html> + <head> + <title>bla</title> + </head> + <body> + <p ID="par1">Pargraph1</p> + <p ID="par2">Pargraph2 Universit\x{e4}t Wien</p> + <p ID="par3">Pargraph3</p> + <p ID="par4">Pargraph4</p> + </body> +</html> +EOX + +my $htb1= HTML::TreeBuilder->new_from_content($t1); + +print "htb1: ", Dumper ($htb1); +my @x= $htb1->look_down (_tag => 'p'); +print "x: ", Dumper (\@x); -- GitLab