2014年10月6日 星期一

perl HTML::TokeParser example

#!/usr/bin/perl

#use strict;
use LWP::Simple;
use HTML::TokeParser;
use Encode;

#my $html   = get("https://www.iyp.com.tw/leisure/Hotels.html");
#my $html   = get("https://www.iyp.com.tw/showroom.php?cate_name_eng_lv1=leisure&cate_name_eng_lv3=Hotels&p=0");

my $i;
open FILE ," >output.csv";

for ($i=0 ; $i<=60 ; $i++) {

    my $html   = get("https://www.iyp.com.tw/showroom.php?cate_name_eng_lv1=leisure&cate_name_eng_lv3=Hotels&p=$i");
    my $stream = HTML::TokeParser->new(\$html);
    my %image  = ( );

    while (my $token = $stream->get_token) {
        #if ($token->[2]{"title"} ne "" && $token->[2]{"target"} eq "_blank") {
            #if ($token->[0] eq 'S' && $token->[1] eq 'a' && $token->[2]{"class"} ne "more-btn") {
            if ($token->[0] eq 'S' && $token->[1] eq 'a' && $token->[2]{"target"} eq "_blank" && $token->[2]{"class"} ne "more-btn") {
                my ($tel) = $token->[2]{"href"} =~ m/(\d+)/;
                print FILE "$tel" ."#\t";
                #print FILE encode("big5",$token->[2]{"title"}). "\t";
                print FILE $token->[2]{"title"}. "#\t";
            }
            if ($token->[0] eq 'S' && $token->[1] eq "span" && $token->[2]{"title"} eq "查看地圖") {
                my ($misc,$addr) = $token->[2]{"go-map"} =~ m/(\/\/.*=)(.*)/;
                #print FILE encode("big5",$addr) ."\n";
                print FILE $addr ."\n";
                #print $token->[2]{"go-map"}. "\n"
            }
        #}
    }

}
close FILE;

沒有留言:

張貼留言