####################### fedora linux:####################
$ sudo yum install python3-pip
$ sudo pip3 install selenium
$ export DISPLAY=:99
$ Xvfb :99 -screen 0 1024x768x16 &
$ cat test.pl
#!/usr/bin/perl
use Selenium::Remote::Driver;
#use Selenium::Firefox;
my $driver = Selenium::Remote::Driver->new(
'browser_name' => 'firefox',
'remote_server_addr' => '127.0.0.1',
'port' => '4444',
'platform' => 'linux',
'auto_close' => true,
'debug' => false
);
#my $driver = Selenium::Firefox->new;
$driver->set_timeout("implicit",10000);
$driver->set_implicit_wait_timeout(10000);
#$driver->get("https://www.google.com");
$driver->get("https://www.fedex.com/zh-tw/shipping/surcharges.html");
print $driver->get_title();
#############################################################
#兩種寫法都可以
#############################################################
#my @buttons = $driver->find_elements("//button[3]");
#print "@buttons\n";
#$buttons[0]->click;
#############################################################
my $button = $driver->find_element("//button[3]");
print "$button\n";
$button->click;
#############################################################
$driver->refresh;
my $data = $driver->find_element("//div[2]/div/table/tbody/tr/td");
#print "$data\n";
print "\n". $data->get_text(). "\n";
my $data = $driver->find_element("//div[2]/div/table/tbody/tr/td[2]");
#print "$data\n";
print "\n". $data->get_text(). "\n";
my $data = $driver->find_element("//div[2]/div/table/tbody/tr/td[3]");
#print "$data\n";
print "\n".$data->get_text() ."\n";
#$driver->close;
This man is too old to remember everything in his brain. Right now, he needs a place to write down what he has studied.
標籤
4GL
(1)
人才發展
(10)
人物
(3)
太陽能
(4)
心理
(3)
心靈
(10)
文學
(31)
生活常識
(14)
光學
(1)
名句
(10)
即時通訊軟體
(2)
奇狐
(2)
爬蟲
(1)
音樂
(2)
產業
(5)
郭語錄
(3)
無聊
(3)
統計
(4)
新聞
(1)
經濟學
(1)
經營管理
(42)
解析度
(1)
遊戲
(5)
電學
(1)
網管
(10)
廣告
(1)
數學
(1)
機率
(1)
雜趣
(1)
證券
(4)
證券期貨
(1)
ABAP
(15)
AD
(1)
agentflow
(4)
AJAX
(1)
Android
(1)
AnyChart
(1)
Apache
(14)
BASIS
(4)
BDL
(1)
C#
(1)
Church
(1)
CIE
(1)
CO
(38)
Converter
(1)
cron
(1)
CSS
(23)
DMS
(1)
DVD
(1)
Eclipse
(1)
English
(1)
excel
(5)
Exchange
(4)
Failover
(1)
Fedora
(1)
FI
(57)
File Transfer
(1)
Firefox
(3)
FM
(2)
fourjs
(1)
Genero
(1)
gladiatus
(1)
google
(1)
Google Maps API
(2)
grep
(1)
Grub
(1)
HR
(2)
html
(23)
HTS
(8)
IE
(1)
IE 8
(1)
IIS
(1)
IMAP
(3)
Internet Explorer
(1)
java
(4)
JavaScript
(22)
jQuery
(6)
JSON
(1)
K3b
(1)
ldd
(1)
LED
(3)
Linux
(117)
Linux Mint
(4)
Load Balance
(1)
Microsoft
(2)
MIS
(2)
MM
(51)
MSSQL
(1)
MySQL
(27)
Network
(1)
NFS
(1)
Office
(1)
OpenSSL
(1)
Oracle
(126)
Outlook
(3)
PDF
(6)
Perl
(60)
PHP
(33)
PL/SQL
(1)
PL/SQL Developer
(1)
PM
(3)
Postfix
(2)
postfwd
(1)
PostgreSQL
(1)
PP
(50)
python
(5)
QM
(1)
Red Hat
(4)
Reporting Service
(28)
ruby
(11)
SAP
(234)
scp
(1)
SD
(16)
sed
(1)
Selenium
(3)
Selenium-WebDriver
(5)
shell
(5)
SQL
(4)
SQL server
(8)
sqlplus
(1)
SQuirreL SQL Client
(1)
SSH
(2)
SWOT
(3)
Symantec
(2)
T-SQL
(7)
Tera Term
(2)
tip
(1)
tiptop
(24)
Tomcat
(6)
Trouble Shooting
(1)
Tuning
(5)
Ubuntu
(37)
ufw
(1)
utf-8
(1)
VIM
(11)
Virtual Machine
(2)
VirtualBox
(1)
vnc
(3)
Web Service
(2)
wget
(1)
Windows
(19)
Windows
(1)
WM
(6)
Xvfb
(2)
youtube
(1)
yum
(2)
2024年8月2日 星期五
使用Selenium 做爬蟲 Perl
2024年8月1日 星期四
使用Selenium 做爬蟲 by python
sudo apt install python3-pip
sudo apt install python3-selenium
sudo apt install python3-bs4
#!/usr/bin/python3
# 載入需要的套件
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.service import Service
import requests
import time
# 開啟瀏覽器視窗(Chrome)# 方法一:執行前需開啟chromedriver.exe且與執行檔在同一個工作目錄
#driver = webdriver.Chrome()
#service = Service(executable_path='/usr/local/bin/geckodriver')
service = Service('/usr/local/bin/geckodriver')
driver = webdriver.Firefox(service=service)
#driver = webdriver.Firefox()
# 方法二:或是直接指定exe檔案路徑
#driver = webdriver.Firefox("/usr/local/bin")
driver.implicitly_wait(3) #等10秒,讓網頁資料load進來
driver.get("https://www.fedex.com/zh-tw/shipping/surcharges.html") # 更改網址以前往不同網頁
#time.sleep(100)
#driver.find_elements(By.CLASS_NAME,"fxg-gdpr__accept-all-btn cc-aem-c-button cc-aem-c-button--responsive cc-aem-c-button--primary")[0].click()
#buttons=driver.find_elements(By.CLASS_NAME,"fxg-gdpr__accept-all-btn cc-aem-c-button cc-aem-c-button--responsive cc-aem-c-button--primary")
button=driver.find_elements(By.CSS_SELECTOR,".fxg-gdpr__accept-all-btn")[0] #使用Selenium IDE找出來 "accept all cookie" 的按鈕
print(button)
button.click()
driver.refresh() #接受cookie後,要將網頁refresh, 重load資料
#使用Selenium IDE找出來要抓資料
data=driver.find_elements(By.CSS_SELECTOR,".fuelsurcharg-dynamic-datalookup .cc-aem-c-table__tbody:nth-child(2) .cc-aem-c-table__tbody__td:nth-child(1)")[0]
print(data.get_attribute("innerText"))
data=driver.find_elements(By.CSS_SELECTOR,".fuelsurcharg-dynamic-datalookup .cc-aem-c-table__tbody:nth-child(2) .cc-aem-c-table__tbody__td:nth-child(2)")[0]
print(data.get_attribute("innerText"))
data=driver.find_elements(By.CSS_SELECTOR,".fuelsurcharg-dynamic-datalookup .cc-aem-c-table__tbody:nth-child(2) .cc-aem-c-table__tbody__td:nth-child(3)")[0]
print(data.get_attribute("innerText"))
#print (driver.title)
#html=driver.page_source
#print (html)
#soup = BeautifulSoup(driver.page_source, 'lxml')
#print(soup.prettify())
#
#with open('index.html', 'w', encoding='utf-8',) as file:
# file.write(soup.prettify())
driver.close() # 關閉瀏覽器視窗
sudo apt install python3-selenium
sudo apt install python3-bs4
#!/usr/bin/python3
# 載入需要的套件
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.service import Service
import requests
import time
# 開啟瀏覽器視窗(Chrome)# 方法一:執行前需開啟chromedriver.exe且與執行檔在同一個工作目錄
#driver = webdriver.Chrome()
#service = Service(executable_path='/usr/local/bin/geckodriver')
service = Service('/usr/local/bin/geckodriver')
driver = webdriver.Firefox(service=service)
#driver = webdriver.Firefox()
# 方法二:或是直接指定exe檔案路徑
#driver = webdriver.Firefox("/usr/local/bin")
driver.implicitly_wait(3) #等10秒,讓網頁資料load進來
driver.get("https://www.fedex.com/zh-tw/shipping/surcharges.html") # 更改網址以前往不同網頁
#time.sleep(100)
#driver.find_elements(By.CLASS_NAME,"fxg-gdpr__accept-all-btn cc-aem-c-button cc-aem-c-button--responsive cc-aem-c-button--primary")[0].click()
#buttons=driver.find_elements(By.CLASS_NAME,"fxg-gdpr__accept-all-btn cc-aem-c-button cc-aem-c-button--responsive cc-aem-c-button--primary")
button=driver.find_elements(By.CSS_SELECTOR,".fxg-gdpr__accept-all-btn")[0] #使用Selenium IDE找出來 "accept all cookie" 的按鈕
print(button)
button.click()
driver.refresh() #接受cookie後,要將網頁refresh, 重load資料
#使用Selenium IDE找出來要抓資料
data=driver.find_elements(By.CSS_SELECTOR,".fuelsurcharg-dynamic-datalookup .cc-aem-c-table__tbody:nth-child(2) .cc-aem-c-table__tbody__td:nth-child(1)")[0]
print(data.get_attribute("innerText"))
data=driver.find_elements(By.CSS_SELECTOR,".fuelsurcharg-dynamic-datalookup .cc-aem-c-table__tbody:nth-child(2) .cc-aem-c-table__tbody__td:nth-child(2)")[0]
print(data.get_attribute("innerText"))
data=driver.find_elements(By.CSS_SELECTOR,".fuelsurcharg-dynamic-datalookup .cc-aem-c-table__tbody:nth-child(2) .cc-aem-c-table__tbody__td:nth-child(3)")[0]
print(data.get_attribute("innerText"))
#print (driver.title)
#html=driver.page_source
#print (html)
#soup = BeautifulSoup(driver.page_source, 'lxml')
#print(soup.prettify())
#
#with open('index.html', 'w', encoding='utf-8',) as file:
# file.write(soup.prettify())
driver.close() # 關閉瀏覽器視窗
訂閱:
文章 (Atom)