今天我想介紹一些可以在python 用一些javascript 語法。
網頁往下用(key 鍵)
我們先選告一些 selenium module
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
path = 'D:\\chromedriver.exe'
driver = webdriver.Chrome(path)
driver.get("https://twitter.com/elonmusk?lang=en")
我們就可以放進scroll loop
time.sleep(3)
element=driver.find_element_by_tag_name('body')
while True:
element.send_keys(Keys.PAGE_DOWN)
time.sleep(3)
Scrolling down (用jS) Example 1
previous_height=driver.execute_script('return document.body.scrollHeight')
while True:
driver.execute_script('window.scrollTo(0,document.body.scrollHeight);')
time.sleep(3)
new_height = driver.execute_script('return document.body.scrollHeight')
if new_height == previous_height:
break
Scrolling down (用jS) Example 2
last_ht, ht = 0,1
while last_ht !=ht:
last_ht=ht
sleep(2)
#scroll down and retrun the hight of scroll
ht = self.browser.execute_script("""
arguments[0].scrollTo(0, arguments[0].scrollHeight);
return arguments[0].scrollHeight; """, scroll_box)
scroll down from web
reference: https://michaeljsanders.com/2017/05/12/scrapin-and-scrollin.html
import time
from selenium import webdriver
from bs4 import BeautifulSoup as bs
#I used Firefox; you can use whichever browser you like.
browser = webdriver.Chrome()
#Tell Selenium to get the URL you're interested in.
browser.get("http://URLHERE.com")
#Selenium script to scroll to the bottom, wait 3 seconds for the next batch of data to load, then continue scrolling. It will continue to do this until the page stops loading new data.
lenOfPage = browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
match=False
while(match==False):
lastCount = lenOfPage
time.sleep(3)
lenOfPage = browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
if lastCount==lenOfPage:
match=True
#Now that the page is fully scrolled, grab the source code.
source_data = browser.page_source
#Throw your source into BeautifulSoup and start parsing!
bs_data = bs(source_data)