【发布时间】:2020-08-18 21:38:08
【问题描述】:
我无法以 CSV 格式获取所有数据,只能获取最后一个数据。抓取完成后,只有最后一个抓取的是保存 CSV 文件,但我想从所有页面保存。
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
mainurl = 'https://austin.craigslist.org/search/cta?s=0'
driver.get(mainurl)
res = driver.execute_script("return document.documentElement.outerHTML")
page_soup = BeautifulSoup(res, 'html.parser')
lnk_opener = driver.find_element_by_xpath('//*[@id="sortable-results"]/ul/li[1]/p/a').click()
time.sleep(4)
records = []
i = 1
while i <3:
i+=1
try:
print(driver.current_url)
except Exception:
print('Internet Error Detected')
try:
title = driver.find_element_by_xpath('//*[@id="titletextonly"]').text
print(title)
except Exception:
print('No Title Given')
try:
price = driver.find_element_by_xpath('/html/body/section/section/h2/span/span[2]').text
print(price)
except Exception:
print('No Price Given')
try:
phone = driver.find_element_by_xpath('//*[@id="postingbody"]/h2[1]/big').text
print(phone)
records.append((phone))
except Exception:
print('No Mobile number avalible')
try:
loc = driver.find_element_by_xpath('/html/body/section/section/section/div[1]/div/div[2]').text
print(loc)
except Exception:
print('No Location Data Avalible')
try:
img = page_soup.find('img')
immg = print(img.get('src','\n'))
except Exception:
print('No img Found')
nxtpg = driver.find_element_by_xpath('/html/body/section/section/header/div[1]/div/a[3]')
nxtpg.click()
time.sleep(4)
url = driver.find_element_by_xpath("/html/body/section/section/header/div[1]/div/a[3]").get_attribute("href")
if url == None:
bckbtn = driver.find_element_by_class_name('backup').click()
time.sleep(5)
nextbuttton = driver.find_element_by_xpath('//*[@id="searchform"]/div[3]/div[3]/span[2]/a[3]').click()
time.sleep(6)
print(records)
records.append((driver.current_url, title, price, loc, immg))
df = pd.DataFrame(records, columns=['Product Url', 'Title/Model/Make', 'Price', 'GM Location', 'Image Link'])
print(df)
df.to_csv('zzz.csv')
time.sleep(4)
driver.quit()
【问题讨论】:
标签: python pandas selenium csv beautifulsoup