【发布时间】:2022-01-20 16:02:40
【问题描述】:
import csv
import pandas as pd
root_url = "https://hjrs.hec.gov.pk/index.php?r=site%2Franking&page=1"
html = requests.get(root_url,verify=False)
soup = BeautifulSoup(html.text, 'html.parser')
paging = soup.find("div",{"class":"grid-view"}).find("ul",{"class":"pagination"}).find_all("a")
start_page = paging[1].text
last_page = paging[len(paging)-2].text
outfile = csv.writer(open('Mona.csv', 'w', newline=''))
outfile .writerow(['Title','Links'])
pages = list(range(1,int(last_page)+1))
for page in pages:
url = 'https://hjrs.hec.gov.pk/index.php?r=site%2Franking&page=1' %(page)
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html.parser')
#print(soup.prettify())
print ('Processing page: %s' %(page))
Title_list = soup.findAll("div",{"class":"table table-striped table-bordered table-responsive w-100 d-md-table"})
for element in Title_list:
Title1 =Title_list.find_all('a')
Title = Title1.contents[0]
Links ='https://web.archive.org8' + artist_name.get('href')
writer.writerow([Title,Links])
outfile.close()
print ('Done')
它给了我这个错误
ValueError Traceback (most recent call last)
<ipython-input-42-f28d946aff93> in <module>()
12 outfile .writerow(['Title','Links'])
13
---> 14 pages = list(range(1,int(last_page)+1))
15 for page in pages:
16 url = 'https://hjrs.hec.gov.pk/index.php?r=site%2Franking&page=1' %(page)
ValueError: invalid literal for int() with base 10: '»'
请解释一下我该如何解决。
【问题讨论】:
标签: python html beautifulsoup pagination