【问题标题】:unable to print data from multiple urls using Selenium Python无法使用 Selenium Python 从多个 url 打印数据
【发布时间】:2021-06-12 16:24:42
【问题描述】:

至于说这段代码有效,但我面临的问题是,只有一个 url 它会通过如下图所示的错误来抓取数据,从而帮助我摆脱困境。它通过session not created错误

只打印一个链接
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import pandas as pd 
import time 

url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
"https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
"https://www.skechers.com/women/shoes/ultra-flex/12843.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
"https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
"https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
"https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
"https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
"https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
"https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
"https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
for endpoint in url:
    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ['enable-automation'])
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
    options.add_argument("--remote-debugging-port=9222")

    driver = webdriver.Chrome('F:/chromedriver.exe',options=options)
    driver.get(endpoint)
    time.sleep(5)
    image= None
    page_Source = driver.page_source
    soup = BeautifulSoup(page_Source, 'html.parser')
    content= soup.find_all('div',class_='c-product-description-features s-product-description-features row') 

    pid=soup.find('span',class_='product-id').text
    links = soup.find_all('img', srcset=True)
    for link in links:
        img= link['srcset']
        alt=link['alt']

    print(pid ,img,alt)

【问题讨论】:

  • 任何你想要打开不同浏览器会话的原因。您可以使用一个浏览器会话并导航您的网址。无论如何,您没有进行并行执行。为避免这种情况,请使用 driver.quit() 退出驱动程序
  • @KunduK 可以在同一个浏览器上打开多个网址吗??
  • 看看你是否使用了正确的chromedriver版本
  • 是的,我昨天正在使用最新的 chrome 驱动程序,我更新了

标签: python selenium beautifulsoup webdriver


【解决方案1】:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

import time

url = ["https://www.skechers.com/women/shoes/ultra-flex/12843.html",
    "https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
       "https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
       "https://www.skechers.com/women/shoes/ultra-flex/12843.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
       "https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
       "https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
       "https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
       "https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
       "https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
       "https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
       "https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
       "https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
       "https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
       "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
       "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
       "https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
count=0
for endpoint in url:
    print(count)
    count+=1
    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ['enable-automation'])
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
    #options.add_argument("--remote-debugging-port=9222")

    driver = webdriver.Chrome('./chromedriver.exe', options=options)
    driver.get(endpoint)
    

问题:

问题在于远程调试端口,您已经在一个实例中使用了它,

解决办法:

删除那个

【讨论】:

    【解决方案2】:

    在 for 循环之外定义 chrome 驱动程序实例。我没有测试,但这应该可以工作。

    options = webdriver.ChromeOptions()
    options.add_experimental_option("excludeSwitches", ['enable-automation'])
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument(
            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
    options.add_argument("--remote-debugging-port=9222")
    
    driver = webdriver.Chrome('F:/chromedriver.exe',options=options)
    
    
    url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
    "https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
    "https://www.skechers.com/women/shoes/ultra-flex/12843.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
    "https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
    "https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
    "https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
    "https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
    "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
    "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
    "https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
    for endpoint in url:    
        driver.get(endpoint)
        time.sleep(5)
        image= None
        page_Source = driver.page_source
        soup = BeautifulSoup(page_Source, 'html.parser')
        content= soup.find_all('div',class_='c-product-description-features s-product-description-features row') 
    
        pid=soup.find('span',class_='product-id').text
        links = soup.find_all('img', srcset=True)
        for link in links:
            img= link['srcset']
            alt=link['alt']
    
        print(pid ,img,alt)
    

    或者在每个url完成后退出()会话

    url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html",
    "https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html",
    "https://www.skechers.com/women/shoes/ultra-flex/12843.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html",
    "https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html",
    "https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html",
    "https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html",
    "https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html",
    "https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html",
    "https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html",
    "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html",
    "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html",
    "https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"]
    for endpoint in url:
        options = webdriver.ChromeOptions()
        options.add_experimental_option("excludeSwitches", ['enable-automation'])
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_argument(
            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36")
        options.add_argument("--remote-debugging-port=9222")
    
        driver = webdriver.Chrome('F:/chromedriver.exe',options=options)
        driver.get(endpoint)
        time.sleep(5)
        image= None
        page_Source = driver.page_source
        soup = BeautifulSoup(page_Source, 'html.parser')
        content= soup.find_all('div',class_='c-product-description-features s-product-description-features row') 
    
        pid=soup.find('span',class_='product-id').text
        links = soup.find_all('img', srcset=True)
        for link in links:
            img= link['srcset']
            alt=link['alt']
    
        print(pid ,img,alt)
        driver.quit()
    

    【讨论】:

      猜你喜欢
      • 2016-09-26
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2021-06-26
      • 2021-07-06
      • 2020-01-16
      • 2021-06-12
      • 1970-01-01
      相关资源
      最近更新 更多