【问题标题】:How can I retrieve the phone number in selenium?如何在硒中检索电话号码?
【发布时间】:2021-08-28 13:13:03
【问题描述】:
import json
from io import StringIO
from bs4 import BeautifulSoup
from requests_html import HTMLSession
import time
from selenium import webdriver
import requests
import pandas as pd
import numpy as np
from selenium.webdriver.chrome.options import Options
import colorama
from colorama import Fore, Back, Style
colorama.init(autoreset = False)
import selenium
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

PATH = "C:\Program Files (x86)\chromedriver.exe"
chrome_options = Options()
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(PATH, options = chrome_options)
# driver.minimize_window()

for b in range(3):

    url = "https://www.healthgrades.com/usearch?what=Marriage%20%26%20Family%20Therapy&entityCode=PS303&where=CA&pageNum={}&sort.provider=bestmatch&state=CA".format(b+1)

    driver.implicitly_wait(50)
    driver.get(url)
    wait = WebDriverWait(driver, 500)
    button = driver.find_element_by_xpath("//a[@class = 'provider-name__lnk']")
    length =  len(driver.find_elements(By.CSS_SELECTOR, "a[data-qa-target='provider-details-provider-name']"))

    j = 0
    for i in range(length):
        elements = driver.find_elements(By.XPATH, "//a[@data-qa-target='provider-details-provider-name']")
        print(elements[j].text)
        # ActionChains(driver).move_to_element(elements[j].find_element_by_xpath(".//ancestor::div[contains(@class,'card-summary')]/following-sibling::div[contains(@class,'card-cta')]/a")).click().perform()
        button.click()

        time.sleep(2)
        phone = driver.find_element_by_xpath("//a[@class='toggle-phone-number-button']")
        print(phone.text)
        # print(wait.until(EC.visibility_of_element_located((By.XPATH, "//a[@class='click-to-call-button-secondary hg-track mobile-click-to-call']"))).text)
        driver.execute_script("window.history.go(-1)")
        time.sleep(2)
        j = j + 1

大家好,这个平台上的某个人已经帮助了我这段代码,但我扩展了它。它应该做什么:刮掉治疗师的名字,点击他们的链接,提取电话号码,回到第一个网站,重复。它的作用:抓取名称,点击链接。它找不到电话号码。我可能使用了错误的 ID,因为它返回错误“非类型没有属性文本”。谁能帮我修复第 19 行中的代码,以便打印电话号码?非常感谢任何帮助。

【问题讨论】:

  • 请贴出完整代码。 (包括进口)
  • @Ram 我发布了我使用的导入

标签: python selenium xpath


【解决方案1】:

我能够使用下面的行提取phone number。如果此行没有给出电话号码,则需要将焦点切换到其他窗口,然后尝试提取详细信息。

driver.find_element_by_xpath("//div[@class='summary-standard-button-row-mobile']/a").get_attribute("innerText")

这是我尝试过的完整代码:

driver.implicitly_wait(10)
driver.get("https://www.healthgrades.com/usearch?what=Marriage%20%26%20Family%20Therapy&entityCode=PS303&where=CA&pageNum=1&sort.provider=bestmatch&state=CA")
for page in range(2,4):
    time.sleep(5)
    length = len(driver.find_elements_by_xpath("//a[@data-qa-target='provider-details-provider-name']"))
    for i in range(length):
        elements = driver.find_elements_by_xpath("//a[@data-qa-target='provider-details-provider-name']")
        elements[i].click()
        handles = driver.window_handles
        driver.switch_to.window(handles[1])
        time.sleep(1)
        print(driver.find_element_by_tag_name("h1").get_attribute("innerText"))
        try:
            print(driver.find_element_by_xpath("//div[@class='summary-standard-button-row-mobile']/a").get_attribute("innerText"))
        except Exception as e:
            print("No Contact Details")
            pass
        driver.close()
        driver.switch_to.window(handles[0])
        time.sleep(1)
    driver.find_element_by_xpath("//a[text()='{}']".format(page)).click()

输出:

Noe Gutierrez, MSW
(760) 385-3959
Richard Vogel, LMFT
(949) 690-6846
Rachel Sievers, LMFT
(559) 901-2836
...

【讨论】:

  • 我使用的网址:Link
  • 感谢您的共鸣,但我将您的行粘贴到我的“电话”变​​量中,它也可以正常工作。想法?
  • @AdrianReichert - 发生了什么,您遇到错误或没有得到任何输出。
  • 我收到一个错误,它找不到您的 xpath
  • @AdrianReichert - 请复制粘贴错误。你切换到其他窗口了吗?
【解决方案2】:

一些东西,看起来你必须更改定位器,我也没有在自动化窗口中看到返回按钮:

代码:

driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(5)
#driver.get("https://www.finq.com/en/login")
wait = WebDriverWait(driver, 10)

current_windows_handle = driver.current_window_handle
for b in range(1, 4):
    driver.switch_to.window(current_windows_handle)
    url = "https://www.healthgrades.com/usearch?what=Marriage%20%26%20Family%20Therapy&entityCode=PS303&where=CA&pageNum={}&sort.provider=bestmatch&state=CA".format(b)
    driver.get(url)
    try:
        if len(driver.find_elements(By.ID, "onetrust-accept-btn-handler")) > 0:
            wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
        else:
            print("pop up was not visible")
    except:
        pass

    length =  len(driver.find_elements(By.CSS_SELECTOR, "a[data-qa-target='provider-details-provider-name']"))

    j = 0

    for i in range(length):
        driver.switch_to.window(current_windows_handle)
        elements = driver.find_elements(By.XPATH, "//a[@data-qa-target='provider-details-provider-name']")
        print(elements[j].text)
        ActionChains(driver).move_to_element(elements[j]).click().perform()
        time.sleep(2)
        driver.switch_to.window(driver.window_handles[1])
        try:
            if len(driver.find_elements(By.XPATH, "//a[@class='toggle-phone-number-button']")) > 0 :
                 phone = driver.find_element_by_xpath("//a[@class='toggle-phone-number-button']")
                 print(phone.text)
            else:
                print("Phone number was not available")
        except:
            pass
        driver.close()
        time.sleep(2)
        j = j + 1
        print(j)

输出:

"C:\Program Files\Python39\python.exe" C:/Users/panabh02/PycharmProjects/SeleniumSO/Chrome.py
Noe Gutierrez, MSW
(760) 385-3959
1
Richard Vogel, LMFT
(949) 690-6846
2
Rachel Sievers, LMFT
(559) 901-2836
3
Luisa Contreras, LMFT
(657) 888-3222
4
Lauren Reminger, MA
(951) 541-1898
5
Dr. Peter Robbins, PHD
(800) 998-6329
6
Ernesto Segismundo, LMFT
(909) 247-8820
7
Crystal Guzman, LMFT
(818) 927-0478
8
Claudia Franzosi, MFT
(619) 246-6700
9
Dr. Irving Croshier, PHD
(818) 710-1266
10
Dr. Richard Zahn, PSY.D
(916) 792-7229
11
Danielle Duran, LMFT
(559) 737-8317
12
Amie Lowery-Luyties, MFT
(310) 494-6587
13
Sherry Johnson, LMFT
(408) 510-4848
14
Rachel McDermott, LMFT
(559) 280-5756
15
Scott Barrella, MSW
(805) 390-6384
16
Ella Begelfor, MFT
(818) 207-1191
17
Dr. Gilbert Chalepas, PSY.D
(310) 500-8442
18
Katki Windsor, LMFT
(408) 471-8536
19
Tara Boerner, LMFT
(818) 924-3960
20
Chad Medlin, LMFT
(805) 746-3180
21
David Coolidge, MA
(916) 608-0714
22
Lissette Gomez, LMFT
(714) 290-1578
1
Wendy Talley, LCSW
(310) 242-6112
2
Lance Ferris, LCSW
(530) 592-7847
3
Jo Ellen Fletcher, LMFT
Phone number was not available
4
Dr. Judith Bucholtz, PHD
(310) 826-1141
5
Angie Nicholas, LMFT
(209) 224-4994
6
Maryam Jamili, LMFT
(949) 735-0500
7
Stephanie Carson, LMFT
(909) 364-1104
8
Betsy Sansby, LMFT
(952) 221-6752
9
Aida Vazin, MAMFT
(949) 872-3926
10
Dr. Wendy O'Connor, PSY.D
(310) 712-1230
11
Chelsea Crow-Fuentes, MA
(949) 484-5008
12
Marymargaret Parker, LMFT
(661) 259-4620
13
Kenton Lane, LMFT
(310) 968-1608
14
Catherine Chambliss, MFT
(310) 303-9132
15
DeAnn Richter, MFT
(714) 396-7776
16
Teje Aliberti, MA
(909) 752-0021
17
Katerra Davis, LMFT
(760) 705-3373
18
Dr. Robert Jaffe, PHD
(818) 906-7079
19
Monica Cervantes, LCSW
(949) 302-8952
20
Behnam Keyvan, MFT
(818) 448-2753
21
Hanouf Alahmari, LMFT
Phone number was not available
22

Process finished with exit code 0

【讨论】:

  • 感谢您的回复,但遗憾的是它返回了一个错误:“来自未知错误:未找到网络视图”
  • 谢谢,但它只抓取前3个人的电话号码。
  • 奇怪,我得到的结果不是你。
  • 是的,我敢肯定哈哈,我可能使用过您的旧解决方案之一?
【解决方案3】:

数据是通过 api 调用在后门的帮助下生成的。在这里我得到了工作输出:

代码:

import requests
import pandas as pd

params = {
    'where': 'CA',
    'sessionId': 'S63e7',
    'requestId': 'R1a5829bce167793b',
    'sort.provider': 'bestmatch',
    'what': 'Marriage & Family Therapy',
    'highlight': 'Marriage & Family Therapy',
    'entityCode': 'PS303',
    'code': 'PS303',
    'category': 'provider',
    'cid': '',
    'hgTrace': 'false',
    'isPsr': 'false',
    'isFsr': 'false',
    'isFirstRequest': 'true',
    'pageNum': '2'
  
   
    
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
}


def main(url):
    with requests.Session() as req:
        req.headers.update(headers)
        elit = []
        r = req.get(url, params=params)
        for item in r.json()['search']['searchResults']['provider']['results']:
            elit.append(item['displayOffice']['phoneNumbers'][0])

        df = pd.DataFrame(elit, columns=["phoneNumber"])
        print(df)


main(
    'https://www.healthgrades.com/api3/usearch')

输出:

  phoneNumber
0   (310) 303-9132
1   (714) 396-7776
2   (909) 752-0021
3   (760) 705-3373
4   (818) 906-7079
5   (949) 302-8952
6   (818) 448-2753
7   (424) 361-9895
8   (530) 541-6696
9   (530) 220-3433
10  (310) 713-6093
11  (909) 353-7080
12  (415) 686-9749
13  (626) 226-7326
14  (408) 981-4224
15  (415) 927-2108
16  (818) 600-2086
17  (818) 416-1110
18  (805) 242-4604
19  (949) 533-0665
20  (310) 709-1612
21  (415) 413-8086
22  (310) 458-3333
23  (951) 777-8176
24  (559) 432-5805
25  (619) 609-9863
26  (858) 717-4196
27  (415) 562-4156
28  (925) 399-1177
29  (818) 706-0040
30  (310) 795-8335
31  (213) 422-9929
32  (714) 446-5830
33  (661) 383-2252
34  (888) 787-1767
35  (510) 516-3188

【讨论】:

    猜你喜欢
    • 2012-05-03
    • 1970-01-01
    • 2011-05-14
    • 2010-12-23
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2013-06-18
    • 1970-01-01
    相关资源
    最近更新 更多