【问题标题】:How to find the nth child heading and print the text using beautifulsoup in python如何在python中使用beautifulsoup查找第n个子标题并打印文本
【发布时间】:2025-11-23 10:35:01
【问题描述】:

根据我的代码,我可以获得项目的第一个标题,并且我希望打印副标题(FSI 详细信息)。无法使用 beautifulsoup 获得第二个标题。我尝试了第 n 个孩子的参考

 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.keys import Keys
 import urllib.request
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.select import Select
 from selenium.webdriver.common.keys import Keys
 import time
 import pandas as pd
 import os
 url = 'https://maharerait.mahaonline.gov.in'
 chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

 driver = webdriver.Chrome(executable_path=chrome_path)
 driver.get(url)
 WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search- 
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
 Registered_Project_radio= WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
 driver.execute_script("arguments[0].click();",Registered_Project_radio)
 Application = driver.find_element_by_id("CertiNo")
 Application.send_keys("P50500000005")
 Search = WebDriverWait(driver, 
      10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
 driver.execute_script("arguments[0].click();",Search)
 View = [item.get_attribute('href') for item in 
      driver.find_elements_by_tag_name("a") if
      item.get_attribute('href') is not None]
 View = View[0]
 driver.get(View)
 request = urllib.request.Request(View)
 html = urllib.request.urlopen(request).read()
 soup = BeautifulSoup(html, 'html.parser')
 divPInfo2 = soup.find("div", {"id": "DivProject"})
 Project_title = divPInfo2.find("div", {'class': 'x_panel'}, 
    recursive=False).find("div", {'class': 'x_title'}).find(
   "h2").text.strip()
 print(Project_title)
 Project_title1 = divPInfo2.find("div", {'class': 'x_panel'}, 
     recursive=False).find("div", {'class': 'x_title'}).find_all(
     "h2")[1].text.strip()
 print(Project_title1 )  # (FSI Detail) heading should be printed here

【问题讨论】:

    标签: python selenium-webdriver beautifulsoup


    【解决方案1】:

    您可以尝试 CSS 选择器:contains("FSI Details"),它选择包含字符串“FSI 详细信息”的元素。此代码打印“FSI 详细信息”部分的标签和值:

    import requests
    from bs4 import BeautifulSoup
    
    url = 'https://maharerait.mahaonline.gov.in/PrintPreview/PrintPreview?q=BPUvrrjIzYs%2f2hwYj1YIOfflh9NisZW6zTns2KLjHBZn6cbQ008s91nzlFrDxVvLwR1vAeLID0%2bo%2bD0H0Z6o2t%2b5P%2b%2fbBOcHCbMQHU8gkwdNZJnbbfu6N7mWSpgKXt4AiQyzuEpoDE7FX6HZypqsGXz4ObYD4KpyRzCsFJaWTgA%3d'
    
    soup = BeautifulSoup(requests.get(url).text, 'lxml')
    
    fsi_content = soup.select_one('.x_title:contains("FSI Details") + .x_content')
    
    print('{: <160}{: <8}'.format('Label', 'Value'))
    print('-' * 168)
    for label, text in zip(fsi_content.select('label'), fsi_content.select('div:has(> label) + div')):
        print('{: <160}{: <8}'.format(label.get_text(strip=True), text.get_text(strip=True)))
    

    打印:

    Label                                                                                                                                                           Value   
    ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    Built-up-Area as per Proposed FSI (In sqmts) ( Proposed but not sanctioned) ( As soon as approved, should be immediately updated in Approved FSI)               0       
    Built-up-Area as per Approved FSI (In sqmts)                                                                                                                    11566.50
    TotalFSI                                                                                                                                                        11566.50
    

    进一步阅读:

    CSS Selectors Refernece

    【讨论】:

    最近更新 更多