【问题标题】:How To Extract Value From List In Python如何在 Python 中从列表中提取值
【发布时间】:2021-09-10 16:32:18
【问题描述】:

代码

from __future__ import unicode_literals
import youtube_dl
import pandas as pd
import csv
import re

# read the csv file
number_of_rows = pd.read_csv('single.csv')

# Initialize YouTube-DL Array
ydl_opts = {}
all_scrapes = []
twitter_list = []

# Scrape Online Product
def run_scraper():

    # Read CSV to List
    with open("single.csv", "r") as f:
        csv_reader = csv.reader(f)
        next(csv_reader)
        
        # Scrape Data From Store
        for csv_line_entry in csv_reader:
                        
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                meta = ydl.extract_info(csv_line_entry[0], download=False)
                channel = meta['channel']
                title = meta['title']
                description = meta['description']
                print('Channel        :', channel)
                print('Title          :', title)
                #print('description    :', description)

                get_links(description)
                print("-" * 120)
                print()

                print('Demo:', twitter_list)

            # Make a tuple with the relevant info of the current YouTube Scrapes
            current_scrapes = (channel, title, twitter_list)
            all_scrapes.append(current_scrapes) 

        print('All Scrapes:', all_scrapes)
        print()


def get_links(description):
  
  # Find URLs in description
  description_urls = re.findall(r'(https?://[^\s]+)', description)
  #print('List Before    :', description_urls, '\n')

  # Twitter Resources
  if 'twitter.com' in description:

    for item in description_urls:
      #print('Print All URLs:', item)
      if 'twitter.com' in item:
        print('- Twitter URL Found:', item)
        twitter_list.append(item)


run_scraper()  

CSV 文件

Videos
https://www.youtube.com/watch?v=kqtD5dpn9C8
https://www.youtube.com/watch?v=rfscVS0vtbw 

上述代码从 CSV 文件中提取 YouTube 网址,然后打印频道和标题信息。
此外,它通过 get_links 函数从 YouTube 描述中提取 Twitter URL。

问题

当我在 get_links 函数中打印捕获的 Twitter 网址时(第 61 行)

print('- Twitter URL Found:', item)  

结果显示正确显示每个用户各自的 Twitter 条目。

如果没有看到所有捕获的 Twitter 网址填充每个元组条目,我无法将此信息提取到元组 current_scrapes 中。

任何帮助将不胜感激。

【问题讨论】:

    标签: python list function web-scraping tuples


    【解决方案1】:

    稍微重新组织你的代码:

    import re
    import youtube_dl
    import pandas as pd
    
    # Scrape Online Product
    def run_scraper():
        ydl_opts = {}
        all_scrapes = []
    
        # Read CSV to List
        with open("single.csv", "r") as f:
            csv_reader = csv.reader(f)
            next(csv_reader)
    
            # Scrape Data From Store
            for csv_line_entry in csv_reader:
    
                with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                    meta = ydl.extract_info(csv_line_entry[0], download=False)
                    channel = meta["channel"]
                    title = meta["title"]
                    description = meta["description"]
                    twitter_list = get_links(description, "twitter.com")
    
                    print("Channel        :", channel)
                    print("Title          :", title)
                    print("Twitter URLs   :", twitter_list)
                    print("-" * 120)
                    print()
    
                all_scrapes.append((channel, title, twitter_list))
    
        return all_scrapes
    
    
    def get_links(description, link):
        out = []
    
        # Find URLs in description
        description_urls = re.findall(r"(https?://[^\s]+)", description)
    
        for item in description_urls:
            if link in item:
                out.append(item)
    
        return out
    
    
    df = pd.DataFrame(run_scraper(), columns=["channel", "title", "twitter URLs"])
    print(df)
    

    打印:

    [youtube] kqtD5dpn9C8: Downloading webpage
    Channel        : Programming with Mosh
    Title          : Python for Beginners - Learn Python in 1 Hour
    Twitter URLs   : ['https://twitter.com/moshhamedani']
    ------------------------------------------------------------------------------------------------------------------------
    
    [youtube] rfscVS0vtbw: Downloading webpage
    Channel        : freeCodeCamp.org
    Title          : Learn Python - Full Course for Beginners [Tutorial]
    Twitter URLs   : ['https://twitter.com/mike_dane']
    ------------------------------------------------------------------------------------------------------------------------
    
                     channel                                                title                        twitter URLs
    0  Programming with Mosh        Python for Beginners - Learn Python in 1 Hour  [https://twitter.com/moshhamedani]
    1       freeCodeCamp.org  Learn Python - Full Course for Beginners [Tutorial]     [https://twitter.com/mike_dane]
    

    【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2021-06-10
    • 1970-01-01
    • 1970-01-01
    • 2017-04-26
    相关资源
    最近更新 更多