如何从 Python 中的列表中提取值

Question

代码

from __future__ import unicode_literals
import youtube_dl
import pandas as pd
import csv
import re

# read the csv file
number_of_rows = pd.read_csv('single.csv')

# Initialize YouTube-DL Array
ydl_opts = {}
all_scrapes = []
twitter_list = []

# Scrape Online Product
def run_scraper():

    # Read CSV to List
    with open("single.csv", "r") as f:
        csv_reader = csv.reader(f)
        next(csv_reader)
        
        # Scrape Data From Store
        for csv_line_entry in csv_reader:
                        
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                meta = ydl.extract_info(csv_line_entry[0], download=False)
                channel = meta['channel']
                title = meta['title']
                description = meta['description']
                print('Channel        :', channel)
                print('Title          :', title)
                #print('description    :', description)

                get_links(description)
                print("-" * 120)
                print()

                print('Demo:', twitter_list)

            # Make a tuple with the relevant info of the current YouTube Scrapes
            current_scrapes = (channel, title, twitter_list)
            all_scrapes.append(current_scrapes) 

        print('All Scrapes:', all_scrapes)
        print()


def get_links(description):
  
  # Find URLs in description
  description_urls = re.findall(r'(https?://[^\s]+)', description)
  #print('List Before    :', description_urls, '\n')

  # Twitter Resources
  if 'twitter.com' in description:

    for item in description_urls:
      #print('Print All URLs:', item)
      if 'twitter.com' in item:
        print('- Twitter URL Found:', item)
        twitter_list.append(item)


run_scraper()

CSV 文件

Videos
https://www.youtube.com/watch?v=kqtD5dpn9C8
https://www.youtube.com/watch?v=rfscVS0vtbw

以上代码从 CSV 文件中提取 YouTube 网址，然后打印频道和标题信息。
此外，它通过 get_links 函数从 YouTube 描述中提取 Twitter URL。

问题

当我在 get_links 函数中打印捕获的 Twitter Url 时（第 61 行）

print('- Twitter URL Found:', item)

显示的结果正确显示了每个用户各自的 Twitter 条目。

我无法将此信息拉入元组 current_scrapes，而没有看到所有捕获的 Twitter Url 填充每个元组条目。

如有任何帮助，我们将不胜感激。

Answer 1

稍微重组您的代码：

import re
import youtube_dl
import pandas as pd

# Scrape Online Product
def run_scraper():
    ydl_opts = {}
    all_scrapes = []

    # Read CSV to List
    with open("single.csv", "r") as f:
        csv_reader = csv.reader(f)
        next(csv_reader)

        # Scrape Data From Store
        for csv_line_entry in csv_reader:

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                meta = ydl.extract_info(csv_line_entry[0], download=False)
                channel = meta["channel"]
                title = meta["title"]
                description = meta["description"]
                twitter_list = get_links(description, "twitter.com")

                print("Channel        :", channel)
                print("Title          :", title)
                print("Twitter URLs   :", twitter_list)
                print("-" * 120)
                print()

            all_scrapes.append((channel, title, twitter_list))

    return all_scrapes


def get_links(description, link):
    out = []

    # Find URLs in description
    description_urls = re.findall(r"(https?://[^\s]+)", description)

    for item in description_urls:
        if link in item:
            out.append(item)

    return out


df = pd.DataFrame(run_scraper(), columns=["channel", "title", "twitter URLs"])
print(df)

打印：

[youtube] kqtD5dpn9C8: Downloading webpage
Channel        : Programming with Mosh
Title          : Python for Beginners - Learn Python in 1 Hour
Twitter URLs   : ['https://twitter.com/moshhamedani']
------------------------------------------------------------------------------------------------------------------------

[youtube] rfscVS0vtbw: Downloading webpage
Channel        : freeCodeCamp.org
Title          : Learn Python - Full Course for Beginners [Tutorial]
Twitter URLs   : ['https://twitter.com/mike_dane']
------------------------------------------------------------------------------------------------------------------------

                 channel                                                title                        twitter URLs
0  Programming with Mosh        Python for Beginners - Learn Python in 1 Hour  [https://twitter.com/moshhamedani]
1       freeCodeCamp.org  Learn Python - Full Course for Beginners [Tutorial]     [https://twitter.com/mike_dane]

如何从 Python 中的列表中提取值

How To Extract Value From List In Python

python

tuples

function

list

web-scraping

代码

CSV 文件

问题