如何从 Python 中的列表中提取值
How To Extract Value From List In Python
代码
from __future__ import unicode_literals
import youtube_dl
import pandas as pd
import csv
import re
# read the csv file
number_of_rows = pd.read_csv('single.csv')
# Initialize YouTube-DL Array
ydl_opts = {}
all_scrapes = []
twitter_list = []
# Scrape Online Product
def run_scraper():
# Read CSV to List
with open("single.csv", "r") as f:
csv_reader = csv.reader(f)
next(csv_reader)
# Scrape Data From Store
for csv_line_entry in csv_reader:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
meta = ydl.extract_info(csv_line_entry[0], download=False)
channel = meta['channel']
title = meta['title']
description = meta['description']
print('Channel :', channel)
print('Title :', title)
#print('description :', description)
get_links(description)
print("-" * 120)
print()
print('Demo:', twitter_list)
# Make a tuple with the relevant info of the current YouTube Scrapes
current_scrapes = (channel, title, twitter_list)
all_scrapes.append(current_scrapes)
print('All Scrapes:', all_scrapes)
print()
def get_links(description):
# Find URLs in description
description_urls = re.findall(r'(https?://[^\s]+)', description)
#print('List Before :', description_urls, '\n')
# Twitter Resources
if 'twitter.com' in description:
for item in description_urls:
#print('Print All URLs:', item)
if 'twitter.com' in item:
print('- Twitter URL Found:', item)
twitter_list.append(item)
run_scraper()
CSV 文件
Videos
https://www.youtube.com/watch?v=kqtD5dpn9C8
https://www.youtube.com/watch?v=rfscVS0vtbw
以上代码从 CSV 文件中提取 YouTube 网址,然后打印频道和标题信息。
此外,它通过 get_links 函数从 YouTube 描述中提取 Twitter URL。
问题
当我在 get_links 函数中打印捕获的 Twitter Url 时(第 61 行)
print('- Twitter URL Found:', item)
显示的结果正确显示了每个用户各自的 Twitter 条目。
我无法将此信息拉入元组 current_scrapes,而没有看到所有捕获的 Twitter Url 填充每个元组条目。
如有任何帮助,我们将不胜感激。
稍微重组您的代码:
import re
import youtube_dl
import pandas as pd
# Scrape Online Product
def run_scraper():
ydl_opts = {}
all_scrapes = []
# Read CSV to List
with open("single.csv", "r") as f:
csv_reader = csv.reader(f)
next(csv_reader)
# Scrape Data From Store
for csv_line_entry in csv_reader:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
meta = ydl.extract_info(csv_line_entry[0], download=False)
channel = meta["channel"]
title = meta["title"]
description = meta["description"]
twitter_list = get_links(description, "twitter.com")
print("Channel :", channel)
print("Title :", title)
print("Twitter URLs :", twitter_list)
print("-" * 120)
print()
all_scrapes.append((channel, title, twitter_list))
return all_scrapes
def get_links(description, link):
out = []
# Find URLs in description
description_urls = re.findall(r"(https?://[^\s]+)", description)
for item in description_urls:
if link in item:
out.append(item)
return out
df = pd.DataFrame(run_scraper(), columns=["channel", "title", "twitter URLs"])
print(df)
打印:
[youtube] kqtD5dpn9C8: Downloading webpage
Channel : Programming with Mosh
Title : Python for Beginners - Learn Python in 1 Hour
Twitter URLs : ['https://twitter.com/moshhamedani']
------------------------------------------------------------------------------------------------------------------------
[youtube] rfscVS0vtbw: Downloading webpage
Channel : freeCodeCamp.org
Title : Learn Python - Full Course for Beginners [Tutorial]
Twitter URLs : ['https://twitter.com/mike_dane']
------------------------------------------------------------------------------------------------------------------------
channel title twitter URLs
0 Programming with Mosh Python for Beginners - Learn Python in 1 Hour [https://twitter.com/moshhamedani]
1 freeCodeCamp.org Learn Python - Full Course for Beginners [Tutorial] [https://twitter.com/mike_dane]
代码
from __future__ import unicode_literals
import youtube_dl
import pandas as pd
import csv
import re
# read the csv file
number_of_rows = pd.read_csv('single.csv')
# Initialize YouTube-DL Array
ydl_opts = {}
all_scrapes = []
twitter_list = []
# Scrape Online Product
def run_scraper():
# Read CSV to List
with open("single.csv", "r") as f:
csv_reader = csv.reader(f)
next(csv_reader)
# Scrape Data From Store
for csv_line_entry in csv_reader:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
meta = ydl.extract_info(csv_line_entry[0], download=False)
channel = meta['channel']
title = meta['title']
description = meta['description']
print('Channel :', channel)
print('Title :', title)
#print('description :', description)
get_links(description)
print("-" * 120)
print()
print('Demo:', twitter_list)
# Make a tuple with the relevant info of the current YouTube Scrapes
current_scrapes = (channel, title, twitter_list)
all_scrapes.append(current_scrapes)
print('All Scrapes:', all_scrapes)
print()
def get_links(description):
# Find URLs in description
description_urls = re.findall(r'(https?://[^\s]+)', description)
#print('List Before :', description_urls, '\n')
# Twitter Resources
if 'twitter.com' in description:
for item in description_urls:
#print('Print All URLs:', item)
if 'twitter.com' in item:
print('- Twitter URL Found:', item)
twitter_list.append(item)
run_scraper()
CSV 文件
Videos
https://www.youtube.com/watch?v=kqtD5dpn9C8
https://www.youtube.com/watch?v=rfscVS0vtbw
以上代码从 CSV 文件中提取 YouTube 网址,然后打印频道和标题信息。
此外,它通过 get_links 函数从 YouTube 描述中提取 Twitter URL。
问题
当我在 get_links 函数中打印捕获的 Twitter Url 时(第 61 行)
print('- Twitter URL Found:', item)
显示的结果正确显示了每个用户各自的 Twitter 条目。
我无法将此信息拉入元组 current_scrapes,而没有看到所有捕获的 Twitter Url 填充每个元组条目。
如有任何帮助,我们将不胜感激。
稍微重组您的代码:
import re
import youtube_dl
import pandas as pd
# Scrape Online Product
def run_scraper():
ydl_opts = {}
all_scrapes = []
# Read CSV to List
with open("single.csv", "r") as f:
csv_reader = csv.reader(f)
next(csv_reader)
# Scrape Data From Store
for csv_line_entry in csv_reader:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
meta = ydl.extract_info(csv_line_entry[0], download=False)
channel = meta["channel"]
title = meta["title"]
description = meta["description"]
twitter_list = get_links(description, "twitter.com")
print("Channel :", channel)
print("Title :", title)
print("Twitter URLs :", twitter_list)
print("-" * 120)
print()
all_scrapes.append((channel, title, twitter_list))
return all_scrapes
def get_links(description, link):
out = []
# Find URLs in description
description_urls = re.findall(r"(https?://[^\s]+)", description)
for item in description_urls:
if link in item:
out.append(item)
return out
df = pd.DataFrame(run_scraper(), columns=["channel", "title", "twitter URLs"])
print(df)
打印:
[youtube] kqtD5dpn9C8: Downloading webpage
Channel : Programming with Mosh
Title : Python for Beginners - Learn Python in 1 Hour
Twitter URLs : ['https://twitter.com/moshhamedani']
------------------------------------------------------------------------------------------------------------------------
[youtube] rfscVS0vtbw: Downloading webpage
Channel : freeCodeCamp.org
Title : Learn Python - Full Course for Beginners [Tutorial]
Twitter URLs : ['https://twitter.com/mike_dane']
------------------------------------------------------------------------------------------------------------------------
channel title twitter URLs
0 Programming with Mosh Python for Beginners - Learn Python in 1 Hour [https://twitter.com/moshhamedani]
1 freeCodeCamp.org Learn Python - Full Course for Beginners [Tutorial] [https://twitter.com/mike_dane]