从 json.file 中提取信息,其中该字段位于各种字典的不同位置
Extracting information from a json.file where the field is in different places in various dicts
我正在从 python 3.8 中的嵌套 json.file 中提取大量指令,并收到以下关键错误:
extended_tweet = 数据[str(i)]['extended_tweet']['full_text']
键错误:'extended_tweet'
如何在嵌套的 json.file 中搜索一个字段,该字段隐藏在每个字典的不同结构中?我认为我定义字段的不灵活方式阻止了正确的输出,但我不知道如何解决它。
for i in data:
date = data[str(i)]['created_at']
account = data[str(i)]['user']['name']
location = data[str(i)]['user']['location']
truncated = data[str(i)]['truncated']
tweet = data[str(i)]['text']
extended_tweet = data[str(i)]['extended_tweet']['full_text']
retweeted_status = data[str(i)]['retweeted_status']['extended_tweet']['full_text']
if truncated == 'True':
print(truncated, date, account, location, extended_tweet)
elif 'RT' in tweet:
print(truncated, date, account, location, retweeted_status)
else:
print(truncated, date, account, location, tweet)
这是我 json.file 中的一个字典示例。数字“3”代表字典,我需要从字段 extended_tweet.full_text 中获取数据。每个探路者都显示路径 x.extended_tweet.full_text。但是,如果我正在使用它,则会收到上面显示的错误。
"3": {
"created_at": "time",
"id": id,
"id_str": "id",
"text": "text",
"display_text_range": [
0,
140
],
"source": "",
"truncated": true,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": ,
"id_str": "",
"name": "",
"screen_name": "name",
"location": "location",
"url": "url",
"description": "description",
"translator_type": "none",
"derived": {
"locations": [
{
"country": "country",
"country_code": "land",
"locality": "locality",
"region": "region",
"full_name": "full_name",
"geo": {
"coordinates": [
number,
number
],
"type": "point"
}
}
]
},
"protected": false,
"verified": true,
"followers_count": number,
"friends_count": number,
"listed_count": number,
"favourites_count": number,
"statuses_count": number,
"created_at": "time",
"utc_offset": null,
"time_zone": null,
"geo_enabled": false,
"lang": null,
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "number",
"profile_background_image_url": "gif",
"profile_background_image_url_https": "link",
"profile_background_tile": true,
"profile_link_color": "607696",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "link",
"profile_image_url_https": "link",
"profile_banner_url": "bannerurl",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"is_quote_status": false,
"extended_tweet": {
"full_text": "full_text",
测试人员您好:) 我将您的 JSON 示例放在一个文件中,在各个字段中添加了一些值并添加了一个 retweeted_status
对象,然后基本上 运行 您的代码如下所示:
import json
import os
with open( os.path.join(os.path.realpath('.'), 'src/test/x.json') ) as file1:
data = json.load(file1)
for i in data:
date = data[str(i)]['created_at']
account = data[str(i)]['user']['name']
location = data[str(i)]['user']['location']
truncated = data[str(i)]['truncated']
tweet = data[str(i)]['text']
extended_tweet = data[str(i)]['extended_tweet']['full_text']
retweeted_status = data[str(i)]['retweeted_status']['extended_tweet']['full_text']
if truncated == 'True':
print(truncated, date, account, location, extended_tweet)
elif 'RT' in tweet:
print(truncated, date, account, location, retweeted_status)
else:
print(truncated, date, account, location, tweet)
对我来说工作正常并打印:
True time location text
这是我放入文件的 JSON:
{"3": {
"created_at": "time",
"id": 1234,
"id_str": "id",
"text": "text",
"display_text_range": [
0,
140
],
"source": "",
"truncated": true,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1234,
"id_str": "",
"name": "",
"screen_name": "name",
"location": "location",
"url": "url",
"description": "description",
"translator_type": "none",
"derived": {
"locations": [
{
"country": "country",
"country_code": "land",
"locality": "locality",
"region": "region",
"full_name": "full_name",
"geo": {
"coordinates": [
100,
100
],
"type": "point"
}
}
]
},
"protected": false,
"verified": true,
"followers_count": 100,
"friends_count": 100,
"listed_count": 100,
"favourites_count": 100,
"statuses_count": 100,
"created_at": "time",
"utc_offset": null,
"time_zone": null,
"geo_enabled": false,
"lang": null,
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "number",
"profile_background_image_url": "gif",
"profile_background_image_url_https": "link",
"profile_background_tile": true,
"profile_link_color": "607696",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "link",
"profile_image_url_https": "link",
"profile_banner_url": "bannerurl",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"is_quote_status": false,
"extended_tweet": {
"full_text": "full_text"
},
"retweeted_status": {
"extended_tweet": {
"full_text": "full_text"
}
}
}}
查看完整数据,很明显有时元素不存在。不使用异常处理丢失键的方法是使用 dict get
方法。如果缺少密钥,此方法允许返回默认值。这是处理扩展和转发推文中缺失元素的代码,不会导致异常,并将打印缺失的内容。此代码处理您数据中的所有 499 条推文。
full_tweet = data[str(i)]
extended_tweet = full_tweet.get('extended_tweet', 'extended_tweet missing')
if extended_tweet != 'extended_tweet missing':
extended_tweet = extended_tweet.get('full_text', 'full_text missing')
retweeted_status = full_tweet.get('retweeted_status', 'retweeted_status missing')
if retweeted_status != 'retweeted_status missing':
retweeted_status = retweeted_status.get('extended_tweet', 'extended_tweet missing')
if retweeted_status != 'extended_tweet missing':
retweeted_status = retweeted_status['full_text']
我正在从 python 3.8 中的嵌套 json.file 中提取大量指令,并收到以下关键错误:
extended_tweet = 数据[str(i)]['extended_tweet']['full_text'] 键错误:'extended_tweet'
如何在嵌套的 json.file 中搜索一个字段,该字段隐藏在每个字典的不同结构中?我认为我定义字段的不灵活方式阻止了正确的输出,但我不知道如何解决它。
for i in data:
date = data[str(i)]['created_at']
account = data[str(i)]['user']['name']
location = data[str(i)]['user']['location']
truncated = data[str(i)]['truncated']
tweet = data[str(i)]['text']
extended_tweet = data[str(i)]['extended_tweet']['full_text']
retweeted_status = data[str(i)]['retweeted_status']['extended_tweet']['full_text']
if truncated == 'True':
print(truncated, date, account, location, extended_tweet)
elif 'RT' in tweet:
print(truncated, date, account, location, retweeted_status)
else:
print(truncated, date, account, location, tweet)
这是我 json.file 中的一个字典示例。数字“3”代表字典,我需要从字段 extended_tweet.full_text 中获取数据。每个探路者都显示路径 x.extended_tweet.full_text。但是,如果我正在使用它,则会收到上面显示的错误。
"3": {
"created_at": "time",
"id": id,
"id_str": "id",
"text": "text",
"display_text_range": [
0,
140
],
"source": "",
"truncated": true,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": ,
"id_str": "",
"name": "",
"screen_name": "name",
"location": "location",
"url": "url",
"description": "description",
"translator_type": "none",
"derived": {
"locations": [
{
"country": "country",
"country_code": "land",
"locality": "locality",
"region": "region",
"full_name": "full_name",
"geo": {
"coordinates": [
number,
number
],
"type": "point"
}
}
]
},
"protected": false,
"verified": true,
"followers_count": number,
"friends_count": number,
"listed_count": number,
"favourites_count": number,
"statuses_count": number,
"created_at": "time",
"utc_offset": null,
"time_zone": null,
"geo_enabled": false,
"lang": null,
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "number",
"profile_background_image_url": "gif",
"profile_background_image_url_https": "link",
"profile_background_tile": true,
"profile_link_color": "607696",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "link",
"profile_image_url_https": "link",
"profile_banner_url": "bannerurl",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"is_quote_status": false,
"extended_tweet": {
"full_text": "full_text",
测试人员您好:) 我将您的 JSON 示例放在一个文件中,在各个字段中添加了一些值并添加了一个 retweeted_status
对象,然后基本上 运行 您的代码如下所示:
import json
import os
with open( os.path.join(os.path.realpath('.'), 'src/test/x.json') ) as file1:
data = json.load(file1)
for i in data:
date = data[str(i)]['created_at']
account = data[str(i)]['user']['name']
location = data[str(i)]['user']['location']
truncated = data[str(i)]['truncated']
tweet = data[str(i)]['text']
extended_tweet = data[str(i)]['extended_tweet']['full_text']
retweeted_status = data[str(i)]['retweeted_status']['extended_tweet']['full_text']
if truncated == 'True':
print(truncated, date, account, location, extended_tweet)
elif 'RT' in tweet:
print(truncated, date, account, location, retweeted_status)
else:
print(truncated, date, account, location, tweet)
对我来说工作正常并打印:
True time location text
这是我放入文件的 JSON:
{"3": {
"created_at": "time",
"id": 1234,
"id_str": "id",
"text": "text",
"display_text_range": [
0,
140
],
"source": "",
"truncated": true,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1234,
"id_str": "",
"name": "",
"screen_name": "name",
"location": "location",
"url": "url",
"description": "description",
"translator_type": "none",
"derived": {
"locations": [
{
"country": "country",
"country_code": "land",
"locality": "locality",
"region": "region",
"full_name": "full_name",
"geo": {
"coordinates": [
100,
100
],
"type": "point"
}
}
]
},
"protected": false,
"verified": true,
"followers_count": 100,
"friends_count": 100,
"listed_count": 100,
"favourites_count": 100,
"statuses_count": 100,
"created_at": "time",
"utc_offset": null,
"time_zone": null,
"geo_enabled": false,
"lang": null,
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "number",
"profile_background_image_url": "gif",
"profile_background_image_url_https": "link",
"profile_background_tile": true,
"profile_link_color": "607696",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "EFEFEF",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "link",
"profile_image_url_https": "link",
"profile_banner_url": "bannerurl",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"is_quote_status": false,
"extended_tweet": {
"full_text": "full_text"
},
"retweeted_status": {
"extended_tweet": {
"full_text": "full_text"
}
}
}}
查看完整数据,很明显有时元素不存在。不使用异常处理丢失键的方法是使用 dict get
方法。如果缺少密钥,此方法允许返回默认值。这是处理扩展和转发推文中缺失元素的代码,不会导致异常,并将打印缺失的内容。此代码处理您数据中的所有 499 条推文。
full_tweet = data[str(i)]
extended_tweet = full_tweet.get('extended_tweet', 'extended_tweet missing')
if extended_tweet != 'extended_tweet missing':
extended_tweet = extended_tweet.get('full_text', 'full_text missing')
retweeted_status = full_tweet.get('retweeted_status', 'retweeted_status missing')
if retweeted_status != 'retweeted_status missing':
retweeted_status = retweeted_status.get('extended_tweet', 'extended_tweet missing')
if retweeted_status != 'extended_tweet missing':
retweeted_status = retweeted_status['full_text']