对字符串替换方法的其他建议? Python
Other suggestion to string replace method? Python
首先,请看下面我的代码。
import string
DNA=["Alpha", "Bravo", "Charlie", "Delta", "Echo", "CharlieChoo", "DeltaAir", "Alpha bet", "ChooChoo", "Airline"]
body = "{\"startDate\":\"2016-01-01\"\
,\"endDate\":\"2017-10-30\"\
,\"timeUnit\":\"date\"\
,\"keywordGroups\":[{\"groupName\":\"Alpha\",\"keywords\":[\"Alpha\"]}\
,{\"groupName\":\"Bravo\",\"keywords\":[\"Bravo\"]}\
,{\"groupName\":\"Charlie\",\"keywords\":[\"Charlie\"]}\
,{\"groupName\":\"Delta\",\"keywords\":[\"Delta\"]}\
,{\"groupName\":\"Echo\",\"keywords\":[\"Echo\"]}]\
,\"device\":\"\",\"ages\":[\"1\",\"11\"],\"gender\":\"\"}"
body = body.replace(DNA[0],DNA[5],2)
body = body.replace(DNA[1],DNA[6],2)
body = body.replace(DNA[2],DNA[7],2)
body = body.replace(DNA[3],DNA[8],2)
body = body.replace(DNA[4],DNA[9],2)
body
输出如下
'{"startDate":"2016-01-01","endDate":"2017-10-30","timeUnit":"date","keywordGroups":
[{"groupName":"Alpha betChoo","keywords":["Alpha betChoo"]},
{"groupName":"ChooChooAir","keywords":["ChooChooAir"]},
{"groupName":"Charlie","keywords":["Charlie"]},
{"groupName":"Delta","keywords":["Delta"]},
{"groupName":"Airline","keywords":["Airline"]}],"device":"","ages":
["1","11"],"gender":""}'
我的预期输出如下
#body = "{\"startDate\":\"2016-01-01\"\
#,\"endDate\":\"2017-10-30\"\
#,\"timeUnit\":\"date\"\
#,\"keywordGroups\":[{\"groupName\":\"CharlieChoo\",\"keywords\":[\"CharlieChoo\"]}\
#,{\"groupName\":\"DeltaAir\",\"keywords\":[\"DeltaAir\"]}\
#,{\"groupName\":\"Alpha bet\",\"keywords\":[\"Alpha bet\"]}\
#,{\"groupName\":\"ChooChoo\",\"keywords\":[\"ChooChoo\"]}\
#,{\"groupName\":\"Airline\",\"keywords\":[\"Airline\"]}]\
#,\"device\":\"\",\"ages\":[\"1\",\"11\"],\"gender\":\"\"}"
所以基本上我是在尝试从 DNA 列表中替换 groupName 和关键字。在这个例子中,我在 DNA 列表中只有 10 个对象,但我的真实项目包含几千个。
我个人的想法是,替换字符串是不合适的,因为字符串很可能会重叠。
还有另一种方法来完成我的任务吗?需要考虑的一件事是我需要将我的输出与第一主体字符串的类型相同(仅更改单词)。
提前致谢
------------------------------------编辑------ ---------------------------------------------- ------
发生了关于@AJAX1234 回答的新错误。
import pandas as pd
import json
#reading xlsx file
ex = pd.ExcelFile('mat_hierarchy.xlsx').parse('Sheet1')
DNA = ex.loc[:,'4Level']
DNA
上面是我的DNA文件,下面是输出
0 Fruit
1 MixFruit
2 SuperFruit
3 PassionFruit
4 Orange
5 Lemon
6 Mango
................. it goes on forever :(
使用此信息,我 运行 您的代码和 "name a is not defined" 错误不断显示。我只是初学者,但我最好的猜测是我的 "DNA" 被定义为索引(DNA.index[0] 或等等......)并且我已经用数字更改了你的代码 "a",并且它还是不行。
关于这个问题有什么建议吗?
感谢您的输入!!!
------------------------编辑 2-------------------- ------------
body_intro = "{\"startDate\":\"2016-01-01\",\"endDate\":\"2017-10-30\",\"timeUnit\":\"date\",\"keywordGroups\":[{\"groupName\":\""
body_keywords = "\",\"keywords\":[\""
body_groupName = "\"]},{\"groupName\":\""
body_last = "\"]}],\"device\":\"\",\"ages\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\",\"11\"],\"gender\":\"f\"}"
for i in range(0,len(DNA),5):
if((len(DNA)%5==0) or (i < (len(DNA)-(len(DNA)%5)))):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_groupName + DNA[i+4] + body_keywords + DNA[i+4] + body_last
elif(len(DNA)%5==4):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_last
elif(len(DNA)%5==3):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_last
elif(len(DNA)%5==2):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_last
else:
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_last
你可以试试这个:
import json
new_body = json.loads(body)
DNA=["Alpha", "Bravo", "Charlie", "Delta", "Echo", "CharlieChoo", "DeltaAir", "Alpha bet", "ChooChoo", "Airline"]
new_body['keywordGroups'] = [{c:[DNA[DNA.index(a)+5] for a in d] if isinstance(d, list) else DNA[DNA.index(a)+5] for c, d in i.items()} for i in new_body['keywordGroups']]
final_data = json.dumps(new_body)
输出:
'{"startDate": "2016-01-01", "endDate": "2017-10-30", "gender": "",
"ages": ["1", "11"], "keywordGroups":
[{"keywords": ["CharlieChoo"], "groupName": "CharlieChoo"},
{"keywords": ["DeltaAir"], "groupName":"DeltaAir"},
{"keywords": ["Alpha bet"], "groupName": "Alpha bet"},
{"keywords": ["ChooChoo"], "groupName": "ChooChoo"}, {"keywords":["Airline"], "groupName": "Airline"}], "device": "", "timeUnit": "date"}'
只需使用正则表达式。我假设您的 DNA 列表包含带有目标名称和源名称的夫妇。
import re
length_of_DNA = len(DNA)
for i, t in enumerate(DNA[:length_of_DNA/2]):
s = DNA[length_of_DNA/2+i]
body = re.sub(r'\"'+t+'\"', s, body, 2)
希望有所帮助。
为了能够执行 "batch" 替换(假设您需要保持替换元素的数量),我将执行以下操作:
lookup = {"Alpha": "CharlieChoo",
"Bravo": "DeltaAir",
"Charlie": "Alpha bet",
"Delta": "ChooChoo",
"Echo": "Airline"}
lookup_count = {"Alpha": 2,
"Bravo": 2,
"Charlie": 2,
"Delta": 2,
"Echo": 2}
def replace_using_lookups(match):
word = match.group(1)
if word in lookup and lookup_count[word] > 0:
lookup_count[word] -= 1
return '"{}"'.format(lookup[word])
return '"{}"'.format(word)
re.sub('"(\w+)"', replace_using_lookups, body)
如果不需要 lookup_count
字典,您可以使用更简单的 lambda 执行替换。
首先,请看下面我的代码。
import string
DNA=["Alpha", "Bravo", "Charlie", "Delta", "Echo", "CharlieChoo", "DeltaAir", "Alpha bet", "ChooChoo", "Airline"]
body = "{\"startDate\":\"2016-01-01\"\
,\"endDate\":\"2017-10-30\"\
,\"timeUnit\":\"date\"\
,\"keywordGroups\":[{\"groupName\":\"Alpha\",\"keywords\":[\"Alpha\"]}\
,{\"groupName\":\"Bravo\",\"keywords\":[\"Bravo\"]}\
,{\"groupName\":\"Charlie\",\"keywords\":[\"Charlie\"]}\
,{\"groupName\":\"Delta\",\"keywords\":[\"Delta\"]}\
,{\"groupName\":\"Echo\",\"keywords\":[\"Echo\"]}]\
,\"device\":\"\",\"ages\":[\"1\",\"11\"],\"gender\":\"\"}"
body = body.replace(DNA[0],DNA[5],2)
body = body.replace(DNA[1],DNA[6],2)
body = body.replace(DNA[2],DNA[7],2)
body = body.replace(DNA[3],DNA[8],2)
body = body.replace(DNA[4],DNA[9],2)
body
输出如下
'{"startDate":"2016-01-01","endDate":"2017-10-30","timeUnit":"date","keywordGroups":
[{"groupName":"Alpha betChoo","keywords":["Alpha betChoo"]},
{"groupName":"ChooChooAir","keywords":["ChooChooAir"]},
{"groupName":"Charlie","keywords":["Charlie"]},
{"groupName":"Delta","keywords":["Delta"]},
{"groupName":"Airline","keywords":["Airline"]}],"device":"","ages":
["1","11"],"gender":""}'
我的预期输出如下
#body = "{\"startDate\":\"2016-01-01\"\
#,\"endDate\":\"2017-10-30\"\
#,\"timeUnit\":\"date\"\
#,\"keywordGroups\":[{\"groupName\":\"CharlieChoo\",\"keywords\":[\"CharlieChoo\"]}\
#,{\"groupName\":\"DeltaAir\",\"keywords\":[\"DeltaAir\"]}\
#,{\"groupName\":\"Alpha bet\",\"keywords\":[\"Alpha bet\"]}\
#,{\"groupName\":\"ChooChoo\",\"keywords\":[\"ChooChoo\"]}\
#,{\"groupName\":\"Airline\",\"keywords\":[\"Airline\"]}]\
#,\"device\":\"\",\"ages\":[\"1\",\"11\"],\"gender\":\"\"}"
所以基本上我是在尝试从 DNA 列表中替换 groupName 和关键字。在这个例子中,我在 DNA 列表中只有 10 个对象,但我的真实项目包含几千个。
我个人的想法是,替换字符串是不合适的,因为字符串很可能会重叠。 还有另一种方法来完成我的任务吗?需要考虑的一件事是我需要将我的输出与第一主体字符串的类型相同(仅更改单词)。 提前致谢
------------------------------------编辑------ ---------------------------------------------- ------
发生了关于@AJAX1234 回答的新错误。
import pandas as pd
import json
#reading xlsx file
ex = pd.ExcelFile('mat_hierarchy.xlsx').parse('Sheet1')
DNA = ex.loc[:,'4Level']
DNA
上面是我的DNA文件,下面是输出
0 Fruit
1 MixFruit
2 SuperFruit
3 PassionFruit
4 Orange
5 Lemon
6 Mango
................. it goes on forever :(
使用此信息,我 运行 您的代码和 "name a is not defined" 错误不断显示。我只是初学者,但我最好的猜测是我的 "DNA" 被定义为索引(DNA.index[0] 或等等......)并且我已经用数字更改了你的代码 "a",并且它还是不行。
关于这个问题有什么建议吗? 感谢您的输入!!!
------------------------编辑 2-------------------- ------------
body_intro = "{\"startDate\":\"2016-01-01\",\"endDate\":\"2017-10-30\",\"timeUnit\":\"date\",\"keywordGroups\":[{\"groupName\":\""
body_keywords = "\",\"keywords\":[\""
body_groupName = "\"]},{\"groupName\":\""
body_last = "\"]}],\"device\":\"\",\"ages\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\",\"11\"],\"gender\":\"f\"}"
for i in range(0,len(DNA),5):
if((len(DNA)%5==0) or (i < (len(DNA)-(len(DNA)%5)))):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_groupName + DNA[i+4] + body_keywords + DNA[i+4] + body_last
elif(len(DNA)%5==4):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + body_keywords + DNA[i+3] + body_last
elif(len(DNA)%5==3):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + body_keywords + DNA[i+2] + body_last
elif(len(DNA)%5==2):
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_groupName + DNA[i+1] + body_keywords + DNA[i+1] + body_last
else:
body = body_intro + DNA[i] + body_keywords + DNA[i] + body_last
你可以试试这个:
import json
new_body = json.loads(body)
DNA=["Alpha", "Bravo", "Charlie", "Delta", "Echo", "CharlieChoo", "DeltaAir", "Alpha bet", "ChooChoo", "Airline"]
new_body['keywordGroups'] = [{c:[DNA[DNA.index(a)+5] for a in d] if isinstance(d, list) else DNA[DNA.index(a)+5] for c, d in i.items()} for i in new_body['keywordGroups']]
final_data = json.dumps(new_body)
输出:
'{"startDate": "2016-01-01", "endDate": "2017-10-30", "gender": "",
"ages": ["1", "11"], "keywordGroups":
[{"keywords": ["CharlieChoo"], "groupName": "CharlieChoo"},
{"keywords": ["DeltaAir"], "groupName":"DeltaAir"},
{"keywords": ["Alpha bet"], "groupName": "Alpha bet"},
{"keywords": ["ChooChoo"], "groupName": "ChooChoo"}, {"keywords":["Airline"], "groupName": "Airline"}], "device": "", "timeUnit": "date"}'
只需使用正则表达式。我假设您的 DNA 列表包含带有目标名称和源名称的夫妇。
import re
length_of_DNA = len(DNA)
for i, t in enumerate(DNA[:length_of_DNA/2]):
s = DNA[length_of_DNA/2+i]
body = re.sub(r'\"'+t+'\"', s, body, 2)
希望有所帮助。
为了能够执行 "batch" 替换(假设您需要保持替换元素的数量),我将执行以下操作:
lookup = {"Alpha": "CharlieChoo",
"Bravo": "DeltaAir",
"Charlie": "Alpha bet",
"Delta": "ChooChoo",
"Echo": "Airline"}
lookup_count = {"Alpha": 2,
"Bravo": 2,
"Charlie": 2,
"Delta": 2,
"Echo": 2}
def replace_using_lookups(match):
word = match.group(1)
if word in lookup and lookup_count[word] > 0:
lookup_count[word] -= 1
return '"{}"'.format(lookup[word])
return '"{}"'.format(word)
re.sub('"(\w+)"', replace_using_lookups, body)
如果不需要 lookup_count
字典,您可以使用更简单的 lambda 执行替换。