将列表中的元素转换为字典
convert elements in a list to dictionary
我想将数据转换成字典以供使用。数据看起来像字典中的键和值,但它们组合成一个元素。
这是数据样本
['"acetic anydride": "[CX3](=[OX1])[OX2][CX3](=[OX1])",\n',
'"acetylenic carbon": "[$([CX2]#C)]",\n',
'"acyl bromide": "[CX3](=[OX1])[Br]",\n',
'"acyl chloride": "[CX3](=[OX1])[Cl]",\n',
'"acyl fluoride": "[CX3](=[OX1])[F]",\n',
'"acyl iodide": "[CX3](=[OX1])[I]",\n',
'"aldehyde": "[CX3H1](=O)[#6]",\n',
'"alkane": "[CX4]",\n',
'"allenic carbon": "[$([CX2](=C)=C)]",\n',
'"amide": "[NX3][CX3](=[OX1])[#6]",\n',
'"amidium": "[NX3][CX3]=[NX3+]",\n',
'"amino acid": "[$([NX3H2,NX4H3+]),$([NX3H](C)(C))][CX4H]([*])[CX3](=[OX1])[OX2H,OX1-,N]",\n',
'"azide": "[$(-[NX2-]-[NX2+]#[NX1]),$(-[NX2]=[NX2+]=[NX1-])]",\n',
'"azo nitrogen": "[NX2]=N",\n',
'"azole": "[$([nr5]:[nr5,or5,sr5]),$([nr5]:[cr5]:[nr5,or5,sr5])]",\n',
'"azoxy nitrogen": "[$([NX2]=[NX3+]([O-])[#6]),$([NX2]=[NX3+0](=[O])[#6])]",\n',
'"diazene": "[NX2]=[NX2]",\n',
'"diazo nitrogen": "[$([#6]=[N+]=[N-]),$([#6-]-[N+]#[N])]",\n',
'"bromine": "[Br]",\n']
我试过使用替换命令删除数据中的 :,但没有成功。
i=0
for line in lines:
a = lines[i]
a.replace(":", "")
lines[i] = a
i+=1
d = {}
for line in lines:
s = line.split(":")
d[s[0].strip(' "')] = s[1].strip(' ",\n')
您可以使用 eval
:
ll = ['"acetic anydride": "[CX3](=[OX1])[OX2][CX3](=[OX1])",\n',
'"acetylenic carbon": "[$([CX2]#C)]",\n',
'"acyl bromide": "[CX3](=[OX1])[Br]",\n',
'"acyl chloride": "[CX3](=[OX1])[Cl]",\n',
'"acyl fluoride": "[CX3](=[OX1])[F]",\n',
'"acyl iodide": "[CX3](=[OX1])[I]",\n',
'"aldehyde": "[CX3H1](=O)[#6]",\n',
'"alkane": "[CX4]",\n',
'"allenic carbon": "[$([CX2](=C)=C)]",\n',
'"amide": "[NX3][CX3](=[OX1])[#6]",\n',
'"amidium": "[NX3][CX3]=[NX3+]",\n',
'"amino acid": "[$([NX3H2,NX4H3+]),$([NX3H](C)(C))][CX4H]([*])[CX3](=[OX1])[OX2H,OX1-,N]",\n',
'"azide": "[$(-[NX2-]-[NX2+]#[NX1]),$(-[NX2]=[NX2+]=[NX1-])]",\n',
'"azo nitrogen": "[NX2]=N",\n',
'"azole": "[$([nr5]:[nr5,or5,sr5]),$([nr5]:[cr5]:[nr5,or5,sr5])]",\n',
'"azoxy nitrogen": "[$([NX2]=[NX3+]([O-])[#6]),$([NX2]=[NX3+0](=[O])[#6])]",\n',
'"diazene": "[NX2]=[NX2]",\n',
'"diazo nitrogen": "[$([#6]=[N+]=[N-]),$([#6-]-[N+]#[N])]",\n',
'"bromine": "[Br]",\n']
dd = eval('{' + ' '.join(ll).replace('\n', '') + '}')
这会将您的列表转换为单个字符串,删除 \n
并添加花括号,然后您将拥有一个可以评估的 str,因为它是有效的 python 代码以形成字典。
这只是格式化的问题,或者更准确地说是数据清理的问题。我不确定您为什么使用增量变量。我想要处理的最重要的事情是每个元素末尾的换行符,然后根据“:”拆分它并使用这些值创建一个字典。您可以试试下面的代码。
d = {}
for element in lines:
element = element.rstrip(",\n")
key, value = element.split(": ")
d[key.strip('"')] = value.strip('"')
d
我曾经使用 strip('"') 删除多个引号。
列表中的每个元素都是一个以',\n'结尾的字符串。这些应该被删除。键和值有不必要的双引号。这些也应该被删除。我认为这应该可以满足您的需求:
mylist = ['"acetic anydride": "[CX3](=[OX1])[OX2][CX3](=[OX1])",\n',
'"acetylenic carbon": "[$([CX2]#C)]",\n',
'"acyl bromide": "[CX3](=[OX1])[Br]",\n',
'"acyl chloride": "[CX3](=[OX1])[Cl]",\n',
'"acyl fluoride": "[CX3](=[OX1])[F]",\n',
'"acyl iodide": "[CX3](=[OX1])[I]",\n',
'"aldehyde": "[CX3H1](=O)[#6]",\n',
'"alkane": "[CX4]",\n',
'"allenic carbon": "[$([CX2](=C)=C)]",\n',
'"amide": "[NX3][CX3](=[OX1])[#6]",\n',
'"amidium": "[NX3][CX3]=[NX3+]",\n',
'"amino acid": "[$([NX3H2,NX4H3+]),$([NX3H](C)(C))][CX4H]([*])[CX3](=[OX1])[OX2H,OX1-,N]",\n',
'"azide": "[$(-[NX2-]-[NX2+]#[NX1]),$(-[NX2]=[NX2+]=[NX1-])]",\n',
'"azo nitrogen": "[NX2]=N",\n',
'"azole": "[$([nr5]:[nr5,or5,sr5]),$([nr5]:[cr5]:[nr5,or5,sr5])]",\n',
'"azoxy nitrogen": "[$([NX2]=[NX3+]([O-])[#6]),$([NX2]=[NX3+0](=[O])[#6])]",\n',
'"diazene": "[NX2]=[NX2]",\n',
'"diazo nitrogen": "[$([#6]=[N+]=[N-]),$([#6-]-[N+]#[N])]",\n',
'"bromine": "[Br]",\n']
mydict = dict()
for e in mylist:
t = e.replace('"', '').split(':')
mydict[t[0]] = t[1][:-2].strip()
print(mydict)
我想将数据转换成字典以供使用。数据看起来像字典中的键和值,但它们组合成一个元素。
这是数据样本
['"acetic anydride": "[CX3](=[OX1])[OX2][CX3](=[OX1])",\n',
'"acetylenic carbon": "[$([CX2]#C)]",\n',
'"acyl bromide": "[CX3](=[OX1])[Br]",\n',
'"acyl chloride": "[CX3](=[OX1])[Cl]",\n',
'"acyl fluoride": "[CX3](=[OX1])[F]",\n',
'"acyl iodide": "[CX3](=[OX1])[I]",\n',
'"aldehyde": "[CX3H1](=O)[#6]",\n',
'"alkane": "[CX4]",\n',
'"allenic carbon": "[$([CX2](=C)=C)]",\n',
'"amide": "[NX3][CX3](=[OX1])[#6]",\n',
'"amidium": "[NX3][CX3]=[NX3+]",\n',
'"amino acid": "[$([NX3H2,NX4H3+]),$([NX3H](C)(C))][CX4H]([*])[CX3](=[OX1])[OX2H,OX1-,N]",\n',
'"azide": "[$(-[NX2-]-[NX2+]#[NX1]),$(-[NX2]=[NX2+]=[NX1-])]",\n',
'"azo nitrogen": "[NX2]=N",\n',
'"azole": "[$([nr5]:[nr5,or5,sr5]),$([nr5]:[cr5]:[nr5,or5,sr5])]",\n',
'"azoxy nitrogen": "[$([NX2]=[NX3+]([O-])[#6]),$([NX2]=[NX3+0](=[O])[#6])]",\n',
'"diazene": "[NX2]=[NX2]",\n',
'"diazo nitrogen": "[$([#6]=[N+]=[N-]),$([#6-]-[N+]#[N])]",\n',
'"bromine": "[Br]",\n']
我试过使用替换命令删除数据中的 :,但没有成功。
i=0
for line in lines:
a = lines[i]
a.replace(":", "")
lines[i] = a
i+=1
d = {}
for line in lines:
s = line.split(":")
d[s[0].strip(' "')] = s[1].strip(' ",\n')
您可以使用 eval
:
ll = ['"acetic anydride": "[CX3](=[OX1])[OX2][CX3](=[OX1])",\n',
'"acetylenic carbon": "[$([CX2]#C)]",\n',
'"acyl bromide": "[CX3](=[OX1])[Br]",\n',
'"acyl chloride": "[CX3](=[OX1])[Cl]",\n',
'"acyl fluoride": "[CX3](=[OX1])[F]",\n',
'"acyl iodide": "[CX3](=[OX1])[I]",\n',
'"aldehyde": "[CX3H1](=O)[#6]",\n',
'"alkane": "[CX4]",\n',
'"allenic carbon": "[$([CX2](=C)=C)]",\n',
'"amide": "[NX3][CX3](=[OX1])[#6]",\n',
'"amidium": "[NX3][CX3]=[NX3+]",\n',
'"amino acid": "[$([NX3H2,NX4H3+]),$([NX3H](C)(C))][CX4H]([*])[CX3](=[OX1])[OX2H,OX1-,N]",\n',
'"azide": "[$(-[NX2-]-[NX2+]#[NX1]),$(-[NX2]=[NX2+]=[NX1-])]",\n',
'"azo nitrogen": "[NX2]=N",\n',
'"azole": "[$([nr5]:[nr5,or5,sr5]),$([nr5]:[cr5]:[nr5,or5,sr5])]",\n',
'"azoxy nitrogen": "[$([NX2]=[NX3+]([O-])[#6]),$([NX2]=[NX3+0](=[O])[#6])]",\n',
'"diazene": "[NX2]=[NX2]",\n',
'"diazo nitrogen": "[$([#6]=[N+]=[N-]),$([#6-]-[N+]#[N])]",\n',
'"bromine": "[Br]",\n']
dd = eval('{' + ' '.join(ll).replace('\n', '') + '}')
这会将您的列表转换为单个字符串,删除 \n
并添加花括号,然后您将拥有一个可以评估的 str,因为它是有效的 python 代码以形成字典。
这只是格式化的问题,或者更准确地说是数据清理的问题。我不确定您为什么使用增量变量。我想要处理的最重要的事情是每个元素末尾的换行符,然后根据“:”拆分它并使用这些值创建一个字典。您可以试试下面的代码。
d = {}
for element in lines:
element = element.rstrip(",\n")
key, value = element.split(": ")
d[key.strip('"')] = value.strip('"')
d
我曾经使用 strip('"') 删除多个引号。
列表中的每个元素都是一个以',\n'结尾的字符串。这些应该被删除。键和值有不必要的双引号。这些也应该被删除。我认为这应该可以满足您的需求:
mylist = ['"acetic anydride": "[CX3](=[OX1])[OX2][CX3](=[OX1])",\n',
'"acetylenic carbon": "[$([CX2]#C)]",\n',
'"acyl bromide": "[CX3](=[OX1])[Br]",\n',
'"acyl chloride": "[CX3](=[OX1])[Cl]",\n',
'"acyl fluoride": "[CX3](=[OX1])[F]",\n',
'"acyl iodide": "[CX3](=[OX1])[I]",\n',
'"aldehyde": "[CX3H1](=O)[#6]",\n',
'"alkane": "[CX4]",\n',
'"allenic carbon": "[$([CX2](=C)=C)]",\n',
'"amide": "[NX3][CX3](=[OX1])[#6]",\n',
'"amidium": "[NX3][CX3]=[NX3+]",\n',
'"amino acid": "[$([NX3H2,NX4H3+]),$([NX3H](C)(C))][CX4H]([*])[CX3](=[OX1])[OX2H,OX1-,N]",\n',
'"azide": "[$(-[NX2-]-[NX2+]#[NX1]),$(-[NX2]=[NX2+]=[NX1-])]",\n',
'"azo nitrogen": "[NX2]=N",\n',
'"azole": "[$([nr5]:[nr5,or5,sr5]),$([nr5]:[cr5]:[nr5,or5,sr5])]",\n',
'"azoxy nitrogen": "[$([NX2]=[NX3+]([O-])[#6]),$([NX2]=[NX3+0](=[O])[#6])]",\n',
'"diazene": "[NX2]=[NX2]",\n',
'"diazo nitrogen": "[$([#6]=[N+]=[N-]),$([#6-]-[N+]#[N])]",\n',
'"bromine": "[Br]",\n']
mydict = dict()
for e in mylist:
t = e.replace('"', '').split(':')
mydict[t[0]] = t[1][:-2].strip()
print(mydict)