如何将这些字符串解析为 Python 中前缀表示法的元组?
How can I parse these strings into tuples of prefix notation in Python?
我需要将这些输入重新格式化为更好的格式,但这样做的最佳方法让我感到困惑。
以下是一些可能的输入(字符串):
'[neg (p or q)]'
'[p imp q, (neg r) imp (neg q)]'
'[(p and q) and r]'
这里分别是需要的格式(字符串列表):
['neg(or(p,q))']
['imp(p,q)', 'imp(neg(r),neg(q))']
['and(and(p,q),r)']
基本上,这些是可以嵌套的命题公式,我正在寻找一种更好的方法来格式化输入,以便稍后在我的代码中更轻松地使用它们。
我尝试过使用一些正则表达式,但不太熟悉。
您可以创建一个简单的解析器,方法是首先使用 re
:
标记您的输入
import re
class Token:
def __init__(self, _t, val):
self._type, self.val = _t, val
def __repr__(self):
return f'{self.__class__.__name__}({self._type}, {self.val})'
class Tokenize:
gram, _t = r'neg|or|imp|and|iff|\(|\)|\w+', [(r'neg|or|imp|and|iff', 'func'), (r'\(', 'oparen'), (r'\)', 'cparen'), (r'\w+', 'value')]
@classmethod
def tokenize(cls, _input):
return [Token([b for a, b in cls._t if re.findall(a, i)][0], i) for i in re.findall(cls.gram, _input)]
def parse(d, stop=None):
s = next(d, None)
if s is None or s._type == stop:
return ''
if s._type == 'func':
return f'{s.val}({parse(d, stop=stop)})'
if s._type == 'oparen':
s = parse(d, stop='cparen')
_n = next(d, None)
if _n and _n._type == stop:
return getattr(s, 'val', s)
return getattr(s, 'val', s) if _n is None else f'{_n.val}({getattr(s, "val", s)}, {parse(d, stop=stop)})'
n = ['[neg (p or q)]', '[p imp q, (neg r) imp (neg q)]', '[(p and q) and r]', '[neg (p iff (neg q))]']
result = [[parse(iter(Tokenize.tokenize(i))) for i in a[1:-1].split(',')] for a in n]
输出:
[['neg(or(p, q))'], ['imp(p, q)', 'imp(neg(r), neg(q))'], ['and(and(p, q), r)'], ['neg(iff(p, neg(q)))']]
我需要将这些输入重新格式化为更好的格式,但这样做的最佳方法让我感到困惑。
以下是一些可能的输入(字符串):
'[neg (p or q)]'
'[p imp q, (neg r) imp (neg q)]'
'[(p and q) and r]'
这里分别是需要的格式(字符串列表):
['neg(or(p,q))']
['imp(p,q)', 'imp(neg(r),neg(q))']
['and(and(p,q),r)']
基本上,这些是可以嵌套的命题公式,我正在寻找一种更好的方法来格式化输入,以便稍后在我的代码中更轻松地使用它们。
我尝试过使用一些正则表达式,但不太熟悉。
您可以创建一个简单的解析器,方法是首先使用 re
:
import re
class Token:
def __init__(self, _t, val):
self._type, self.val = _t, val
def __repr__(self):
return f'{self.__class__.__name__}({self._type}, {self.val})'
class Tokenize:
gram, _t = r'neg|or|imp|and|iff|\(|\)|\w+', [(r'neg|or|imp|and|iff', 'func'), (r'\(', 'oparen'), (r'\)', 'cparen'), (r'\w+', 'value')]
@classmethod
def tokenize(cls, _input):
return [Token([b for a, b in cls._t if re.findall(a, i)][0], i) for i in re.findall(cls.gram, _input)]
def parse(d, stop=None):
s = next(d, None)
if s is None or s._type == stop:
return ''
if s._type == 'func':
return f'{s.val}({parse(d, stop=stop)})'
if s._type == 'oparen':
s = parse(d, stop='cparen')
_n = next(d, None)
if _n and _n._type == stop:
return getattr(s, 'val', s)
return getattr(s, 'val', s) if _n is None else f'{_n.val}({getattr(s, "val", s)}, {parse(d, stop=stop)})'
n = ['[neg (p or q)]', '[p imp q, (neg r) imp (neg q)]', '[(p and q) and r]', '[neg (p iff (neg q))]']
result = [[parse(iter(Tokenize.tokenize(i))) for i in a[1:-1].split(',')] for a in n]
输出:
[['neg(or(p, q))'], ['imp(p, q)', 'imp(neg(r), neg(q))'], ['and(and(p, q), r)'], ['neg(iff(p, neg(q)))']]