附加列表后为空输出
Empty output after appending a list
r = ","
x = ""
output = list()
import string
def find_word(filepath,keyword):
doc = open(filepath, 'r')
for line in doc:
#Remove all the unneccessary characters
line = line.replace("'", r)
line = line.replace('`', r)
line = line.replace('[', r)
line = line.replace(']', r)
line = line.replace('{', r)
line = line.replace('}', r)
line = line.replace('(', r)
line = line.replace(')', r)
line = line.replace(':', r)
line = line.replace('.', r)
line = line.replace('!', r)
line = line.replace('?', r)
line = line.replace('"', r)
line = line.replace(';', r)
line = line.replace(' ', r)
line = line.replace(',,', r)
line = line.replace(',,,', r)
line = line.replace(',,,,', r)
line = line.replace(',,,,,', r)
line = line.replace(',,,,,,', r)
line = line.replace(',,,,,,,', r)
line = line.replace('#', r)
line = line.replace('*', r)
line = line.replace('**', r)
line = line.replace('***', r)
#Make the line lowercase
line = line.lower()
#Split the line after every r (comma) and name the result "word"
words = line.split(r)
#if the keyword (also in lowercase form) appears in the before created words list
#then append the list output by the whole line in which the keyword appears
if keyword.lower() in words:
output.append(line)
return output
print find_word("pg844.txt","and")
这段代码的目标是在文本文件中搜索某个关键字,比如"and",然后将找到关键字的整行放入一个类型列表 (int,细绳)。 int 应该是行号,上面提到的字符串是整行。
我仍在研究行编号 - 所以目前还没有问题。但问题是:输出为空。即使我附加一个随机字符串而不是行,我也没有得到任何结果。
如果我用
if keyword.lower() in words:
print line
我得到了所有需要的行,其中出现了关键字。但是我就是不能把它放到输出列表中。
我正在尝试搜索的文本文件:http://www.gutenberg.org/cache/epub/844/pg844.txt
请使用正则表达式。请参阅 Regex in Python 的一些文档。替换每个 character/character 集是令人困惑的。列表和 .append()
的使用看起来是正确的,但或许可以考虑在 for 循环中调试您的 line
变量,偶尔打印它以确保它的值是您想要的。
pyInProgress 的回答对全局变量提出了一个很好的观点,尽管没有测试它,但我不相信如果使用 output
return 变量而不是全局 output
变量。如果您需要有关全局变量的更多信息,请参阅 this Whosebug post。
由于 output = list()
位于代码的顶层并且不在函数内部,因此它被视为全局变量。
要在函数内编辑全局变量,必须先使用 global
关键字。
示例:
gVar = 10
def editVar():
global gVar
gVar += 5
因此,要在您的函数 find_word()
中编辑变量 output
,您必须在为其赋值之前键入 global output
。
它应该是这样的:
r = ","
x = ""
output = list()
import string
def find_word(filepath,keyword):
doc = open(filepath, 'r')
for line in doc:
#Remove all the unneccessary characters
line = line.replace("'", r)
line = line.replace('`', r)
line = line.replace('[', r)
line = line.replace(']', r)
line = line.replace('{', r)
line = line.replace('}', r)
line = line.replace('(', r)
line = line.replace(')', r)
line = line.replace(':', r)
line = line.replace('.', r)
line = line.replace('!', r)
line = line.replace('?', r)
line = line.replace('"', r)
line = line.replace(';', r)
line = line.replace(' ', r)
line = line.replace(',,', r)
line = line.replace(',,,', r)
line = line.replace(',,,,', r)
line = line.replace(',,,,,', r)
line = line.replace(',,,,,,', r)
line = line.replace(',,,,,,,', r)
line = line.replace('#', r)
line = line.replace('*', r)
line = line.replace('**', r)
line = line.replace('***', r)
#Make the line lowercase
line = line.lower()
#Split the line after every r (comma) and name the result "word"
words = line.split(r)
#if the keyword (also in lowercase form) appears in the before created words list
#then append the list output by the whole line in which the keyword appears
global output
if keyword.lower() in words:
output.append(line)
return output
以后除非绝对需要,否则尽量远离全局变量。它们会变得凌乱!
循环 string.punctuation
以在遍历行之前删除所有内容
import string, re
r = ','
def find_word(filepath, keyword):
output = []
with open(filepath, 'rb') as f:
data = f.read()
for x in list(string.punctuation):
if x != r:
data = data.replace(x, '')
data = re.sub(r',{2,}', r, data, re.M).splitlines()
for i, line in enumerate(data):
if keyword.lower() in line.lower().split(r):
output.append((i, line))
return output
print find_word('pg844.txt', 'and')
r = ","
x = ""
output = list()
import string
def find_word(filepath,keyword):
doc = open(filepath, 'r')
for line in doc:
#Remove all the unneccessary characters
line = line.replace("'", r)
line = line.replace('`', r)
line = line.replace('[', r)
line = line.replace(']', r)
line = line.replace('{', r)
line = line.replace('}', r)
line = line.replace('(', r)
line = line.replace(')', r)
line = line.replace(':', r)
line = line.replace('.', r)
line = line.replace('!', r)
line = line.replace('?', r)
line = line.replace('"', r)
line = line.replace(';', r)
line = line.replace(' ', r)
line = line.replace(',,', r)
line = line.replace(',,,', r)
line = line.replace(',,,,', r)
line = line.replace(',,,,,', r)
line = line.replace(',,,,,,', r)
line = line.replace(',,,,,,,', r)
line = line.replace('#', r)
line = line.replace('*', r)
line = line.replace('**', r)
line = line.replace('***', r)
#Make the line lowercase
line = line.lower()
#Split the line after every r (comma) and name the result "word"
words = line.split(r)
#if the keyword (also in lowercase form) appears in the before created words list
#then append the list output by the whole line in which the keyword appears
if keyword.lower() in words:
output.append(line)
return output
print find_word("pg844.txt","and")
这段代码的目标是在文本文件中搜索某个关键字,比如"and",然后将找到关键字的整行放入一个类型列表 (int,细绳)。 int 应该是行号,上面提到的字符串是整行。
我仍在研究行编号 - 所以目前还没有问题。但问题是:输出为空。即使我附加一个随机字符串而不是行,我也没有得到任何结果。
如果我用
if keyword.lower() in words:
print line
我得到了所有需要的行,其中出现了关键字。但是我就是不能把它放到输出列表中。
我正在尝试搜索的文本文件:http://www.gutenberg.org/cache/epub/844/pg844.txt
请使用正则表达式。请参阅 Regex in Python 的一些文档。替换每个 character/character 集是令人困惑的。列表和 .append()
的使用看起来是正确的,但或许可以考虑在 for 循环中调试您的 line
变量,偶尔打印它以确保它的值是您想要的。
pyInProgress 的回答对全局变量提出了一个很好的观点,尽管没有测试它,但我不相信如果使用 output
return 变量而不是全局 output
变量。如果您需要有关全局变量的更多信息,请参阅 this Whosebug post。
由于 output = list()
位于代码的顶层并且不在函数内部,因此它被视为全局变量。
要在函数内编辑全局变量,必须先使用 global
关键字。
示例:
gVar = 10
def editVar():
global gVar
gVar += 5
因此,要在您的函数 find_word()
中编辑变量 output
,您必须在为其赋值之前键入 global output
。
它应该是这样的:
r = ","
x = ""
output = list()
import string
def find_word(filepath,keyword):
doc = open(filepath, 'r')
for line in doc:
#Remove all the unneccessary characters
line = line.replace("'", r)
line = line.replace('`', r)
line = line.replace('[', r)
line = line.replace(']', r)
line = line.replace('{', r)
line = line.replace('}', r)
line = line.replace('(', r)
line = line.replace(')', r)
line = line.replace(':', r)
line = line.replace('.', r)
line = line.replace('!', r)
line = line.replace('?', r)
line = line.replace('"', r)
line = line.replace(';', r)
line = line.replace(' ', r)
line = line.replace(',,', r)
line = line.replace(',,,', r)
line = line.replace(',,,,', r)
line = line.replace(',,,,,', r)
line = line.replace(',,,,,,', r)
line = line.replace(',,,,,,,', r)
line = line.replace('#', r)
line = line.replace('*', r)
line = line.replace('**', r)
line = line.replace('***', r)
#Make the line lowercase
line = line.lower()
#Split the line after every r (comma) and name the result "word"
words = line.split(r)
#if the keyword (also in lowercase form) appears in the before created words list
#then append the list output by the whole line in which the keyword appears
global output
if keyword.lower() in words:
output.append(line)
return output
以后除非绝对需要,否则尽量远离全局变量。它们会变得凌乱!
循环 string.punctuation
以在遍历行之前删除所有内容
import string, re
r = ','
def find_word(filepath, keyword):
output = []
with open(filepath, 'rb') as f:
data = f.read()
for x in list(string.punctuation):
if x != r:
data = data.replace(x, '')
data = re.sub(r',{2,}', r, data, re.M).splitlines()
for i, line in enumerate(data):
if keyword.lower() in line.lower().split(r):
output.append((i, line))
return output
print find_word('pg844.txt', 'and')