处理特殊字符
Processing special characters
假设我在 Lua mÜ⌠⌠í∩
中收到以下字符串,并想将其应用到我当前的处理代码中,即以下
function inTable(tbl, item)
for key, value in pairs(tbl) do
if value == item then return true end
end
return false
end
function processstring(instr)
finmsg = ""
achar = {131,132,133,134,142,143,145,146,160,166,181,182,183,198,199,224}
echar = {130,137,138,144,228}
ichar = {139,140,141,161,173,179,244}
ochar = {147,148,149,153,162,167,229,233,234,248}
uchar = {129,150,151,154,163}
nchar = {164,165,227,252}
outmsg = string.upper(instr)
for c in outmsg:gmatch"." do
bc = string.byte(c)
if(bc <= 47 or (bc>=58 and bc<=64) or (bc>=91 and bc<=96) or bc >=123)then
elseif (bc == 52) then finmsg = finmsg.."A"
elseif (bc == 51) then finmsg = finmsg.."E"
elseif (bc == 49) then finmsg = finmsg.."I"
elseif (bc == 48) then finmsg = finmsg.."O"
elseif (inTable(achar, bc)==true) then finmsg = finmsg.."A"
elseif (inTable(echar, bc)==true) then finmsg = finmsg.."E"
elseif (inTable(ichar, bc)==true) then finmsg = finmsg.."I"
elseif (inTable(ochar, bc)==true) then finmsg = finmsg.."O"
elseif (inTable(uchar, bc)==true) then finmsg = finmsg.."U"
elseif (inTable(nchar, bc)==true) then finmsg = finmsg.."N"
else
finmsg = finmsg..c
end
end
return finmsg
end
function checkword (instr)
specword = [[]]
wordlist = {"FIN", "FFI", "PHIN", "PHEN", "FIN", "PHIN", "IFFUM", "MUF", "MEUFEEN", "FEN","FEEN"}
for i, v in ipairs (wordlist) do
if (string.match(processstring(instr), v) ~= nil)then
return 1
end
end
--if (string.match(instr,specword) ~= nil)then
-- return 1
--end
end
print (checkword("mÜ⌠⌠í∩"))
到目前为止,我还没有找到任何方法来证明这样的字符串。即使使用 string.byte() 将其简化为 ASCII,我也无法可靠地处理这些奇特的字符。更奇怪的是,如果我在 processstring
上执行 print(bc)
,我会得到以下输出
160 226 140 160 195 173 226 136 169
现在,一个 6 个字母的单词有 9 个 ASCII 码,这怎么可能?我构建的代码引用了 http://www.asciitable.com/,这是错误的吗?我该如何处理这个过程?
local subst = {
U = "üûùÜú",
N = "ñÑπⁿ∩",
O = "ôöòÖóºσΘΩ°",
I = "ïîìí¡│",
F = "⌠",
A = "âäàåÄÅæÆáª╡╢╖╞╟α",
E = "éëèÉΣ",
}
local subst_utf8 = {}
for base_letter, list_of_letters in pairs(subst) do
for utf8letter in list_of_letters:gmatch'[%z-\x7F\xC0-\xFF][\x80-\xBF]*' do
subst_utf8[utf8letter] = base_letter
end
end
function processstring(instr)
return (instr:upper():gsub('[%z-\x7F\xC0-\xFF][\x80-\xBF]*', subst_utf8))
end
print(processstring("mÜ⌠⌠í∩")) --> MUFFIN
假设我在 Lua mÜ⌠⌠í∩
中收到以下字符串,并想将其应用到我当前的处理代码中,即以下
function inTable(tbl, item)
for key, value in pairs(tbl) do
if value == item then return true end
end
return false
end
function processstring(instr)
finmsg = ""
achar = {131,132,133,134,142,143,145,146,160,166,181,182,183,198,199,224}
echar = {130,137,138,144,228}
ichar = {139,140,141,161,173,179,244}
ochar = {147,148,149,153,162,167,229,233,234,248}
uchar = {129,150,151,154,163}
nchar = {164,165,227,252}
outmsg = string.upper(instr)
for c in outmsg:gmatch"." do
bc = string.byte(c)
if(bc <= 47 or (bc>=58 and bc<=64) or (bc>=91 and bc<=96) or bc >=123)then
elseif (bc == 52) then finmsg = finmsg.."A"
elseif (bc == 51) then finmsg = finmsg.."E"
elseif (bc == 49) then finmsg = finmsg.."I"
elseif (bc == 48) then finmsg = finmsg.."O"
elseif (inTable(achar, bc)==true) then finmsg = finmsg.."A"
elseif (inTable(echar, bc)==true) then finmsg = finmsg.."E"
elseif (inTable(ichar, bc)==true) then finmsg = finmsg.."I"
elseif (inTable(ochar, bc)==true) then finmsg = finmsg.."O"
elseif (inTable(uchar, bc)==true) then finmsg = finmsg.."U"
elseif (inTable(nchar, bc)==true) then finmsg = finmsg.."N"
else
finmsg = finmsg..c
end
end
return finmsg
end
function checkword (instr)
specword = [[]]
wordlist = {"FIN", "FFI", "PHIN", "PHEN", "FIN", "PHIN", "IFFUM", "MUF", "MEUFEEN", "FEN","FEEN"}
for i, v in ipairs (wordlist) do
if (string.match(processstring(instr), v) ~= nil)then
return 1
end
end
--if (string.match(instr,specword) ~= nil)then
-- return 1
--end
end
print (checkword("mÜ⌠⌠í∩"))
到目前为止,我还没有找到任何方法来证明这样的字符串。即使使用 string.byte() 将其简化为 ASCII,我也无法可靠地处理这些奇特的字符。更奇怪的是,如果我在 processstring
上执行 print(bc)
,我会得到以下输出
160 226 140 160 195 173 226 136 169
现在,一个 6 个字母的单词有 9 个 ASCII 码,这怎么可能?我构建的代码引用了 http://www.asciitable.com/,这是错误的吗?我该如何处理这个过程?
local subst = {
U = "üûùÜú",
N = "ñÑπⁿ∩",
O = "ôöòÖóºσΘΩ°",
I = "ïîìí¡│",
F = "⌠",
A = "âäàåÄÅæÆáª╡╢╖╞╟α",
E = "éëèÉΣ",
}
local subst_utf8 = {}
for base_letter, list_of_letters in pairs(subst) do
for utf8letter in list_of_letters:gmatch'[%z-\x7F\xC0-\xFF][\x80-\xBF]*' do
subst_utf8[utf8letter] = base_letter
end
end
function processstring(instr)
return (instr:upper():gsub('[%z-\x7F\xC0-\xFF][\x80-\xBF]*', subst_utf8))
end
print(processstring("mÜ⌠⌠í∩")) --> MUFFIN