根据另一个数组中的条目拆分数组
Splitting an array based on entries in another array
我正在编写一个 JavaScript 代码,它将 RegExp 分解成它的基本组件,并对它的作用给出一个简短的解释。
我的总体想法是将输入字符串(作为 RegExp)拆分为另一个数组中的条目。
我当前的代码:
function interpret(regex){
var r = regex + "";
r = r.split("/");
body = r[1];
flags = r[2];
var classes = [".","\w","\d","\s","\W","\D","\S","[","]"];
var classdefs = ["any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","open matchset","close matchset"];
var quantifiers = ["*","+","?",
/{(\d+)}/g, // a{n}
/{(\d+),}/g, // a{n,}
/{(\d+),(\d+)}/g, // a{n,m}
/[+*?]\?/g // a<quant>? - lazy quantification
];
var quantDefs = ["repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly time","repeated or more times","repeated between and times"];
var escaped = ["\t","\n","\r","\.","\*","\","\^","\?","\|"];
var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a backslash","a carot","a question mark","a vertical bar"];
// code to split r based on entries in classes, quantifiers, and escaped.
}
理想情况下,这个函数(我们称之为 splitR
)将 return 输出如下内容:
> splitR("hello",["he","l"]);
["he", "l", "l", "o"]
> splitR("hello",["he"]);
["he", "llo"]
> splitR("hello",["he","o"]);
["he", "ll", "o"];
> splitR("5 is the square root of 25",[/\d+/g,/\w{3,}/g,"of"]);
["5", " is ", "the", " ", "square", " ", "root", " ", "of", " ", "25"]
明确定义,splitR
函数应该在 interpret
函数的上下文中采用 RegExp 并将其拆分为基本组件;例如\d+[0-9]\w*?
应该拆分成 ["\d", "+", "[", "0-9", "]", "\w", "*", "?"]
。这些组件在其他数组中单独定义,使用各种正则表达式(例如 /{(\d+)}/g
来查找 a{n}
)和字符串(例如 "."
)。
真的,我对splitR
的定义感到困惑。感谢您的帮助!
这会将正则表达式拆分成多个部分,并用各部分的描述填充第二个数组。它会跳过意想不到的字符,但没有真正的正则表达式语法检查,即如果你开始一个范围而不结束它,脚本不会抱怨。我冒昧地添加了一些你的列表中遗漏的东西,比如分组括号、开始和结束锚点……
function interpret(regex)
{
var body = regex.source;
var flags = (regex.global ? "g" : "") + (regex.ignoreCase ? "i" : "") + (regex.multiline ? "m" : "");
var classes = [/^\w\-\w/,/^\./,/^\w/,/^\d/,/^\s/,/^\W/,/^\D/,/^\S/,/^\[/,/^\]/,/^\(/,/^\)/,/^\^/,/^$/,/^\|/];
var classDefs = ["character range","any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","any non-whitespace character","open matchset","close matchset","open group","close group","start anchor or negation","end anchor","alternative"];
var quantifiers = [/^[+*?]\?/,/^\*/,/^\+/,/^\?/,/^{(\d+)}/,/^{(\d+),}/,/^{(\d+),(\d+)}/];
var quantDefs = ["lazy quantification","repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly time","repeated or more times","repeated between and times"];
var escaped = [/^\t/,/^\n/,/^\r/,/^\\./,/^\\*/,/^\\+/,/^\\-/,/^\\/,/^\\^/,/^\$/,/^\\?/,/^\\|/,/^\\[/,/^\\]/,/^\\(/,/^\\)/,/^\\{/,/^\\}/];
var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a plus","a minus","a backslash","a caret","a dollar sign","a question mark","a vertical bar","a square bracket","a square bracket","a bracket","a bracket","a curly bracket","a curly bracket"];
var literal = [/^[^\.\\[\]\(\)\^$\|\*\+\-\?\{\}]+/];
var literalDefs = ["literal text"];
var regs = classes.concat(quantifiers,escaped,literal);
var defs = classDefs.concat(quantDefs,escapedDefs,literalDefs);
var reglist = [];
var deflist = [];
while (body.length)
{
var found = false;
var chunk = null;
for (var i = 0; i < regs.length; i++)
{
chunk = body.match(regs[i]);
if (chunk)
{
reglist.push(chunk[0]);
deflist.push(defs[i]);
body = body.substr(chunk[0].length);
found = true;
break;
}
}
if (!found)
{
body = body.substr(1); // skip unexpected character
}
}
console.log(regex.source);
console.log(reglist);
console.log(deflist);
alert("see console for output");
}
var x = new RegExp("^[a-z0-9]\^.\.\w\d\s\W\D\S+(te|\|st)*\*\n+\+\-\}(\W?\?\s{1,3})\\*?a{3}b{4,}c{}\r\t\$$", "ig");
interpret(x);
我正在编写一个 JavaScript 代码,它将 RegExp 分解成它的基本组件,并对它的作用给出一个简短的解释。
我的总体想法是将输入字符串(作为 RegExp)拆分为另一个数组中的条目。
我当前的代码:
function interpret(regex){
var r = regex + "";
r = r.split("/");
body = r[1];
flags = r[2];
var classes = [".","\w","\d","\s","\W","\D","\S","[","]"];
var classdefs = ["any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","open matchset","close matchset"];
var quantifiers = ["*","+","?",
/{(\d+)}/g, // a{n}
/{(\d+),}/g, // a{n,}
/{(\d+),(\d+)}/g, // a{n,m}
/[+*?]\?/g // a<quant>? - lazy quantification
];
var quantDefs = ["repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly time","repeated or more times","repeated between and times"];
var escaped = ["\t","\n","\r","\.","\*","\","\^","\?","\|"];
var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a backslash","a carot","a question mark","a vertical bar"];
// code to split r based on entries in classes, quantifiers, and escaped.
}
理想情况下,这个函数(我们称之为 splitR
)将 return 输出如下内容:
> splitR("hello",["he","l"]);
["he", "l", "l", "o"]
> splitR("hello",["he"]);
["he", "llo"]
> splitR("hello",["he","o"]);
["he", "ll", "o"];
> splitR("5 is the square root of 25",[/\d+/g,/\w{3,}/g,"of"]);
["5", " is ", "the", " ", "square", " ", "root", " ", "of", " ", "25"]
明确定义,splitR
函数应该在 interpret
函数的上下文中采用 RegExp 并将其拆分为基本组件;例如\d+[0-9]\w*?
应该拆分成 ["\d", "+", "[", "0-9", "]", "\w", "*", "?"]
。这些组件在其他数组中单独定义,使用各种正则表达式(例如 /{(\d+)}/g
来查找 a{n}
)和字符串(例如 "."
)。
真的,我对splitR
的定义感到困惑。感谢您的帮助!
这会将正则表达式拆分成多个部分,并用各部分的描述填充第二个数组。它会跳过意想不到的字符,但没有真正的正则表达式语法检查,即如果你开始一个范围而不结束它,脚本不会抱怨。我冒昧地添加了一些你的列表中遗漏的东西,比如分组括号、开始和结束锚点……
function interpret(regex)
{
var body = regex.source;
var flags = (regex.global ? "g" : "") + (regex.ignoreCase ? "i" : "") + (regex.multiline ? "m" : "");
var classes = [/^\w\-\w/,/^\./,/^\w/,/^\d/,/^\s/,/^\W/,/^\D/,/^\S/,/^\[/,/^\]/,/^\(/,/^\)/,/^\^/,/^$/,/^\|/];
var classDefs = ["character range","any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","any non-whitespace character","open matchset","close matchset","open group","close group","start anchor or negation","end anchor","alternative"];
var quantifiers = [/^[+*?]\?/,/^\*/,/^\+/,/^\?/,/^{(\d+)}/,/^{(\d+),}/,/^{(\d+),(\d+)}/];
var quantDefs = ["lazy quantification","repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly time","repeated or more times","repeated between and times"];
var escaped = [/^\t/,/^\n/,/^\r/,/^\\./,/^\\*/,/^\\+/,/^\\-/,/^\\/,/^\\^/,/^\$/,/^\\?/,/^\\|/,/^\\[/,/^\\]/,/^\\(/,/^\\)/,/^\\{/,/^\\}/];
var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a plus","a minus","a backslash","a caret","a dollar sign","a question mark","a vertical bar","a square bracket","a square bracket","a bracket","a bracket","a curly bracket","a curly bracket"];
var literal = [/^[^\.\\[\]\(\)\^$\|\*\+\-\?\{\}]+/];
var literalDefs = ["literal text"];
var regs = classes.concat(quantifiers,escaped,literal);
var defs = classDefs.concat(quantDefs,escapedDefs,literalDefs);
var reglist = [];
var deflist = [];
while (body.length)
{
var found = false;
var chunk = null;
for (var i = 0; i < regs.length; i++)
{
chunk = body.match(regs[i]);
if (chunk)
{
reglist.push(chunk[0]);
deflist.push(defs[i]);
body = body.substr(chunk[0].length);
found = true;
break;
}
}
if (!found)
{
body = body.substr(1); // skip unexpected character
}
}
console.log(regex.source);
console.log(reglist);
console.log(deflist);
alert("see console for output");
}
var x = new RegExp("^[a-z0-9]\^.\.\w\d\s\W\D\S+(te|\|st)*\*\n+\+\-\}(\W?\?\s{1,3})\\*?a{3}b{4,}c{}\r\t\$$", "ig");
interpret(x);