C 中的正则表达式自定义替换函数
Regex custom replace function in C
我一直在修改此代码 https://benchmarksgame.alioth.debian.org/u64q/program.php?test=regexredux&lang=gcc&id=3 以实现 pcre 替换功能。
目前我正在尝试为其提供选项,以用第一个替换多次连续出现并使用此代码(请参阅中间的评论)
static char * fb_subst_updating(fbuf_t * dst, char * src,
const char * p,
const char * r) {
pcre * re;
pcre_extra * re_ex;
const char * re_e;
char * dp;
int index_last_coincidence = -1;
int re_eo, m[3], pos, rlen, clen, coincidence_length;
if (!(re = pcre_compile(p, 0, & re_e, & re_eo, NULL)))
exit(1);
re_ex = pcre_study(re, PCRE_STUDY_JIT_COMPILE, & re_e);
// The for loop iterates though all the coincidences matches.
for (dst - > len = 0, rlen = strlen(r), pos = 0; pcre_exec(re, re_ex, src, strlen(src), pos, 0, m, 3) >= 0; pos = m[1]) {
//m[0] and m[1] are the first and second index position
//of the coincidence in the iteration
clen = m[0] - pos;
if (r[0] == '$') {
coincidence_length = m[1] - m[0];
dp = fb_need(dst, clen + coincidence_length);
dst - > len += clen + coincidence_length;
char coincidence_value[coincidence_length];
memcpy(coincidence_value, & src[m[0]], coincidence_length);
memcpy(dp, src + pos, clen);
if (index_last_coincidence == m[0]) {
index_last_coincidence = m[1];
// I'm sure that my problem is here
// This line could be ignored according to my logic,
// but the result is the same:
memcpy(dp + clen, coincidence_value, 0);
} else {
index_last_coincidence = m[1];
memcpy(dp + clen, coincidence_value, coincidence_length);
}
} else {
dp = fb_need(dst, clen + rlen);
dst - > len += clen + rlen;
memcpy(dp, src + pos, clen);
memcpy(dp + clen, r, rlen);
}
}
clen = strlen(src) - pos;
dp = fb_need(dst, clen);
dst - > len += clen;
memcpy(dp, src + pos, clen);
return dst - > buf;
}
我有这个文本输入:
RT @pacobazan_: Me encanta como se revientan cañones entre ellos. (@LibreriaSur)Muy eeeeeeee de República Burgués con statichipocresía social, donde todo funciona por autobombo, palmadita al amigo y argolla. (@LibreriaSur)(@LibreriaSur)(@LibreriaSur)Argolla en el cine, argolla en el periodismo, argolla en la tv. Grupitos de amigos (@LibreriaSur)jugando a ser talentosos. RT @MijaelGLP : Unas chelas para celebrar la inauguración de la (@LibreriaSur)esquina de libros de (@LibreriaSur) en Café Julieta con dos maestros:…
模式是这样的:
((@[A-Za-z0-9_]+))
替换为:</code></p>
<p>我得到 atm 的结果是:</p>
<blockquote>
<p>RT @pacobazan_: Me encanta como se revientan cañones entre ellos. (@LibreriaSur)Muy eeeeeeee de República Burgués con statichipocresía social, donde todo funciona por autobombo, palmadita al amigo y argolla. (@LibreriaSur)</p>
</blockquote>
<p>它应该用 <code>(@LibreriaSur)
替换 (@LibreriaSur)(@LibreriaSur)(@LibreriaSur)
它在第一次连续巧合后就不再继续复制了,还没有弄清楚是什么原因。
按照我的逻辑,如果上一次重合的初始位置与实际重合的最终位置相同,则不应该复制,并在下一次迭代中继续其余部分。
如果没有连续的巧合或者我使用正常的替换,它工作正常。
我几乎可以肯定这是一个愚蠢的错误,但我才使用 C 语言几天,无法弄明白。希望有人能帮助我。
好吧,我终于想出了一个让它工作的方法,不得不使用时间变量、strcat、strcpy 和与“\0”连接来重构所有函数。这是结果:
static char * replace2(char * text,
const char * pattern,
const char * replace_value) {
char * result;
result = (char * ) malloc(8192);
pcre * re;
pcre_extra * re_ex;
const char * re_e;
int len_text = strnlen(text, strlen(text)), index_last_coincidencia = -1;
char * dp;
int re_eo, m[3], pos, rlen, clen;
if (!(re = pcre_compile(pattern, 0, & re_e, & re_eo, NULL))) exit(1);
re_ex = pcre_study(re, PCRE_STUDY_JIT_COMPILE, & re_e);
for (rlen = strnlen(replace_value, strlen(replace_value)), pos = 0; pcre_exec(re, re_ex, text, len_text, pos, 0, m, 3) >= 0; pos = m[1]) {
if (replace_value[0] == '$') {
if (index_last_coincidencia == m[0]) {
index_last_coincidencia = m[1];
} else {
index_last_coincidencia = m[1];
char * tmp;
tmp = (char * ) malloc(m[0] - pos);
strncpy(tmp, & text[pos], m[0] - pos);
tmp[m[0] - pos] = '[=10=]';
strcat(result, tmp);
free(tmp);
tmp = (char * ) malloc(m[1] - m[0]);
strncpy(tmp, & text[m[0]], m[1] - m[0]);
tmp[m[1] - m[0]] = '[=10=]';
strcat(result, tmp);
free(tmp);
}
} else {
char * tmp;
tmp = (char * ) malloc(m[0] - pos);
strncpy(tmp, & text[pos], m[0] - pos);
tmp[m[0] - pos] = '[=10=]';
strcat(result, tmp);
free(tmp);
tmp = (char * ) malloc(rlen);
strncpy(tmp, replace_value, rlen);
tmp[rlen] = '[=10=]';
strcat(result, tmp);
free(tmp);
}
}
char * tmp;
int size = (strnlen(text, strlen(text)) - pos);
tmp = (char * ) malloc(size);
strncpy(tmp, & text[pos], size);
tmp[size] = '[=10=]';
strcat(result, tmp);
free(tmp);
return result;}
我一直在修改此代码 https://benchmarksgame.alioth.debian.org/u64q/program.php?test=regexredux&lang=gcc&id=3 以实现 pcre 替换功能。
目前我正在尝试为其提供选项,以用第一个替换多次连续出现并使用此代码(请参阅中间的评论)
static char * fb_subst_updating(fbuf_t * dst, char * src,
const char * p,
const char * r) {
pcre * re;
pcre_extra * re_ex;
const char * re_e;
char * dp;
int index_last_coincidence = -1;
int re_eo, m[3], pos, rlen, clen, coincidence_length;
if (!(re = pcre_compile(p, 0, & re_e, & re_eo, NULL)))
exit(1);
re_ex = pcre_study(re, PCRE_STUDY_JIT_COMPILE, & re_e);
// The for loop iterates though all the coincidences matches.
for (dst - > len = 0, rlen = strlen(r), pos = 0; pcre_exec(re, re_ex, src, strlen(src), pos, 0, m, 3) >= 0; pos = m[1]) {
//m[0] and m[1] are the first and second index position
//of the coincidence in the iteration
clen = m[0] - pos;
if (r[0] == '$') {
coincidence_length = m[1] - m[0];
dp = fb_need(dst, clen + coincidence_length);
dst - > len += clen + coincidence_length;
char coincidence_value[coincidence_length];
memcpy(coincidence_value, & src[m[0]], coincidence_length);
memcpy(dp, src + pos, clen);
if (index_last_coincidence == m[0]) {
index_last_coincidence = m[1];
// I'm sure that my problem is here
// This line could be ignored according to my logic,
// but the result is the same:
memcpy(dp + clen, coincidence_value, 0);
} else {
index_last_coincidence = m[1];
memcpy(dp + clen, coincidence_value, coincidence_length);
}
} else {
dp = fb_need(dst, clen + rlen);
dst - > len += clen + rlen;
memcpy(dp, src + pos, clen);
memcpy(dp + clen, r, rlen);
}
}
clen = strlen(src) - pos;
dp = fb_need(dst, clen);
dst - > len += clen;
memcpy(dp, src + pos, clen);
return dst - > buf;
}
我有这个文本输入:
RT @pacobazan_: Me encanta como se revientan cañones entre ellos. (@LibreriaSur)Muy eeeeeeee de República Burgués con statichipocresía social, donde todo funciona por autobombo, palmadita al amigo y argolla. (@LibreriaSur)(@LibreriaSur)(@LibreriaSur)Argolla en el cine, argolla en el periodismo, argolla en la tv. Grupitos de amigos (@LibreriaSur)jugando a ser talentosos. RT @MijaelGLP : Unas chelas para celebrar la inauguración de la (@LibreriaSur)esquina de libros de (@LibreriaSur) en Café Julieta con dos maestros:…
模式是这样的:
((@[A-Za-z0-9_]+))
替换为:</code></p>
<p>我得到 atm 的结果是:</p>
<blockquote>
<p>RT @pacobazan_: Me encanta como se revientan cañones entre ellos. (@LibreriaSur)Muy eeeeeeee de República Burgués con statichipocresía social, donde todo funciona por autobombo, palmadita al amigo y argolla. (@LibreriaSur)</p>
</blockquote>
<p>它应该用 <code>(@LibreriaSur)
(@LibreriaSur)(@LibreriaSur)(@LibreriaSur)
它在第一次连续巧合后就不再继续复制了,还没有弄清楚是什么原因。
按照我的逻辑,如果上一次重合的初始位置与实际重合的最终位置相同,则不应该复制,并在下一次迭代中继续其余部分。
如果没有连续的巧合或者我使用正常的替换,它工作正常。
我几乎可以肯定这是一个愚蠢的错误,但我才使用 C 语言几天,无法弄明白。希望有人能帮助我。
好吧,我终于想出了一个让它工作的方法,不得不使用时间变量、strcat、strcpy 和与“\0”连接来重构所有函数。这是结果:
static char * replace2(char * text,
const char * pattern,
const char * replace_value) {
char * result;
result = (char * ) malloc(8192);
pcre * re;
pcre_extra * re_ex;
const char * re_e;
int len_text = strnlen(text, strlen(text)), index_last_coincidencia = -1;
char * dp;
int re_eo, m[3], pos, rlen, clen;
if (!(re = pcre_compile(pattern, 0, & re_e, & re_eo, NULL))) exit(1);
re_ex = pcre_study(re, PCRE_STUDY_JIT_COMPILE, & re_e);
for (rlen = strnlen(replace_value, strlen(replace_value)), pos = 0; pcre_exec(re, re_ex, text, len_text, pos, 0, m, 3) >= 0; pos = m[1]) {
if (replace_value[0] == '$') {
if (index_last_coincidencia == m[0]) {
index_last_coincidencia = m[1];
} else {
index_last_coincidencia = m[1];
char * tmp;
tmp = (char * ) malloc(m[0] - pos);
strncpy(tmp, & text[pos], m[0] - pos);
tmp[m[0] - pos] = '[=10=]';
strcat(result, tmp);
free(tmp);
tmp = (char * ) malloc(m[1] - m[0]);
strncpy(tmp, & text[m[0]], m[1] - m[0]);
tmp[m[1] - m[0]] = '[=10=]';
strcat(result, tmp);
free(tmp);
}
} else {
char * tmp;
tmp = (char * ) malloc(m[0] - pos);
strncpy(tmp, & text[pos], m[0] - pos);
tmp[m[0] - pos] = '[=10=]';
strcat(result, tmp);
free(tmp);
tmp = (char * ) malloc(rlen);
strncpy(tmp, replace_value, rlen);
tmp[rlen] = '[=10=]';
strcat(result, tmp);
free(tmp);
}
}
char * tmp;
int size = (strnlen(text, strlen(text)) - pos);
tmp = (char * ) malloc(size);
strncpy(tmp, & text[pos], size);
tmp[size] = '[=10=]';
strcat(result, tmp);
free(tmp);
return result;}