如何根据 HTML 标签更新文本
How update text based on HTML tags
我有一个非常基本的例子:
<div><span>Lorem ipsum dolor sit amet, elit</span>consectetur adipiscing</div>
我想将标签 <div>...</div>
附近的单词 "dolor" 替换为 "some_another_word"。 "dolor"字可以放在div
的
的内外
我现在的代码是:
$html = '<div><span>Lorem ipsum dolor sit amet, elit</span>consectetur adipiscing</div>';
$docs = new \DOMDocument();
$docs->loadHTML( $html );
$els = $docs->getElementsByTagName('*');
foreach ( $els as $node ) {
if ( 'div' === $node->nodeName ) {
$node->textContent = str_replace('dolor', 'some_another_word', $node->textContent);
}
}
var_dump( $docs->saveHTML() );
我的代码的结果是:
<html><body><div>Lorem ipsum some_another_word sit amet, elit consectetur adipiscing</div></body></html>
我丢失了我需要的 span
标签。我该如何预防?
您可以使用 XPath
表达式来非常精确地定位您希望操作的内容(如果您正确地构造了查询)。以下内容应该会告诉您如何应用该想法。
$html = '<div><span style="color:red">Lorem ipsum dolor sit amet, elit</span>consectetur adipiscing</div>';
$word = 'dolor';
$replace = '#### banana ####';
try{
libxml_use_internal_errors( true );
$dom=new DOMDocument;
$dom->preserveWhiteSpace = false;
$dom->validateOnParse = false;
$dom->standalone=true;
$dom->strictErrorChecking=true;
$dom->substituteEntities=true;
$dom->recover=true;
$dom->formatOutput=false;
$dom->loadHTML( $html );
$errors = libxml_get_errors();
libxml_clear_errors();
if( !empty( $errors ) ) {
throw new Exception( implode( PHP_EOL, $errors ) );
}
$xp=new DOMXPath( $dom );
/* The XPath expression */
$query='//div/span[ contains( text(),"'.$word.'") ]';
$col=$xp->query( $query );
if( !empty( $col ) ){
foreach( $col as $index => $node ){
$node->nodeValue = str_replace( $word, $replace, $node->nodeValue );
}
/* output to browser or save to file */
echo $dom->saveHTML();
} else {
throw new Exception( sprintf( 'Empty nodelist - XPath query %s failed', $query ) );
}
$dom=$xp=null;
}catch( Exception $e ){
printf( 'Caught Exception -> Trace:%s Message:%s Code:%d', $e->getTraceAsString(), $e->getMessage(), $e->getCode() );
}
我有一个非常基本的例子:
<div><span>Lorem ipsum dolor sit amet, elit</span>consectetur adipiscing</div>
我想将标签 <div>...</div>
附近的单词 "dolor" 替换为 "some_another_word"。 "dolor"字可以放在div
的
我现在的代码是:
$html = '<div><span>Lorem ipsum dolor sit amet, elit</span>consectetur adipiscing</div>';
$docs = new \DOMDocument();
$docs->loadHTML( $html );
$els = $docs->getElementsByTagName('*');
foreach ( $els as $node ) {
if ( 'div' === $node->nodeName ) {
$node->textContent = str_replace('dolor', 'some_another_word', $node->textContent);
}
}
var_dump( $docs->saveHTML() );
我的代码的结果是:
<html><body><div>Lorem ipsum some_another_word sit amet, elit consectetur adipiscing</div></body></html>
我丢失了我需要的 span
标签。我该如何预防?
您可以使用 XPath
表达式来非常精确地定位您希望操作的内容(如果您正确地构造了查询)。以下内容应该会告诉您如何应用该想法。
$html = '<div><span style="color:red">Lorem ipsum dolor sit amet, elit</span>consectetur adipiscing</div>';
$word = 'dolor';
$replace = '#### banana ####';
try{
libxml_use_internal_errors( true );
$dom=new DOMDocument;
$dom->preserveWhiteSpace = false;
$dom->validateOnParse = false;
$dom->standalone=true;
$dom->strictErrorChecking=true;
$dom->substituteEntities=true;
$dom->recover=true;
$dom->formatOutput=false;
$dom->loadHTML( $html );
$errors = libxml_get_errors();
libxml_clear_errors();
if( !empty( $errors ) ) {
throw new Exception( implode( PHP_EOL, $errors ) );
}
$xp=new DOMXPath( $dom );
/* The XPath expression */
$query='//div/span[ contains( text(),"'.$word.'") ]';
$col=$xp->query( $query );
if( !empty( $col ) ){
foreach( $col as $index => $node ){
$node->nodeValue = str_replace( $word, $replace, $node->nodeValue );
}
/* output to browser or save to file */
echo $dom->saveHTML();
} else {
throw new Exception( sprintf( 'Empty nodelist - XPath query %s failed', $query ) );
}
$dom=$xp=null;
}catch( Exception $e ){
printf( 'Caught Exception -> Trace:%s Message:%s Code:%d', $e->getTraceAsString(), $e->getMessage(), $e->getCode() );
}