使用简单 HTML DOM 解析器来 JSON?
Use Simple HTML DOM Parser to JSON?
我正在尝试对抓取网站的每个元素进行分组,将其转换为 json 元素,但它似乎不起作用。
<?php
// Include the php dom parser
include_once 'simple_html_dom.php';
header('Content-type: application/json');
// Create DOM from URL or file
$html = file_get_html('urlhere');
foreach($html->find('hr ul') as $ul)
{
foreach($ul->find('div.product') as $li)
$data[$count]['products'][]['li']= $li->innertext;
$count++;
}
echo json_encode($data);
?>
这个returns
{"":{"products":[{"li":" <a class=\"th\" href=\"\/products\/56942-haters-crewneck-sweatshirt\"> <div style=\"background-image:url('http:\/\/s0.merchdirect.com\/images\/15814\/v600_B_AltApparel_Crew.png');\"> <img src=\"http:\/\/s0.com\/images\/6398\/product-image-placeholder-600.png\"> <\/div> <\/a> <div class=\"panel panel-info\" style=\"display: none;\"> <div class=\"name\"> <a href=\"\/products\/56942-haters-crewneck-sweatshirt\"> Haters Crewneck Sweatshirt <\/a> <\/div> <div class=\"subtitle\"> .00 <\/div> <\/div> "}
当我真正希望实现时:
{"products":[{
"link":"/products/56942-haters-crewneck-sweatshirt",
"image":"http://s0.com/images/15814/v600_B_AltApparel_Crew.png",
"name":"Haters Crewneck Sweatshirt",
"subtitle":"60.00"}
]}
如何删除所有冗余信息并可能命名重新格式化的 json 中的每个元素?
谢谢!
您只需要在内部循环中扩展您的逻辑:
foreach($html->find('hr ul') as $ul)
{
foreach($ul->find('div.product') as $li) {
$product = array();
$product['link'] = $li->find('a.th')[0]->href;
$product['name'] = trim($li->find('div.name a')[0]->innertext);
$product['subtitle'] = trim($li->find('div.subtitle')[0]->innertext);
$product['image'] = explode("'", $li->find('div')[0]->style)[1];
$data[$count]['products'][] = $product;
}
}
echo json_encode($data);
我正在尝试对抓取网站的每个元素进行分组,将其转换为 json 元素,但它似乎不起作用。
<?php
// Include the php dom parser
include_once 'simple_html_dom.php';
header('Content-type: application/json');
// Create DOM from URL or file
$html = file_get_html('urlhere');
foreach($html->find('hr ul') as $ul)
{
foreach($ul->find('div.product') as $li)
$data[$count]['products'][]['li']= $li->innertext;
$count++;
}
echo json_encode($data);
?>
这个returns
{"":{"products":[{"li":" <a class=\"th\" href=\"\/products\/56942-haters-crewneck-sweatshirt\"> <div style=\"background-image:url('http:\/\/s0.merchdirect.com\/images\/15814\/v600_B_AltApparel_Crew.png');\"> <img src=\"http:\/\/s0.com\/images\/6398\/product-image-placeholder-600.png\"> <\/div> <\/a> <div class=\"panel panel-info\" style=\"display: none;\"> <div class=\"name\"> <a href=\"\/products\/56942-haters-crewneck-sweatshirt\"> Haters Crewneck Sweatshirt <\/a> <\/div> <div class=\"subtitle\"> .00 <\/div> <\/div> "}
当我真正希望实现时:
{"products":[{
"link":"/products/56942-haters-crewneck-sweatshirt",
"image":"http://s0.com/images/15814/v600_B_AltApparel_Crew.png",
"name":"Haters Crewneck Sweatshirt",
"subtitle":"60.00"}
]}
如何删除所有冗余信息并可能命名重新格式化的 json 中的每个元素?
谢谢!
您只需要在内部循环中扩展您的逻辑:
foreach($html->find('hr ul') as $ul)
{
foreach($ul->find('div.product') as $li) {
$product = array();
$product['link'] = $li->find('a.th')[0]->href;
$product['name'] = trim($li->find('div.name a')[0]->innertext);
$product['subtitle'] = trim($li->find('div.subtitle')[0]->innertext);
$product['image'] = explode("'", $li->find('div')[0]->style)[1];
$data[$count]['products'][] = $product;
}
}
echo json_encode($data);