如何在另一个脚本中使用来自脚本的数据
How use data from on script in another
我想稍微自动化我的工作。
我有两个文件。我想使用第一个输出(链接列表)并在第二个中使用它来遍历每个输出并提取一些数据。
此脚本创建了我们词汇表页面上列出的 URL 列表:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('links.csv');
request('https://www.myurl.com/glossary', (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.letter > ul > li').each((i, el) => {
const link = $(el)
.find('a')
.attr('href');
writeStream.write(`${link} \n`);
});
console.log('Done...');
}
});
第二个文件应查找 h1 和 .class 并将其放入 .html 文件形式的 URL 中:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('data.html');
request('what to put here?', (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.content').each((i, el) => {
const term = $(el)
.find('h1')
.text();
const definition = $(el)
.find('.glossary_short_definition')
.text();
writeStream.write(`<h1>${term}</h1> <p>${definition}</p> \n`);
});
console.log('Done...');
}
});
我添加了导入/导出文件。
如何在第二个文件中使用这些 URL?
您不必将 url 列表保存为 csv。您可以将它传递给包装在函数中的第二个请求。你可以这样做:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('data.html');
request('https://www.myurl.com/glossary', (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.letter > ul > li').each((i, el) => {
const link = $(el)
.find('a')
.attr('href');
followUrl(link); // pass the link value to the next request...
});
console.log('Done...');
}
});
function followUrl(link){
request( link , (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.content').each((i, el) => {
const term = $(el)
.find('h1')
.text();
const definition = $(el)
.find('.glossary_short_definition')
.text();
writeStream.write(`<h1>${term}</h1> <p>${definition}</p> \n`);
});
console.log(`Done for url: ${link}`);
}
});
}
但是您知道吗,您也可以使用 osmosis 用更少的代码来做同样的事情?
const osmosis = require('osmosis');
const fs = require('fs');
const writeStream = fs.createWriteStream('data.html');
osmosis
.get('https://www.myurl.com/glossary')
.follow('@href') // browse each href in the url
.set({
'term' : 'h1',
'definition' : '.glossary_short_definition'
})
.data((res) => {
// do something with the data result
writeStream.write(`<h1>${res.term}</h1> <p>${res.definition}</p> \n`);
})
.done((x) => console.log('finished!'))
.log(console.log)
.error(console.log)
.debug(console.log);
我想稍微自动化我的工作。
我有两个文件。我想使用第一个输出(链接列表)并在第二个中使用它来遍历每个输出并提取一些数据。
此脚本创建了我们词汇表页面上列出的 URL 列表:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('links.csv');
request('https://www.myurl.com/glossary', (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.letter > ul > li').each((i, el) => {
const link = $(el)
.find('a')
.attr('href');
writeStream.write(`${link} \n`);
});
console.log('Done...');
}
});
第二个文件应查找 h1 和 .class 并将其放入 .html 文件形式的 URL 中:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('data.html');
request('what to put here?', (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.content').each((i, el) => {
const term = $(el)
.find('h1')
.text();
const definition = $(el)
.find('.glossary_short_definition')
.text();
writeStream.write(`<h1>${term}</h1> <p>${definition}</p> \n`);
});
console.log('Done...');
}
});
我添加了导入/导出文件。
如何在第二个文件中使用这些 URL?
您不必将 url 列表保存为 csv。您可以将它传递给包装在函数中的第二个请求。你可以这样做:
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('data.html');
request('https://www.myurl.com/glossary', (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.letter > ul > li').each((i, el) => {
const link = $(el)
.find('a')
.attr('href');
followUrl(link); // pass the link value to the next request...
});
console.log('Done...');
}
});
function followUrl(link){
request( link , (error, response, html) => {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$('div.content').each((i, el) => {
const term = $(el)
.find('h1')
.text();
const definition = $(el)
.find('.glossary_short_definition')
.text();
writeStream.write(`<h1>${term}</h1> <p>${definition}</p> \n`);
});
console.log(`Done for url: ${link}`);
}
});
}
但是您知道吗,您也可以使用 osmosis 用更少的代码来做同样的事情?
const osmosis = require('osmosis');
const fs = require('fs');
const writeStream = fs.createWriteStream('data.html');
osmosis
.get('https://www.myurl.com/glossary')
.follow('@href') // browse each href in the url
.set({
'term' : 'h1',
'definition' : '.glossary_short_definition'
})
.data((res) => {
// do something with the data result
writeStream.write(`<h1>${res.term}</h1> <p>${res.definition}</p> \n`);
})
.done((x) => console.log('finished!'))
.log(console.log)
.error(console.log)
.debug(console.log);