.push 不是网络爬虫中的函数
.push is not a function in web crawler
我正在写一个节点JS网络爬虫class,我遇到了以下错误,this.textInvertedIndex[word].push is not a function
。经过进一步检查,我意识到出于某种原因 this.textInvertedIndex[word]
被写为本机对象 function Object({ [native code] })
。对于最初的几次迭代,通过控制台日志记录 this.textInvertedIndex 一切似乎都很好,因为它是一个数组对象。但是突然出现了这个错误。是否有我隐式重写 textInvertedIndex 的代码部分?
这里是相关的class:
function Crawler(queue, maxIndexSize) {
this.queue = queue;
this.maxIndexSize = maxIndexSize;
this.findChunks = () => {
let currentChunk;
let minimumDistance = Infinity;
for (i = 1; i <= this.maxIndexSize; i++) {
if (this.maxIndexSize % i === 0) {
const newDistance = Math.abs(i - 30);
if (newDistance < minimumDistance) {
minimumDistance = newDistance;
currentChunk = i;
} else {
return currentChunk
};
};
};
};
this.chunks = this.findChunks();
this.chunkSize = this.maxIndexSize / this.chunks;
this.totalWordOccurances = {};
this.imageInvertedIndex = {};
this.textInvertedIndex = {};
this.images = [];
this.sites = [];
this.seen = {};
this.write = (url, html) => {
const documentId = this.sites.length;
const website = new Website(url, html);
const title = website.title();
const content = website.content(title);
const words = content.filter(item => typeof item !== "object");
const wordsLength = words.length;
const query = new Query(words);
const individualWords = query.individualize(words);
this.seen[url] = true;
this.sites.push({
url,
title,
description: website.description()
});
for (word of individualWords) {
const normalizedTf = query.count(word) / wordsLength;
const textInvertedIndexEntry = {
documentId,
normalizedTf
};
if (this.textInvertedIndex[word]) {
this.textInvertedIndex[word].push(textInvertedIndexEntry);
} else {
this.textInvertedIndex[word] = [textInvertedIndexEntry];
};
if (this.totalWordOccurances[word]) {
this.totalWordOccurances[word] += 1;
} else {
this.totalWordOccurances[word] = 1;
};
};
for (i = 0; i < content.length; i++) {
const item = content[i];
if (typeof item === "object") {
const imageId = this.images.length;
this.images.push(item);
for (word of individualWords) {
const imageScore = getImageScore(i, word, content);
const imageInvertedIndexEntry = {
imageId,
imageScore
};
if (this.imageInvertedIndex[word]) {
this.imageInvertedIndex[word].push(imageInvertedIndexEntry);
} else {
this.imageInvertedIndex[word] = [imageInvertedIndexEntry];
};
};
};
};
};
this.crawl = async () => {
while (this.sites.length !== this.maxIndexSize) {
let nextQueue = [];
const websitesUnfiltered = await Promise.all(this.queue.map((url) => {
const website = new Website(url);
return website.request();
}));
const websitesToAdd = this.maxIndexSize - this.sites.length;
let websites = websitesUnfiltered.filter(message => message !== "Failure")
.slice(0, websitesToAdd);
for (site of websites) {
const url = site.url;
const htmlCode = site.htmlCode;
const website = new Website(url, htmlCode);
this.write(url, htmlCode);
nextQueue = nextQueue.concat(website.urls());
};
nextQueue = new Query(nextQueue.filter(url => !this.seen[url]))
.individualize();
this.queue = nextQueue;
};
};
};
这样调用
const crawler = new Crawler(["https://stanford.edu/"], 25000000);
crawler.crawl();
this.textInvertedIndex = {};
正在定义一个 push 不是有效函数的对象。您可以通过将其定义为 this.textInvertedIndex = [];
将其更改为数组,否则您可以将 key/value 条目添加到对象中,因为它的定义如下: this.textInvertedIndex[key] = value;
原来,我的密钥正在访问 this.textInvertedIndex[word]
。单词是 constructor
。 constructor
已经是内置对象 属性,因此永远不能将其重写为定义了 .push
的数组。要解决此问题,请将所有对象键设为大写,这样 constructor
将变为 CONSTRUCTOR
,从而确保永远不会调用已经存在的对象属性。
我正在写一个节点JS网络爬虫class,我遇到了以下错误,this.textInvertedIndex[word].push is not a function
。经过进一步检查,我意识到出于某种原因 this.textInvertedIndex[word]
被写为本机对象 function Object({ [native code] })
。对于最初的几次迭代,通过控制台日志记录 this.textInvertedIndex 一切似乎都很好,因为它是一个数组对象。但是突然出现了这个错误。是否有我隐式重写 textInvertedIndex 的代码部分?
这里是相关的class:
function Crawler(queue, maxIndexSize) {
this.queue = queue;
this.maxIndexSize = maxIndexSize;
this.findChunks = () => {
let currentChunk;
let minimumDistance = Infinity;
for (i = 1; i <= this.maxIndexSize; i++) {
if (this.maxIndexSize % i === 0) {
const newDistance = Math.abs(i - 30);
if (newDistance < minimumDistance) {
minimumDistance = newDistance;
currentChunk = i;
} else {
return currentChunk
};
};
};
};
this.chunks = this.findChunks();
this.chunkSize = this.maxIndexSize / this.chunks;
this.totalWordOccurances = {};
this.imageInvertedIndex = {};
this.textInvertedIndex = {};
this.images = [];
this.sites = [];
this.seen = {};
this.write = (url, html) => {
const documentId = this.sites.length;
const website = new Website(url, html);
const title = website.title();
const content = website.content(title);
const words = content.filter(item => typeof item !== "object");
const wordsLength = words.length;
const query = new Query(words);
const individualWords = query.individualize(words);
this.seen[url] = true;
this.sites.push({
url,
title,
description: website.description()
});
for (word of individualWords) {
const normalizedTf = query.count(word) / wordsLength;
const textInvertedIndexEntry = {
documentId,
normalizedTf
};
if (this.textInvertedIndex[word]) {
this.textInvertedIndex[word].push(textInvertedIndexEntry);
} else {
this.textInvertedIndex[word] = [textInvertedIndexEntry];
};
if (this.totalWordOccurances[word]) {
this.totalWordOccurances[word] += 1;
} else {
this.totalWordOccurances[word] = 1;
};
};
for (i = 0; i < content.length; i++) {
const item = content[i];
if (typeof item === "object") {
const imageId = this.images.length;
this.images.push(item);
for (word of individualWords) {
const imageScore = getImageScore(i, word, content);
const imageInvertedIndexEntry = {
imageId,
imageScore
};
if (this.imageInvertedIndex[word]) {
this.imageInvertedIndex[word].push(imageInvertedIndexEntry);
} else {
this.imageInvertedIndex[word] = [imageInvertedIndexEntry];
};
};
};
};
};
this.crawl = async () => {
while (this.sites.length !== this.maxIndexSize) {
let nextQueue = [];
const websitesUnfiltered = await Promise.all(this.queue.map((url) => {
const website = new Website(url);
return website.request();
}));
const websitesToAdd = this.maxIndexSize - this.sites.length;
let websites = websitesUnfiltered.filter(message => message !== "Failure")
.slice(0, websitesToAdd);
for (site of websites) {
const url = site.url;
const htmlCode = site.htmlCode;
const website = new Website(url, htmlCode);
this.write(url, htmlCode);
nextQueue = nextQueue.concat(website.urls());
};
nextQueue = new Query(nextQueue.filter(url => !this.seen[url]))
.individualize();
this.queue = nextQueue;
};
};
};
这样调用
const crawler = new Crawler(["https://stanford.edu/"], 25000000);
crawler.crawl();
this.textInvertedIndex = {};
正在定义一个 push 不是有效函数的对象。您可以通过将其定义为 this.textInvertedIndex = [];
将其更改为数组,否则您可以将 key/value 条目添加到对象中,因为它的定义如下: this.textInvertedIndex[key] = value;
原来,我的密钥正在访问 this.textInvertedIndex[word]
。单词是 constructor
。 constructor
已经是内置对象 属性,因此永远不能将其重写为定义了 .push
的数组。要解决此问题,请将所有对象键设为大写,这样 constructor
将变为 CONSTRUCTOR
,从而确保永远不会调用已经存在的对象属性。