在 javascript 中实施 One Rule 算法
Implementing OneRule algorithmn in javascript
OneR,"One Rule" 的缩写,是一种简单而准确的分类算法,它为数据中的每个预测变量生成一个规则,然后选择总误差最小的规则作为其 "one rule"。
我试图在 GitHub 上找到代码示例,但只找到一个,用 R 语言开发的。我怎样才能在 Javascript 中实现这个算法?
我试过什么?
我正在尝试执行以下示例文章:
https://www.saedsayad.com/oner.htm
class OneR {
/**
* Pass dataset which will be an array of values.
* Last value is classifcator's value.
* All other values are predictors.
*
* Example
*
* The meaning of sequence values:
* |Outlook|Temp|Humidity|Windy|Play Golf|
*
* Representation of a sequence:
* ['rainy', 'hot', 'high', 0, 0]
*
* True and False are represented as zeros or ones
*/
constructor(data = []) {
this.data = data;
this.frequences = {};
}
predict() {
if (this.data && this.data.length > 0) {
const firstRow = this.data[0];
const predictorCount = firstRow.length - 1;
let classifcator;
// For each predictor,
for (let i = 0; i < predictorCount; i++) {
// For each value of that predictor, make a rule as follos;
for (let y = 0; y < this.data.length; y++) {
// Count how often each value of target (class) appears
classifcator = this.data[y][predictorCount];
console.log(classifcator);
// Find the most frequent class
// Make the rule assign that class to this value of the predictor
}
// Calculate the total error of the rules of each predictor
}
// Choose the predictor with the smallest total error
} else {
console.log("Cannot predict!");
}
}
}
module.exports = {
OneR
};
我已经从 csv 加载数据
rainy,hot,high,0,0
rainy,hot,high,1,0
overcast,hot,high,0,1
sunny,mild,high,0,1
sunny,cool,normal,0,1
sunny,cool,normal,1,0
overcast,cool,normal,1,1
rainy,mild,high,0,0
rainy,cool,normal,0,1
sunny,mild,normal,0,1
rainy,mild,normal,1,1
overcast,mild,high,1,1
overcast,hot,normal,0,1
sunny,mild,high,1,0
如果我正确理解必须如何比较频率 tables(最低错误率,最高准确度),您可以使用 Maps 以便在必要时处理非字符串类型。
虽然您的示例的目标值是布尔值(0 或 1),但通常它们可能来自更大的域,例如 "call"、"fold"、"raise" , "check".
您的模板代码创建了一个 class,但老实说,我看不出这样做有什么好处,因为您实际上只能对其执行一项操作。当然,如果除了单规则预测之外,您还有其他的想法,那么 class 可能是有意义的。在这里,我将只提供一个获取数据的函数,以及 returns 所选预测变量的数量和随之而来的规则 table:
function oneR(data) {
if (!data && !data.length) return console.log("Cannot predict!");
const predictorCount = data[0].length - 1;
// get unique list of classes (target values):
let classes = [...new Set(data.map(row => row[predictorCount]))];
let bestAccuracy = -1;
let bestFreq, bestPredictor;
// For each predictor,
for (let i = 0; i < predictorCount; i++) {
// create frequency table for this predictor: Map of Map of counts
let freq = new Map(data.map(row => [row[i], new Map(classes.map(targetValue => [targetValue, 0]))]));
// For each value of that predictor, collect the frequencies
for (let row of data) {
// Count how often each value of target (class) appears
let targetValue = row[predictorCount];
let predictorValueFreq = freq.get(row[i]);
let count = predictorValueFreq.get(targetValue);
predictorValueFreq.set(targetValue, count+1);
}
// Find the most frequent class for each predictor value
let accuracy = 0;
for (let [predictorValue, predictorValueFreq] of freq) {
let maxCount = 0;
let chosenTargetValue;
for (let [targetValue, count] of predictorValueFreq) {
if (count > maxCount) {
// Make the rule assign that class to this value of the predictor
maxCount = count;
chosenTargetValue = targetValue;
}
}
freq.set(predictorValue, chosenTargetValue);
accuracy += maxCount;
}
// If this accuracy is best, then retain this frequency table
if (accuracy > bestAccuracy) {
bestAccuracy = accuracy;
bestPredictor = i;
bestFreq = freq;
}
}
// Return the best frequency table and the predictor for which it applies
return {
predictor: bestPredictor, // zero-based column number
rule: [...bestFreq.entries()]
}
}
let data = [
["rainy","hot","high",0,0],
["rainy","hot","high",1,0],
["overcast","hot","high",0,1],
["sunny","mild","high",0,1],
["sunny","cool","normal",0,1],
["sunny","cool","normal",1,0],
["overcast","cool","normal",1,1],
["rainy","mild","high",0,0],
["rainy","cool","normal",0,1],
["sunny","mild","normal",0,1],
["rainy","mild","normal",1,1],
["overcast","mild","high",1,1],
["overcast","hot","normal",0,1],
["sunny","mild","high",1,0]
];
let result = oneR(data);
console.log(result);
OneR,"One Rule" 的缩写,是一种简单而准确的分类算法,它为数据中的每个预测变量生成一个规则,然后选择总误差最小的规则作为其 "one rule"。
我试图在 GitHub 上找到代码示例,但只找到一个,用 R 语言开发的。我怎样才能在 Javascript 中实现这个算法?
我试过什么? 我正在尝试执行以下示例文章: https://www.saedsayad.com/oner.htm
class OneR {
/**
* Pass dataset which will be an array of values.
* Last value is classifcator's value.
* All other values are predictors.
*
* Example
*
* The meaning of sequence values:
* |Outlook|Temp|Humidity|Windy|Play Golf|
*
* Representation of a sequence:
* ['rainy', 'hot', 'high', 0, 0]
*
* True and False are represented as zeros or ones
*/
constructor(data = []) {
this.data = data;
this.frequences = {};
}
predict() {
if (this.data && this.data.length > 0) {
const firstRow = this.data[0];
const predictorCount = firstRow.length - 1;
let classifcator;
// For each predictor,
for (let i = 0; i < predictorCount; i++) {
// For each value of that predictor, make a rule as follos;
for (let y = 0; y < this.data.length; y++) {
// Count how often each value of target (class) appears
classifcator = this.data[y][predictorCount];
console.log(classifcator);
// Find the most frequent class
// Make the rule assign that class to this value of the predictor
}
// Calculate the total error of the rules of each predictor
}
// Choose the predictor with the smallest total error
} else {
console.log("Cannot predict!");
}
}
}
module.exports = {
OneR
};
我已经从 csv 加载数据
rainy,hot,high,0,0
rainy,hot,high,1,0
overcast,hot,high,0,1
sunny,mild,high,0,1
sunny,cool,normal,0,1
sunny,cool,normal,1,0
overcast,cool,normal,1,1
rainy,mild,high,0,0
rainy,cool,normal,0,1
sunny,mild,normal,0,1
rainy,mild,normal,1,1
overcast,mild,high,1,1
overcast,hot,normal,0,1
sunny,mild,high,1,0
如果我正确理解必须如何比较频率 tables(最低错误率,最高准确度),您可以使用 Maps 以便在必要时处理非字符串类型。
虽然您的示例的目标值是布尔值(0 或 1),但通常它们可能来自更大的域,例如 "call"、"fold"、"raise" , "check".
您的模板代码创建了一个 class,但老实说,我看不出这样做有什么好处,因为您实际上只能对其执行一项操作。当然,如果除了单规则预测之外,您还有其他的想法,那么 class 可能是有意义的。在这里,我将只提供一个获取数据的函数,以及 returns 所选预测变量的数量和随之而来的规则 table:
function oneR(data) {
if (!data && !data.length) return console.log("Cannot predict!");
const predictorCount = data[0].length - 1;
// get unique list of classes (target values):
let classes = [...new Set(data.map(row => row[predictorCount]))];
let bestAccuracy = -1;
let bestFreq, bestPredictor;
// For each predictor,
for (let i = 0; i < predictorCount; i++) {
// create frequency table for this predictor: Map of Map of counts
let freq = new Map(data.map(row => [row[i], new Map(classes.map(targetValue => [targetValue, 0]))]));
// For each value of that predictor, collect the frequencies
for (let row of data) {
// Count how often each value of target (class) appears
let targetValue = row[predictorCount];
let predictorValueFreq = freq.get(row[i]);
let count = predictorValueFreq.get(targetValue);
predictorValueFreq.set(targetValue, count+1);
}
// Find the most frequent class for each predictor value
let accuracy = 0;
for (let [predictorValue, predictorValueFreq] of freq) {
let maxCount = 0;
let chosenTargetValue;
for (let [targetValue, count] of predictorValueFreq) {
if (count > maxCount) {
// Make the rule assign that class to this value of the predictor
maxCount = count;
chosenTargetValue = targetValue;
}
}
freq.set(predictorValue, chosenTargetValue);
accuracy += maxCount;
}
// If this accuracy is best, then retain this frequency table
if (accuracy > bestAccuracy) {
bestAccuracy = accuracy;
bestPredictor = i;
bestFreq = freq;
}
}
// Return the best frequency table and the predictor for which it applies
return {
predictor: bestPredictor, // zero-based column number
rule: [...bestFreq.entries()]
}
}
let data = [
["rainy","hot","high",0,0],
["rainy","hot","high",1,0],
["overcast","hot","high",0,1],
["sunny","mild","high",0,1],
["sunny","cool","normal",0,1],
["sunny","cool","normal",1,0],
["overcast","cool","normal",1,1],
["rainy","mild","high",0,0],
["rainy","cool","normal",0,1],
["sunny","mild","normal",0,1],
["rainy","mild","normal",1,1],
["overcast","mild","high",1,1],
["overcast","hot","normal",0,1],
["sunny","mild","high",1,0]
];
let result = oneR(data);
console.log(result);