UDF JavaScript 实现到 Snowflake
UDF JavaScript implementation into Snowflake
我发现有用的 JS 函数,我想将其实现到雪花的 UDF 中,以便将复杂的计算转移到 dwh。
http://trentrichardson.com/2010/04/06/compute-linear-regressions-in-javascript/
尝试将上述返工为 SQL 程序:
CREATE OR REPLACE PROCEDURE JS_LNR_REG(a array, b array)
RETURNS ARRAY
LANGUAGE javascript
AS
$$
function linearRegression(y,x){
var lr = {};
var n = y.length;
var sum_x = 0;
var sum_y = 0;
var sum_xy = 0;
var sum_xx = 0;
var sum_yy = 0;
for (var i = 0; i < y.length; i++) {
sum_x += x[i];
sum_y += y[i];
sum_xy += (x[i]*y[i]);
sum_xx += (x[i]*x[i]);
sum_yy += (y[i]*y[i]);
}
lr['slope'] = (n * sum_xy - sum_x * sum_y) / (n*sum_xx - sum_x * sum_x);
lr['intercept'] = (sum_y - lr.slope * sum_x)/n;
return lr;} ; $$;
执行失败并出现错误:SQL compilation error: Invalid identifier
这很奇怪,因为已经创建了过程。
SELECT JS_LNR_REG('[2,4,5,3,1]', '[5.2, 5.7, 5.0, 4.2]');
你快到了。要传递数组,请尝试使用 ARRAY_CONSTRUCT 或 PARSE_JSON,如下所示:
SELECT JS_LNR_REG(array_construct(1,2,3,4), array_construct(5,4,3,2));
SELECT JS_LNR_REG(parse_json('[1,2,3,4]'), parse_json('[5,4,3,2]'));
要使代码正常工作,请将其设为 returns 对象的函数,并确保从您的 UDF 中实际调用 linearRegression():
CREATE OR REPLACE FUNCTION JS_LNR_REG(A array, B array)
RETURNS OBJECT
LANGUAGE JAVASCRIPT
STRICT
AS
$$
function linearRegression(y,x){
var lr = {};
var n = y.length;
var sum_x = 0;
var sum_y = 0;
var sum_xy = 0;
var sum_xx = 0;
var sum_yy = 0;
for (var i = 0; i < y.length; i++) {
sum_x += x[i];
sum_y += y[i];
sum_xy += (x[i]*y[i]);
sum_xx += (x[i]*x[i]);
sum_yy += (y[i]*y[i]);
}
lr['slope'] = (n * sum_xy - sum_x * sum_y) / (n*sum_xx - sum_x * sum_x);
lr['intercept'] = (sum_y - lr.slope * sum_x)/n;
return lr;
}
return linearRegression(A,B)
; $$;
希望对您有所帮助。
我发现有用的 JS 函数,我想将其实现到雪花的 UDF 中,以便将复杂的计算转移到 dwh。
http://trentrichardson.com/2010/04/06/compute-linear-regressions-in-javascript/
尝试将上述返工为 SQL 程序:
CREATE OR REPLACE PROCEDURE JS_LNR_REG(a array, b array)
RETURNS ARRAY
LANGUAGE javascript
AS
$$
function linearRegression(y,x){
var lr = {};
var n = y.length;
var sum_x = 0;
var sum_y = 0;
var sum_xy = 0;
var sum_xx = 0;
var sum_yy = 0;
for (var i = 0; i < y.length; i++) {
sum_x += x[i];
sum_y += y[i];
sum_xy += (x[i]*y[i]);
sum_xx += (x[i]*x[i]);
sum_yy += (y[i]*y[i]);
}
lr['slope'] = (n * sum_xy - sum_x * sum_y) / (n*sum_xx - sum_x * sum_x);
lr['intercept'] = (sum_y - lr.slope * sum_x)/n;
return lr;} ; $$;
执行失败并出现错误:SQL compilation error: Invalid identifier
这很奇怪,因为已经创建了过程。
SELECT JS_LNR_REG('[2,4,5,3,1]', '[5.2, 5.7, 5.0, 4.2]');
你快到了。要传递数组,请尝试使用 ARRAY_CONSTRUCT 或 PARSE_JSON,如下所示:
SELECT JS_LNR_REG(array_construct(1,2,3,4), array_construct(5,4,3,2));
SELECT JS_LNR_REG(parse_json('[1,2,3,4]'), parse_json('[5,4,3,2]'));
要使代码正常工作,请将其设为 returns 对象的函数,并确保从您的 UDF 中实际调用 linearRegression():
CREATE OR REPLACE FUNCTION JS_LNR_REG(A array, B array)
RETURNS OBJECT
LANGUAGE JAVASCRIPT
STRICT
AS
$$
function linearRegression(y,x){
var lr = {};
var n = y.length;
var sum_x = 0;
var sum_y = 0;
var sum_xy = 0;
var sum_xx = 0;
var sum_yy = 0;
for (var i = 0; i < y.length; i++) {
sum_x += x[i];
sum_y += y[i];
sum_xy += (x[i]*y[i]);
sum_xx += (x[i]*x[i]);
sum_yy += (y[i]*y[i]);
}
lr['slope'] = (n * sum_xy - sum_x * sum_y) / (n*sum_xx - sum_x * sum_x);
lr['intercept'] = (sum_y - lr.slope * sum_x)/n;
return lr;
}
return linearRegression(A,B)
; $$;
希望对您有所帮助。