使用 bigquery 创建身份验证路径
Create an authentication path with bigquery
我有一个 SQL-bigquery 问题,希望您能帮助我。我已经非常紧张地搜索了一个星期,但一直没有成功。我有一个约 10 亿条记录的登录日志文件。该文件包含如下数据:
Time. User. Source computer. Destination computer.
1. U12. C04. C11
2. U14. C09. C14
3. U11. C04. C23
4. U12. C11. C14
5. U12. C23. C24
6. U14. C09. C14
我需要确定所有可能的身份验证路径如下:
User. Authentication Path.
U12. C04,C11
U12. C04,C11,C14
U12. C23,C24
U14. C09,C14
U11. C04,C23
它看起来是递归的,我尝试遵循一些页面路径示例,但无法使其工作。提前致谢!
以下内容适用于 BigQuery Standard SQL,对您来说应该是一个好的开始
It looks recursive
- 确实如此 - 感谢 UDF
#standardSQL
CREATE TEMPORARY FUNCTION path(arr ARRAY<STRUCT<s STRING, d STRING>>)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
var result = [], p;
for (i = 0; i < arr.length; i++){arr[i].flag = 0;}
for (i = 0; i < arr.length; i++){
if (arr[i].flag == 0) {
arr[i].flag = 1;
p = arr[i].s + ',' + arr[i].d + next(p, arr[i].d);
result.push(p);
}
}; return result;
function next(p, s) {
for (j = 0; j < arr.length; j++) {
if(arr[j].flag == 0 && arr[j].s == s) {
arr[j].flag = 1;
return ',' + arr[j].d + next(p, arr[j].d);
}
} return '';
}
""";
SELECT user,
path(ARRAY_AGG(STRUCT<s STRING, d STRING>(source, destination) ORDER BY time)) AS path
FROM `yourproject.yourdataset.yourtable` t
GROUP BY user
您可以使用以下问题中的虚拟数据进行上述测试
#standardSQL
CREATE TEMPORARY FUNCTION path(arr ARRAY<STRUCT<s STRING, d STRING>>)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
var result = [], p;
for (i = 0; i < arr.length; i++){arr[i].flag = 0;}
for (i = 0; i < arr.length; i++){
if (arr[i].flag == 0) {
arr[i].flag = 1;
p = arr[i].s + ',' + arr[i].d + next(p, arr[i].d);
result.push(p);
}
}; return result;
function next(p, s) {
for (j = 0; j < arr.length; j++) {
if(arr[j].flag == 0 && arr[j].s == s) {
arr[j].flag = 1;
return ',' + arr[j].d + next(p, arr[j].d);
}
} return '';
}
""";
WITH `yourproject.yourdataset.yourtable` AS (
SELECT 1 time, 'U12' user, 'C04' source, 'C11' destination UNION ALL
SELECT 2, 'U14', 'C09', 'C14' UNION ALL
SELECT 3, 'U11', 'C04', 'C23' UNION ALL
SELECT 4, 'U12', 'C11', 'C14' UNION ALL
SELECT 5, 'U12', 'C23', 'C24' UNION ALL
SELECT 6, 'U14', 'C10', 'C15'
)
SELECT user,
path(ARRAY_AGG(STRUCT<s STRING, d STRING>(source, destination) ORDER BY time)) AS path
FROM `yourproject.yourdataset.yourtable` t
GROUP BY user
ORDER BY MIN(time)
结果如下
Row user path
1 U12 C04,C11,C14
C23,C24
2 U14 C09,C14
C10,C15
3 U11 C04,C23
我有一个 SQL-bigquery 问题,希望您能帮助我。我已经非常紧张地搜索了一个星期,但一直没有成功。我有一个约 10 亿条记录的登录日志文件。该文件包含如下数据:
Time. User. Source computer. Destination computer.
1. U12. C04. C11
2. U14. C09. C14
3. U11. C04. C23
4. U12. C11. C14
5. U12. C23. C24
6. U14. C09. C14
我需要确定所有可能的身份验证路径如下:
User. Authentication Path.
U12. C04,C11
U12. C04,C11,C14
U12. C23,C24
U14. C09,C14
U11. C04,C23
它看起来是递归的,我尝试遵循一些页面路径示例,但无法使其工作。提前致谢!
以下内容适用于 BigQuery Standard SQL,对您来说应该是一个好的开始
It looks recursive
- 确实如此 - 感谢 UDF
#standardSQL
CREATE TEMPORARY FUNCTION path(arr ARRAY<STRUCT<s STRING, d STRING>>)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
var result = [], p;
for (i = 0; i < arr.length; i++){arr[i].flag = 0;}
for (i = 0; i < arr.length; i++){
if (arr[i].flag == 0) {
arr[i].flag = 1;
p = arr[i].s + ',' + arr[i].d + next(p, arr[i].d);
result.push(p);
}
}; return result;
function next(p, s) {
for (j = 0; j < arr.length; j++) {
if(arr[j].flag == 0 && arr[j].s == s) {
arr[j].flag = 1;
return ',' + arr[j].d + next(p, arr[j].d);
}
} return '';
}
""";
SELECT user,
path(ARRAY_AGG(STRUCT<s STRING, d STRING>(source, destination) ORDER BY time)) AS path
FROM `yourproject.yourdataset.yourtable` t
GROUP BY user
您可以使用以下问题中的虚拟数据进行上述测试
#standardSQL
CREATE TEMPORARY FUNCTION path(arr ARRAY<STRUCT<s STRING, d STRING>>)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
var result = [], p;
for (i = 0; i < arr.length; i++){arr[i].flag = 0;}
for (i = 0; i < arr.length; i++){
if (arr[i].flag == 0) {
arr[i].flag = 1;
p = arr[i].s + ',' + arr[i].d + next(p, arr[i].d);
result.push(p);
}
}; return result;
function next(p, s) {
for (j = 0; j < arr.length; j++) {
if(arr[j].flag == 0 && arr[j].s == s) {
arr[j].flag = 1;
return ',' + arr[j].d + next(p, arr[j].d);
}
} return '';
}
""";
WITH `yourproject.yourdataset.yourtable` AS (
SELECT 1 time, 'U12' user, 'C04' source, 'C11' destination UNION ALL
SELECT 2, 'U14', 'C09', 'C14' UNION ALL
SELECT 3, 'U11', 'C04', 'C23' UNION ALL
SELECT 4, 'U12', 'C11', 'C14' UNION ALL
SELECT 5, 'U12', 'C23', 'C24' UNION ALL
SELECT 6, 'U14', 'C10', 'C15'
)
SELECT user,
path(ARRAY_AGG(STRUCT<s STRING, d STRING>(source, destination) ORDER BY time)) AS path
FROM `yourproject.yourdataset.yourtable` t
GROUP BY user
ORDER BY MIN(time)
结果如下
Row user path
1 U12 C04,C11,C14
C23,C24
2 U14 C09,C14
C10,C15
3 U11 C04,C23