如何将 IP 地址与子网匹配并获得 SUM
How to match IP addresses to a Subnets and get SUM
你能帮我在 Clickhouse 中解决这个 SELECT 吗?
我想测量一些网络的流量统计。我正在使用带有 2 tables:
的 clickhouse 数据库
select * from network_account_db
┌─network───────----─┬─source─┬─category─┐
│ 192.168.200.0/29 │ server │ general │
│ 192.168.200.11/30 │ server │ general │
│ 192.168.200.22/32 │ server │ general │
└───────────────----─┴────────┴──────────┘
select packetDate,packetDateTime,sampleRatio,srcIp,dstIp,length from traffic
┌─packetDate─┬──────packetDateTime─┬─sampleRatio─┬─────srcIp─┬──────dstIp─┬─length─┐
│ 2021-02-04 │ 2021-02-04 22:15:20 │ 1 │ 232998210 │ 767413237 │ 1280 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 918211986 │ 40 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1150185814 │ 30088 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1168387235 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1169107244 │ 104 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1169107244 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1224157376 │ 617 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1476066034 │ 1425 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1600411769 │ 4656 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1743465996 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1746016762 │ 108 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1746284673 │ 901 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 3194642526 │ 1976 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 2315259109 │ 2403 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 2540034693 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 2540034693 │ 52 │
我想像这样测量流量
select sum(length * sampleRatio ) AS total, category
from ( select network as net from network_account_db where source='server' )
where srcIp=IPv4StringToNum(net)
我需要获取,例如:
category=general
total=242422
你能帮我找到正确的吗SELECT。我有一个 table 有网络,另一个有 IP。
更新:
你好。我需要在 select 中添加其他字段,例如:
SELECT dictGet('network_account_db.reputation_rbl_db', 'category', tuple(srcIp)) AS category, dictGet('network_account_db.reputation_rbl_db', 'source', tuple(srcIp)) AS source, sum(length * sampleRatio) AS total_bytes, sum(numberOfPackets * sampleRatio) AS total_pps, IPv4StringToNum(srcIp), IPv4StringToNum(dstIp) FROM traffic WHERE dictHas('network_account_db.reputation_rbl_db', tuple(srcIp)) GROUP BY category order by packetDateTime desc```
but appear an error like:
列 srcIp
不在聚合函数下,也不在 GROUP BY 中:```
如果我将它添加到 GROUP BY,我会在响应中丢失一些行。
有没有办法用 UNION 或类似的方法添加这个字段?
尝试这种直接方式:
SELECT
category,
sum(length * sampleRatio) AS total
FROM
(
WITH
(
SELECT groupArray((category, ip_num_from, ip_num_to))
FROM
(
/* get the lower range and the higher range of the subnet */
SELECT
category,
splitByChar('/', network) AS ip_parts,
IPv4CIDRToRange(IPv4StringToNum(ip_parts[1]), toUInt8(ip_parts[2])) AS ip_ranges,
toUInt32(ip_ranges.1) AS ip_num_from,
toUInt32(ip_ranges.2) AS ip_num_to
FROM network_account_db
)
) AS networks
SELECT
/* find the first subnet which covers IP */
arrayFirst(n -> srcIp BETWEEN n.2 AND n.3, networks) AS network,
network.1 AS category,
length,
sampleRatio
FROM traffic
/* exclude orphans IPs */
WHERE category != ''
)
GROUP BY category
考虑使用 ip-trie-字典来简化搜索包含 IP 的子网:
CREATE DICTIONARY networks_dict (
network String,
source String,
category String
)
PRIMARY KEY network
SOURCE(CLICKHOUSE(host 'localhost' port 9000 db 'test' table 'network_account_db' user 'default'))
LAYOUT(IP_TRIE())
LIFETIME(3600);
SELECT
dictGet('test.networks_dict', 'category', tuple(srcIp)) AS category,
sum(length * sampleRatio) AS total
FROM traffic
WHERE dictHas('test.networks_dict', tuple(srcIp))
GROUP BY category
这里借鉴了这些想法Add function to check if an IPv4/6 is in a list of subnets #6808。
你能帮我在 Clickhouse 中解决这个 SELECT 吗?
我想测量一些网络的流量统计。我正在使用带有 2 tables:
的 clickhouse 数据库select * from network_account_db
┌─network───────----─┬─source─┬─category─┐
│ 192.168.200.0/29 │ server │ general │
│ 192.168.200.11/30 │ server │ general │
│ 192.168.200.22/32 │ server │ general │
└───────────────----─┴────────┴──────────┘
select packetDate,packetDateTime,sampleRatio,srcIp,dstIp,length from traffic
┌─packetDate─┬──────packetDateTime─┬─sampleRatio─┬─────srcIp─┬──────dstIp─┬─length─┐
│ 2021-02-04 │ 2021-02-04 22:15:20 │ 1 │ 232998210 │ 767413237 │ 1280 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 918211986 │ 40 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1150185814 │ 30088 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1168387235 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1169107244 │ 104 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1169107244 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1224157376 │ 617 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1476066034 │ 1425 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1600411769 │ 4656 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1743465996 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1746016762 │ 108 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 1746284673 │ 901 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 3194642526 │ 1976 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 2315259109 │ 2403 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 2540034693 │ 52 │
│ 2021-02-04 │ 2021-02-04 22:15:19 │ 1 │ 767413237 │ 2540034693 │ 52 │
我想像这样测量流量
select sum(length * sampleRatio ) AS total, category
from ( select network as net from network_account_db where source='server' )
where srcIp=IPv4StringToNum(net)
我需要获取,例如:
category=general
total=242422
你能帮我找到正确的吗SELECT。我有一个 table 有网络,另一个有 IP。
更新:
你好。我需要在 select 中添加其他字段,例如:
SELECT dictGet('network_account_db.reputation_rbl_db', 'category', tuple(srcIp)) AS category, dictGet('network_account_db.reputation_rbl_db', 'source', tuple(srcIp)) AS source, sum(length * sampleRatio) AS total_bytes, sum(numberOfPackets * sampleRatio) AS total_pps, IPv4StringToNum(srcIp), IPv4StringToNum(dstIp) FROM traffic WHERE dictHas('network_account_db.reputation_rbl_db', tuple(srcIp)) GROUP BY category order by packetDateTime desc```
but appear an error like:
列 srcIp
不在聚合函数下,也不在 GROUP BY 中:```
如果我将它添加到 GROUP BY,我会在响应中丢失一些行。
有没有办法用 UNION 或类似的方法添加这个字段?
尝试这种直接方式:
SELECT
category,
sum(length * sampleRatio) AS total
FROM
(
WITH
(
SELECT groupArray((category, ip_num_from, ip_num_to))
FROM
(
/* get the lower range and the higher range of the subnet */
SELECT
category,
splitByChar('/', network) AS ip_parts,
IPv4CIDRToRange(IPv4StringToNum(ip_parts[1]), toUInt8(ip_parts[2])) AS ip_ranges,
toUInt32(ip_ranges.1) AS ip_num_from,
toUInt32(ip_ranges.2) AS ip_num_to
FROM network_account_db
)
) AS networks
SELECT
/* find the first subnet which covers IP */
arrayFirst(n -> srcIp BETWEEN n.2 AND n.3, networks) AS network,
network.1 AS category,
length,
sampleRatio
FROM traffic
/* exclude orphans IPs */
WHERE category != ''
)
GROUP BY category
考虑使用 ip-trie-字典来简化搜索包含 IP 的子网:
CREATE DICTIONARY networks_dict (
network String,
source String,
category String
)
PRIMARY KEY network
SOURCE(CLICKHOUSE(host 'localhost' port 9000 db 'test' table 'network_account_db' user 'default'))
LAYOUT(IP_TRIE())
LIFETIME(3600);
SELECT
dictGet('test.networks_dict', 'category', tuple(srcIp)) AS category,
sum(length * sampleRatio) AS total
FROM traffic
WHERE dictHas('test.networks_dict', tuple(srcIp))
GROUP BY category
这里借鉴了这些想法Add function to check if an IPv4/6 is in a list of subnets #6808。