带聚合的 Elasticsearch 交叉索引查询
Elasticsearch cross-index query with aggregations
我使用:Elasticsearch 7.7,Kibana 7.7
例如,我们取两个索引:
具有简单映射的用户索引:
PUT /user_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"user_phone": { "type": "text" },
"name": { "type": "text" }
}
}
}
用简单的映射检查:
PUT /check_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"price": { "type": "integer" },
"goods_count": {"type": "integer"}
}
}
}
我想像这样构建 table 可视化:
________________________________________________________________________
user_id | user_phone | average_price | sum_goods_count |
___________|_______________|_____________________|______________________
1 | 123 | 512 | 64 |
___________|_______________|_____________________|______________________
2 | 456 | 256 | 16 |
___________|_______________|_____________________|______________________
所以我的问题是:
这是真的吗?
我是否理解正确,我需要查询这两个索引,获取用户列表,然后循环创建带有支票的购物车?
首先,你应该尽可能地在 ES 中尝试 de-normalize 数据以获得它提供的最佳性能和功能,我仔细阅读了你提供的示例和问题中的评论通过将 user
和 check
索引组合成单个索引,它似乎可以在您的 use-case 中轻松实现并在下面的示例中显示。
索引映射
{
"mappings": {
"properties": {
"user_id": {
"type": "text",
"fielddata": "true"
},
"price": {
"type": "integer"
},
"goods_count": {
"type": "integer"
}
}
}
}
索引数据:
使用上面定义的索引映射,索引这三个文档,其中一个文档具有 "user_id":"1"
,两个文档具有 "user_id":"2"
{
"user_id":"1",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":100,
"goods_count":200
}
搜索查询:
详见Terms Aggregation, Top Hits aggregation, Sum aggregation and Avg aggregationES官方文档
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "user_id"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"user_id"
]
}
}
},
"avg_price": {
"avg": {
"field": "price"
}
},
"goods_count": {
"sum": {
"field": "goods_count"
}
}
}
}
}
}
搜索结果:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
]
},
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"user_id": "2"
}
},
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"user_id": "2"
}
}
]
}
},
"avg_price": {
"value": 300.0
},
"goods_count": {
"value": 300.0
}
},
{
"key": "1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"user_id": "1"
}
}
]
}
},
"avg_price": {
"value": 500.0
},
"goods_count": {
"value": 100.0
}
}
]
}
}
}
正如您在上面的搜索结果中看到的,"user_id":"2"
的平均价格是 (500+100)/2 = 300
,goods_count
的总和是 100+200 = 300
。
同样 "user_id":"1"
的平均价格是 500/1 = 500
并且 goods_count
的总和是 100
.
我使用:Elasticsearch 7.7,Kibana 7.7
例如,我们取两个索引:
具有简单映射的用户索引:
PUT /user_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"user_phone": { "type": "text" },
"name": { "type": "text" }
}
}
}
用简单的映射检查:
PUT /check_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"price": { "type": "integer" },
"goods_count": {"type": "integer"}
}
}
}
我想像这样构建 table 可视化:
________________________________________________________________________
user_id | user_phone | average_price | sum_goods_count |
___________|_______________|_____________________|______________________
1 | 123 | 512 | 64 |
___________|_______________|_____________________|______________________
2 | 456 | 256 | 16 |
___________|_______________|_____________________|______________________
所以我的问题是:
这是真的吗?
我是否理解正确,我需要查询这两个索引,获取用户列表,然后循环创建带有支票的购物车?
首先,你应该尽可能地在 ES 中尝试 de-normalize 数据以获得它提供的最佳性能和功能,我仔细阅读了你提供的示例和问题中的评论通过将 user
和 check
索引组合成单个索引,它似乎可以在您的 use-case 中轻松实现并在下面的示例中显示。
索引映射
{
"mappings": {
"properties": {
"user_id": {
"type": "text",
"fielddata": "true"
},
"price": {
"type": "integer"
},
"goods_count": {
"type": "integer"
}
}
}
}
索引数据:
使用上面定义的索引映射,索引这三个文档,其中一个文档具有 "user_id":"1"
,两个文档具有 "user_id":"2"
{
"user_id":"1",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":100,
"goods_count":200
}
搜索查询:
详见Terms Aggregation, Top Hits aggregation, Sum aggregation and Avg aggregationES官方文档
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "user_id"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"user_id"
]
}
}
},
"avg_price": {
"avg": {
"field": "price"
}
},
"goods_count": {
"sum": {
"field": "goods_count"
}
}
}
}
}
}
搜索结果:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
]
},
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"user_id": "2"
}
},
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"user_id": "2"
}
}
]
}
},
"avg_price": {
"value": 300.0
},
"goods_count": {
"value": 300.0
}
},
{
"key": "1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"user_id": "1"
}
}
]
}
},
"avg_price": {
"value": 500.0
},
"goods_count": {
"value": 100.0
}
}
]
}
}
}
正如您在上面的搜索结果中看到的,"user_id":"2"
的平均价格是 (500+100)/2 = 300
,goods_count
的总和是 100+200 = 300
。
同样 "user_id":"1"
的平均价格是 500/1 = 500
并且 goods_count
的总和是 100
.