Elasticsearch 上的桶计数
Bucket count on Elasticsearch
我正在尝试使用有关可穿戴设备使用情况的统计信息来提取用户。忠实用户是指最近30天内使用可穿戴设备超过20天且平均每天使用可穿戴设备时间大于4小时的用户。
因此,简而言之,忠实用户 =(最少使用 20 天 + 平均每天使用 > 4 小时)
在 Elasticsearch 中,使用文档根据日期和使用时间进行索引。
{
id:"AL-2930",
"usage_duration":4.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2930",
"usage_duration":5.5,
"sessionDate":"2020-05-02"
},
{
id:"AL-2931",
"usage_duration":3.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2931",
"usage_duration":3.0,
"sessionDate":"2020-05-02"
},
我正在尝试的查询 运行 给出了正确的结果。
{
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" }
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
}
}
}
}
}
我得到的结果是这样的:
{
"took": 15,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2139,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"patients": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1926,
"buckets": [
{
"key": "BG-P-A100CR",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A102XF",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
},
{
"key": "BG-P-A103ZU",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A104IA",
"doc_count": 24,
"avg_usage": {
"value": 6.5
}
},
{
"key": "BG-P-A104ZL",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A106BT",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A110VY",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
}
]
}
}
我真正需要的是查询 return 找到的桶中的桶总数。我尝试了类似问题 () 的回答,但没有帮助。
以下是否有帮助:
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" },
"size": 100, <----- Added this
"show_term_doc_count_error": true <----- Added this
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
},
"bucket_count":{
"bucket_script": {
"buckets_path": {
"count": "_count"
},
"script": "return params.count"
}
}
}
},
"mybucketcount":{
"stats_bucket": {
"buckets_path":"users._count"
}
}
}
}
我 运行 通过将 "script": "params.avgUsage > 4.0"
替换为 "script": "params.avgUsage > 3.0"
并为您提到的文档集制作 min_doc_count as 2
上面的查询,我看到了下面的内容回应:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "AL-2930",
"doc_count" : 2,
"avg_usage" : {
"value" : 5.0
},
"bucket_count" : {
"value" : 2.0
}
},
{
"key" : "AL-2931",
"doc_count" : 2,
"avg_usage" : {
"value" : 3.25
},
"bucket_count" : {
"value" : 2.0
}
}
]
},
"mybucketcount" : {
"count" : 2, <---- Note this.
"min" : 2.0,
"max" : 2.0,
"avg" : 2.0,
"sum" : 4.0
}
}
}
我假设您需要 Terms Aggregation i.e. for users
and I've simply added the Stats Aggregation 返回的桶总数。
如果有帮助请告诉我!
我正在尝试使用有关可穿戴设备使用情况的统计信息来提取用户。忠实用户是指最近30天内使用可穿戴设备超过20天且平均每天使用可穿戴设备时间大于4小时的用户。 因此,简而言之,忠实用户 =(最少使用 20 天 + 平均每天使用 > 4 小时)
在 Elasticsearch 中,使用文档根据日期和使用时间进行索引。
{
id:"AL-2930",
"usage_duration":4.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2930",
"usage_duration":5.5,
"sessionDate":"2020-05-02"
},
{
id:"AL-2931",
"usage_duration":3.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2931",
"usage_duration":3.0,
"sessionDate":"2020-05-02"
},
我正在尝试的查询 运行 给出了正确的结果。
{
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" }
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
}
}
}
}
}
我得到的结果是这样的:
{
"took": 15,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2139,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"patients": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1926,
"buckets": [
{
"key": "BG-P-A100CR",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A102XF",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
},
{
"key": "BG-P-A103ZU",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A104IA",
"doc_count": 24,
"avg_usage": {
"value": 6.5
}
},
{
"key": "BG-P-A104ZL",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A106BT",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A110VY",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
}
]
}
}
我真正需要的是查询 return 找到的桶中的桶总数。我尝试了类似问题 (
以下是否有帮助:
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" },
"size": 100, <----- Added this
"show_term_doc_count_error": true <----- Added this
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
},
"bucket_count":{
"bucket_script": {
"buckets_path": {
"count": "_count"
},
"script": "return params.count"
}
}
}
},
"mybucketcount":{
"stats_bucket": {
"buckets_path":"users._count"
}
}
}
}
我 运行 通过将 "script": "params.avgUsage > 4.0"
替换为 "script": "params.avgUsage > 3.0"
并为您提到的文档集制作 min_doc_count as 2
上面的查询,我看到了下面的内容回应:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "AL-2930",
"doc_count" : 2,
"avg_usage" : {
"value" : 5.0
},
"bucket_count" : {
"value" : 2.0
}
},
{
"key" : "AL-2931",
"doc_count" : 2,
"avg_usage" : {
"value" : 3.25
},
"bucket_count" : {
"value" : 2.0
}
}
]
},
"mybucketcount" : {
"count" : 2, <---- Note this.
"min" : 2.0,
"max" : 2.0,
"avg" : 2.0,
"sum" : 4.0
}
}
}
我假设您需要 Terms Aggregation i.e. for users
and I've simply added the Stats Aggregation 返回的桶总数。
如果有帮助请告诉我!