Elasticsearch 上的桶计数

Bucket count on Elasticsearch

我正在尝试使用有关可穿戴设备使用情况的统计信息来提取用户。忠实用户是指最近30天内使用可穿戴设备超过20天且平均每天使用可穿戴设备时间大于4小时的用户。 因此,简而言之,忠实用户 =(最少使用 20 天 + 平均每天使用 > 4 小时)

在 Elasticsearch 中,使用文档根据日期和使用时间进行索引。

{
id:"AL-2930",
"usage_duration":4.5,
"sessionDate":"2020-05-01" 
},
{
id:"AL-2930",
"usage_duration":5.5,
"sessionDate":"2020-05-02" 
},
{
id:"AL-2931",
"usage_duration":3.5,
"sessionDate":"2020-05-01" 
},
{
id:"AL-2931",
"usage_duration":3.0,
"sessionDate":"2020-05-02" 
},

我正在尝试的查询 运行 给出了正确的结果。

{

  "aggs": {
    "users": {
     "terms": {
        "field": "id",
        "min_doc_count": 20,
        "order" : { "_key" : "asc" }
      },
   
      "aggs": {
        "avg_usage": {
          "avg": {
            "field": "usage_duration"
          }
           
        },
        "usage_filter": {
          "bucket_selector": {
            "buckets_path": {
              "avgUsage": "avg_usage"
            },
            "script": "params.avgUsage > 4.0"
          }
        
        }
        
      }
    }

  }


}

我得到的结果是这样的:

{
    "took": 15,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2139,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "patients": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 1926,
            "buckets": [
                {
                    "key": "BG-P-A100CR",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 4.5
                    }
                },
                {
                    "key": "BG-P-A102XF",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 5.5
                    }
                },
                {
                    "key": "BG-P-A103ZU",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 5.0
                    }
                },
                {
                    "key": "BG-P-A104IA",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 6.5
                    }
                },
                {
                    "key": "BG-P-A104ZL",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 4.5
                    }
                },
                {
                    "key": "BG-P-A106BT",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 5.0
                    }
                },
                {
                    "key": "BG-P-A110VY",
                    "doc_count": 24,
                    "avg_usage": {
                        "value": 5.5
                    }
                }
            ]
        }
    }

我真正需要的是查询 return 找到的桶中的桶总数。我尝试了类似问题 () 的回答,但没有帮助。

以下是否有帮助:

POST <your_index_name>/_search
{
  "size": 0,
  "aggs": {
    "users": {
     "terms": {
        "field": "id",
        "min_doc_count": 20,
        "order" : { "_key" : "asc" },
        "size": 100,                       <----- Added this
        "show_term_doc_count_error": true  <----- Added this 
      },
      "aggs": {
        "avg_usage": {
          "avg": {
            "field": "usage_duration"
          }
        },
        "usage_filter": {
          "bucket_selector": {
            "buckets_path": {
              "avgUsage": "avg_usage"
            },
            "script": "params.avgUsage > 4.0"
          }
        },
        "bucket_count":{
          "bucket_script": {
            "buckets_path": {
              "count": "_count"
            },
            "script": "return params.count"
          }
        }
      }
    },
    "mybucketcount":{
      "stats_bucket": {
        "buckets_path":"users._count"
      }
    }
  }
}

我 运行 通过将 "script": "params.avgUsage > 4.0" 替换为 "script": "params.avgUsage > 3.0" 并为您提到的文档集制作 min_doc_count as 2 上面的查询,我看到了下面的内容回应:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "users" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "AL-2930",
          "doc_count" : 2,
          "avg_usage" : {
            "value" : 5.0
          },
          "bucket_count" : {
            "value" : 2.0
          }
        },
        {
          "key" : "AL-2931",
          "doc_count" : 2,
          "avg_usage" : {
            "value" : 3.25
          },
          "bucket_count" : {
            "value" : 2.0
          }
        }
      ]
    },
    "mybucketcount" : {
      "count" : 2,             <---- Note this.
      "min" : 2.0,
      "max" : 2.0,
      "avg" : 2.0,
      "sum" : 4.0
    }
  }
}

我假设您需要 Terms Aggregation i.e. for users and I've simply added the Stats Aggregation 返回的桶总数。

如果有帮助请告诉我!