在 ElasticSearch 中如何聚合直到达到某个值?
How to aggregate until a certain value is reached in ElasticSearch?
我想通过 "amount" 字段聚合文档列表(每个文档都有两个字段 - 时间戳和金额),直到达到某个值。例如,我想获取按时间戳排序的文档列表,总数量等于 100。是否可以在一个查询中完成?
这是我的查询 returns 总金额 - 我想在这里添加一个条件,当达到某个值时停止聚合。
{
"query": {
"bool": {
"filter": [
{
"range": {
"timestamp": {
"gte": 1525168583
}
}
}
]
}
},
"aggs": {
"total_amount": {
"sum": {
"field": "amount"
}
}
},
"sort": [
"timestamp"
],
"size": 10000
}
谢谢
完全可以使用 function_score scripting for mimicking sorting, filter aggs for the range gte query and a healthy amount of scripted_metric aggs 的组合来将总和限制在一定数量内:
PUT summation
{
"mappings": {
"properties": {
"timestamp": {
"type": "date",
"format": "epoch_second"
}
}
}
}
POST summation/_doc
{
"context": "newest",
"timestamp": 1587049128,
"amount": 20
}
POST summation/_doc
{
"context": "2nd newest",
"timestamp": 1586049128,
"amount": 30
}
POST summation/_doc
{
"context": "3rd newest",
"timestamp": 1585049128,
"amount": 40
}
POST summation/_doc
{
"context": "4th newest",
"timestamp": 1585049128,
"amount": 30
}
GET summation/_search
{
"size": 0,
"aggs": {
"filtered_agg": {
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1585049128
}
}
},
{
"function_score": {
"query": {
"match_all": {}
},
"script_score": {
"script": {
"source": "return (params['now'] - doc['timestamp'].date.toMillis())",
"params": {
"now": 1587049676
}
}
}
}
}
]
}
},
"aggs": {
"limited_sum": {
"scripted_metric": {
"init_script": """
state['my_hash'] = new HashMap();
state['my_hash'].put('sum', 0);
state['my_hash'].put('docs', new ArrayList());
""",
"map_script": """
if (state['my_hash']['sum'] <= 100) {
state['my_hash']['sum'] += doc['amount'].value;
state['my_hash']['docs'].add(doc['context.keyword'].value);
}
""",
"combine_script": "return state['my_hash']",
"reduce_script": "return states[0]"
}
}
}
}
}
}
屈服
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"filtered_agg" : {
"meta" : { },
"doc_count" : 4,
"limited_sum" : {
"value" : {
"docs" : [
"newest",
"2nd newest",
"3rd newest",
"4th newest"
],
"sum" : 120
}
}
}
}
}
我在此处选择仅 return doc.context
,但您可以对其进行调整以检索您喜欢的任何内容 -- ID、数量等。
我想通过 "amount" 字段聚合文档列表(每个文档都有两个字段 - 时间戳和金额),直到达到某个值。例如,我想获取按时间戳排序的文档列表,总数量等于 100。是否可以在一个查询中完成?
这是我的查询 returns 总金额 - 我想在这里添加一个条件,当达到某个值时停止聚合。
{
"query": {
"bool": {
"filter": [
{
"range": {
"timestamp": {
"gte": 1525168583
}
}
}
]
}
},
"aggs": {
"total_amount": {
"sum": {
"field": "amount"
}
}
},
"sort": [
"timestamp"
],
"size": 10000
}
谢谢
完全可以使用 function_score scripting for mimicking sorting, filter aggs for the range gte query and a healthy amount of scripted_metric aggs 的组合来将总和限制在一定数量内:
PUT summation
{
"mappings": {
"properties": {
"timestamp": {
"type": "date",
"format": "epoch_second"
}
}
}
}
POST summation/_doc
{
"context": "newest",
"timestamp": 1587049128,
"amount": 20
}
POST summation/_doc
{
"context": "2nd newest",
"timestamp": 1586049128,
"amount": 30
}
POST summation/_doc
{
"context": "3rd newest",
"timestamp": 1585049128,
"amount": 40
}
POST summation/_doc
{
"context": "4th newest",
"timestamp": 1585049128,
"amount": 30
}
GET summation/_search
{
"size": 0,
"aggs": {
"filtered_agg": {
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1585049128
}
}
},
{
"function_score": {
"query": {
"match_all": {}
},
"script_score": {
"script": {
"source": "return (params['now'] - doc['timestamp'].date.toMillis())",
"params": {
"now": 1587049676
}
}
}
}
}
]
}
},
"aggs": {
"limited_sum": {
"scripted_metric": {
"init_script": """
state['my_hash'] = new HashMap();
state['my_hash'].put('sum', 0);
state['my_hash'].put('docs', new ArrayList());
""",
"map_script": """
if (state['my_hash']['sum'] <= 100) {
state['my_hash']['sum'] += doc['amount'].value;
state['my_hash']['docs'].add(doc['context.keyword'].value);
}
""",
"combine_script": "return state['my_hash']",
"reduce_script": "return states[0]"
}
}
}
}
}
}
屈服
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"filtered_agg" : {
"meta" : { },
"doc_count" : 4,
"limited_sum" : {
"value" : {
"docs" : [
"newest",
"2nd newest",
"3rd newest",
"4th newest"
],
"sum" : 120
}
}
}
}
}
我在此处选择仅 return doc.context
,但您可以对其进行调整以检索您喜欢的任何内容 -- ID、数量等。