Elasticsearch 获取每个用户的最新徽章值
Elasticsearch get latest badge value for each user
我有一个索引“candidate_ranking”,其中包含具有以下文档结构的文档。索引的每个文档有 1 个 candidate_id,并且可以有多个具有相同 candidate_id 的文档,但是 created_at 字段对于它们来说是不同的。这是我的 candidate_ranking 索引中的文档示例-
"hits" : [
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "SCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 118558,
"candidate_id" : 29492,
"created_at" : "2021-03-27T01:34:29.628550+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "SSa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 133354,
"candidate_id" : 29492,
"created_at" : "2021-03-27T02:11:35.811420+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Sia26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 148136,
"candidate_id" : 29492,
"created_at" : "2021-03-29T20:20:36.482066+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Sya26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 162916,
"candidate_id" : 29492,
"created_at" : "2021-03-29T21:05:03.985032+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "TCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 177712,
"candidate_id" : 29492,
"created_at" : "2021-03-29T21:33:32.596613+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "TSa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 192999,
"candidate_id" : 29492,
"created_at" : "2021-03-29T22:20:24.942116+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Tia26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 225434,
"candidate_id" : 29492,
"created_at" : "2021-03-29T23:13:59.266074+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Tya26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 247169,
"candidate_id" : 29492,
"created_at" : "2021-03-30T00:16:04.077245+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "UCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 271179,
"candidate_id" : 29492,
"created_at" : "2021-03-30T01:19:59.803999+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "USa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 295537,
"candidate_id" : 29492,
"created_at" : "2021-03-30T02:23:42.077149+00:00",
"badge" : null
}
}
]
}
此徽章值可以是空字符串或“1”或“2”。
我目前正在使用此聚合来计算徽章值同时为 1 和 2 的所有用户
GET /candidate_ranking/_search
{
"aggs": {
"mega_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "2"],
"size": 500000
}
},
"rising_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "1"],
"size": 500000
}
}
}
}
我的索引包含每个“candidate_id”的多个文档。
我只想对每个 candidate_id 的最新文档进行徽章聚合。
类似于按 created_at 字段按降序排序或排序,然后只为每个 candidate_id 取最高值。因此,将所有拥有最新徽章的候选人计数为 1 或 2。
我试过这样做,但没有用
GET /candidate_ranking/_search
{
"aggs": {
"mega_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "2"],
"size": 500000,
"order": {"created_at": "desc"},
"top_hits": {"size":1}
}
},
"rising_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "1"],
"size": 500000
}
}
}
}
您可以使用 terms aggregation along with max aggregation 来实现您需要的用例
要获取桶的数量,您需要使用stats_bucket aggregation
添加包含索引数据、映射、搜索查询和搜索结果的工作示例
索引映射:
{
"mappings": {
"properties": {
"created_at": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss.SSSSSSz"
}
}
}
}
索引数据:
{
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
{
"id": 271179,
"candidate_id": 29492,
"created_at": "2021-03-30T01:19:59.803999+00:00",
"badge": "1"
}
{
"id": 247169,
"candidate_id": 29492,
"created_at": "2021-03-30T00:16:04.077245+00:00",
"badge": "1"
}
{
"id": 225434,
"candidate_id": 29492,
"created_at": "2021-03-29T23:13:59.266074+00:00",
"badge": null
}
{
"id": 192999,
"candidate_id": 29492,
"created_at": "2021-03-29T22:20:24.942116+00:00",
"badge": null
}
{
"id": 177712,
"candidate_id": 29492,
"created_at": "2021-03-29T21:33:32.596613+00:00",
"badge": null
}
{
"id": 162916,
"candidate_id": 29492,
"created_at": "2021-03-29T21:05:03.985032+00:00",
"badge": null
}
{
"id": 148136,
"candidate_id": 29492,
"created_at": "2021-03-29T20:20:36.482066+00:00",
"badge": "2"
}
{
"id": 118558,
"candidate_id": 29492,
"created_at": "2021-03-27T01:34:29.628550+00:00",
"badge": "2"
}
{
"id": 133354,
"candidate_id": 29492,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
搜索查询:
{
"size": 0,
"aggs": {
"badge_1": {
"terms": {
"field": "badge.keyword",
"include": [
"1"
],
"size": 500000
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"stats_1": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
},
"badge_2": {
"terms": {
"field": "badge.keyword",
"include": [
"2"
],
"size": 500000
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"stats_2": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
}
}
}
搜索结果:
"aggregations": {
"badge_2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 3,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617049236482E12,
"value_as_string": "2021-03-29T20:20:36.482000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67157371",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"id": 133354,
"candidate_id": 29492,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
}
]
}
}
}
]
},
"stats_2": {
"count": 1, // note this
"min": 3.0,
"max": 3.0,
"avg": 3.0,
"sum": 3.0
}
}
]
},
"badge_1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 3,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617071022077E12,
"value_as_string": "2021-03-30T02:23:42.077000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67157371",
"_type": "_doc",
"_id": "10",
"_score": 1.0,
"_source": {
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
}
]
}
}
}
]
},
"stats_1": {
"count": 1, // note this
"min": 3.0,
"max": 3.0,
"avg": 3.0,
"sum": 3.0
}
}
]
}
}
我有一个索引“candidate_ranking”,其中包含具有以下文档结构的文档。索引的每个文档有 1 个 candidate_id,并且可以有多个具有相同 candidate_id 的文档,但是 created_at 字段对于它们来说是不同的。这是我的 candidate_ranking 索引中的文档示例-
"hits" : [
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "SCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 118558,
"candidate_id" : 29492,
"created_at" : "2021-03-27T01:34:29.628550+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "SSa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 133354,
"candidate_id" : 29492,
"created_at" : "2021-03-27T02:11:35.811420+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Sia26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 148136,
"candidate_id" : 29492,
"created_at" : "2021-03-29T20:20:36.482066+00:00",
"badge" : "2"
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Sya26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 162916,
"candidate_id" : 29492,
"created_at" : "2021-03-29T21:05:03.985032+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "TCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 177712,
"candidate_id" : 29492,
"created_at" : "2021-03-29T21:33:32.596613+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "TSa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 192999,
"candidate_id" : 29492,
"created_at" : "2021-03-29T22:20:24.942116+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Tia26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 225434,
"candidate_id" : 29492,
"created_at" : "2021-03-29T23:13:59.266074+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "Tya26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 247169,
"candidate_id" : 29492,
"created_at" : "2021-03-30T00:16:04.077245+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "UCa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 271179,
"candidate_id" : 29492,
"created_at" : "2021-03-30T01:19:59.803999+00:00",
"badge" : null
}
},
{
"_index" : "candidate_ranking",
"_type" : "_doc",
"_id" : "USa26HgB0zUr7edEvDul",
"_score" : 1.0,
"_source" : {
"id" : 295537,
"candidate_id" : 29492,
"created_at" : "2021-03-30T02:23:42.077149+00:00",
"badge" : null
}
}
]
}
此徽章值可以是空字符串或“1”或“2”。
我目前正在使用此聚合来计算徽章值同时为 1 和 2 的所有用户
GET /candidate_ranking/_search
{
"aggs": {
"mega_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "2"],
"size": 500000
}
},
"rising_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "1"],
"size": 500000
}
}
}
}
我的索引包含每个“candidate_id”的多个文档。 我只想对每个 candidate_id 的最新文档进行徽章聚合。 类似于按 created_at 字段按降序排序或排序,然后只为每个 candidate_id 取最高值。因此,将所有拥有最新徽章的候选人计数为 1 或 2。
我试过这样做,但没有用
GET /candidate_ranking/_search
{
"aggs": {
"mega_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "2"],
"size": 500000,
"order": {"created_at": "desc"},
"top_hits": {"size":1}
}
},
"rising_mogul": {
"terms": {
"field": "badge.keyword",
"exclude": ["", "1"],
"size": 500000
}
}
}
}
您可以使用 terms aggregation along with max aggregation 来实现您需要的用例
要获取桶的数量,您需要使用stats_bucket aggregation
添加包含索引数据、映射、搜索查询和搜索结果的工作示例
索引映射:
{
"mappings": {
"properties": {
"created_at": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss.SSSSSSz"
}
}
}
}
索引数据:
{
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
{
"id": 271179,
"candidate_id": 29492,
"created_at": "2021-03-30T01:19:59.803999+00:00",
"badge": "1"
}
{
"id": 247169,
"candidate_id": 29492,
"created_at": "2021-03-30T00:16:04.077245+00:00",
"badge": "1"
}
{
"id": 225434,
"candidate_id": 29492,
"created_at": "2021-03-29T23:13:59.266074+00:00",
"badge": null
}
{
"id": 192999,
"candidate_id": 29492,
"created_at": "2021-03-29T22:20:24.942116+00:00",
"badge": null
}
{
"id": 177712,
"candidate_id": 29492,
"created_at": "2021-03-29T21:33:32.596613+00:00",
"badge": null
}
{
"id": 162916,
"candidate_id": 29492,
"created_at": "2021-03-29T21:05:03.985032+00:00",
"badge": null
}
{
"id": 148136,
"candidate_id": 29492,
"created_at": "2021-03-29T20:20:36.482066+00:00",
"badge": "2"
}
{
"id": 118558,
"candidate_id": 29492,
"created_at": "2021-03-27T01:34:29.628550+00:00",
"badge": "2"
}
{
"id": 133354,
"candidate_id": 29492,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
搜索查询:
{
"size": 0,
"aggs": {
"badge_1": {
"terms": {
"field": "badge.keyword",
"include": [
"1"
],
"size": 500000
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"stats_1": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
},
"badge_2": {
"terms": {
"field": "badge.keyword",
"include": [
"2"
],
"size": 500000
},
"aggs": {
"unique_id": {
"terms": {
"field": "candidate_id",
"size": 10,
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"top_doc": {
"top_hits": {
"size": 1
}
},
"latestOrder": {
"max": {
"field": "created_at"
}
}
}
},
"stats_2": {
"stats_bucket": {
"buckets_path": "unique_id._count"
}
}
}
}
}
}
搜索结果:
"aggregations": {
"badge_2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 3,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617049236482E12,
"value_as_string": "2021-03-29T20:20:36.482000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67157371",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"id": 133354,
"candidate_id": 29492,
"created_at": "2021-03-27T02:11:35.811420+00:00",
"badge": "2"
}
}
]
}
}
}
]
},
"stats_2": {
"count": 1, // note this
"min": 3.0,
"max": 3.0,
"avg": 3.0,
"sum": 3.0
}
}
]
},
"badge_1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 3,
"unique_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 29492,
"doc_count": 3,
"latestOrder": {
"value": 1.617071022077E12,
"value_as_string": "2021-03-30T02:23:42.077000Z"
},
"top_doc": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67157371",
"_type": "_doc",
"_id": "10",
"_score": 1.0,
"_source": {
"id": 295537,
"candidate_id": 29492,
"created_at": "2021-03-30T02:23:42.077149+00:00",
"badge": "1"
}
}
]
}
}
}
]
},
"stats_1": {
"count": 1, // note this
"min": 3.0,
"max": 3.0,
"avg": 3.0,
"sum": 3.0
}
}
]
}
}