Elasticsearch只有一条记录是基于userid的?
Elasticsearch only one record based on userid?
在post索引中,postid是主键,userid是外键。
我想要所有 post 但只有 post 来自一个用户 ID,这样只有一个用户在结果中有一个 post 按 post 日期排序(可选最新第一)
//Actual Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u1",
postid: "p2"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
},
{
userid: "u3",
postid: "p5"
},
{
userid: "u3",
postid: "p6"
}
]
需要如下
//Expecting Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
}
]
假设索引映射的形式为:
PUT user_posts
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
你可以:
- 聚合
userid
并按字母顺序排列 ID
- 在
postid
上进行子聚合,并通过 max
aggregation. 按 posttime
降序对 post 进行排序
- 通过
filter_path
选项过滤响应以仅检索您需要的内容
POST user_posts/_search?filter_path=aggregations.*.buckets.key,aggregations.*.buckets.*.buckets.key
{
"size": 0,
"aggs": {
"by_userid": {
"terms": {
"field": "userid",
"order": {
"_key": "asc"
},
"size": 100
},
"aggs": {
"by_latest_postid": {
"terms": {
"field": "postid",
"size": 1,
"order": {
"latest_posttime": "desc"
}
},
"aggs": {
"latest_posttime": {
"max": {
"field": "postdate"
}
}
}
}
}
}
}
}
产量:
{
"aggregations" : {
"by_userid" : {
"buckets" : [
{
"key" : "u1",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p1"
}
]
}
},
{
"key" : "u2",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p3"
}
]
}
},
{
"key" : "u3",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p4"
}
]
}
}
]
}
}
}
然后您可以像往常一样post-处理:
...
const response = await ...; // transform the above request for use in the ES JS lib of your choice
const result = response.aggregations.by_userid.buckets.map(b => {
return {
userid: b.key,
postid: b.by_latest_postid.buckets && b.by_latest_postid.buckets[0].key
}
})
您可以使用 top hits 子聚合。因此,首先按 userId
进行 terms
聚合,然后您可以使用按 post-date
排序的热门点击来获得每个用户的最新 post。
我应该说,如果你有很多 userId
并且你希望每个都获得最高命中率,你应该使用 composite aggregation 作为你的顶级聚合,而不是条款。
我想你可以为此使用热门。这是此示例:
DELETE my-index-000001
PUT my-index-000001
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
PUT my-index-000001/_doc/1
{"userid": "u1", "postid": "p1", "postdate": "2021-03-01"}
PUT my-index-000001/_doc/2
{"userid": "u1", "postid": "p2", "postdate": "2021-03-02"}
PUT my-index-000001/_doc/3
{"userid": "u2", "postid": "p3", "postdate": "2021-03-03"}
PUT my-index-000001/_doc/4
{"userid": "u3", "postid": "p4", "postdate": "2021-03-04"}
PUT my-index-000001/_doc/5
{"userid": "u3", "postid": "p5", "postdate": "2021-03-05"}
PUT my-index-000001/_doc/6
{"userid": "u3", "postid": "p6", "postdate": "2021-03-06"}
这些是示例索引创建步骤。在这里查询:
GET my-index-000001/_search
{
"size": 0,
"aggs": {
"top_users": {
"terms": {
"field": "userid",
"size": 100
},
"aggs": {
"top": {
"top_hits": {
"sort": [
{
"postdate": {
"order": "desc"
}
}
],
"_source": {
"includes": [ "postdate", "postid" ]
},
"size": 1
}
}
}
}
}
}
并且,在结果集中,您可以看到聚合中每个用户的顶部 post:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"top_users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "u3",
"doc_count" : 3,
"top" : {
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"postdate" : "2021-03-06",
"postid" : "p6"
},
"sort" : [
1614988800000
]
}
]
}
}
},
{
"key" : "u1",
"doc_count" : 2,
"top" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"postdate" : "2021-03-02",
"postid" : "p2"
},
"sort" : [
1614643200000
]
}
]
}
}
},
{
"key" : "u2",
"doc_count" : 1,
"top" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"postdate" : "2021-03-03",
"postid" : "p3"
},
"sort" : [
1614729600000
]
}
]
}
}
}
]
}
}
}
在post索引中,postid是主键,userid是外键。
我想要所有 post 但只有 post 来自一个用户 ID,这样只有一个用户在结果中有一个 post 按 post 日期排序(可选最新第一)
//Actual Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u1",
postid: "p2"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
},
{
userid: "u3",
postid: "p5"
},
{
userid: "u3",
postid: "p6"
}
]
需要如下
//Expecting Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
}
]
假设索引映射的形式为:
PUT user_posts
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
你可以:
- 聚合
userid
并按字母顺序排列 ID - 在
postid
上进行子聚合,并通过max
aggregation. 按 - 通过
filter_path
选项过滤响应以仅检索您需要的内容
posttime
降序对 post 进行排序
POST user_posts/_search?filter_path=aggregations.*.buckets.key,aggregations.*.buckets.*.buckets.key
{
"size": 0,
"aggs": {
"by_userid": {
"terms": {
"field": "userid",
"order": {
"_key": "asc"
},
"size": 100
},
"aggs": {
"by_latest_postid": {
"terms": {
"field": "postid",
"size": 1,
"order": {
"latest_posttime": "desc"
}
},
"aggs": {
"latest_posttime": {
"max": {
"field": "postdate"
}
}
}
}
}
}
}
}
产量:
{
"aggregations" : {
"by_userid" : {
"buckets" : [
{
"key" : "u1",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p1"
}
]
}
},
{
"key" : "u2",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p3"
}
]
}
},
{
"key" : "u3",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p4"
}
]
}
}
]
}
}
}
然后您可以像往常一样post-处理:
...
const response = await ...; // transform the above request for use in the ES JS lib of your choice
const result = response.aggregations.by_userid.buckets.map(b => {
return {
userid: b.key,
postid: b.by_latest_postid.buckets && b.by_latest_postid.buckets[0].key
}
})
您可以使用 top hits 子聚合。因此,首先按 userId
进行 terms
聚合,然后您可以使用按 post-date
排序的热门点击来获得每个用户的最新 post。
我应该说,如果你有很多 userId
并且你希望每个都获得最高命中率,你应该使用 composite aggregation 作为你的顶级聚合,而不是条款。
我想你可以为此使用热门。这是此示例:
DELETE my-index-000001
PUT my-index-000001
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
PUT my-index-000001/_doc/1
{"userid": "u1", "postid": "p1", "postdate": "2021-03-01"}
PUT my-index-000001/_doc/2
{"userid": "u1", "postid": "p2", "postdate": "2021-03-02"}
PUT my-index-000001/_doc/3
{"userid": "u2", "postid": "p3", "postdate": "2021-03-03"}
PUT my-index-000001/_doc/4
{"userid": "u3", "postid": "p4", "postdate": "2021-03-04"}
PUT my-index-000001/_doc/5
{"userid": "u3", "postid": "p5", "postdate": "2021-03-05"}
PUT my-index-000001/_doc/6
{"userid": "u3", "postid": "p6", "postdate": "2021-03-06"}
这些是示例索引创建步骤。在这里查询:
GET my-index-000001/_search
{
"size": 0,
"aggs": {
"top_users": {
"terms": {
"field": "userid",
"size": 100
},
"aggs": {
"top": {
"top_hits": {
"sort": [
{
"postdate": {
"order": "desc"
}
}
],
"_source": {
"includes": [ "postdate", "postid" ]
},
"size": 1
}
}
}
}
}
}
并且,在结果集中,您可以看到聚合中每个用户的顶部 post:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"top_users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "u3",
"doc_count" : 3,
"top" : {
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"postdate" : "2021-03-06",
"postid" : "p6"
},
"sort" : [
1614988800000
]
}
]
}
}
},
{
"key" : "u1",
"doc_count" : 2,
"top" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"postdate" : "2021-03-02",
"postid" : "p2"
},
"sort" : [
1614643200000
]
}
]
}
}
},
{
"key" : "u2",
"doc_count" : 1,
"top" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"postdate" : "2021-03-03",
"postid" : "p3"
},
"sort" : [
1614729600000
]
}
]
}
}
}
]
}
}
}