跨多个弹性搜索类型查询
Querying across multiple elasticsearch types
我想在 Elastic Search 5.0 中获取以多种类型(type1 AND type2 AND type3...)存在的文档。我知道可以通过使用 URL 中的 type1、type2 等多种类型并过滤 _type 字段来跨多种类型进行搜索。但是所有这些条件都是 OR (type1 OR type2)。如何实现 AND 条件?
这是我的ES中的两个文档,
{
"_index":"cust_58e8700034fa4e368590fb1396e2641c",
"_type":"unique-fp-domains",
"_id":"n_d4dbba7309a94503b25eca735078f17c_258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"_version":2,
"_score":1,
"_source":{
"mg_timestamp":1579866709096,
"violated-directive":"connect-src",
"fp-hash":"258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"time":1579866709096,
"scan-id":"n_d4dbba7309a94503b25eca735078f17c",
"blocked-uri":"play.sundaysky.com"
}
}
{
"_index":"cust_58e8700034fa4e368590fb1396e2641c",
"_type":"tag-alexa-top1k-using-csp-tld-domain",
"_id":"AW_XY4P4kmprPQ28bTUb",
"_version":1,
"_score":1,
"_source":{
"tagged-domain":"sundaysky.com",
"tag-guidance":"FP",
"additional-tag-metadata-isbase64-encoded":"eyJ0b3RhbC1hbGV4YS1tYXRjaGVzIjoyMzh9",
"project-id":2,
"fp-hash":"258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"scan-id":"n_d4dbba7309a94503b25eca735078f17c",
}
}
我想使用 "scan-id":"n_d4dbba7309a94503b25eca735078f17c"
从给定的 2 种类型的相同索引中获取文档
我试过了,
{
"size": 100,
"query": {
"bool": {
"must": [
{
"bool": {
"filter": [
{
"term": {
"_type": {
"value": "tag-alexa-top1k-using-csp-tld-domain"
}
}
},
{
"term": {
"scan-id": {
"value": "n_d4dbba7309a94503b25eca735078f17c"
}
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"_type": {
"value": "unique-fp-domains"
}
}
},
{
"term": {
"scan-id": {
"value": "n_d4dbba7309a94503b25eca735078f17c"
}
}
}
]
}
}
]
}
}
}
但是没用。
您可以使用移动搜索。这可以组合多个搜索。您可以在他们的文档中找到有关此的更多信息。 https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html
我认为这个查询会解决您的问题;
"query": {
"bool": {
"must": [
{
"terms": {
"_type": "tag-alexa-top1k-using-csp-tld-domain"
}
},
{
"terms": {
"_type": "unique-fp-domains"
}
}
],
"filter": [
{
"scan-id": {
"_type": "n_d4dbba7309a94503b25eca735078f17c"
}
}
]
}
}
"query": {
"query_string":{
"query" : "(_type : unique-fp-domains OR tag-alexa-top1k-using-csp-tld-domain) AND (scan-id : n_d4dbba7309a94503b25eca735078f17c)
}
}
Elasticsearch 不擅长加入不同的文档集合,但在您的情况下,您可以通过 parent-child
关系解决您的问题。
如何以 AND 方式同时查询多种索引类型?
如果你有 one-to-many 关系,你可以用 parent-child
建模。假设类型 unique-fp-domains
是 "parent" 类型并且 scan-id
字段是唯一标识符。我们还假设 tag-alexa-top1k-using-csp-tld-domain
是一个 "child" 并且 tag-alexa-top1k-using-csp-tld-domain
类型的每个文档恰好引用 unique-fp-domains
.
中的 1 个文档
然后我们应该按以下方式创建 Elasticsearch 映射:
PUT /cust_58
{
"mappings": {
"unique-fp-domains": {},
"tag-alexa-top1k-using-csp-tld-domain": {
"_parent": {
"type": "unique-fp-domains"
}
}
}
}
然后像这样插入文件:
# "parent"
PUT /cust_58/unique-fp-domains/n_d4dbba7309a94503b25eca735078f17c
{
"mg_timestamp": 1579866709096,
"violated-directive": "connect-src",
"fp-hash": "258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"time": 1579866709096,
"scan-id": "n_d4dbba7309a94503b25eca735078f17c",
"blocked-uri": "play.sundaysky.com"
}
# "child"
POST /cust_58/tag-alexa-top1k-using-csp-tld-domain?parent=n_d4dbba7309a94503b25eca735078f17c
{
"tagged-domain": "sundaysky.com",
"tag-guidance": "FP",
"additional-tag-metadata-isbase64-encoded": "eyJ0b3RhbC1hbGV4YS1tYXRjaGVzIjoyMzh9",
"project-id": 2,
"fp-hash": "258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"scan-id": "n_d4dbba7309a94503b25eca735078f17c"
}
现在我们将能够查询 parent objects 与任何 child 关联 == 加入 parent ID,这是我们被迫的scan-id
通过手动提供文档的 _id
。
查询将使用 has_child
并且如下所示:
POST /cust_58/unique-fp-domains/_search
{
"query": {
"has_child": {
"type": "tag-alexa-top1k-using-csp-tld-domain",
"query": {
"match_all": {}
},
"inner_hits": {}
}
}
}
请注意,我们使用 inner_hits
告诉 Elasticsearch 检索匹配的 "child" 文档。
输出看起来像:
"hits": [
{
"_index": "cust_58",
"_type": "unique-fp-domains",
"_id": "n_d4dbba7309a94503b25eca735078f17c",
"_score": 1.0,
"_source": {
"mg_timestamp": 1579866709096,
"violated-directive": "connect-src",
...
},
"inner_hits": {
"tag-alexa-top1k-using-csp-tld-domain": {
"hits": {
"total": 1,
"max_score": 1.0,
"hits": [
{
"_type": "tag-alexa-top1k-using-csp-tld-domain",
"_id": "AW_xhfnnIzWDkoWd1czA",
"_score": 1.0,
"_routing": "n_d4dbba7309a94503b25eca735078f17c",
"_parent": "n_d4dbba7309a94503b25eca735078f17c",
"_source": {
"tagged-domain": "sundaysky.com",
...
}
使用 parent-child
的缺点是什么?
- parent ID 应该是唯一的
- 加入仅在 parent ID
- 一些performance overhead:
If you care about query performance you should not use this query.
- 要启用 parent-child 必须更改映射并对现有数据重新编制索引
需要考虑的其他重要事项
在 Elasticsearch 6 中,键入 have been removed. The good news are that already starting from Elasticsearch 5 one can use join
datatype。
总的来说,Elasticsearch 不太擅长管理objects之间的关系,但是there are few ways to deal with them.
希望对您有所帮助!
我想在 Elastic Search 5.0 中获取以多种类型(type1 AND type2 AND type3...)存在的文档。我知道可以通过使用 URL 中的 type1、type2 等多种类型并过滤 _type 字段来跨多种类型进行搜索。但是所有这些条件都是 OR (type1 OR type2)。如何实现 AND 条件?
这是我的ES中的两个文档,
{
"_index":"cust_58e8700034fa4e368590fb1396e2641c",
"_type":"unique-fp-domains",
"_id":"n_d4dbba7309a94503b25eca735078f17c_258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"_version":2,
"_score":1,
"_source":{
"mg_timestamp":1579866709096,
"violated-directive":"connect-src",
"fp-hash":"258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"time":1579866709096,
"scan-id":"n_d4dbba7309a94503b25eca735078f17c",
"blocked-uri":"play.sundaysky.com"
}
}
{
"_index":"cust_58e8700034fa4e368590fb1396e2641c",
"_type":"tag-alexa-top1k-using-csp-tld-domain",
"_id":"AW_XY4P4kmprPQ28bTUb",
"_version":1,
"_score":1,
"_source":{
"tagged-domain":"sundaysky.com",
"tag-guidance":"FP",
"additional-tag-metadata-isbase64-encoded":"eyJ0b3RhbC1hbGV4YS1tYXRjaGVzIjoyMzh9",
"project-id":2,
"fp-hash":"258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"scan-id":"n_d4dbba7309a94503b25eca735078f17c",
}
}
我想使用 "scan-id":"n_d4dbba7309a94503b25eca735078f17c"
我试过了,
{
"size": 100,
"query": {
"bool": {
"must": [
{
"bool": {
"filter": [
{
"term": {
"_type": {
"value": "tag-alexa-top1k-using-csp-tld-domain"
}
}
},
{
"term": {
"scan-id": {
"value": "n_d4dbba7309a94503b25eca735078f17c"
}
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"_type": {
"value": "unique-fp-domains"
}
}
},
{
"term": {
"scan-id": {
"value": "n_d4dbba7309a94503b25eca735078f17c"
}
}
}
]
}
}
]
}
}
}
但是没用。
您可以使用移动搜索。这可以组合多个搜索。您可以在他们的文档中找到有关此的更多信息。 https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html
我认为这个查询会解决您的问题;
"query": {
"bool": {
"must": [
{
"terms": {
"_type": "tag-alexa-top1k-using-csp-tld-domain"
}
},
{
"terms": {
"_type": "unique-fp-domains"
}
}
],
"filter": [
{
"scan-id": {
"_type": "n_d4dbba7309a94503b25eca735078f17c"
}
}
]
}
}
"query": {
"query_string":{
"query" : "(_type : unique-fp-domains OR tag-alexa-top1k-using-csp-tld-domain) AND (scan-id : n_d4dbba7309a94503b25eca735078f17c)
}
}
Elasticsearch 不擅长加入不同的文档集合,但在您的情况下,您可以通过 parent-child
关系解决您的问题。
如何以 AND 方式同时查询多种索引类型?
如果你有 one-to-many 关系,你可以用 parent-child
建模。假设类型 unique-fp-domains
是 "parent" 类型并且 scan-id
字段是唯一标识符。我们还假设 tag-alexa-top1k-using-csp-tld-domain
是一个 "child" 并且 tag-alexa-top1k-using-csp-tld-domain
类型的每个文档恰好引用 unique-fp-domains
.
然后我们应该按以下方式创建 Elasticsearch 映射:
PUT /cust_58
{
"mappings": {
"unique-fp-domains": {},
"tag-alexa-top1k-using-csp-tld-domain": {
"_parent": {
"type": "unique-fp-domains"
}
}
}
}
然后像这样插入文件:
# "parent"
PUT /cust_58/unique-fp-domains/n_d4dbba7309a94503b25eca735078f17c
{
"mg_timestamp": 1579866709096,
"violated-directive": "connect-src",
"fp-hash": "258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"time": 1579866709096,
"scan-id": "n_d4dbba7309a94503b25eca735078f17c",
"blocked-uri": "play.sundaysky.com"
}
# "child"
POST /cust_58/tag-alexa-top1k-using-csp-tld-domain?parent=n_d4dbba7309a94503b25eca735078f17c
{
"tagged-domain": "sundaysky.com",
"tag-guidance": "FP",
"additional-tag-metadata-isbase64-encoded": "eyJ0b3RhbC1hbGV4YS1tYXRjaGVzIjoyMzh9",
"project-id": 2,
"fp-hash": "258b3ad1a11aba282f35908662bdc5432d68fd96bf3ca90013dcdd5764331399",
"scan-id": "n_d4dbba7309a94503b25eca735078f17c"
}
现在我们将能够查询 parent objects 与任何 child 关联 == 加入 parent ID,这是我们被迫的scan-id
通过手动提供文档的 _id
。
查询将使用 has_child
并且如下所示:
POST /cust_58/unique-fp-domains/_search
{
"query": {
"has_child": {
"type": "tag-alexa-top1k-using-csp-tld-domain",
"query": {
"match_all": {}
},
"inner_hits": {}
}
}
}
请注意,我们使用 inner_hits
告诉 Elasticsearch 检索匹配的 "child" 文档。
输出看起来像:
"hits": [
{
"_index": "cust_58",
"_type": "unique-fp-domains",
"_id": "n_d4dbba7309a94503b25eca735078f17c",
"_score": 1.0,
"_source": {
"mg_timestamp": 1579866709096,
"violated-directive": "connect-src",
...
},
"inner_hits": {
"tag-alexa-top1k-using-csp-tld-domain": {
"hits": {
"total": 1,
"max_score": 1.0,
"hits": [
{
"_type": "tag-alexa-top1k-using-csp-tld-domain",
"_id": "AW_xhfnnIzWDkoWd1czA",
"_score": 1.0,
"_routing": "n_d4dbba7309a94503b25eca735078f17c",
"_parent": "n_d4dbba7309a94503b25eca735078f17c",
"_source": {
"tagged-domain": "sundaysky.com",
...
}
使用 parent-child
的缺点是什么?
- parent ID 应该是唯一的
- 加入仅在 parent ID
- 一些performance overhead:
If you care about query performance you should not use this query.
- 要启用 parent-child 必须更改映射并对现有数据重新编制索引
需要考虑的其他重要事项
在 Elasticsearch 6 中,键入 have been removed. The good news are that already starting from Elasticsearch 5 one can use join
datatype。
总的来说,Elasticsearch 不太擅长管理objects之间的关系,但是there are few ways to deal with them.
希望对您有所帮助!