为什么当查询包含 "IN" 前缀时弹性搜索不返回结果?
Why elastic search not returning result when query contains "IN" prefix?
下面的 Elastic Query 没有为我的应用程序返回任何结果
"query" : {
"bool" : {
"must" : [
{
"simple_query_string" : {
"query" : "IN-123456",
"fields" : [
"field1.auto^1.0",
"field2.auto^1.0"
],
"flags" : -1,
"default_operator" : "AND",
"analyze_wildcard" : false,
"auto_generate_synonyms_phrase_query" : true,
"fuzzy_prefix_length" : 0,
"fuzzy_max_expansions" : 50,
"fuzzy_transpositions" : true,
"boost" : 1.0
}
}],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
请注意,我在弹性数据源中显示了与字段 2 匹配的文本 "IN-123456" 的文档。
我可以使用“123456”作为查询中的文本搜索相同的文档。
下面是使用的索引
{
"document_****": {
"aliases": {
"document": {}
},
"mappings": {
"_doc": {
"dynamic": "strict",
"date_detection": false,
"properties": {
"@timestamp": {
"type": "date"
},
"field2": {
"type": "keyword",
"fields": {
"auto": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
},
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "document_***",
"creation_date": "1****",
"analysis": {
"filter": {
"autocomplete_filter_30": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "30"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_30": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter_30"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_nonstop": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "***",
"version": {
"created": "6020499"
}
}
}
}
}
注意:出于保密原因,少数值被替换为*
检查您的映射。以下查询工作正常。
POST v_upload_branch/_doc
{
"branch_name":"IN-123456",
"branch_head":"Chennai",
}
GET v_upload_branch/_search
{
"query" : {
"bool" : {
"must" : [
{
"simple_query_string" : {
"query" : "IN-123456",
"fields" : [
"branch_head^1.0",
"branch_name^1.0"
],
"flags" : -1,
"default_operator" : "AND",
"analyze_wildcard" : false,
"auto_generate_synonyms_phrase_query" : true,
"fuzzy_prefix_length" : 0,
"fuzzy_max_expansions" : 50,
"fuzzy_transpositions" : true,
"boost" : 1.0
}
}],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
下面是使用的索引
{
"document_****": {
"aliases": {
"document": {}
},
"mappings": {
"_doc": {
"dynamic": "strict",
"date_detection": false,
"properties": {
"@timestamp": {
"type": "date"
},
"field2": {
"type": "keyword",
"fields": {
"auto": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
},
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "document_***",
"creation_date": "1****",
"analysis": {
"filter": {
"autocomplete_filter_30": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "30"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_30": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter_30"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_nonstop": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "***",
"version": {
"created": "6020499"
}
}
}
}
}
注意:出于保密原因,少数值被替换为*
分析我的索引映射后发现令牌过滤器停止正在从令牌流中删除前缀 IN。因为它是默认停用词列表的一部分 english 停用词
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stop-tokenfilter.html
因为这个弹性搜索在搜索时忽略了前缀 IN 并且不返回任何结果
下面的 Elastic Query 没有为我的应用程序返回任何结果
"query" : {
"bool" : {
"must" : [
{
"simple_query_string" : {
"query" : "IN-123456",
"fields" : [
"field1.auto^1.0",
"field2.auto^1.0"
],
"flags" : -1,
"default_operator" : "AND",
"analyze_wildcard" : false,
"auto_generate_synonyms_phrase_query" : true,
"fuzzy_prefix_length" : 0,
"fuzzy_max_expansions" : 50,
"fuzzy_transpositions" : true,
"boost" : 1.0
}
}],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
请注意,我在弹性数据源中显示了与字段 2 匹配的文本 "IN-123456" 的文档。 我可以使用“123456”作为查询中的文本搜索相同的文档。
下面是使用的索引
{
"document_****": {
"aliases": {
"document": {}
},
"mappings": {
"_doc": {
"dynamic": "strict",
"date_detection": false,
"properties": {
"@timestamp": {
"type": "date"
},
"field2": {
"type": "keyword",
"fields": {
"auto": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
},
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "document_***",
"creation_date": "1****",
"analysis": {
"filter": {
"autocomplete_filter_30": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "30"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_30": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter_30"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_nonstop": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "***",
"version": {
"created": "6020499"
}
}
}
}
}
注意:出于保密原因,少数值被替换为*
检查您的映射。以下查询工作正常。
POST v_upload_branch/_doc
{
"branch_name":"IN-123456",
"branch_head":"Chennai",
}
GET v_upload_branch/_search
{
"query" : {
"bool" : {
"must" : [
{
"simple_query_string" : {
"query" : "IN-123456",
"fields" : [
"branch_head^1.0",
"branch_name^1.0"
],
"flags" : -1,
"default_operator" : "AND",
"analyze_wildcard" : false,
"auto_generate_synonyms_phrase_query" : true,
"fuzzy_prefix_length" : 0,
"fuzzy_max_expansions" : 50,
"fuzzy_transpositions" : true,
"boost" : 1.0
}
}],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
下面是使用的索引
{
"document_****": {
"aliases": {
"document": {}
},
"mappings": {
"_doc": {
"dynamic": "strict",
"date_detection": false,
"properties": {
"@timestamp": {
"type": "date"
},
"field2": {
"type": "keyword",
"fields": {
"auto": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
},
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "document_***",
"creation_date": "1****",
"analysis": {
"filter": {
"autocomplete_filter_30": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "30"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_30": {
"filter": [
"lowercase",
"stop",
"autocomplete_filter_30"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_nonstop": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "***",
"version": {
"created": "6020499"
}
}
}
}
}
注意:出于保密原因,少数值被替换为*
分析我的索引映射后发现令牌过滤器停止正在从令牌流中删除前缀 IN。因为它是默认停用词列表的一部分 english 停用词
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-stop-tokenfilter.html
因为这个弹性搜索在搜索时忽略了前缀 IN 并且不返回任何结果