ES 建议,搜索索引项中的所有单词(不仅是第一个单词)
ES suggest, search all words in index item (not only the first word)
基于 答案(第一个选项)我创建了这个索引:
'settings' => array(
'analysis' => array(
'analyzer' => array(
'stop_analyzer' => array(
'type' => 'custom',
'tokenizer' => 'standard',
'filter' => array(
'lowercase',
'english_stop'
)
)
),
"filter" => array(
"english_stop" => array(
"type" => "stop",
"stopwords" => "_english_"
)
)
)
),
'mappings' => array(
'properties' => array(
'texts' => array(
'type' => 'completion',
"analyzer" => "stop_analyzer",
"search_analyzer" => "stop_analyzer",
'preserve_position_increments' => false
),
),
)
当我使用或不使用停用词开始建议搜索时,这非常有效。但是,例如,当我的索引中有这个:This is the text
,并且我搜索 text
时,我不会得到任何结果,那么执行此操作的正确方法是什么?我宁愿不使用 N-gram。
我的搜索查询:
'suggest' => array(
'suggestion' => array(
'prefix'=> 'text',
'completion' => array(
'field' => 'texts'
)
)
)
The best way to the completion suggester that can match the middle of
fields is n-gram filter.
但由于您不想使用 n-gram,您可以尝试以下方法:
您可以使用多个建议,其中一个建议基于前缀,对于字段中间的匹配,您可以使用正则表达式。
添加具有索引映射、数据、搜索查询和搜索结果的工作示例
索引映射:
{
"settings": {
"analysis": {
"filter": {
"my_custom_stop_words_filter": {
"type": "stop",
"ignore_case": true,
"stopwords": [ "and", "is", "the" ]
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"my_custom_stop_words_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "keyword"
},
"suggest": {
"type": "completion",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
}
}
索引数据:
{
"suggest": [
{
"input": "This is the text"
}
]
}
{
"suggest": [
{
"input": "Software Manager"
}
]
}
搜索查询:
{
"suggest": {
"suggest-exact": {
"prefix": "text",
"completion": {
"field": "suggest",
"skip_duplicates": true
}
},
"suggest-regex": {
"regex": ".*text.*",
"completion": {
"field": "suggest",
"skip_duplicates": true
}
}
}
}
搜索结果:
"suggest": {
"suggest-exact": [
{
"text": "text",
"offset": 0,
"length": 4,
"options": []
}
],
"suggest-regex": [
{
"text": ".*text.*",
"offset": 0,
"length": 8,
"options": [
{
"text": "This is the text",
"_index": "test",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"suggest": [
{
"input": "This is the text"
}
]
}
}
]
}
]
}
Based on the comment given by the user, adding another answer, for searching all the words using n-grams. The previous method works perfectly but it is quite expensive to use regex.
添加具有索引映射、索引数据、搜索查询和搜索结果的工作示例
索引映射:
{
"settings": {
"analysis": {
"filter": {
"my_custom_stop_words_filter": {
"type": "stop",
"ignore_case": true,
"stopwords": [
"and",
"is",
"the"
]
},
"ngram_filter": {
"type": "ngram",
"min_gram": 4,
"max_gram": 20
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"ngram_filter",
"my_custom_stop_words_filter"
]
}
}
},
"max_ngram_diff": 50
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "ngram_analyzer",
"search_analyzer": "standard"
}
}
}
}
分析API
POST/_analyze
{
"analyzer" : "ngram_analyzer",
"text" : "This is the text"
}
生成了以下令牌:
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "text",
"start_offset": 12,
"end_offset": 16,
"type": "<ALPHANUM>",
"position": 3
}
]
}
索引数据:
{
"title": [
"This is the text"
]
}
搜索查询:
{
"query": {
"match": {
"title": "text"
}
}
}
搜索结果:
"hits": [
{
"_index": "stof_29753971",
"_type": "_doc",
"_id": "1",
"_score": 0.41978103,
"_source": {
"title": [
"This is the text"
]
}
}
]
基于
'settings' => array(
'analysis' => array(
'analyzer' => array(
'stop_analyzer' => array(
'type' => 'custom',
'tokenizer' => 'standard',
'filter' => array(
'lowercase',
'english_stop'
)
)
),
"filter" => array(
"english_stop" => array(
"type" => "stop",
"stopwords" => "_english_"
)
)
)
),
'mappings' => array(
'properties' => array(
'texts' => array(
'type' => 'completion',
"analyzer" => "stop_analyzer",
"search_analyzer" => "stop_analyzer",
'preserve_position_increments' => false
),
),
)
当我使用或不使用停用词开始建议搜索时,这非常有效。但是,例如,当我的索引中有这个:This is the text
,并且我搜索 text
时,我不会得到任何结果,那么执行此操作的正确方法是什么?我宁愿不使用 N-gram。
我的搜索查询:
'suggest' => array(
'suggestion' => array(
'prefix'=> 'text',
'completion' => array(
'field' => 'texts'
)
)
)
The best way to the completion suggester that can match the middle of fields is n-gram filter.
但由于您不想使用 n-gram,您可以尝试以下方法:
您可以使用多个建议,其中一个建议基于前缀,对于字段中间的匹配,您可以使用正则表达式。
添加具有索引映射、数据、搜索查询和搜索结果的工作示例
索引映射:
{
"settings": {
"analysis": {
"filter": {
"my_custom_stop_words_filter": {
"type": "stop",
"ignore_case": true,
"stopwords": [ "and", "is", "the" ]
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"my_custom_stop_words_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "keyword"
},
"suggest": {
"type": "completion",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
}
}
索引数据:
{
"suggest": [
{
"input": "This is the text"
}
]
}
{
"suggest": [
{
"input": "Software Manager"
}
]
}
搜索查询:
{
"suggest": {
"suggest-exact": {
"prefix": "text",
"completion": {
"field": "suggest",
"skip_duplicates": true
}
},
"suggest-regex": {
"regex": ".*text.*",
"completion": {
"field": "suggest",
"skip_duplicates": true
}
}
}
}
搜索结果:
"suggest": {
"suggest-exact": [
{
"text": "text",
"offset": 0,
"length": 4,
"options": []
}
],
"suggest-regex": [
{
"text": ".*text.*",
"offset": 0,
"length": 8,
"options": [
{
"text": "This is the text",
"_index": "test",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"suggest": [
{
"input": "This is the text"
}
]
}
}
]
}
]
}
Based on the comment given by the user, adding another answer, for searching all the words using n-grams. The previous method works perfectly but it is quite expensive to use regex.
添加具有索引映射、索引数据、搜索查询和搜索结果的工作示例
索引映射:
{
"settings": {
"analysis": {
"filter": {
"my_custom_stop_words_filter": {
"type": "stop",
"ignore_case": true,
"stopwords": [
"and",
"is",
"the"
]
},
"ngram_filter": {
"type": "ngram",
"min_gram": 4,
"max_gram": 20
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"ngram_filter",
"my_custom_stop_words_filter"
]
}
}
},
"max_ngram_diff": 50
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "ngram_analyzer",
"search_analyzer": "standard"
}
}
}
}
分析API
POST/_analyze
{
"analyzer" : "ngram_analyzer",
"text" : "This is the text"
}
生成了以下令牌:
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "text",
"start_offset": 12,
"end_offset": 16,
"type": "<ALPHANUM>",
"position": 3
}
]
}
索引数据:
{
"title": [
"This is the text"
]
}
搜索查询:
{
"query": {
"match": {
"title": "text"
}
}
}
搜索结果:
"hits": [
{
"_index": "stof_29753971",
"_type": "_doc",
"_id": "1",
"_score": 0.41978103,
"_source": {
"title": [
"This is the text"
]
}
}
]