嵌套字段类型的弹性搜索查询
Elastic Search Query for nested field type
我正在尝试为我们有一个名为“types”的嵌套字段类型(即像字符串 ArrayList)的业务场景制定查询。以下是将“类型”作为字段之一的示例索引文档。
文档 1:
{ “类型”:[
{
“标签”:“对话框”,
},
{
“标签”:“暴力”,
},
{
“标签”:“语言”,
}
}
文件 2:
{ “类型”:[
{
“标签”:“对话框”,
}
}
现在,要求是搜索查询最多应匹配字段值中的一个值,即如果用户搜索“对话框”,则它应该 return 仅匹配文档 2 而不是文档 1,因为它在该领域具有其他价值。基本上,它应该只获取与单个搜索查询值完全匹配的记录,不包括该字段中存在的所有其他值。
下面是映射:
{
"media-hub-asset-metadata": {
"mappings": {
"dynamic": "true",
"properties": {
"Metadata": {
"properties": {
"Actors": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
},
"Types": {
"type": "nested",
"properties": {
"Acronym": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Display": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Label": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
},
"TVLabel": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
}
}
}
}
},
"MetadataType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
},
"Network": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
示例索引文档:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 9139,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "media-hub-asset-metadata",
"_type" : "_doc",
"_id" : "1640655|VOD",
"_score" : 1.0,
"_source" : {
"AssetId" : 1640655,
"MaterialId" : "XMX1311",
"Metadata" : {
"Actors" : [
"Owen, Clive",
"Mueller-Stahl, Armin",
"Watts, Naomi"
],
"AirDate" : "2013-05-01T00:00:00Z",
"ClosedCaption" : true,
"Code" : "",
"Types" : [
{
"Label" : "Dialog",
"TVLabel" : "D"
},
{
"Label" : "Violence",
"TVLabel" : "V"
},
{
"Label" : "Language",
"TVLabel" : "L"
}
]
},
"MetadataType" : "VOD"
}
}
]
}
}
非常感谢任何帮助!提前致谢
您需要使用script_score
along with the function score query。
试试下面的查询
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "types",
"query": {
"bool": {
"must": [
{
"match": {
"types.Label": "Dialog"
}
}
]
}
}
}
}
]
}
},
"functions": [
{
"script_score": {
"script": {
"source": "params._source.containsKey('types') && params._source['types'] != null && params._source.types.size() == 1 ? 1 : 0"
}
}
}
],
"min_score": 0.5 // note this
}
}
}
搜索结果将是
"hits": [
{
"_index": "67594441",
"_type": "_doc",
"_id": "2",
"_score": 0.53899646,
"_source": {
"types": [
{
"Label": "Dialog"
}
]
}
]
我正在尝试为我们有一个名为“types”的嵌套字段类型(即像字符串 ArrayList)的业务场景制定查询。以下是将“类型”作为字段之一的示例索引文档。
文档 1: { “类型”:[ { “标签”:“对话框”, }, { “标签”:“暴力”, }, { “标签”:“语言”, } }
文件 2: { “类型”:[ { “标签”:“对话框”, } }
现在,要求是搜索查询最多应匹配字段值中的一个值,即如果用户搜索“对话框”,则它应该 return 仅匹配文档 2 而不是文档 1,因为它在该领域具有其他价值。基本上,它应该只获取与单个搜索查询值完全匹配的记录,不包括该字段中存在的所有其他值。
下面是映射:
{
"media-hub-asset-metadata": {
"mappings": {
"dynamic": "true",
"properties": {
"Metadata": {
"properties": {
"Actors": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
},
"Types": {
"type": "nested",
"properties": {
"Acronym": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Display": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Label": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
},
"TVLabel": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
}
}
}
}
},
"MetadataType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
},
"ngram": {
"type": "text",
"analyzer": "ngram_tokenizer_analyzer"
}
}
},
"Network": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
示例索引文档:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 9139,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "media-hub-asset-metadata",
"_type" : "_doc",
"_id" : "1640655|VOD",
"_score" : 1.0,
"_source" : {
"AssetId" : 1640655,
"MaterialId" : "XMX1311",
"Metadata" : {
"Actors" : [
"Owen, Clive",
"Mueller-Stahl, Armin",
"Watts, Naomi"
],
"AirDate" : "2013-05-01T00:00:00Z",
"ClosedCaption" : true,
"Code" : "",
"Types" : [
{
"Label" : "Dialog",
"TVLabel" : "D"
},
{
"Label" : "Violence",
"TVLabel" : "V"
},
{
"Label" : "Language",
"TVLabel" : "L"
}
]
},
"MetadataType" : "VOD"
}
}
]
}
}
非常感谢任何帮助!提前致谢
您需要使用script_score
along with the function score query。
试试下面的查询
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "types",
"query": {
"bool": {
"must": [
{
"match": {
"types.Label": "Dialog"
}
}
]
}
}
}
}
]
}
},
"functions": [
{
"script_score": {
"script": {
"source": "params._source.containsKey('types') && params._source['types'] != null && params._source.types.size() == 1 ? 1 : 0"
}
}
}
],
"min_score": 0.5 // note this
}
}
}
搜索结果将是
"hits": [
{
"_index": "67594441",
"_type": "_doc",
"_id": "2",
"_score": 0.53899646,
"_source": {
"types": [
{
"Label": "Dialog"
}
]
}
]