使用 Ngram returns 始终搜索查询 Elasticsearch 0 个结果

Search query Elasticsearch with Ngram returns 0 results always

我使用 NEST 与 Elasticsearch 一起工作。我尝试将所有字符串字段分解为标记。同时对于tokininiz使用ngram。但是,当提示查询时,我总是得到 0 个结果。

我的 class 与 api 一起工作。

public class Elasticsearch
{
    string index = "video-materials";
    ElasticClient client;
    public Elasticsearch()
    {
        var settings = new ConnectionSettings(new Uri("http://localhost:9200"));
        client = new ElasticClient(settings);
        if (client.IndexExists(index).Exists)
        {
            client.DeleteIndex(index);
        }
        var nGramFilters = new List<string> { "lowercase", "asciifolding", "nGram_filter" };

        var resp = client.CreateIndex(index, c => c
             .Mappings(m => m
                .Map<ElasticVideoMaterial>(mm => mm
                    .AutoMap()
                    .Properties(p => p
                        .Text(t => t
                            .Name(n => n.OriginalTitle)
                            .Fields(f => f
                                .Keyword(k => k
                                    .Name("keyword")
                                    .IgnoreAbove(256)
                                )
                                .Text(tt => tt
                                    .Name("ngram")
                                    .Analyzer("ngram_analyzer")
                                )
                            )
                        )
                    )
                )
            )
            .Settings(s => s
                .Analysis(a => a
                    .Analyzers(anz => anz
                        .Custom("ngram_analyzer", cc => cc
                            .Filters(nGramFilters)
                            .Tokenizer("ngram_tokenizer")))
                    .Tokenizers(tz => tz
                        .NGram("ngram_tokenizer", td => td
                            .MinGram(3)
                            .MaxGram(3)
                            .TokenChars(TokenChar.Letter, TokenChar.Digit)
                        )
                    )
                )
            )
        );
    }
    public void Index(IEnumerable<ElasticVideoMaterial> models)
    {
        foreach(var model in models)
        {
            client.Index(model,i=>i.Index(index));
        }
    }
    public void Search(string query)
    {
        var resp = client.Search<ElasticVideoMaterial>(i => i
                                                        .Query(q => q
                                                            .Match(m => m
                                                                .Field(f => f.OriginalTitle.Suffix("ngram"))
                                                                .Query("Hob")
                                                            )
                                                        )
                                                        .Index(index)
                                                    ).Documents.ToList();
    }
}

我总是再次创建索引,然后索引对象列表。 为此,请使用 Index() 方法。 这是我的索引 class.

public class ElasticVideoMaterial
{
    public int ID { get; set; }
    public string Title { get; set; }
    public string OriginalTitle { get; set; }
    public float? KinopoiskRating { get; set; }
    public float? Imdb { get; set; }
    public int Duration { get; set; }
    public List<string> GenreTitles { get; set; }
    public List<string> CountryNames { get; set; }
    public DateTime? ReleaseDate { get; set; }
    public List<string> TranslationTitles { get; set; }
    public List<string> FilmMakerNames { get; set; }
    public List<string> ActorNames { get; set; }
    public List<string> ThemeNames { get; set; }
    public CompletionField Suggest { get; set; }
}

但是当我尝试使用 Search () 方法获取结果时,我得到了 0 个结果。 (写了《霍比特人》,希望能收到名字里有《霍比特人》的片子)

ngram_analyzer用于分析搜索请求的查询输入,但本分析器不用于分析索引请求的OriginalTitle输入。

索引文档时只需要配置OriginalTitle字段使用的分析器,可以用attribute mapping or fluent mapping指定。例如,流畅的映射

var client = new ElasticClient();

if (client.IndexExists(defaultIndex).Exists)
    client.DeleteIndex(defaultIndex);

var nGramFilters = new List<string> { "lowercase", "asciifolding", "nGram_filter" };

var resp = client.CreateIndex(defaultIndex, c => c
     .Mappings(m => m
        .Map<ElasticVideoMaterial>(mm => mm
            .AutoMap()
            .Properties(p => p
                .Text(t => t
                    .Name(n => n.OriginalTitle)
                    .Fields(f => f
                        .Keyword(k => k
                            .Name("keyword")
                            .IgnoreAbove(256)
                        )
                        .Text(tt => tt
                            .Name("ngram")
                            .Analyzer("ngram_analyzer")
                        )
                    )
                )
            )
        )
    )
    .Settings(s => s
        .Analysis(a => a
            .Analyzers(anz => anz
                .Custom("ngram_analyzer", cc => cc
                    .Filters(nGramFilters)
                    .Tokenizer("ngram_tokenizer")))
            .Tokenizers(tz => tz
                .NGram("ngram_tokenizer", td => td
                    .MinGram(3)
                    .MaxGram(3)
                    .TokenChars(TokenChar.Letter, TokenChar.Digit)
                )
            )
        )
    )
);

var searchResponse = client.Search<ElasticVideoMaterial>(i => i
    .Query(q => q
        .Match(m => m
            .Field(f => f.OriginalTitle.Suffix("ngram"))
            .Query("Hob")
        )
    )
);

这会将 OriginalTitle 设置为 multi-field 并在 OriginalTitle 下创建一个名为 ngram 的多字段,它将在两个索引处使用 ngram_analyzer该字段的时间和搜索时间。