RavenDB 数组搜索 returns 个随机结果
RavenDB array search returns random results
我正在尝试使用 RavenDB 4 中的 Search 方法在字典之上执行搜索。奇怪的是,如果搜索词是 in 或 it 我得到随机结果。我绝对确定 none 的记录包含这些词。在 studio 上执行等效的 lucene 查询时也会发生这种情况。当我输入有效的搜索词(如员工姓名、编号等)时,它会按预期工作。
我已经根据真实场景创建了这个简单的场景。
这是索引:
public class Search : AbstractIndexCreationTask<Employee, Page>
{
public Search()
{
Map = employees => from employee in employees
select new
{
Id = employee.Id,
Details = employee.Details
};
Reduce = results => from result in results
group result by new
{
result.Id,
result.Details
}
into g
select new
{
g.Key.Id,
g.Key.Details
};
Index("Details", FieldIndexing.Search);
}
}
员工class:
public class Employee
{
public string Id { get; set; }
public Dictionary<string, object> Details { get; set; }
}
添加员工:
details = new Dictionary<string, object>();
details.Add("EmployeeNo", 25);
details.Add("FirstNames", "Yuri");
details.Add("Surname", "Cardoso");
details.Add("PositionCode", "XYZ");
details.Add("PositionTitle", "Developer");
employee = new Employee
{
Details = details
};
session.Store(employee);
session.SaveChanges();
搜索方式:
var searchTerm = "in";
var result = session
.Query<Page, Search>()
.Search(i => i.Details, $"EmployeeNo:({searchTerm})")
.Search(i => i.Details, $"FirstNames:({searchTerm})", options: SearchOptions.Or)
.Search(i => i.Details, $"Surname:({searchTerm})", options: SearchOptions.Or)
.Search(i => i.Details, $"PositionCode:({searchTerm})", options: SearchOptions.Or)
.Search(i => i.Details, $"PositionTitle:({searchTerm})", options: SearchOptions.Or)
.ToList();
Lucene 查询输出:
from index 'Search' where search(Details, "EmployeeNo:(it)")
or search(Details, "FirstNames:(it)")
or search(Details, "Surname:(it)")
or search(Details, "PositionCode:(it)")
or search(Details, "PositionTitle:(it)")
知道为什么在输入这些特定单词时会返回随机结果吗?
问题是停用词。某些术语非常常见,以至于它们对于使用全文搜索进行搜索毫无意义。
是,它,他们,是,等等。
它们被查询分析器删除。
请参阅此处的讨论:https://ravendb.net/docs/article-page/4.2/Csharp/indexes/using-analyzers
您可以使用空白分析器代替标准分析器,因为前者不会消除停用词。
在获得 RavenDB group 人员的帮助后,我们设法找到了适合我的场景的解决方案。
员工:
public class Employee
{
public string Id { get; set; }
public string DepartmentId { get; set; }
public Dictionary<string, object> Details { get; set; }
}
部门:
public class Department
{
public string Id { get; set; }
public string Name { get; set; }
}
页数:
public class Page
{
public string Id { get; set; }
public string Department { get; set; }
public Dictionary<string, object> Details { get; set; }
}
索引(dynamic fields):
public class Search : AbstractIndexCreationTask<Employee, Page>
{
public Search()
{
Map = employees => from employee in employees
let dept = LoadDocument<Department>(employee.DepartmentId)
select new
{
employee.Id,
Department = dept.Name,
_ = employee.Details.Select(x => CreateField(x.Key, x.Value))
};
Store(x => x.Department, FieldStorage.Yes);
Index(Constants.Documents.Indexing.Fields.AllFields, FieldIndexing.Search);
}
}
查询:
using (var session = DocumentStoreHolder.Store.OpenAsyncSession())
{
var searchTearm = "*yu* *dev*";
var result = await session
.Advanced
.AsyncDocumentQuery<Page, Search>()
.Search("Department", searchTearm)
.Search("EmployeeNo", searchTearm)
.Search("FirstNames", searchTearm)
.Search("Surname", searchTearm)
.Search("PositionCode", searchTearm)
.Search("PositionTitle", searchTearm)
.SelectFields<Page>()
.ToListAsync();
}
这样看来一切正常,不再有随机结果。
非常感谢 Ayende 和 Egor。
我正在尝试使用 RavenDB 4 中的 Search 方法在字典之上执行搜索。奇怪的是,如果搜索词是 in 或 it 我得到随机结果。我绝对确定 none 的记录包含这些词。在 studio 上执行等效的 lucene 查询时也会发生这种情况。当我输入有效的搜索词(如员工姓名、编号等)时,它会按预期工作。
我已经根据真实场景创建了这个简单的场景。
这是索引:
public class Search : AbstractIndexCreationTask<Employee, Page>
{
public Search()
{
Map = employees => from employee in employees
select new
{
Id = employee.Id,
Details = employee.Details
};
Reduce = results => from result in results
group result by new
{
result.Id,
result.Details
}
into g
select new
{
g.Key.Id,
g.Key.Details
};
Index("Details", FieldIndexing.Search);
}
}
员工class:
public class Employee
{
public string Id { get; set; }
public Dictionary<string, object> Details { get; set; }
}
添加员工:
details = new Dictionary<string, object>();
details.Add("EmployeeNo", 25);
details.Add("FirstNames", "Yuri");
details.Add("Surname", "Cardoso");
details.Add("PositionCode", "XYZ");
details.Add("PositionTitle", "Developer");
employee = new Employee
{
Details = details
};
session.Store(employee);
session.SaveChanges();
搜索方式:
var searchTerm = "in";
var result = session
.Query<Page, Search>()
.Search(i => i.Details, $"EmployeeNo:({searchTerm})")
.Search(i => i.Details, $"FirstNames:({searchTerm})", options: SearchOptions.Or)
.Search(i => i.Details, $"Surname:({searchTerm})", options: SearchOptions.Or)
.Search(i => i.Details, $"PositionCode:({searchTerm})", options: SearchOptions.Or)
.Search(i => i.Details, $"PositionTitle:({searchTerm})", options: SearchOptions.Or)
.ToList();
Lucene 查询输出:
from index 'Search' where search(Details, "EmployeeNo:(it)")
or search(Details, "FirstNames:(it)")
or search(Details, "Surname:(it)")
or search(Details, "PositionCode:(it)")
or search(Details, "PositionTitle:(it)")
知道为什么在输入这些特定单词时会返回随机结果吗?
问题是停用词。某些术语非常常见,以至于它们对于使用全文搜索进行搜索毫无意义。 是,它,他们,是,等等。 它们被查询分析器删除。 请参阅此处的讨论:https://ravendb.net/docs/article-page/4.2/Csharp/indexes/using-analyzers
您可以使用空白分析器代替标准分析器,因为前者不会消除停用词。
在获得 RavenDB group 人员的帮助后,我们设法找到了适合我的场景的解决方案。
员工:
public class Employee
{
public string Id { get; set; }
public string DepartmentId { get; set; }
public Dictionary<string, object> Details { get; set; }
}
部门:
public class Department
{
public string Id { get; set; }
public string Name { get; set; }
}
页数:
public class Page
{
public string Id { get; set; }
public string Department { get; set; }
public Dictionary<string, object> Details { get; set; }
}
索引(dynamic fields):
public class Search : AbstractIndexCreationTask<Employee, Page>
{
public Search()
{
Map = employees => from employee in employees
let dept = LoadDocument<Department>(employee.DepartmentId)
select new
{
employee.Id,
Department = dept.Name,
_ = employee.Details.Select(x => CreateField(x.Key, x.Value))
};
Store(x => x.Department, FieldStorage.Yes);
Index(Constants.Documents.Indexing.Fields.AllFields, FieldIndexing.Search);
}
}
查询:
using (var session = DocumentStoreHolder.Store.OpenAsyncSession())
{
var searchTearm = "*yu* *dev*";
var result = await session
.Advanced
.AsyncDocumentQuery<Page, Search>()
.Search("Department", searchTearm)
.Search("EmployeeNo", searchTearm)
.Search("FirstNames", searchTearm)
.Search("Surname", searchTearm)
.Search("PositionCode", searchTearm)
.Search("PositionTitle", searchTearm)
.SelectFields<Page>()
.ToListAsync();
}
这样看来一切正常,不再有随机结果。 非常感谢 Ayende 和 Egor。