弹性搜索嵌套。在不包括另一个时间跨度的时间跨度内获取聚合结果
ElasticSearch Nest. Get aggregation result within timespan excluding another timespan
接着是,我想过滤掉只在给定时间段内出现的用户。
例如,仅在 2016 年访问过 而在 2017 年 没有访问过的用户列表。当我们仅使用过滤器 2016
时间跨度时,情况并非如此,因为用户也可能出现在 2017
中。所以,可能的尝试是让 [2016..2017 users] - [2017 users]
set.
我的尝试是向 ES 发送 2 个查询([2016..2017 users]
和 [2017 users]
)并在我的应用程序中使用 userList_20162017.Except(userList_2017)
过滤掉。
但我认为这似乎是非常低效的方法。难道只能用 ElasticSearch NEST 查询来实现吗?
void Main()
{
var client = new ElasticClient(connectionSettings);
var twoYearsAgo = new DateTime(2016,1,1);
var yearAgo = new DateTime(2017,1,1);
// get 2016..2017 users
var searchResponse20162017 = client.Search<Visitor>(s => s
.Size(0)
.Query(q => q
.DateRange(c => c.Field(p => p.CreationDate)
.GreaterThan(twoYearsAgo)
.LessThan(DateeTime.UtcNow)
)
)
.Aggregations(a => a
.Terms("unique_users", c => c
.Field(f => f.OwnerUserId)
.Size(int.MaxValue)
)
)
);
// get 2017 users
var searchResponse2017 = client.Search<Visitor>(s => s
.Size(0)
.Query(q => q
.DateRange(c => c.Field(p => p.CreationDate)
.GreaterThan(yearAgo)
.LessThan(DateeTime.UtcNow)
)
)
.Aggregations(a => a
.Terms("unique_users", c => c
.Field(f => f.OwnerUserId)
.Size(int.MaxValue)
)
)
);
var uniqueUser20162017 = searchResponse20162017.Aggs.Terms("unique_users").Buckets.Select(b => b.KeyAsString).ToList();
var uniqueUser2017 = searchResponse2017.Aggs.Terms("unique_users").Buckets.Select(b => b.KeyAsString).ToList();
// Final result. seems so naïve and inefficient.
var uniqueUser2016Only = searchResponse20162017.Except(searchResponse2017);
}
可以用 filter
sub aggregation 做到这一点;首先,使用 terms
聚合获取 2016 年和 2017 年范围内的唯一 ID,然后对此执行子聚合以获取不在 2017 年范围内的那些 ID。如果文档计数 terms
aggregation等于filter聚合的document count,那么这个id是2016年的,不是2017年的。
这是一个例子
void Main()
{
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "examples";
var connectionSettings = new ConnectionSettings(pool)
.DefaultIndex(defaultIndex);
var client = new ElasticClient(connectionSettings);
if (client.IndexExists(defaultIndex).Exists)
client.DeleteIndex(defaultIndex);
var examples = new[]{
new Example(1, new DateTime(2016, 01, 01)),
new Example(1, new DateTime(2017, 01, 01)),
new Example(2, new DateTime(2016, 01, 01)),
new Example(3, new DateTime(2017, 01, 01)),
};
client.Bulk(b => b
.IndexMany(examples)
.Refresh(Refresh.WaitFor));
client.Search<Example>(s => s
.Size(0)
.Query(q => +q
.DateRange(c => c.Field(p => p.Date)
.GreaterThanOrEquals(new DateTime(2016, 01, 01))
.LessThan(new DateTime(2018, 01, 01))
)
)
.Aggregations(a => a
.Terms("ids_in_2016_and_2017", c => c
.Field(f => f.ExampleId)
.Size(int.MaxValue)
.Aggregations(aa => aa
.Filter("ids_only_in_2016", f => f
.Filter(ff => +!ff
.DateRange(d => d
.Field(p => p.Date)
.GreaterThanOrEquals(new DateTime(2017, 01, 01))
.LessThan(new DateTime(2018, 01, 01))
)
)
)
)
)
)
);
}
public class Example
{
public Example(int exampleId, DateTime date)
{
ExampleId = exampleId;
Date = date;
}
public int ExampleId { get; set; }
public DateTime Date { get; set; }
}
ExampleId
2 只出现在 2016 年而不出现在 2017 年,因为它在 2016 年和 2017 年的文档数仅等于 2016 年的文档数
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"ids_in_2016_and_2017" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1,
"doc_count" : 2,
"ids_only_in_2016" : {
"doc_count" : 1
}
},
{
"key" : 2,
"doc_count" : 1,
"ids_only_in_2016" : {
"doc_count" : 1
}
},
{
"key" : 3,
"doc_count" : 1,
"ids_only_in_2016" : {
"doc_count" : 0
}
}
]
}
}
}
*OP appended: result to get a list of userId.
var list = searchResponse1.Aggs.Terms("ids_in_2016_2017").Buckets
.Select(o => new {
UserId = o.Key,
DocCount = o.DocCount == ((Nest.SingleBucketAggregate)o.Aggregations["ids_only_in_2016"]).DocCount
})
.Where(x => x.DocCount == true)
.Select(x => x.UserId)
.ToList();
接着是
例如,仅在 2016 年访问过 而在 2017 年 没有访问过的用户列表。当我们仅使用过滤器 2016
时间跨度时,情况并非如此,因为用户也可能出现在 2017
中。所以,可能的尝试是让 [2016..2017 users] - [2017 users]
set.
我的尝试是向 ES 发送 2 个查询([2016..2017 users]
和 [2017 users]
)并在我的应用程序中使用 userList_20162017.Except(userList_2017)
过滤掉。
但我认为这似乎是非常低效的方法。难道只能用 ElasticSearch NEST 查询来实现吗?
void Main()
{
var client = new ElasticClient(connectionSettings);
var twoYearsAgo = new DateTime(2016,1,1);
var yearAgo = new DateTime(2017,1,1);
// get 2016..2017 users
var searchResponse20162017 = client.Search<Visitor>(s => s
.Size(0)
.Query(q => q
.DateRange(c => c.Field(p => p.CreationDate)
.GreaterThan(twoYearsAgo)
.LessThan(DateeTime.UtcNow)
)
)
.Aggregations(a => a
.Terms("unique_users", c => c
.Field(f => f.OwnerUserId)
.Size(int.MaxValue)
)
)
);
// get 2017 users
var searchResponse2017 = client.Search<Visitor>(s => s
.Size(0)
.Query(q => q
.DateRange(c => c.Field(p => p.CreationDate)
.GreaterThan(yearAgo)
.LessThan(DateeTime.UtcNow)
)
)
.Aggregations(a => a
.Terms("unique_users", c => c
.Field(f => f.OwnerUserId)
.Size(int.MaxValue)
)
)
);
var uniqueUser20162017 = searchResponse20162017.Aggs.Terms("unique_users").Buckets.Select(b => b.KeyAsString).ToList();
var uniqueUser2017 = searchResponse2017.Aggs.Terms("unique_users").Buckets.Select(b => b.KeyAsString).ToList();
// Final result. seems so naïve and inefficient.
var uniqueUser2016Only = searchResponse20162017.Except(searchResponse2017);
}
可以用 filter
sub aggregation 做到这一点;首先,使用 terms
聚合获取 2016 年和 2017 年范围内的唯一 ID,然后对此执行子聚合以获取不在 2017 年范围内的那些 ID。如果文档计数 terms
aggregation等于filter聚合的document count,那么这个id是2016年的,不是2017年的。
这是一个例子
void Main()
{
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "examples";
var connectionSettings = new ConnectionSettings(pool)
.DefaultIndex(defaultIndex);
var client = new ElasticClient(connectionSettings);
if (client.IndexExists(defaultIndex).Exists)
client.DeleteIndex(defaultIndex);
var examples = new[]{
new Example(1, new DateTime(2016, 01, 01)),
new Example(1, new DateTime(2017, 01, 01)),
new Example(2, new DateTime(2016, 01, 01)),
new Example(3, new DateTime(2017, 01, 01)),
};
client.Bulk(b => b
.IndexMany(examples)
.Refresh(Refresh.WaitFor));
client.Search<Example>(s => s
.Size(0)
.Query(q => +q
.DateRange(c => c.Field(p => p.Date)
.GreaterThanOrEquals(new DateTime(2016, 01, 01))
.LessThan(new DateTime(2018, 01, 01))
)
)
.Aggregations(a => a
.Terms("ids_in_2016_and_2017", c => c
.Field(f => f.ExampleId)
.Size(int.MaxValue)
.Aggregations(aa => aa
.Filter("ids_only_in_2016", f => f
.Filter(ff => +!ff
.DateRange(d => d
.Field(p => p.Date)
.GreaterThanOrEquals(new DateTime(2017, 01, 01))
.LessThan(new DateTime(2018, 01, 01))
)
)
)
)
)
)
);
}
public class Example
{
public Example(int exampleId, DateTime date)
{
ExampleId = exampleId;
Date = date;
}
public int ExampleId { get; set; }
public DateTime Date { get; set; }
}
ExampleId
2 只出现在 2016 年而不出现在 2017 年,因为它在 2016 年和 2017 年的文档数仅等于 2016 年的文档数
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"ids_in_2016_and_2017" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1,
"doc_count" : 2,
"ids_only_in_2016" : {
"doc_count" : 1
}
},
{
"key" : 2,
"doc_count" : 1,
"ids_only_in_2016" : {
"doc_count" : 1
}
},
{
"key" : 3,
"doc_count" : 1,
"ids_only_in_2016" : {
"doc_count" : 0
}
}
]
}
}
}
*OP appended: result to get a list of userId.
var list = searchResponse1.Aggs.Terms("ids_in_2016_2017").Buckets
.Select(o => new {
UserId = o.Key,
DocCount = o.DocCount == ((Nest.SingleBucketAggregate)o.Aggregations["ids_only_in_2016"]).DocCount
})
.Where(x => x.DocCount == true)
.Select(x => x.UserId)
.ToList();