如何使用假设生成具有指定大小、总和和最小值的正浮点数列表
How to generate lists of positive floats with a specified size, sum, and a minimum value using Hypothesis
这个问题是关于 Hypothesis
基于 属性 测试的库。
我想要一种策略,该策略会为我提供具有固定大小、指定总和的正浮点数列表,并且任何元素都不会低于某个指定的最小阈值。
例如:
size = 5
sum_ = 1
threshold = 0.1
...
for _ in range(3)
print(magic_strategy.example())
可以给出这样的东西:
[0.4, 0.2, 0.1, 0.1, 0.2]
[0.15, 0.25, 0.25, 0.2, 0.15]
[0.2, 0.2, 0.2, 0.2, 0.2]
如何编写这样的策略?
尝试 filtering 失败:
from hypothesis import strategies as st
size = 5
sum_ = 1
threshold = 0.1
domain_values = st.floats(min_value=0, allow_infinity=False, exclude_min=True)
domain_values_lists = st.lists(domain_values, min_size=size, max_size=size)
normalized_lists = domain_values_lists.map(lambda values: [value * sum_ / sum(values)
for value in values])
lists_with_threshold = normalized_lists.filter(lambda values: all(value > threshold for value in values))
这种方法的问题在于,出于某种原因,每当我从这种策略中采用 example
时,无论给定的输入参数如何,它总是给我相同值的列表:
[0.2, 0.2, 0.2, ..., 0.2]
[0.01, 0.01, 0.01, ..., 0.01]
[4.0, 4.0, 4.0, ..., 4.0]
主要思想是构建一些值,然后将它们映射到区间 [0, 1]
,然后映射到 [0, sum_]
,使其成为 sum_
的分区(即元素将等于 sum_
) 最后 -- 将下限移动到 [min_value, sum_]
保留分区 属性:
from math import floor
from numbers import Real
from typing import List
from hypothesis import strategies
from hypothesis.strategies import SearchStrategy as Strategy
MIN_PARTITION_SIZE = 1
def to_partitions(sum_: Real,
*,
min_value: Real = 0,
size: int = MIN_PARTITION_SIZE,
base: Strategy[Real] = strategies.integers()
) -> Strategy[List[Real]]:
if size < MIN_PARTITION_SIZE:
raise ValueError('`size` should not be less '
f'than {MIN_PARTITION_SIZE}.')
if not (0 <= min_value <= sum_):
raise ValueError(f'`min_value` should be in [0, {sum_}] interval.')
if min_value:
max_size = floor(sum_ / min_value)
if max_size < size:
raise ValueError(f'`size` should not be greater than {max_size}.')
def to_proportions(numbers: List[Real]) -> List[Real]:
return [2 * abs(number) / (1 + number * number) for number in numbers]
def to_partition(proportions: List[Real]) -> List[Real]:
factor = sum_ / sum(proportions)
return [proportion * factor for proportion in proportions]
def bound_minimum(partition: List[Real]) -> List[Real]:
minimum = min(partition)
if minimum >= min_value:
return partition
partition_size = len(partition)
denominator = sum_ - partition_size * minimum
slope = sum_ - partition_size * min_value
intercept = sum_ * (min_value - minimum)
return [max((part * slope + intercept) / denominator, min_value)
for part in partition]
def normalize(partition: List[Real]) -> List[Real]:
partition_sum = sum(partition)
if partition_sum < sum_:
arg_min = min(range(len(partition)),
key=partition.__getitem__)
partition[arg_min] += sum_ - partition_sum
elif partition_sum > sum_:
arg_max = max(range(len(partition)),
key=partition.__getitem__)
partition[arg_max] -= partition_sum - sum_
return partition
def is_valid(partition: List[Real]) -> bool:
return sum(partition) == sum_
return (strategies.lists(base,
min_size=size,
max_size=size)
.filter(any)
.map(to_proportions)
.map(to_partition)
.map(bound_minimum)
.map(normalize)
.filter(is_valid))
对于数字“规范化”,我们使用众所周知的 属性 实数
(x - 1) ^ 2 >= 0 | since square is non-negative
x^2 + 1 >= 2 * x | divide both sides by positive x^2 + 1
...
1 >= 2 * x / (x^2 + 1)
我们还添加了一些技巧来处理浮点数可能出现的问题(比如在移动绑定到 min_value
之后有一个元素接近但小于 min_value
并且分区的总和关闭但是不等于 sum
).
测试
from math import floor
from numbers import Real
from typing import List
from hypothesis import (given,
strategies)
from hypothesis.strategies import DataObject
@given(strategies.data(), strategies.floats(0, 100))
def test_to_partitions(data: DataObject, sum_: Real) -> None:
min_value = data.draw(strategies.floats(0, sum_))
size = data.draw(strategies.integers(MIN_PARTITION_SIZE,
min(floor(sum_ / min_value), 100)
if min_value
else 100))
strategy = to_partitions(sum_,
min_value=min_value,
size=size)
partition = data.draw(strategy)
assert sum(partition) == sum_
assert len(partition) == size
assert all(part >= min_value for part in partition)
好像过了。
这个问题是关于 Hypothesis
基于 属性 测试的库。
我想要一种策略,该策略会为我提供具有固定大小、指定总和的正浮点数列表,并且任何元素都不会低于某个指定的最小阈值。
例如:
size = 5
sum_ = 1
threshold = 0.1
...
for _ in range(3)
print(magic_strategy.example())
可以给出这样的东西:
[0.4, 0.2, 0.1, 0.1, 0.2]
[0.15, 0.25, 0.25, 0.2, 0.15]
[0.2, 0.2, 0.2, 0.2, 0.2]
如何编写这样的策略?
尝试 filtering 失败:
from hypothesis import strategies as st
size = 5
sum_ = 1
threshold = 0.1
domain_values = st.floats(min_value=0, allow_infinity=False, exclude_min=True)
domain_values_lists = st.lists(domain_values, min_size=size, max_size=size)
normalized_lists = domain_values_lists.map(lambda values: [value * sum_ / sum(values)
for value in values])
lists_with_threshold = normalized_lists.filter(lambda values: all(value > threshold for value in values))
这种方法的问题在于,出于某种原因,每当我从这种策略中采用 example
时,无论给定的输入参数如何,它总是给我相同值的列表:
[0.2, 0.2, 0.2, ..., 0.2]
[0.01, 0.01, 0.01, ..., 0.01]
[4.0, 4.0, 4.0, ..., 4.0]
主要思想是构建一些值,然后将它们映射到区间 [0, 1]
,然后映射到 [0, sum_]
,使其成为 sum_
的分区(即元素将等于 sum_
) 最后 -- 将下限移动到 [min_value, sum_]
保留分区 属性:
from math import floor
from numbers import Real
from typing import List
from hypothesis import strategies
from hypothesis.strategies import SearchStrategy as Strategy
MIN_PARTITION_SIZE = 1
def to_partitions(sum_: Real,
*,
min_value: Real = 0,
size: int = MIN_PARTITION_SIZE,
base: Strategy[Real] = strategies.integers()
) -> Strategy[List[Real]]:
if size < MIN_PARTITION_SIZE:
raise ValueError('`size` should not be less '
f'than {MIN_PARTITION_SIZE}.')
if not (0 <= min_value <= sum_):
raise ValueError(f'`min_value` should be in [0, {sum_}] interval.')
if min_value:
max_size = floor(sum_ / min_value)
if max_size < size:
raise ValueError(f'`size` should not be greater than {max_size}.')
def to_proportions(numbers: List[Real]) -> List[Real]:
return [2 * abs(number) / (1 + number * number) for number in numbers]
def to_partition(proportions: List[Real]) -> List[Real]:
factor = sum_ / sum(proportions)
return [proportion * factor for proportion in proportions]
def bound_minimum(partition: List[Real]) -> List[Real]:
minimum = min(partition)
if minimum >= min_value:
return partition
partition_size = len(partition)
denominator = sum_ - partition_size * minimum
slope = sum_ - partition_size * min_value
intercept = sum_ * (min_value - minimum)
return [max((part * slope + intercept) / denominator, min_value)
for part in partition]
def normalize(partition: List[Real]) -> List[Real]:
partition_sum = sum(partition)
if partition_sum < sum_:
arg_min = min(range(len(partition)),
key=partition.__getitem__)
partition[arg_min] += sum_ - partition_sum
elif partition_sum > sum_:
arg_max = max(range(len(partition)),
key=partition.__getitem__)
partition[arg_max] -= partition_sum - sum_
return partition
def is_valid(partition: List[Real]) -> bool:
return sum(partition) == sum_
return (strategies.lists(base,
min_size=size,
max_size=size)
.filter(any)
.map(to_proportions)
.map(to_partition)
.map(bound_minimum)
.map(normalize)
.filter(is_valid))
对于数字“规范化”,我们使用众所周知的 属性 实数
(x - 1) ^ 2 >= 0 | since square is non-negative
x^2 + 1 >= 2 * x | divide both sides by positive x^2 + 1
...
1 >= 2 * x / (x^2 + 1)
我们还添加了一些技巧来处理浮点数可能出现的问题(比如在移动绑定到 min_value
之后有一个元素接近但小于 min_value
并且分区的总和关闭但是不等于 sum
).
测试
from math import floor
from numbers import Real
from typing import List
from hypothesis import (given,
strategies)
from hypothesis.strategies import DataObject
@given(strategies.data(), strategies.floats(0, 100))
def test_to_partitions(data: DataObject, sum_: Real) -> None:
min_value = data.draw(strategies.floats(0, sum_))
size = data.draw(strategies.integers(MIN_PARTITION_SIZE,
min(floor(sum_ / min_value), 100)
if min_value
else 100))
strategy = to_partitions(sum_,
min_value=min_value,
size=size)
partition = data.draw(strategy)
assert sum(partition) == sum_
assert len(partition) == size
assert all(part >= min_value for part in partition)
好像过了。