在 Python 中随机选择数组中的连续元素
Random selection of contiguous elements in an array in Python
我有一个索引列表,例如0...365,我想 select 几个 随机 selected 而无需替换,此列表的连续子区域 。
index = [i+1 for i in range(365) ]
#n could be 3
for i in range(n):
exclusion_regions.append( get_random_contiguous_region(index) )
有没有人对get_random_contiguous_region()
的实现有什么建议
我们需要一个 while 循环来确保我们最终不会重叠,您可以检查切片的长度是否满足任何其他条件,使用列表 comp 您不能指定不同的条件:
如果您想要从总列表大小的大约 5% 到 15% 的随机切片和大约 30% 的样本大小:
from random import choice
from numpy import arange
index = [i + 1 for i in range(365)]
choices = []
seen = set()
ar = arange(0.05,.16, .01)
ln = len(index)
sample_size = 0
while sample_size < ln * .30:
perc = choice(ar) # get random 5, 10, 15 percent slices
size = int(ln * perc)
ch = choice(index[:-size+1]) # avoid falling off the side
rn = index[ch:ch+size]
if len(rn) == size and not seen.intersection(rn):
seen.update(rn)
choices.append(rn)
sample_size += len(rn)
print(choices)
你可以这样做:
import random
n = 3
index = [i+1 for i in range(10) ]
slices = sorted(random.sample(range(0, len(index)), 2*n))
[index[start:end] for start, end in zip(slices[::2], slices[1::2])]
这是一个象征性地处理范围的解决方案,而不是考虑每个项目。
(对于您正在处理的小基数,它可能有点矫枉过正,但对于包含数万个项目的范围,它的效率会高得多。)
编辑: 我已经更新它以允许将长度指定为整数或指定为 returns 整数的 0 参数函数。您现在可以将长度作为分布给出,而不仅仅是常数。
import random
def range_intersection(a, b):
if a.step == b.step == 1:
return range(max(a.start, b.start), min(a.stop, b.stop), 1)
else:
# here be dragons!
raise NotImplemented
def random_subrange(length, range_):
start = random.randrange(
range_.start,
range_.stop - length * range_.step,
range_.step
)
stop = start + length * range_.step
return range(start, stop, range_.step)
def const_fn(n):
def fn():
return n
return fn
def random_distinct_subranges(num, length, range_):
if not callable(length):
length = const_fn(length)
ranges = []
for n in range(num):
while True:
new_range = random_subrange(length(), range_)
if not any(range_intersection(new_range, r) for r in ranges):
ranges.append(new_range)
break
ranges.sort(key = lambda r: r.start)
return ranges
然后
days = range(1, 366)
# pick 3 periods randomly without overlapping
periods = random_distinct_subranges(3, lambda:random.randint(5,15), days)
print(periods)
给出类似
的东西
[range(78, 92), range(147, 155), range(165, 173)]
可以像这样迭代
from itertools import chain
rand_days = chain(*periods)
print(list(rand_days))
给予
[78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 147, 148, 149, 150, 151, 152, 153, 154, 165, 166, 167, 168, 169, 170, 171, 172]
这是一种安静简单的递归方法:索引列表被随机分为给定大小范围内的连续序列。之后,选择其中三个子序列。
indexes = range(1, 80)
from random import randint, sample
# recursive division of the sequence
def get_random_division(lst, minsize, maxsize):
split_index = randint(minsize, maxsize)
# if the remaining list would get too small, return the unsplit one
if minsize>len(lst)-split_index:
return [lst]
return [lst[:split_index]] + get_random_division(lst[split_index:], minsize, maxsize)
# determine size range of the subdivisions
minsize, maxsize = 5, int(0.15*len(data))
# choose three of the subdivided sequences
sample(get_random_division(indexes, minsize, maxsize), 3)
输出:
[[17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
[36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46],
[1, 2, 3, 4, 5]]
我有一个索引列表,例如0...365,我想 select 几个 随机 selected 而无需替换,此列表的连续子区域 。
index = [i+1 for i in range(365) ]
#n could be 3
for i in range(n):
exclusion_regions.append( get_random_contiguous_region(index) )
有没有人对get_random_contiguous_region()
的实现有什么建议我们需要一个 while 循环来确保我们最终不会重叠,您可以检查切片的长度是否满足任何其他条件,使用列表 comp 您不能指定不同的条件: 如果您想要从总列表大小的大约 5% 到 15% 的随机切片和大约 30% 的样本大小:
from random import choice
from numpy import arange
index = [i + 1 for i in range(365)]
choices = []
seen = set()
ar = arange(0.05,.16, .01)
ln = len(index)
sample_size = 0
while sample_size < ln * .30:
perc = choice(ar) # get random 5, 10, 15 percent slices
size = int(ln * perc)
ch = choice(index[:-size+1]) # avoid falling off the side
rn = index[ch:ch+size]
if len(rn) == size and not seen.intersection(rn):
seen.update(rn)
choices.append(rn)
sample_size += len(rn)
print(choices)
你可以这样做:
import random
n = 3
index = [i+1 for i in range(10) ]
slices = sorted(random.sample(range(0, len(index)), 2*n))
[index[start:end] for start, end in zip(slices[::2], slices[1::2])]
这是一个象征性地处理范围的解决方案,而不是考虑每个项目。
(对于您正在处理的小基数,它可能有点矫枉过正,但对于包含数万个项目的范围,它的效率会高得多。)
编辑: 我已经更新它以允许将长度指定为整数或指定为 returns 整数的 0 参数函数。您现在可以将长度作为分布给出,而不仅仅是常数。
import random
def range_intersection(a, b):
if a.step == b.step == 1:
return range(max(a.start, b.start), min(a.stop, b.stop), 1)
else:
# here be dragons!
raise NotImplemented
def random_subrange(length, range_):
start = random.randrange(
range_.start,
range_.stop - length * range_.step,
range_.step
)
stop = start + length * range_.step
return range(start, stop, range_.step)
def const_fn(n):
def fn():
return n
return fn
def random_distinct_subranges(num, length, range_):
if not callable(length):
length = const_fn(length)
ranges = []
for n in range(num):
while True:
new_range = random_subrange(length(), range_)
if not any(range_intersection(new_range, r) for r in ranges):
ranges.append(new_range)
break
ranges.sort(key = lambda r: r.start)
return ranges
然后
days = range(1, 366)
# pick 3 periods randomly without overlapping
periods = random_distinct_subranges(3, lambda:random.randint(5,15), days)
print(periods)
给出类似
的东西[range(78, 92), range(147, 155), range(165, 173)]
可以像这样迭代
from itertools import chain
rand_days = chain(*periods)
print(list(rand_days))
给予
[78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 147, 148, 149, 150, 151, 152, 153, 154, 165, 166, 167, 168, 169, 170, 171, 172]
这是一种安静简单的递归方法:索引列表被随机分为给定大小范围内的连续序列。之后,选择其中三个子序列。
indexes = range(1, 80)
from random import randint, sample
# recursive division of the sequence
def get_random_division(lst, minsize, maxsize):
split_index = randint(minsize, maxsize)
# if the remaining list would get too small, return the unsplit one
if minsize>len(lst)-split_index:
return [lst]
return [lst[:split_index]] + get_random_division(lst[split_index:], minsize, maxsize)
# determine size range of the subdivisions
minsize, maxsize = 5, int(0.15*len(data))
# choose three of the subdivided sequences
sample(get_random_division(indexes, minsize, maxsize), 3)
输出:
[[17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
[36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46],
[1, 2, 3, 4, 5]]