查找给定整数列表的唯一子集列表
Finding list of unique subsets of a given list of integers
寻找一种有效的方法来确定整数列表的所有唯一子集。
假设我有一个包含 340 个整数的整数列表。我想要一个所有可能子集的列表(每个子集有 5 个元素)。所有提供的整数都是唯一的,结果不应重复其子集中的任何元素。给定输入 1,2,3,4,5,6,7,8,9 的示例 我正在寻找
的输出
- 1,2,3,4,5
- 1,2,3,4,6
- 1,2,3,4,7
- 1,2,3,4,8
- 1,2,3,4,9
- 1,2,3,5,6
- 1,2,3,5,7
- ...
我必须在 CSharp 中执行此操作。这可以在 LINQ 中完成吗?
如果 tractable 9 个元素的集合(或最多 25-30 个)和 5 个元素的子集,代码可以基于递归函数
static void Main(string[] args)
{
foreach (var item in ListPerm())
{
Console.WriteLine(String.Join(",", item));
}
Console.Read();
}
private static List<List<int>> ListPerm(HashSet<int> mySet = null, int deep = 5)
{
if (mySet == null)
{
mySet = initSet(8);
}
if (deep <= 0)
{
return Enumerable.Empty<List<int>>().ToList();
}
List<List<int>> all = new List<List<int>>();
for (int i = 0; i < mySet.Count - deep + 1; i++)
{
if (deep == 1)
{
var list = new List<int>() { mySet.ElementAt(i) };
all.Add(list);
}
foreach (var item in ListPerm(new HashSet<int>(mySet.Skip(i+1)), deep - 1))
{
var list = new List<int>() { mySet.ElementAt(i) };
list.AddRange(item);
all.Add(list);
}
}
return all;
}
private static HashSet<int> initSet(int lenght)
{
HashSet<int> ret = new HashSet<int>();
for (int i = 0; i < lenght; i++)
{
ret.Add(i * 1 + 1); // just an example...
};
return ret;
}
再造
现在,让我把上面的代码优化成一个更性能更好的函数,它需要3.2秒来得到8个整数的组合在我的标准笔记本电脑上,满分 30。
private static int[][] ListPerm(int[] mySet, int deep)
{
var all = new List<int[]>();
if (deep == 1)
{
return mySet.Select(x => new int[] { x }).ToArray();
}
else
{
var mySubSet = new int[mySet.Length - 1];
Array.Copy(mySet, 1, mySubSet, 0, mySubSet.Length);
var perm1st = ListPerm(mySubSet, deep - 1);
for (int i = 0; i < mySet.Length - deep + 1; i++)
{
var permn = perm1st.Select(x =>
{
var z = new int[x.Length + 1];
z[0] = mySet[i];
x.CopyTo(z, 1);
return z;
}
);
all.AddRange(permn);
int start = Array.FindIndex(perm1st, item => item[0] != mySet[i + 1]);
if (start > 0)
{
var temp_cpy = new int[perm1st.Length - start][];
Array.Copy(perm1st, start, temp_cpy, 0, temp_cpy.Length);
perm1st = temp_cpy;
}
}
}
return all.ToArray();
}
基准
这里是 Ivan 的、我的和社区 wiki 算法对 20 中 5 整数组合的比较。
结果
wiki perm: 00:00:00.0950055
writing wiki perm: 00:00:00.0460026
Ivan perm: 00:00:00.0400023
writing Ivan perm: 00:00:00.0260015
my perm: 00:00:00.0110006
writing my perm: 00:00:00.0300017
测试代码
var input = Enumerable.Range(1, 20);
int deep = 5;
var start = DateTime.Now;
var wiki = Algorithms.Combinations(input, deep).ToArray();
Console.WriteLine("wiki perm: {0}", DateTime.Now - start);
start = DateTime.Now;
StreamWriter sw0 = new StreamWriter(@"C:\dev\SO\Algo\perm0.txt", false);
foreach (var item in wiki)
{
sw0.WriteLine(String.Join(",", item));
}
sw0.Close();
Console.WriteLine("writing wiki perm: {0}", DateTime.Now - start);
start = DateTime.Now;
start = DateTime.Now;
var result = input
.Distinct()
.ToArray()
.GetCombinations(deep)
.Select(c => (int[])c.Clone())
.ToList();
Console.WriteLine("Ivan perm: {0}", DateTime.Now - start);
start = DateTime.Now;
StreamWriter sw1 = new StreamWriter(@"C:\dev\SO\Algo\perm1.txt", false);
foreach (var item in result)
{
sw1.WriteLine(String.Join(",", item));
}
sw1.Close();
Console.WriteLine("writing Ivan perm: {0}", DateTime.Now - start);
start = DateTime.Now;
var myPerm = ListPermSO(input.ToArray(), deep);
Console.WriteLine("my perm: {0}", DateTime.Now - start);
start = DateTime.Now;
StreamWriter sw2 = new StreamWriter(@"C:\dev\SO\Algo\perm2.txt", false);
foreach (var item in myPerm)
{
sw2.WriteLine(String.Join(",", item));
}
sw2.Close();
Console.WriteLine("writing my perm: {0}", DateTime.Now - start);
Console.Read();
我已经回答了几个组合问题,而且我到处都使用非递归分配自由算法的变体。对于这种情况,它看起来像这样:
public static class Algorithms
{
public static IEnumerable<T[]> GetCombinations<T>(this T[] input, int N)
{
var result = new T[N];
var indices = new int[N];
for (int pos = 0, index = 0; ;)
{
for (; pos < N; pos++, index++)
{
indices[pos] = index;
result[pos] = input[index];
}
yield return result;
do
{
if (pos == 0) yield break;
index = indices[--pos] + 1;
}
while (index > input.Length - N + pos);
}
}
}
与其他实现一样,该方法产生一个相同的内部缓冲区,这在您只需要迭代和处理结果集一次时很有用。如果需要存储组合,则需要在存储之前克隆返回的数组。这是您的场景中的示例用法:
var input = Enumerable.Range(1, 20);
var result = input
.Distinct()
.ToArray()
.GetCombinations(5)
.Select(c => (int[])c.Clone())
.ToList();
更新: GetCombinations
方法基本上模拟了这样的 N
嵌套循环(在伪代码中):
for (int i0 = 0; i0 <= input.Length - N; i0++)
for (int i1 = i0 + 1; i1 <= input.Length - N + 1; i1++)
for (int i2 = i1 + 1; i2 <= input.Length - N + 2; i2++)
...
for (int iN-1 = iN-2 + 1; iN-1 <= input.Length - 1; iN-1++)
yield { input[i0], input[i1], input[i2], ..., input[iN-1] }
寻找一种有效的方法来确定整数列表的所有唯一子集。
假设我有一个包含 340 个整数的整数列表。我想要一个所有可能子集的列表(每个子集有 5 个元素)。所有提供的整数都是唯一的,结果不应重复其子集中的任何元素。给定输入 1,2,3,4,5,6,7,8,9 的示例 我正在寻找
的输出- 1,2,3,4,5
- 1,2,3,4,6
- 1,2,3,4,7
- 1,2,3,4,8
- 1,2,3,4,9
- 1,2,3,5,6
- 1,2,3,5,7
- ...
我必须在 CSharp 中执行此操作。这可以在 LINQ 中完成吗?
如果 tractable 9 个元素的集合(或最多 25-30 个)和 5 个元素的子集,代码可以基于递归函数
static void Main(string[] args)
{
foreach (var item in ListPerm())
{
Console.WriteLine(String.Join(",", item));
}
Console.Read();
}
private static List<List<int>> ListPerm(HashSet<int> mySet = null, int deep = 5)
{
if (mySet == null)
{
mySet = initSet(8);
}
if (deep <= 0)
{
return Enumerable.Empty<List<int>>().ToList();
}
List<List<int>> all = new List<List<int>>();
for (int i = 0; i < mySet.Count - deep + 1; i++)
{
if (deep == 1)
{
var list = new List<int>() { mySet.ElementAt(i) };
all.Add(list);
}
foreach (var item in ListPerm(new HashSet<int>(mySet.Skip(i+1)), deep - 1))
{
var list = new List<int>() { mySet.ElementAt(i) };
list.AddRange(item);
all.Add(list);
}
}
return all;
}
private static HashSet<int> initSet(int lenght)
{
HashSet<int> ret = new HashSet<int>();
for (int i = 0; i < lenght; i++)
{
ret.Add(i * 1 + 1); // just an example...
};
return ret;
}
再造
现在,让我把上面的代码优化成一个更性能更好的函数,它需要3.2秒来得到8个整数的组合在我的标准笔记本电脑上,满分 30。
private static int[][] ListPerm(int[] mySet, int deep)
{
var all = new List<int[]>();
if (deep == 1)
{
return mySet.Select(x => new int[] { x }).ToArray();
}
else
{
var mySubSet = new int[mySet.Length - 1];
Array.Copy(mySet, 1, mySubSet, 0, mySubSet.Length);
var perm1st = ListPerm(mySubSet, deep - 1);
for (int i = 0; i < mySet.Length - deep + 1; i++)
{
var permn = perm1st.Select(x =>
{
var z = new int[x.Length + 1];
z[0] = mySet[i];
x.CopyTo(z, 1);
return z;
}
);
all.AddRange(permn);
int start = Array.FindIndex(perm1st, item => item[0] != mySet[i + 1]);
if (start > 0)
{
var temp_cpy = new int[perm1st.Length - start][];
Array.Copy(perm1st, start, temp_cpy, 0, temp_cpy.Length);
perm1st = temp_cpy;
}
}
}
return all.ToArray();
}
基准
这里是 Ivan 的、我的和社区 wiki 算法对 20 中 5 整数组合的比较。
结果
wiki perm: 00:00:00.0950055
writing wiki perm: 00:00:00.0460026
Ivan perm: 00:00:00.0400023
writing Ivan perm: 00:00:00.0260015
my perm: 00:00:00.0110006
writing my perm: 00:00:00.0300017
测试代码
var input = Enumerable.Range(1, 20);
int deep = 5;
var start = DateTime.Now;
var wiki = Algorithms.Combinations(input, deep).ToArray();
Console.WriteLine("wiki perm: {0}", DateTime.Now - start);
start = DateTime.Now;
StreamWriter sw0 = new StreamWriter(@"C:\dev\SO\Algo\perm0.txt", false);
foreach (var item in wiki)
{
sw0.WriteLine(String.Join(",", item));
}
sw0.Close();
Console.WriteLine("writing wiki perm: {0}", DateTime.Now - start);
start = DateTime.Now;
start = DateTime.Now;
var result = input
.Distinct()
.ToArray()
.GetCombinations(deep)
.Select(c => (int[])c.Clone())
.ToList();
Console.WriteLine("Ivan perm: {0}", DateTime.Now - start);
start = DateTime.Now;
StreamWriter sw1 = new StreamWriter(@"C:\dev\SO\Algo\perm1.txt", false);
foreach (var item in result)
{
sw1.WriteLine(String.Join(",", item));
}
sw1.Close();
Console.WriteLine("writing Ivan perm: {0}", DateTime.Now - start);
start = DateTime.Now;
var myPerm = ListPermSO(input.ToArray(), deep);
Console.WriteLine("my perm: {0}", DateTime.Now - start);
start = DateTime.Now;
StreamWriter sw2 = new StreamWriter(@"C:\dev\SO\Algo\perm2.txt", false);
foreach (var item in myPerm)
{
sw2.WriteLine(String.Join(",", item));
}
sw2.Close();
Console.WriteLine("writing my perm: {0}", DateTime.Now - start);
Console.Read();
我已经回答了几个组合问题,而且我到处都使用非递归分配自由算法的变体。对于这种情况,它看起来像这样:
public static class Algorithms
{
public static IEnumerable<T[]> GetCombinations<T>(this T[] input, int N)
{
var result = new T[N];
var indices = new int[N];
for (int pos = 0, index = 0; ;)
{
for (; pos < N; pos++, index++)
{
indices[pos] = index;
result[pos] = input[index];
}
yield return result;
do
{
if (pos == 0) yield break;
index = indices[--pos] + 1;
}
while (index > input.Length - N + pos);
}
}
}
与其他实现一样,该方法产生一个相同的内部缓冲区,这在您只需要迭代和处理结果集一次时很有用。如果需要存储组合,则需要在存储之前克隆返回的数组。这是您的场景中的示例用法:
var input = Enumerable.Range(1, 20);
var result = input
.Distinct()
.ToArray()
.GetCombinations(5)
.Select(c => (int[])c.Clone())
.ToList();
更新: GetCombinations
方法基本上模拟了这样的 N
嵌套循环(在伪代码中):
for (int i0 = 0; i0 <= input.Length - N; i0++)
for (int i1 = i0 + 1; i1 <= input.Length - N + 1; i1++)
for (int i2 = i1 + 1; i2 <= input.Length - N + 2; i2++)
...
for (int iN-1 = iN-2 + 1; iN-1 <= input.Length - 1; iN-1++)
yield { input[i0], input[i1], input[i2], ..., input[iN-1] }