不同的选择,即使使用自定义的 EqualityComparer,仍然会留下重复的条目
Distinct selection, even with a custom EqualityComparer, still leaves duplicate entries
我对这个脚本很迷惑 - 我不明白 - 为什么它会留下重复的条目?
private static float GenerateMedian(IEnumerable<Collider> items, KDAxis axis)
{
float[] allValues = items.SelectMany(AxisSelector(axis)).ToArray();
Debug.LogFormat("{0} all values for {1} items: {2}.", allValues.Length, items.Count(), string.Join(", ", allValues.Select(v => v.ToString("F10")).ToArray()));
#if BASIC_DISTINCT
float[] values = allValues.Distinct().OrderBy(f => f).ToArray();
#else
float[] values = allValues.Distinct(new KDFloatComparer(0.0001f)).OrderBy(f => f).ToArray();
#endif
Debug.LogFormat("{0} distinct values for {1} items: {2}.", values.Length, items.Count(), string.Join(", ", values.Select(v => v.ToString("F10")).ToArray()));
int medianIndex = Mathf.CeilToInt(values.Length / 2f) - 1;
float medianValue = values[medianIndex];
Debug.LogFormat("Median index: {0} (left: {1}; right: {2}) value: {3}", medianIndex, medianIndex + 1, values.Length - 1 - medianIndex, medianValue);
return medianValue;
}
private static Func<Collider, IEnumerable<float>> AxisSelector(KDAxis axis)
{
switch (axis)
{
case KDAxis.X:
return XAxisSelector;
case KDAxis.Y:
return YAxisSelector;
case KDAxis.Z:
return ZAxisSelector;
}
return XAxisSelector;
}
private static IEnumerable<float> XAxisSelector(Collider collider)
{
yield return collider.bounds.max.x;
yield return collider.bounds.min.x;
}
private static IEnumerable<float> YAxisSelector(Collider collider)
{
yield return collider.bounds.max.y;
yield return collider.bounds.min.y;
}
private static IEnumerable<float> ZAxisSelector(Collider collider)
{
yield return collider.bounds.max.z;
yield return collider.bounds.min.z;
}
提供此输出:
28 all values for 14 items: 3.0000000000, 2.0000000000, 11.0000000000, -11.0000000000, -5.0000010000, -10.0000000000, 3.0000000000, 2.0000000000, 3.0000000000, 2.0000000000, 11.0000000000, -11.0000000000, -10.0000000000, -11.0000400000, 3.0000000000, 2.0000000000, 7.0000000000, 6.0000000000, -7.0000000000, -10.0000000000, 10.0000000000, -10.0000000000, 11.0000000000, 9.9999550000, -8.0000000000, -9.9999980000, 3.0000000000, 2.0000000000.
20 distinct values for 14 items: -11.0000400000, -11.0000000000, -10.0000000000, -10.0000000000, -9.9999980000, -8.0000000000, -7.0000000000, -5.0000010000, 2.0000000000, 2.0000000000, 2.0000000000, 3.0000000000, 3.0000000000, 3.0000000000, 6.0000000000, 7.0000000000, 9.9999550000, 10.0000000000, 11.0000000000, 11.0000000000.
而且它显然包含重复项 - 例如 3 x 2.0
和 3 x 3.0
。
即使我要实现一个自定义浮点数比较器,并用 new KDFloatComparer(0.0001f)
将其输入 Distinct()
:
public class KDFloatComparer : EqualityComparer<float>
{
public readonly float InternalEpsilon = 0.001f;
public KDFloatComparer(float epsilon) : base()
{
InternalEpsilon = epsilon;
}
//
public override bool Equals(float a, float b)
{
float absoluteA = Math.Abs(a);
float absoluteB = Math.Abs(b);
float absoluteDifference = Math.Abs(a - b);
if (a == b)
{
return true;
}
else if (a == 0 || b == 0 || absoluteDifference < float.Epsilon)
{
// a or b is zero or both are extremely close to it.
// Relative error is less meaningful here.
return absoluteDifference < InternalEpsilon;
}
else
{
// Use relative error.
return absoluteDifference / (absoluteA + absoluteB) < InternalEpsilon;
}
return true;
}
public override int GetHashCode(float value)
{
return value.GetHashCode();
}
}
结果完全一样
我确实尝试在 csharppad.com
上复制场景 - 它没有留下重复项。虽然,我没有使用 SelectMany
方法,但我使用报告的 ToString("F10")
值制作了原始数组,这让我认为问题出在浮点精度上,但是,无论我如何实现EqualityComparer
(在尝试使用 SO 之前有一些自定义变体),我似乎无法确定它。
我该如何解决这个问题?
你的Equals
是坏的,因为它不满足三角不等式。一定是那个a == b && b == c ==> a == c
。由于 epsilon 比较,情况并非如此。
真的,这没有意义。如果你有数字new [] { 0, epsilon, epsilon * 2 }
,你想保留这三个数字中的哪一个?!您需要更好地定义它并使用不同的算法。
当您违反 Equals
和 GetHashCode
的合同时,您会得到未定义的行为。
另一个问题是一些哈希码不相等的值在这里比较相等。
I did try to replicate the scenario over on csharppad.com - it didn't leave duplicates
未定义的行为有时意味着得到正确的结果。
我创建了一个小型控制台项目来测试它:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace TestEqual
{
class Program
{
static float[] values = new float[] { 3.0000000000f, 2.0000000000f, 11.0000000000f, -11.0000000000f, -5.0000010000f, -10.0000000000f, 3.0000000000f, 2.0000000000f, 3.0000000000f, 2.0000000000f, 11.0000000000f, -11.0000000000f, -10.0000000000f, -11.0000400000f, 3.0000000000f, 2.0000000000f, 7.0000000000f, 6.0000000000f, -7.0000000000f, -10.0000000000f, 10.0000000000f, -10.0000000000f, 11.0000000000f, 9.9999550000f, -8.0000000000f, -9.9999980000f, 3.0000000000f, 2.0000000000f };
static void Main(string[] args)
{
var distinct = values.Distinct(new KDFloatComparer(0.001f)).OrderBy(d => d).ToArray();
Console.WriteLine("Valores distintos: ");
foreach (var f in distinct)
Console.WriteLine(f);
Console.ReadKey();
}
public class KDFloatComparer : EqualityComparer<float>
{
public readonly float InternalEpsilon = 0.001f;
public KDFloatComparer(float epsilon)
: base()
{
InternalEpsilon = epsilon;
}
//
public override bool Equals(float a, float b)
{
float absoluteA = Math.Abs(a);
float absoluteB = Math.Abs(b);
float absoluteDifference = Math.Abs(a - b);
if (a == b)
{
return true;
}
else if (a == 0 || b == 0 || absoluteDifference < InternalEpsilon)
{
// a or b is zero or both are extremely close to it.
// Relative error is less meaningful here.
return absoluteDifference < InternalEpsilon;
}
else
{
// Use relative error.
return absoluteDifference / (absoluteA + absoluteB) < InternalEpsilon;
}
return true;
}
public override int GetHashCode(float value)
{
return value.GetHashCode();
}
}
public class FComparer : IEqualityComparer<float>
{
public bool Equals(float x, float y)
{
var dif = Math.Abs(x - y);
if ((x == 0 || y == 0) && dif < float.Epsilon)
return true;
if (Math.Sign(x) != Math.Sign(y))
return false;
return dif < float.Epsilon;
}
public int GetHashCode(float obj)
{
return obj.GetHashCode();
}
}
}
}
Linux/Mono V4.0.1 下的结果 其中:
Valores distintos:
-11,00004
-11
-10
-9,999998
-8
-7
-5,000001 2 3 6 7 9,999955 10 11
所以我唯一能想到的是你的单声道版本有浮点数学错误,确实有一些旧版本确实有一些问题。
尝试将你的单声道版本更新到最新版本,更好的是,从你机器上的最新源代码编译它。
此外,我还包含了一个较小的比较器,它产生了相同的结果。
编辑:我也更正了你的比较器,在一个地方你使用的是 InternalEpsilon,在其他地方 float.Epsilon,float.Epsilon 是 1,401298E- 45,这在您的字符串中无法表示,因为它们只有九位小数,如果存在低于 0.000000001 的差异,您在裁剪时看不到它。
EDIT:似乎 Distinct 只执行比较器的 Equals 仅当散列码相同,所以每个浮点数具有不同的哈希码 Equals 永远不会被执行。
此示例 100% 生成随机数。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace TestEqual
{
class Program
{
static void Main(string[] args)
{
Random rnd = new Random();
List<float> numbers = new List<float>();
for(int buc = 0; buc < 1000; buc++)
numbers.Add((float)rnd.NextDouble());
var distinct = numbers.OrderBy(d => d).Distinct(new FComparer()).OrderBy(d => d).ToArray();
Console.WriteLine(float.Epsilon);
Console.WriteLine("Valores distintos: ");
foreach (var f in distinct)
Console.WriteLine(f);
foreach (var f in distinct)
{
for (int buc = 0; buc < distinct.Length; buc++)
if (Math.Abs(f - distinct[buc]) < 0.001f && f != distinct[buc])
Console.WriteLine("Duplicate");
}
Console.ReadKey();
}
public class FComparer : IEqualityComparer<float>
{
public bool Equals(float x, float y)
{
var dif = Math.Abs(x - y);
if ((x == 0 || y == 0) && dif < 0.001f)
return true;
if (Math.Sign(x) != Math.Sign(y))
return false;
return dif < 0.001f;
}
public int GetHashCode(float obj)
{
//This is the key, if GetHashCode is different then Equals is not called
return 0;
}
}
}
}
我对这个脚本很迷惑 - 我不明白 - 为什么它会留下重复的条目?
private static float GenerateMedian(IEnumerable<Collider> items, KDAxis axis)
{
float[] allValues = items.SelectMany(AxisSelector(axis)).ToArray();
Debug.LogFormat("{0} all values for {1} items: {2}.", allValues.Length, items.Count(), string.Join(", ", allValues.Select(v => v.ToString("F10")).ToArray()));
#if BASIC_DISTINCT
float[] values = allValues.Distinct().OrderBy(f => f).ToArray();
#else
float[] values = allValues.Distinct(new KDFloatComparer(0.0001f)).OrderBy(f => f).ToArray();
#endif
Debug.LogFormat("{0} distinct values for {1} items: {2}.", values.Length, items.Count(), string.Join(", ", values.Select(v => v.ToString("F10")).ToArray()));
int medianIndex = Mathf.CeilToInt(values.Length / 2f) - 1;
float medianValue = values[medianIndex];
Debug.LogFormat("Median index: {0} (left: {1}; right: {2}) value: {3}", medianIndex, medianIndex + 1, values.Length - 1 - medianIndex, medianValue);
return medianValue;
}
private static Func<Collider, IEnumerable<float>> AxisSelector(KDAxis axis)
{
switch (axis)
{
case KDAxis.X:
return XAxisSelector;
case KDAxis.Y:
return YAxisSelector;
case KDAxis.Z:
return ZAxisSelector;
}
return XAxisSelector;
}
private static IEnumerable<float> XAxisSelector(Collider collider)
{
yield return collider.bounds.max.x;
yield return collider.bounds.min.x;
}
private static IEnumerable<float> YAxisSelector(Collider collider)
{
yield return collider.bounds.max.y;
yield return collider.bounds.min.y;
}
private static IEnumerable<float> ZAxisSelector(Collider collider)
{
yield return collider.bounds.max.z;
yield return collider.bounds.min.z;
}
提供此输出:
28 all values for 14 items: 3.0000000000, 2.0000000000, 11.0000000000, -11.0000000000, -5.0000010000, -10.0000000000, 3.0000000000, 2.0000000000, 3.0000000000, 2.0000000000, 11.0000000000, -11.0000000000, -10.0000000000, -11.0000400000, 3.0000000000, 2.0000000000, 7.0000000000, 6.0000000000, -7.0000000000, -10.0000000000, 10.0000000000, -10.0000000000, 11.0000000000, 9.9999550000, -8.0000000000, -9.9999980000, 3.0000000000, 2.0000000000.
20 distinct values for 14 items: -11.0000400000, -11.0000000000, -10.0000000000, -10.0000000000, -9.9999980000, -8.0000000000, -7.0000000000, -5.0000010000, 2.0000000000, 2.0000000000, 2.0000000000, 3.0000000000, 3.0000000000, 3.0000000000, 6.0000000000, 7.0000000000, 9.9999550000, 10.0000000000, 11.0000000000, 11.0000000000.
而且它显然包含重复项 - 例如 3 x 2.0
和 3 x 3.0
。
即使我要实现一个自定义浮点数比较器,并用 new KDFloatComparer(0.0001f)
将其输入 Distinct()
:
public class KDFloatComparer : EqualityComparer<float>
{
public readonly float InternalEpsilon = 0.001f;
public KDFloatComparer(float epsilon) : base()
{
InternalEpsilon = epsilon;
}
//
public override bool Equals(float a, float b)
{
float absoluteA = Math.Abs(a);
float absoluteB = Math.Abs(b);
float absoluteDifference = Math.Abs(a - b);
if (a == b)
{
return true;
}
else if (a == 0 || b == 0 || absoluteDifference < float.Epsilon)
{
// a or b is zero or both are extremely close to it.
// Relative error is less meaningful here.
return absoluteDifference < InternalEpsilon;
}
else
{
// Use relative error.
return absoluteDifference / (absoluteA + absoluteB) < InternalEpsilon;
}
return true;
}
public override int GetHashCode(float value)
{
return value.GetHashCode();
}
}
结果完全一样
我确实尝试在 csharppad.com
上复制场景 - 它没有留下重复项。虽然,我没有使用 SelectMany
方法,但我使用报告的 ToString("F10")
值制作了原始数组,这让我认为问题出在浮点精度上,但是,无论我如何实现EqualityComparer
(在尝试使用 SO 之前有一些自定义变体),我似乎无法确定它。
我该如何解决这个问题?
你的Equals
是坏的,因为它不满足三角不等式。一定是那个a == b && b == c ==> a == c
。由于 epsilon 比较,情况并非如此。
真的,这没有意义。如果你有数字new [] { 0, epsilon, epsilon * 2 }
,你想保留这三个数字中的哪一个?!您需要更好地定义它并使用不同的算法。
当您违反 Equals
和 GetHashCode
的合同时,您会得到未定义的行为。
另一个问题是一些哈希码不相等的值在这里比较相等。
I did try to replicate the scenario over on csharppad.com - it didn't leave duplicates
未定义的行为有时意味着得到正确的结果。
我创建了一个小型控制台项目来测试它:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace TestEqual
{
class Program
{
static float[] values = new float[] { 3.0000000000f, 2.0000000000f, 11.0000000000f, -11.0000000000f, -5.0000010000f, -10.0000000000f, 3.0000000000f, 2.0000000000f, 3.0000000000f, 2.0000000000f, 11.0000000000f, -11.0000000000f, -10.0000000000f, -11.0000400000f, 3.0000000000f, 2.0000000000f, 7.0000000000f, 6.0000000000f, -7.0000000000f, -10.0000000000f, 10.0000000000f, -10.0000000000f, 11.0000000000f, 9.9999550000f, -8.0000000000f, -9.9999980000f, 3.0000000000f, 2.0000000000f };
static void Main(string[] args)
{
var distinct = values.Distinct(new KDFloatComparer(0.001f)).OrderBy(d => d).ToArray();
Console.WriteLine("Valores distintos: ");
foreach (var f in distinct)
Console.WriteLine(f);
Console.ReadKey();
}
public class KDFloatComparer : EqualityComparer<float>
{
public readonly float InternalEpsilon = 0.001f;
public KDFloatComparer(float epsilon)
: base()
{
InternalEpsilon = epsilon;
}
//
public override bool Equals(float a, float b)
{
float absoluteA = Math.Abs(a);
float absoluteB = Math.Abs(b);
float absoluteDifference = Math.Abs(a - b);
if (a == b)
{
return true;
}
else if (a == 0 || b == 0 || absoluteDifference < InternalEpsilon)
{
// a or b is zero or both are extremely close to it.
// Relative error is less meaningful here.
return absoluteDifference < InternalEpsilon;
}
else
{
// Use relative error.
return absoluteDifference / (absoluteA + absoluteB) < InternalEpsilon;
}
return true;
}
public override int GetHashCode(float value)
{
return value.GetHashCode();
}
}
public class FComparer : IEqualityComparer<float>
{
public bool Equals(float x, float y)
{
var dif = Math.Abs(x - y);
if ((x == 0 || y == 0) && dif < float.Epsilon)
return true;
if (Math.Sign(x) != Math.Sign(y))
return false;
return dif < float.Epsilon;
}
public int GetHashCode(float obj)
{
return obj.GetHashCode();
}
}
}
}
Linux/Mono V4.0.1 下的结果 其中:
Valores distintos: -11,00004 -11 -10 -9,999998 -8 -7 -5,000001 2 3 6 7 9,999955 10 11
所以我唯一能想到的是你的单声道版本有浮点数学错误,确实有一些旧版本确实有一些问题。
尝试将你的单声道版本更新到最新版本,更好的是,从你机器上的最新源代码编译它。
此外,我还包含了一个较小的比较器,它产生了相同的结果。
编辑:我也更正了你的比较器,在一个地方你使用的是 InternalEpsilon,在其他地方 float.Epsilon,float.Epsilon 是 1,401298E- 45,这在您的字符串中无法表示,因为它们只有九位小数,如果存在低于 0.000000001 的差异,您在裁剪时看不到它。
EDIT:似乎 Distinct 只执行比较器的 Equals 仅当散列码相同,所以每个浮点数具有不同的哈希码 Equals 永远不会被执行。
此示例 100% 生成随机数。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace TestEqual
{
class Program
{
static void Main(string[] args)
{
Random rnd = new Random();
List<float> numbers = new List<float>();
for(int buc = 0; buc < 1000; buc++)
numbers.Add((float)rnd.NextDouble());
var distinct = numbers.OrderBy(d => d).Distinct(new FComparer()).OrderBy(d => d).ToArray();
Console.WriteLine(float.Epsilon);
Console.WriteLine("Valores distintos: ");
foreach (var f in distinct)
Console.WriteLine(f);
foreach (var f in distinct)
{
for (int buc = 0; buc < distinct.Length; buc++)
if (Math.Abs(f - distinct[buc]) < 0.001f && f != distinct[buc])
Console.WriteLine("Duplicate");
}
Console.ReadKey();
}
public class FComparer : IEqualityComparer<float>
{
public bool Equals(float x, float y)
{
var dif = Math.Abs(x - y);
if ((x == 0 || y == 0) && dif < 0.001f)
return true;
if (Math.Sign(x) != Math.Sign(y))
return false;
return dif < 0.001f;
}
public int GetHashCode(float obj)
{
//This is the key, if GetHashCode is different then Equals is not called
return 0;
}
}
}
}