不同的选择,即使使用自定义的 EqualityComparer,仍然会留下重复的条目

Distinct selection, even with a custom EqualityComparer, still leaves duplicate entries

我对这个脚本很迷惑 - 我不明白 - 为什么它会留下重复的条目?

private static float GenerateMedian(IEnumerable<Collider> items, KDAxis axis)
{
    float[] allValues = items.SelectMany(AxisSelector(axis)).ToArray();
    Debug.LogFormat("{0} all values for {1} items: {2}.", allValues.Length, items.Count(), string.Join(", ", allValues.Select(v => v.ToString("F10")).ToArray()));
    #if BASIC_DISTINCT
    float[] values = allValues.Distinct().OrderBy(f => f).ToArray();
    #else
    float[] values = allValues.Distinct(new KDFloatComparer(0.0001f)).OrderBy(f => f).ToArray();
    #endif
    Debug.LogFormat("{0} distinct values for {1} items: {2}.", values.Length, items.Count(), string.Join(", ", values.Select(v => v.ToString("F10")).ToArray()));

    int medianIndex = Mathf.CeilToInt(values.Length / 2f) - 1;
    float medianValue = values[medianIndex];

    Debug.LogFormat("Median index: {0} (left: {1}; right: {2}) value: {3}", medianIndex, medianIndex + 1, values.Length - 1 - medianIndex, medianValue);

    return medianValue;
}

private static Func<Collider, IEnumerable<float>> AxisSelector(KDAxis axis)
{
    switch (axis)
    {
        case KDAxis.X:
            return XAxisSelector;

        case KDAxis.Y:
            return YAxisSelector;

        case KDAxis.Z:
            return ZAxisSelector;
    }

    return XAxisSelector;
}

private static IEnumerable<float> XAxisSelector(Collider collider)
{
    yield return collider.bounds.max.x;
    yield return collider.bounds.min.x;
}

private static IEnumerable<float> YAxisSelector(Collider collider)
{
    yield return collider.bounds.max.y;
    yield return collider.bounds.min.y;
}

private static IEnumerable<float> ZAxisSelector(Collider collider)
{
    yield return collider.bounds.max.z;
    yield return collider.bounds.min.z;
}

提供此输出:

28 all values for 14 items: 3.0000000000, 2.0000000000, 11.0000000000, -11.0000000000, -5.0000010000, -10.0000000000, 3.0000000000, 2.0000000000, 3.0000000000, 2.0000000000, 11.0000000000, -11.0000000000, -10.0000000000, -11.0000400000, 3.0000000000, 2.0000000000, 7.0000000000, 6.0000000000, -7.0000000000, -10.0000000000, 10.0000000000, -10.0000000000, 11.0000000000, 9.9999550000, -8.0000000000, -9.9999980000, 3.0000000000, 2.0000000000.
20 distinct values for 14 items: -11.0000400000, -11.0000000000, -10.0000000000, -10.0000000000, -9.9999980000, -8.0000000000, -7.0000000000, -5.0000010000, 2.0000000000, 2.0000000000, 2.0000000000, 3.0000000000, 3.0000000000, 3.0000000000, 6.0000000000, 7.0000000000, 9.9999550000, 10.0000000000, 11.0000000000, 11.0000000000.

而且它显然包含重复项 - 例如 3 x 2.03 x 3.0

即使我要实现一个自定义浮点数比较器,并用 new KDFloatComparer(0.0001f) 将其输入 Distinct():

public class KDFloatComparer : EqualityComparer<float>
{
    public readonly float InternalEpsilon = 0.001f;

    public KDFloatComparer(float epsilon) : base()
    {
        InternalEpsilon = epsilon;
    }

    // 
    public override bool Equals(float a, float b)
    {
        float absoluteA = Math.Abs(a);
        float absoluteB = Math.Abs(b);
        float absoluteDifference = Math.Abs(a - b);

        if (a == b) 
        {
            return true;
        } 
        else if (a == 0 || b == 0 || absoluteDifference < float.Epsilon) 
        {
            // a or b is zero or both are extremely close to it.
            // Relative error is less meaningful here.
            return absoluteDifference < InternalEpsilon;
        } 
        else 
        { 
            // Use relative error.
            return absoluteDifference / (absoluteA + absoluteB) < InternalEpsilon;
        }

        return true;
    }

    public override int GetHashCode(float value)
    {
        return value.GetHashCode();
    }
}

结果完全一样

我确实尝试在 csharppad.com 上复制场景 - 它没有留下重复项。虽然,我没有使用 SelectMany 方法,但我使用报告的 ToString("F10") 值制作了原始数组,这让我认为问题出在浮点精度上,但是,无论我如何实现EqualityComparer(在尝试使用 SO 之前有一些自定义变体),我似乎无法确定它。

我该如何解决这个问题?

你的Equals是坏的,因为它不满足三角不等式。一定是那个a == b && b == c ==> a == c。由于 epsilon 比较,情况并非如此。

真的,这没有意义。如果你有数字new [] { 0, epsilon, epsilon * 2 },你想保留这三个数字中的哪一个?!您需要更好地定义它并使用不同的算法。

当您违反 EqualsGetHashCode 的合同时,您会得到未定义的行为。

另一个问题是一些哈希码不相等的值在这里比较相等。

I did try to replicate the scenario over on csharppad.com - it didn't leave duplicates

未定义的行为有时意味着得到正确的结果。

我创建了一个小型控制台项目来测试它:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace TestEqual
{
    class Program
    {

        static float[] values = new float[] { 3.0000000000f, 2.0000000000f, 11.0000000000f, -11.0000000000f, -5.0000010000f, -10.0000000000f, 3.0000000000f, 2.0000000000f, 3.0000000000f, 2.0000000000f, 11.0000000000f, -11.0000000000f, -10.0000000000f, -11.0000400000f, 3.0000000000f, 2.0000000000f, 7.0000000000f, 6.0000000000f, -7.0000000000f, -10.0000000000f, 10.0000000000f, -10.0000000000f, 11.0000000000f, 9.9999550000f, -8.0000000000f, -9.9999980000f, 3.0000000000f, 2.0000000000f };

        static void Main(string[] args)
        {
            var distinct = values.Distinct(new KDFloatComparer(0.001f)).OrderBy(d => d).ToArray();

            Console.WriteLine("Valores distintos: ");

            foreach (var f in distinct)
                Console.WriteLine(f);

            Console.ReadKey();
        }

        public class KDFloatComparer : EqualityComparer<float>
        {
            public readonly float InternalEpsilon = 0.001f;

            public KDFloatComparer(float epsilon)
                : base()
            {
                InternalEpsilon = epsilon;
            }

            // 
            public override bool Equals(float a, float b)
            {
                float absoluteA = Math.Abs(a);
                float absoluteB = Math.Abs(b);
                float absoluteDifference = Math.Abs(a - b);

                if (a == b)
                {
                    return true;
                }
                else if (a == 0 || b == 0 || absoluteDifference < InternalEpsilon)
                {
                    // a or b is zero or both are extremely close to it.
                    // Relative error is less meaningful here.
                    return absoluteDifference < InternalEpsilon;
                }
                else
                {
                    // Use relative error.
                    return absoluteDifference / (absoluteA + absoluteB) < InternalEpsilon;
                }

                return true;
            }

            public override int GetHashCode(float value)
            {
                return value.GetHashCode();
            }
        }

        public class FComparer : IEqualityComparer<float>
        {

            public bool Equals(float x, float y)
            {

                var dif = Math.Abs(x - y);

                if ((x == 0 || y == 0) && dif < float.Epsilon)
                    return true;

                if (Math.Sign(x) != Math.Sign(y))
                    return false;

                return dif < float.Epsilon;
            }

            public int GetHashCode(float obj)
            {
                return obj.GetHashCode();
            }
        }

    }
}

Linux/Mono V4.0.1 下的结果 其中:

Valores distintos: -11,00004 -11 -10 -9,999998 -8 -7 -5,000001 2 3 6 7 9,999955 10 11

所以我唯一能想到的是你的单声道版本有浮点数学错误,确实有一些旧版本确实有一些问题。

尝试将你的单声道版本更新到最新版本,更好的是,从你机器上的最新源代码编译它。

此外,我还包含了一个较小的比较器,它产生了相同的结果。

编辑:我也更正了你的比较器,在一个地方你使用的是 InternalEpsilon,在其他地方 float.Epsilon,float.Epsilon 是 1,401298E- 45,这在您的字符串中无法表示,因为它们只有九位小数,如果存在低于 0.000000001 的差异,您在裁剪时看不到它。

EDIT:似乎 Distinct 只执行比较器的 Equals 仅当散列码相同,所以每个浮点数具有不同的哈希码 Equals 永远不会被执行。

此示例 100% 生成随机数。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace TestEqual
{
    class Program
    {
        static void Main(string[] args)
        {

            Random rnd = new Random();

            List<float> numbers = new List<float>();

            for(int buc = 0; buc < 1000; buc++)
                numbers.Add((float)rnd.NextDouble());

            var distinct = numbers.OrderBy(d => d).Distinct(new FComparer()).OrderBy(d => d).ToArray();

            Console.WriteLine(float.Epsilon);

            Console.WriteLine("Valores distintos: ");

            foreach (var f in distinct)
                Console.WriteLine(f);

            foreach (var f in distinct)
            {

                for (int buc = 0; buc < distinct.Length; buc++)
                    if (Math.Abs(f - distinct[buc]) < 0.001f && f != distinct[buc])
                        Console.WriteLine("Duplicate");

            }

            Console.ReadKey();
        }

        public class FComparer : IEqualityComparer<float>
        {

            public bool Equals(float x, float y)
            {

                var dif = Math.Abs(x - y);

                if ((x == 0 || y == 0) && dif < 0.001f)
                    return true;

                if (Math.Sign(x) != Math.Sign(y))
                    return false;

                return dif < 0.001f;
            }

            public int GetHashCode(float obj)
            {
                //This is the key, if GetHashCode is different then Equals is not called
                return 0;
            }
        }

    }
}