数学运算比 C# 中 100 多个变量的逻辑检查更快?

Mathematical operations faster than logic checks for 100+ variables in C#?

我正在处理大量数据(数百万个 300 变量加上对象)。对于要添加到数据库中的对象,它必须至少拥有 100 个 double?指定变量。

class RowObject {
    double? var1 {get; set;}
    double? var2 {get; set;}
    //Another 98 double? variables declared
    double? var100 {get; set;}
}

我想出了两种检查方法,将所有变量加在一起,看结果是否大于 0 或不为空。

RowObject rO = new RowObject();
rO.var1 = 7250.345;
rO.var2 = null;
rO.var3 = 64.742l
//etc...

var sum = rO.var1 + rO.var2 + rO.var3 + ... rO.var100;
if (sum != null) {
   //do something;
}

或者毫不奇怪地使用 if 语句

if (rO.var1 != null || rO.var2 != null|| ... rO.var100 != null) {
    //do something;
}

除了速度之外,100 个变量会大大降低可读性,所以如果有更好的方法可以忽略不计但在 eyes/understandable 上更容易,我认为这是一个有效的答案。

(millions of 300 variable plus objects)

您是说数百万个对象,每个对象都有 300 多个属性?

class A {
  public int? a1 {get;}
  public int? a2 {get;}
  (...)
  public int? a301 {get;}
}

我已经 运行 进行了快速测试。

我运行两个选项各4次。

结果

if (a.A1 == null || a.A2 == null || a.A2 == null || a.A3 == null || a.A4 == null || a.A5 == null || a.A6 == null || a.A7 == null || a.A8 == null || a.A9 == null || a.A10 == null)
00:00:00.4341559, 00:00:00.4751146, 00:00:00.4799181, 00:00:00.4522816
var sum = a.A1 + a.A2 +a.A3 +a.A4 +a.A5 +a.A6 +a.A7 +a.A8 +a.A9 +a.A10;
if( sum == null )
00:00:00.6336356, 00:00:00.5714210, 00:00:00.6071693, 00:00:00.6795270

代码

class A
{
    public A(double? a1, double? a2, double? a3, double? a4, double? a5, double? a6, double? a7, double? a8, double? a9, double? a10)
    {
        this.A1 = a1;
        this.A2 = a2;
        this.A3 = a3;
        this.A4 = a4;
        this.A5 = a5;
        this.A6 = a6;
        this.A7 = a7;
        this.A8 = a8;
        this.A9 = a9;
        this.A10 = a10;
    }

    public double? A1 { get; }
    public double? A2 { get; }
    public double? A3 { get; }
    public double? A4 { get; }
    public double? A5 { get; }
    public double? A6 { get; }
    public double? A7 { get; }
    public double? A8 { get; }
    public double? A9 { get; }
    public double? A10 { get; }

}

static void Main(string[] args)
{
    var r = new Random(1);

    var As = Enumerable.Range(0, 1000000)
        .Select(i => new A(
             r.NextDouble(),
             r.NextDouble(),
            r.NextDouble(),
            r.NextDouble(),
            r.NextDouble(),
            r.NextDouble(),
            r.NextDouble(),
            r.NextDouble(),
            r.NextDouble(),
            r.NextDouble()
        ));

    var index = 0;
    var sw = Stopwatch.StartNew();
    foreach (var a in As)
    {
        if (a.A1 == null || a.A2 == null || a.A2 == null || a.A3 == null || a.A4 == null || a.A5 == null || a.A6 == null || a.A7 == null || a.A8 == null || a.A9 == null || a.A10 == null)
        {
            index++;
        }

        //var sum = a.A1 + a.A2 +a.A3 +a.A4 +a.A5 +a.A6 +a.A7 +a.A8 +a.A9 +a.A10;
        //if( sum == null )
        //{
        //    index++;
        //}

    }
    Console.WriteLine(sw.Elapsed);
}

好吧,在 if 语句中连续写出每一个可能是最有效的,因为它在最好的情况下读取 1 个项目(由于短路第一个 true 你得到),并且 n最坏情况下的项目。

全部加起来总是读取每一项,所以没有效率。

但是正如您所说,这些解决方案的可读性并不好。可读性的解决方案是编写一个函数,将每个元素放入 IEnumerable,然后使用 Linq 的 Any 来测试列表:

using System;
using System.Collections.Generic;
using System.Linq;

public class SampleProgram
{

    public class RowObject {
        public double? var1 {get; set;}
        public double? var2 {get; set;}
        //Another 98 double? variables declared
        public double? var100 {get; set;}
    }

    private static void GetRowList(RowObject obj, List<Nullable<double>> rowList)
    {
        rowList.Clear();
        rowList.Add(obj.var1);
        rowList.Add(obj.var2);
        //Another 98 double? variables declared
        rowList.Add(obj.var100);
    }

    private static bool TestRow(List<Nullable<double>> rowList)
    {
        return rowList.Any( n => !n.HasValue );
    }

    public static void Main(string[] args)
    {
        RowObject o1 = new RowObject();
        o1.var1 = null;
        o1.var2 = 2;
        o1.var100 = 100;

        List<Nullable<double>> rowList = new List<Nullable<double>>();

        GetRowList(o1, rowList);
        Console.WriteLine(TestRow(rowList));

        RowObject o2 = new RowObject();
        o2.var1 = 1;
        o2.var2 = 2;
        o2.var100 = 100;

        GetRowList(o2, rowList);
        Console.WriteLine(TestRow(rowList));
    }
}

这需要对每一项进行一次读写,将其放入列表中,然后读取1-n项进行测试。但它更具可读性。

如果您不想对 GetRowList 中的属性进行硬编码并且愿意牺牲更多的速度,您可以使用 reflection 以这种方式将所有属性添加到列表中。

为什么不使用这样的嵌入式数组?

public class RowObject : IEnumerable<double?>
{
  private double?[] vars { get; set; }

  IEnumerator<double?> IEnumerable<double?>.GetEnumerator()
  {
    foreach ( var value in vars )
      yield return value;
  }

  IEnumerator IEnumerable.GetEnumerator()
  {
    foreach ( var value in vars )
      yield return value;
  }

  private void CheckIndex(int index, int min, int max)
  {
    if ( index < min || index > max )
      throw new ArgumentOutOfRangeException("Index", $"Must be between {min} and {max}");
  }

  public double? this[int index]
  {
    get
    {
      CheckIndex(index, 0, vars.Length);
      return vars[index];
    }
    set
    {
      CheckIndex(index, 0, vars.Length);
      vars[index] = value;
    }
  }

  public RowObject(int capacity)
  {
    vars = new double?[capacity];
  }
}

如果你想要一个从 1 开始的索引器

  public double? this[int index]
  {
    get
    {
      CheckIndex(index, 1, vars.Length + 1);
      return vars[index + 1];
    }
    set
    {
      CheckIndex(index, 1, vars.Length + 1);
      vars[index + 1] = value;
    }
  }
}

测试:

static void Test()
{
  RowObject r0 = new RowObject(3);
  r0[0] = 7250.345;
  r0[1] = null;
  r0[2] = 64.742;
  RowObject r1 = new RowObject(3);
  r1[0] = null;
  r1[1] = null;
  r1[2] = null;
  RowObject r2 = new RowObject(3);
  r2[0] = 7250.345;
  r2[1] = 1000.0;
  r2[2] = 64.742;

  Action<RowObject, string> test = (rowobject, name) =>
  {
    var sum = rowobject.Sum(); // any null value is evaluated as 0
    Console.WriteLine(name + ".Sum() = " + sum);
    if ( rowobject.Any(v => v != null) )
      Console.WriteLine(name + " contains at least a not null value");
    if ( rowobject.Any(v => v == null) )
      Console.WriteLine(name + " contains at least one null value");
    if ( rowobject.All(v => v != null) )
      Console.WriteLine(name + " contains no null value");
    if ( rowobject.All(v => v == null) )
      Console.WriteLine(name + " contains only null values");
  };

  test(r0, "r0");
  Console.WriteLine();
  test(r1, "r1");
  Console.WriteLine();
  test(r2, "r2");
}

输出:

r0.Sum() = 7315,087
r0 contains at least a not null value
r0 contains at least one null value

r1.Sum() = 0
r1 contains at least one null value
r1 contains only null values

r2.Sum() = 8315,087
r2 contains at least a not null value
r2 contains no null value