在 C# 中使用 Deedle 进行成对匹配和开窗
Pairwise matching and windowing using Deedle in C#
假设我们要计算一些水果的价格如何变化。从 CSV 文件开始:
Day,Name,Kind,Price
2019-09-04,"apple","red delicious",63.09
2019-09-04,"apple","ginger crisp",52.14
2019-09-04,"orange","navel",41.18
2019-09-03,"apple","red delicious",63.07
2019-09-03,"apple","ginger crisp",52.11
2019-09-03,"orange","navel",41.13
2019-09-02,"apple","red delicious",63.00
2019-09-02,"apple","ginger crisp",52.00
2019-09-02,"orange","navel",41.00
对于未知数量的水果和品种,我们可以读取数据框并构建一个额外的列用于匹配。
var fruits_file = Path.Combine(root, "fruits.csv");
Deedle.Frame<int, string> df = Frame.ReadCsv(fruits_file);
Series<int, string> name = df.GetColumn<string>("Name");
Series<int, string> kind = df.GetColumn<string>("Kind");
var namekind = name.ZipInner(kind).Select(t => t.Value.Item1 + t.Value.Item2);
df.AddColumn("NameKind", namekind);
但问题依旧。 Deedle.Series.Window()
和 Deedle.Series.Pairwise()
可以执行一阶差分,但不能基于某些字符串(同名类)进行匹配。
复制列 LastPrice
并随后计算 Change
的正确方法是什么?
Day,Name,Kind,Price,LastPrice,Change
2019-09-04,"apple","red delicious",63.09,63.07,0.02
2019-09-04,"apple","ginger crisp",52.14,52.11,0.03
2019-09-04,"orange","navel",41.18,41.13,0.05
2019-09-03,"apple","red delicious",63.07,63.00,0.07
2019-09-03,"apple","ginger crisp",52.11,52.00,0.11
2019-09-03,"orange","navel",41.13,41.00,0.13
2019-09-02,"apple","red delicious",63.00,,
2019-09-02,"apple","ginger crisp",52.00,,
2019-09-02,"orange","navel",41.00,,
查看下面的代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
namespace ConsoleApplication137
{
class Program
{
const string FILENAME = @"c:\temp\test.csv";
static void Main(string[] args)
{
Fruit fruit = new Fruit(FILENAME);
Fruit.PrintFruits();
Console.ReadLine();
}
}
public class Fruit
{
public static List<Fruit> fruits { get; set; }
public DateTime day { get; set; }
public string name { get; set; }
public string kind { get; set; }
public decimal price { get; set; }
public Fruit() { }
public Fruit(string filename)
{
int count = 0;
StreamReader reader = new StreamReader(filename);
string line = "";
while ((line = reader.ReadLine()) != null)
{
if (++count > 1)
{
string[] splitLine = line.Split(new char[] { ',' }).ToArray();
Fruit newFruit = new Fruit();
if (fruits == null) fruits = new List<Fruit>();
fruits.Add(newFruit);
newFruit.day = DateTime.Parse(splitLine[0]);
newFruit.name = splitLine[1];
newFruit.kind = splitLine[2];
newFruit.price = decimal.Parse(splitLine[3]);
}
}
reader.Close();
}
public static void PrintFruits()
{
var groups = fruits.OrderBy(x => x.day)
.GroupBy(x => new { name = x.name, kind = x.kind })
.ToList();
foreach (var group in groups)
{
for (int i = 0; i < group.Count() - 1; i++)
{
Console.WriteLine("Old Date : '{0}', New Date : '{1}', Name : '{2}', Kind : '{3}', Old Price '{4}', New Price '{5}', Delta Price '{6}'",
group.ToList()[i].day.ToString("yyyy-MM-dd"),
group.ToList()[i + 1].day.ToString("yyyy-MM-dd"),
group.ToList()[i].name,
group.ToList()[i].kind,
group.ToList()[i].price.ToString(),
group.ToList()[i + 1].price.ToString(),
(group.ToList()[i + 1].price - group.ToList()[i].price).ToString()
);
}
}
}
}
}
假设我们要计算一些水果的价格如何变化。从 CSV 文件开始:
Day,Name,Kind,Price
2019-09-04,"apple","red delicious",63.09
2019-09-04,"apple","ginger crisp",52.14
2019-09-04,"orange","navel",41.18
2019-09-03,"apple","red delicious",63.07
2019-09-03,"apple","ginger crisp",52.11
2019-09-03,"orange","navel",41.13
2019-09-02,"apple","red delicious",63.00
2019-09-02,"apple","ginger crisp",52.00
2019-09-02,"orange","navel",41.00
对于未知数量的水果和品种,我们可以读取数据框并构建一个额外的列用于匹配。
var fruits_file = Path.Combine(root, "fruits.csv");
Deedle.Frame<int, string> df = Frame.ReadCsv(fruits_file);
Series<int, string> name = df.GetColumn<string>("Name");
Series<int, string> kind = df.GetColumn<string>("Kind");
var namekind = name.ZipInner(kind).Select(t => t.Value.Item1 + t.Value.Item2);
df.AddColumn("NameKind", namekind);
但问题依旧。 Deedle.Series.Window()
和 Deedle.Series.Pairwise()
可以执行一阶差分,但不能基于某些字符串(同名类)进行匹配。
复制列 LastPrice
并随后计算 Change
的正确方法是什么?
Day,Name,Kind,Price,LastPrice,Change
2019-09-04,"apple","red delicious",63.09,63.07,0.02
2019-09-04,"apple","ginger crisp",52.14,52.11,0.03
2019-09-04,"orange","navel",41.18,41.13,0.05
2019-09-03,"apple","red delicious",63.07,63.00,0.07
2019-09-03,"apple","ginger crisp",52.11,52.00,0.11
2019-09-03,"orange","navel",41.13,41.00,0.13
2019-09-02,"apple","red delicious",63.00,,
2019-09-02,"apple","ginger crisp",52.00,,
2019-09-02,"orange","navel",41.00,,
查看下面的代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
namespace ConsoleApplication137
{
class Program
{
const string FILENAME = @"c:\temp\test.csv";
static void Main(string[] args)
{
Fruit fruit = new Fruit(FILENAME);
Fruit.PrintFruits();
Console.ReadLine();
}
}
public class Fruit
{
public static List<Fruit> fruits { get; set; }
public DateTime day { get; set; }
public string name { get; set; }
public string kind { get; set; }
public decimal price { get; set; }
public Fruit() { }
public Fruit(string filename)
{
int count = 0;
StreamReader reader = new StreamReader(filename);
string line = "";
while ((line = reader.ReadLine()) != null)
{
if (++count > 1)
{
string[] splitLine = line.Split(new char[] { ',' }).ToArray();
Fruit newFruit = new Fruit();
if (fruits == null) fruits = new List<Fruit>();
fruits.Add(newFruit);
newFruit.day = DateTime.Parse(splitLine[0]);
newFruit.name = splitLine[1];
newFruit.kind = splitLine[2];
newFruit.price = decimal.Parse(splitLine[3]);
}
}
reader.Close();
}
public static void PrintFruits()
{
var groups = fruits.OrderBy(x => x.day)
.GroupBy(x => new { name = x.name, kind = x.kind })
.ToList();
foreach (var group in groups)
{
for (int i = 0; i < group.Count() - 1; i++)
{
Console.WriteLine("Old Date : '{0}', New Date : '{1}', Name : '{2}', Kind : '{3}', Old Price '{4}', New Price '{5}', Delta Price '{6}'",
group.ToList()[i].day.ToString("yyyy-MM-dd"),
group.ToList()[i + 1].day.ToString("yyyy-MM-dd"),
group.ToList()[i].name,
group.ToList()[i].kind,
group.ToList()[i].price.ToString(),
group.ToList()[i + 1].price.ToString(),
(group.ToList()[i + 1].price - group.ToList()[i].price).ToString()
);
}
}
}
}
}