如何使用 C# 从文本文件的行中提取特定数据并输出到新的文本文件

How to extract specfic data from lines in a text file and output to a new text file using C#

我有几个包含数据行的大型 .csv 文件。我只需要从每一行中提取数据的特定部分,从而忽略我不感兴趣的部分并将结果输出到一个新的文本文件中。

例如,这里有一段数据:

Fr 23:59:59 M40 N04161K RX LAG 2 JNYT  17 STORE OCC 1 PRUD 1 RAW  -9 LAG   0

Fr 23:59:59 M08  N09461M  %SAT   3  %CONG   0  MQ 0  EB 0  OSQ     0 NSQ     4

Fr 23:59:59 M20 N09461M SAT   3%  SQ     0  FLOW     4  GN  13  STOC  9

我希望编写一个如下所示的新文件:

5,23,59,59,2,17,1,1,-9,0

5,23,59,59,3,0,0,0,0,4

5,23,59,59,3,0,4,13,9

(您会注意到数据的开头是“5”,我也想用它代替代表 'Friday' 的 'Fr')

数据在数据集中由 'M' 引用(M40、M08 等)标识,输出其数据集中的所有数据将很有用(例如,所有带有 M40 的数据都过滤到一个 . txt 文件,因此是我的 'if' 语句)

我希望每个数字都用逗号分隔但不是必需的

到目前为止,这是我的代码:

class Program
{
    static void Main(string[] args)
    {
        String line;
        try
        {
            //Pass the file path and file name to the StreamReader constructor
            StreamReader sr = new StreamReader("C:\MessExport_20110402_0000.csv");
            StreamWriter sw = new StreamWriter("C:\output.txt");
            //Read the first line of text
            line = sr.ReadLine();

            //Continue to read until you reach end of file
            while (line != null)
            {
                if (line.Contains("M40"))
                {
                    sw.WriteLine(line);
                }
                    if (line.Contains("M08"))
                    {
                        sw.WriteLine(line);
                    }      
                line = sr.ReadLine();
            }

            //close the files
            sr.Close();
            sw.Close();
            //Console.ReadLine();
        }
        catch (Exception e)
        {
            Console.WriteLine("Exception: " + e.Message);
        }
        finally
        {
            Console.WriteLine("Executing finally block.");
            Console.WriteLine("Press any key to exit.");
            Console.ReadKey();
        }


    }
}

读取下一个 .csv 文件并将结果再次输出到新的 .txt 文件会很有用

我对将任何代码与正则表达式和拆分结合使用非常陌生,因此非常感谢任何帮助。

只是一个简单的实现:

string workingDirectory = @"c:\";

var days = new[] { "Su", "Mo", "Tu", "We", "Th", "Fr", "Sa" };
var writers = new Dictionary<string, StreamWriter>();
using (StreamReader sr = new StreamReader(workingDirectory + "data.csv"))
{
    string line;
    while ((line = sr.ReadLine()) != null)
    {
        var items = line.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

        StreamWriter w;
        if (!writers.TryGetValue(items[2], out w))
        {
            w = new StreamWriter(workingDirectory + items[2] + ".txt");
            writers.Add(items[2], w);
        }

        var times = items[1].Split(':');
        var digits = items.Skip(3)
                    .Select(x => { int i; return new { IsValid = int.TryParse(x, out i), Value = x }; })
                    .Where(x => x.IsValid).Select(x => x.Value);
        var data = new[] { Array.IndexOf(days, items[0]).ToString() }.Concat(times).Concat(digits);
        w.WriteLine(String.Join(",", data));
    }
}
foreach (var w in writers)
{
    w.Value.Close();
    w.Value.Dispose();
}

这是一个快速的尝试,但我认为它会让您有所收获。

var lines = new List<string> { 
    "Fr 23:59:59 M40 N04161K RX LAG 2 JNYT  17 STORE OCC 1 PRUD 1 RAW  -9 LAG   0",
    "Fr 23:59:59 M08  N09461M  %SAT   3  %CONG   0  MQ 0  EB 0  OSQ     0 NSQ     4",
    "Fr 23:59:59 M20 N09461M SAT   3%  SQ     0  FLOW     4  GN  13  STOC  9"
};
var options = RegexOptions.IgnorePatternWhitespace;
var regex = new Regex("(?: ^\w\w | -?\b\d+\b )", options );

foreach (var l in lines ){
    var matches = regex.Matches( l );

    foreach(Match m in matches){
        Console.Write( "{0},", m.Value );
    }
    Console.WriteLine();
}

生产:

Fr,23,59,59,2,17,1,1,-9,0,
Fr,23,59,59,3,0,0,0,0,4,
Fr,23,59,59,3,0,4,13,9,
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

namespace Program
{
  public class TransformCsv
  {
    [STAThread]
    public static void Main(String[] args)
    {
      (new TransformCsv()).Run(@"c:\temp\MessExport_20110402_0000.csv", @"c:\temp\output.txt", LineFilterFunction);
    }

    public static Boolean LineFilterFunction(String line)
    {
      return line.Contains("M40") || line.Contains("M08");
    }

    ////////////////////

    private List<String> _dayOfWeek = new List<String>() { "Mo", "Tu", "We", "Th", "Fr", "Sa", "Su" };

    private Dictionary<String, String> _mReference =
      new Dictionary<String, String>()
      {
        // Add other M-reference mappings here.
        { "M40", "2" },
        { "M08", "3" },
        { "M20", "3" }
      };

    public void Run(String inputFilePath, String outputFilePath, Func<String, Boolean> lineFilterFunction)
    {
      using (var reader = new StreamReader(inputFilePath))
      {
        using (var writer = new StreamWriter(outputFilePath))
        {
          String line = null;
          while ((line = reader.ReadLine()) != null)
          {
            if (!String.IsNullOrWhiteSpace(line) && lineFilterFunction(line))
              writer.WriteLine(this.GetTransformedLine(line));
          }
        }
      }
    }

    private static Char[] _spaceCharacter = " ".ToCharArray();

    private String GetTransformedLine(String line)
    {
      var elements = line.Split(_spaceCharacter, StringSplitOptions.RemoveEmptyEntries);

      var result = new List<String>();
      result.Add((_dayOfWeek.IndexOf(elements[0]) + 1).ToString());
      result.Add(elements[1].Replace(':', ','));
      result.Add(_mReference[elements[2]]);
      result.AddRange(elements.Skip(3).Where(e => this.IsInt32(e)));

      return String.Join(",", result);
    }

    private Boolean IsInt32(String s)
    {
      Int32 _;
      return Int32.TryParse(s, out _);
    }
  }
}