根据标题合并文本文件。
Merging text files based on headings.
我正在寻找有关如何根据标题合并多个文本文件的建议。
假设我有以下 3 个标题如下的文本文件。如您所见,有些元素会重复,有些元素是文本文件所独有的。文本文件包含数千行,每个 header 包含各种类型的数据。
文本文件 1:
元素 1|元素 2|元素 4|元素 5|
00000001|00000002|00000004|00000005|
文本文件 2:
元素 2|元素 3|元素 4|元素 5|
00000002|00000003|00000004|00000005|
文本文件 3:
元素 1|元素 3|元素 4|元素 6|
00000001|00000003|00000004|00000006|
最终输出的文本文件将如下所示:
元素 1|元素 2|元素 4|元素 5|元素 3|元素 6|
00000001|00000002|00000004|00000005|00000003|________|
________|00000002|00000004|00000005|________|________|
00000001|________|00000004|________|00000003|00000006|
如您所见,输出文本文件将从原始 3 个文本文件中捕获每个 header。然后它将每个数据字段存储在适当的标题下。如果某个元素在特定文本文件中不可用,则该字段留空。
您应该向我们展示您已有的代码,然后询问如何处理您无法解决的部分。
我建议您存储 header 的列表,以及行的列表或字典。这样你就可以检查 header 是否已经存在。
例如:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace TestElements
{
public class Elements
{
public static string[] ROW_SEPARATOR = { " " };
public static string[] ELEMENT_SEPARATOR = { "|" };
private int _nextRowId;
public List<string> ColumnHeaders;
public Dictionary<int, Dictionary<string, string>> Rows;
public Elements()
{
this._nextRowId = 0;
this.ColumnHeaders = new List<string>();
this.Rows = new Dictionary<int, Dictionary<string, string>>();
}
public void AddFromFile(string path)
{
// Read all the file, and split in lines
string[] lines = File.ReadAllText(path).Split(ROW_SEPARATOR, StringSplitOptions.None);
// Get the headers
List<string> headers = lines[0].Split(ELEMENT_SEPARATOR, StringSplitOptions.None).ToList();
// Add headers that are new
foreach (string header in headers)
{
if (!this.ColumnHeaders.Contains(header))
{
this.ColumnHeaders.Add(header);
}
}
// Parse every line
for (int i = 1; i < lines.Length; i++)
{
// Split a line into elements
List<string> elements = lines[i].Split(ELEMENT_SEPARATOR, StringSplitOptions.None).ToList();
// Build a row of elements
Dictionary<string, string> row = new Dictionary<string,string>();
for (int j = 0; j < headers.Count; j++)
{
row.Add(headers[j], elements[j]);
}
// Add the row to our store
this.AddRow(row);
}
}
private void AddRow(Dictionary<string, string> rowdata)
{
this.Rows.Add(this._nextRowId, rowdata);
this._nextRowId++;
}
}
}
请注意,我在此示例中没有检查错误。
然后您可以根据需要构建输出文本。
如果您有任何更具体的问题,或者这没有帮助,请直接提问。
编辑:这是一个用法示例
// Create a few files to use in a test
string TextFile1 =
"Element1|Element2|Element4|Element5| " +
"00000001|00000002|00000004|00000005| " +
"00000011|00000012|00000014|00000015| " +
"00000021|00000022|00000024|00000025| " +
"00000031|00000032|00000034|00000035|";
string TextFile2 =
"Element2|Element3|Element4|Element5| " +
"00000002|00000003|00000004|00000005| " +
"00000012|00000013|00000014|00000015| " +
"00000022|00000023|00000024|00000025|";
string TextFile3 =
"Element1|Element3|Element4|Element6| " +
"00000001|00000003|00000004|00000006| " +
"00000011|00000013|00000014|00000016| " +
"00000021|00000023|00000024|00000026| " +
"00000031|00000033|00000034|00000036| " +
"00000041|00000042|00000044|00000045|";
File.WriteAllText("File1.txt", TextFile1);
File.WriteAllText("File2.txt", TextFile2);
File.WriteAllText("File3.txt", TextFile3);
// Read the files into our class
Elements elements = new Elements();
elements.AddFromFile("File1.txt");
elements.AddFromFile("File2.txt");
elements.AddFromFile("File3.txt");
// Build the result
StringBuilder sb = new StringBuilder();
// First build headers
foreach (string header in elements.ColumnHeaders)
{
sb.Append(header);
sb.Append("|");
}
sb.Append(Environment.NewLine);
// Next add every row
foreach (Dictionary<string, string> row in elements.Rows.Values)
{
foreach (string header in elements.ColumnHeaders)
{
if (row.ContainsKey(header))
{
sb.Append(row[header]);
}
else
{
sb.Append("________");
}
sb.Append("|");
}
sb.Append(Environment.NewLine);
}
// Finally save the result into a file
File.WriteAllText("Result.txt", sb.ToString());
结果如下所示:
Element1|Element2|Element4|Element5|Element3|Element6|
00000001|00000002|00000004|00000005|________|________|
00000011|00000012|00000014|00000015|________|________|
00000021|00000022|00000024|00000025|________|________|
00000031|00000032|00000034|00000035|________|________|
________|00000002|00000004|00000005|00000003|________|
________|00000012|00000014|00000015|00000013|________|
________|00000022|00000024|00000025|00000023|________|
00000001|________|00000004|________|00000003|00000006|
00000011|________|00000014|________|00000013|00000016|
00000021|________|00000024|________|00000023|00000026|
00000031|________|00000034|________|00000033|00000036|
00000041|________|00000044|________|00000042|00000045|
我正在寻找有关如何根据标题合并多个文本文件的建议。
假设我有以下 3 个标题如下的文本文件。如您所见,有些元素会重复,有些元素是文本文件所独有的。文本文件包含数千行,每个 header 包含各种类型的数据。
文本文件 1:
元素 1|元素 2|元素 4|元素 5| 00000001|00000002|00000004|00000005|
文本文件 2:
元素 2|元素 3|元素 4|元素 5| 00000002|00000003|00000004|00000005|
文本文件 3:
元素 1|元素 3|元素 4|元素 6| 00000001|00000003|00000004|00000006|
最终输出的文本文件将如下所示:
元素 1|元素 2|元素 4|元素 5|元素 3|元素 6| 00000001|00000002|00000004|00000005|00000003|________| ________|00000002|00000004|00000005|________|________| 00000001|________|00000004|________|00000003|00000006|
如您所见,输出文本文件将从原始 3 个文本文件中捕获每个 header。然后它将每个数据字段存储在适当的标题下。如果某个元素在特定文本文件中不可用,则该字段留空。
您应该向我们展示您已有的代码,然后询问如何处理您无法解决的部分。
我建议您存储 header 的列表,以及行的列表或字典。这样你就可以检查 header 是否已经存在。
例如:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace TestElements
{
public class Elements
{
public static string[] ROW_SEPARATOR = { " " };
public static string[] ELEMENT_SEPARATOR = { "|" };
private int _nextRowId;
public List<string> ColumnHeaders;
public Dictionary<int, Dictionary<string, string>> Rows;
public Elements()
{
this._nextRowId = 0;
this.ColumnHeaders = new List<string>();
this.Rows = new Dictionary<int, Dictionary<string, string>>();
}
public void AddFromFile(string path)
{
// Read all the file, and split in lines
string[] lines = File.ReadAllText(path).Split(ROW_SEPARATOR, StringSplitOptions.None);
// Get the headers
List<string> headers = lines[0].Split(ELEMENT_SEPARATOR, StringSplitOptions.None).ToList();
// Add headers that are new
foreach (string header in headers)
{
if (!this.ColumnHeaders.Contains(header))
{
this.ColumnHeaders.Add(header);
}
}
// Parse every line
for (int i = 1; i < lines.Length; i++)
{
// Split a line into elements
List<string> elements = lines[i].Split(ELEMENT_SEPARATOR, StringSplitOptions.None).ToList();
// Build a row of elements
Dictionary<string, string> row = new Dictionary<string,string>();
for (int j = 0; j < headers.Count; j++)
{
row.Add(headers[j], elements[j]);
}
// Add the row to our store
this.AddRow(row);
}
}
private void AddRow(Dictionary<string, string> rowdata)
{
this.Rows.Add(this._nextRowId, rowdata);
this._nextRowId++;
}
}
}
请注意,我在此示例中没有检查错误。
然后您可以根据需要构建输出文本。
如果您有任何更具体的问题,或者这没有帮助,请直接提问。
编辑:这是一个用法示例
// Create a few files to use in a test
string TextFile1 =
"Element1|Element2|Element4|Element5| " +
"00000001|00000002|00000004|00000005| " +
"00000011|00000012|00000014|00000015| " +
"00000021|00000022|00000024|00000025| " +
"00000031|00000032|00000034|00000035|";
string TextFile2 =
"Element2|Element3|Element4|Element5| " +
"00000002|00000003|00000004|00000005| " +
"00000012|00000013|00000014|00000015| " +
"00000022|00000023|00000024|00000025|";
string TextFile3 =
"Element1|Element3|Element4|Element6| " +
"00000001|00000003|00000004|00000006| " +
"00000011|00000013|00000014|00000016| " +
"00000021|00000023|00000024|00000026| " +
"00000031|00000033|00000034|00000036| " +
"00000041|00000042|00000044|00000045|";
File.WriteAllText("File1.txt", TextFile1);
File.WriteAllText("File2.txt", TextFile2);
File.WriteAllText("File3.txt", TextFile3);
// Read the files into our class
Elements elements = new Elements();
elements.AddFromFile("File1.txt");
elements.AddFromFile("File2.txt");
elements.AddFromFile("File3.txt");
// Build the result
StringBuilder sb = new StringBuilder();
// First build headers
foreach (string header in elements.ColumnHeaders)
{
sb.Append(header);
sb.Append("|");
}
sb.Append(Environment.NewLine);
// Next add every row
foreach (Dictionary<string, string> row in elements.Rows.Values)
{
foreach (string header in elements.ColumnHeaders)
{
if (row.ContainsKey(header))
{
sb.Append(row[header]);
}
else
{
sb.Append("________");
}
sb.Append("|");
}
sb.Append(Environment.NewLine);
}
// Finally save the result into a file
File.WriteAllText("Result.txt", sb.ToString());
结果如下所示:
Element1|Element2|Element4|Element5|Element3|Element6|
00000001|00000002|00000004|00000005|________|________|
00000011|00000012|00000014|00000015|________|________|
00000021|00000022|00000024|00000025|________|________|
00000031|00000032|00000034|00000035|________|________|
________|00000002|00000004|00000005|00000003|________|
________|00000012|00000014|00000015|00000013|________|
________|00000022|00000024|00000025|00000023|________|
00000001|________|00000004|________|00000003|00000006|
00000011|________|00000014|________|00000013|00000016|
00000021|________|00000024|________|00000023|00000026|
00000031|________|00000034|________|00000033|00000036|
00000041|________|00000044|________|00000042|00000045|