SSIS:将记录集写入文件的脚本任务
SSIS: Script task to write recordset to file
我正在使用 SQL Server Data Tools 2013 创建 SSIS 包。这个包有一个带有完整结果集选项的执行 SQL 任务,用于将查询结果推送到对象类型的 SSIS 变量中。
我在脚本任务中使用以下命令获取存储在对象变量中的记录集并将其写入 CSV:
Public Sub Main()
Dim fileName As String = Dts.Variables("vFileName").Value.ToString
Dim destinationPath As String = Dts.Variables("vDestinationPath").Value.ToString
Dim destinationPathAndFileName As String = destinationPath + fileName
Dim fileContents As String = ""
Dim oleDB As OleDbDataAdapter = New OleDbDataAdapter()
Dim table As DataTable = New DataTable()
Dim rs As System.Object = Dts.Variables("vResultSet").Value
' Populate DataTable with vResultSet data
oleDB.Fill(table, rs)
' Loop through columns and concatenate with commas
For Each col As DataColumn In table.Columns
fileContents &= col.ColumnName & ","
Next
' Remove final comma from columns string and append line break
fileContents = fileContents.Substring(0, fileContents.Length - 1)
fileContents &= Environment.NewLine
' Loop through rows and concatenate with commas
Dim i As Integer
For Each row As DataRow In table.Rows
For i = 1 To table.Columns.Count
fileContents &= row(i - 1).ToString() & ","
Next
' Remove final comma from row string and append line break
fileContents = fileContents.Substring(0, fileContents.Length - 1)
fileContents &= Environment.NewLine
Next
' Write all text to destination file. If file exists, this step will overwrite it.
System.IO.File.WriteAllText(destinationPathAndFileName, fileContents)
Dts.TaskResult = ScriptResults.Success
End Sub
这可行,但速度非常慢,将单个 14k 行数据集写入 CSV 需要 25 分钟以上。我不能使用数据流,因为这个过程存在一个循环,每个 table 导出的元数据是不同的。我很确定脚本任务是唯一的选择,但是有没有比遍历数据集的每一行更快的方法?如果我可以提供更多信息,请告诉我。
您可以随意翻译成 VB.NET。鉴于我已经为不同的项目编写了这段代码,我将你的请求与我的工作方式结合起来
传入3个SSIS变量:vFileName、vDestinationPath和vResultSet,Main中的代码将ado记录集转换为DataTable,然后将其添加到DataSet并传递给Persist方法。 Persist
的 delimiter
默认参数为 |
。
此实现根本不尝试处理任何极端情况。它不使用限定符转义文本列,不转义嵌入式限定符,对提要中的换行符做任何事情,OleDbDataAdapter
的填充方法中的某些内容因二进制数据而失败,等等
public void Main()
{
string fileName = Dts.Variables["User::vFileName"].Value.ToString();
DataSet ds = null;
DataTable dt = null;
string outputFolder = Dts.Variables["User::vDestinationPath"].Value.ToString();
string fileMask = string.Empty;
string sheetName = string.Empty;
string outSubFolder = string.Empty;
string message = string.Empty;
bool fireAgain = true;
try
{
ds = new DataSet();
dt = new DataTable();
System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter();
adapter.Fill(dt, Dts.Variables["User::vResultSet"].Value);
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(fileName);
baseFileName = System.IO.Path.GetFileName(fileName);
ds.Tables.Add(dt);
//foreach (DataTable dt in ds.Tables)
{
Persist(ds, fileName, outputFolder);
}
}
catch (Exception ex)
{
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "fileName", fileName), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "outputFolder", outputFolder), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), string.Empty, 0, ref fireAgain);
}
Dts.TaskResult = (int)ScriptResults.Success;
}
public static void Persist(System.Data.DataSet ds, string originalFileName, string outputFolder, string delimiter = "|")
{
// Enumerate through all the tables in the dataset
// Save it out as sub versions of the
if (ds == null)
{
return;
}
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(originalFileName);
string baseFolder = System.IO.Path.GetDirectoryName(originalFileName);
System.Collections.Generic.List<string> header = null;
foreach (System.Data.DataTable table in ds.Tables)
{
string outFilePath = System.IO.Path.Combine(outputFolder, string.Format("{0}.{1}.csv", baseFileName, table.TableName));
System.Text.Encoding e = System.Text.Encoding.Default;
if (table.ExtendedProperties.ContainsKey("Unicode") && (bool)table.ExtendedProperties["Unicode"])
{
e = System.Text.Encoding.Unicode;
}
using (System.IO.StreamWriter file = new System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e))
{
table.ExtendedProperties.Add("Path", outFilePath);
// add header row
header = new System.Collections.Generic.List<string>(table.Columns.Count);
foreach (System.Data.DataColumn item in table.Columns)
{
header.Add(item.ColumnName);
}
file.WriteLine(string.Join(delimiter, header));
foreach (System.Data.DataRow row in table.Rows)
{
// TODO: For string based fields, capture the max length
IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());
file.WriteLine(string.Join(delimiter, fields));
}
}
}
}
需要 运行 但 Biml 实现看起来像
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<Connections>
<OleDbConnection Name="tempdb" ConnectionString="Data Source=localhost\dev2014;Initial Catalog=AdventureWorksDW2014;Provider=SQLNCLI11.0;Integrated Security=SSPI;"/>
</Connections>
<Packages>
<Package Name="so_37059747" ConstraintMode="Linear">
<Variables>
<Variable DataType="String" Name="QuerySource"><![CDATA[SELECT
S.name
, T.name
FROM
sys.schemas AS S
INNER JOIN
sys.tables AS T
ON T.schema_id = S.schema_id;]]></Variable>
<Variable DataType="String" Name="SchemaName">dbo</Variable>
<Variable DataType="String" Name="TableName">foo</Variable>
<Variable DataType="String" Name="QueryTableDump" EvaluateAsExpression="true">"SELECT X.* FROM [" + @[User::SchemaName] + "].[" + @[User::TableName] + "] AS X;"</Variable>
<Variable DataType="Object" Name="rsTables"></Variable>
<Variable DataType="Object" Name="vResultSet"></Variable>
<Variable DataType="String" Name="vFileName" EvaluateAsExpression="true">@[User::SchemaName] + "_" + @[User::TableName] + ".txt"</Variable>
<Variable DataType="String" Name="vDestinationPath">c:\ssisdata\so\Output</Variable>
</Variables>
<Tasks>
<ExecuteSQL
ConnectionName="tempdb"
Name="SQL Generate Loop data"
ResultSet="Full">
<VariableInput VariableName="User.QuerySource" />
<Results>
<Result VariableName="User.rsTables" Name="0" />
</Results>
</ExecuteSQL>
<ForEachAdoLoop SourceVariableName="User.rsTables" Name="FELC Shred rs" ConstraintMode="Linear">
<VariableMappings>
<VariableMapping VariableName="User.SchemaName" Name="0" />
<VariableMapping VariableName="User.TableName" Name="1" />
</VariableMappings>
<Tasks>
<ExecuteSQL
ConnectionName="tempdb"
Name="SQL Generate Export data"
ResultSet="Full">
<VariableInput VariableName="User.QueryTableDump" />
<Results>
<Result VariableName="User.vResultSet" Name="0" />
</Results>
</ExecuteSQL>
<Script ProjectCoreName="ST_RS2CSV" Name="SCR Convert to text">
<ScriptTaskProjectReference ScriptTaskProjectName="ST_RS2CSV" />
</Script>
</Tasks>
</ForEachAdoLoop>
</Tasks>
</Package>
</Packages>
<ScriptProjects>
<ScriptTaskProject ProjectCoreName="ST_RS2CSV" Name="ST_RS2CSV" VstaMajorVersion="0">
<ReadOnlyVariables>
<Variable Namespace="User" VariableName="vFileName" DataType="String" />
<Variable Namespace="User" VariableName="vDestinationPath" DataType="String" />
<Variable Namespace="User" VariableName="vResultSet" DataType="Object" />
</ReadOnlyVariables>
<Files>
<File Path="ScriptMain.cs" BuildAction="Compile">
<![CDATA[namespace DataDumper
{
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Xml.Linq;
using Microsoft.SqlServer.Dts.Runtime;
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
public void Main()
{
string fileName = Dts.Variables["User::vFileName"].Value.ToString();
DataSet ds = null;
DataTable dt = null;
string outputFolder = Dts.Variables["User::vDestinationPath"].Value.ToString();
string fileMask = string.Empty;
string sheetName = string.Empty;
string outSubFolder = string.Empty;
string message = string.Empty;
bool fireAgain = true;
try
{
ds = new DataSet();
dt = new DataTable();
System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter();
adapter.Fill(dt, Dts.Variables["User::vResultSet"].Value);
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(fileName);
baseFileName = System.IO.Path.GetFileName(fileName);
ds.Tables.Add(dt);
//foreach (DataTable dt in ds.Tables)
{
Persist(ds, fileName, outputFolder);
}
}
catch (Exception ex)
{
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "fileName", fileName), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "outputFolder", outputFolder), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), string.Empty, 0, ref fireAgain);
}
Dts.TaskResult = (int)ScriptResults.Success;
}
public static void Persist(System.Data.DataSet ds, string originalFileName, string outputFolder, string delimiter = "|")
{
// Enumerate through all the tables in the dataset
// Save it out as sub versions of the
if (ds == null)
{
return;
}
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(originalFileName);
string baseFolder = System.IO.Path.GetDirectoryName(originalFileName);
System.Collections.Generic.List<string> header = null;
foreach (System.Data.DataTable table in ds.Tables)
{
string outFilePath = System.IO.Path.Combine(outputFolder, string.Format("{0}.{1}.csv", baseFileName, table.TableName));
System.Text.Encoding e = System.Text.Encoding.Default;
if (table.ExtendedProperties.ContainsKey("Unicode") && (bool)table.ExtendedProperties["Unicode"])
{
e = System.Text.Encoding.Unicode;
}
using (System.IO.StreamWriter file = new System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e))
{
table.ExtendedProperties.Add("Path", outFilePath);
// add header row
header = new System.Collections.Generic.List<string>(table.Columns.Count);
foreach (System.Data.DataColumn item in table.Columns)
{
header.Add(item.ColumnName);
}
file.WriteLine(string.Join(delimiter, header));
foreach (System.Data.DataRow row in table.Rows)
{
// TODO: For string based fields, capture the max length
IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());
file.WriteLine(string.Join(delimiter, fields));
}
}
}
}
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
}
}
]]>
</File>
<File Path="Properties\AssemblyInfo.cs" BuildAction="Compile">
using System.Reflection;
using System.Runtime.CompilerServices;
[assembly: AssemblyTitle("AssemblyTitle")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Bill Fellows")]
[assembly: AssemblyProduct("ProductName")]
[assembly: AssemblyCopyright("Copyright @ 2016")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly: AssemblyVersion("1.0.*")]
</File>
</Files>
<AssemblyReferences>
<AssemblyReference AssemblyPath="System" />
<AssemblyReference AssemblyPath="System.Core" />
<AssemblyReference AssemblyPath="System.Data" />
<AssemblyReference AssemblyPath="System.Data.DataSetExtensions" />
<AssemblyReference AssemblyPath="System.Windows.Forms" />
<AssemblyReference AssemblyPath="System.Xml" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.ManagedDTS.dll" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.ScriptTask.dll" />
<AssemblyReference AssemblyPath="System.Linq" />
<AssemblyReference AssemblyPath="System.Xml.Linq" />
<AssemblyReference AssemblyPath="Microsoft.VisualBasic" />
</AssemblyReferences>
</ScriptTaskProject>
</ScriptProjects>
</Biml>
在 15 秒内转储了所有 AdventureworksDW2014
根据这条线失败的评论IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());
确保您的项目中有以下 using 语句。我认为那些扩展在 Linq 命名空间中,但它可能是 Collections
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Xml.Linq;
using Microsoft.SqlServer.Dts.Runtime;
为什么原来的速度很慢?
我的假设是缓慢归结为所有这些串联。字符串在 .Net 中是不可变的,每次向它添加一列时都会创建该字符串的新版本。当我构建我的行时,我使用 String.Join 方法将数组中的每个元素压缩成一个字符串。这也简化了附加字段分隔符所需的逻辑。
我还立即将当前行写入一个文件,而不是通过调用 WriteAllText
来转储所有内容而膨胀我的内存
这是@billinkc 的优秀答案的 VB.NET 版本,以防对任何人有用:
导入系统
Imports System.Data
Imports System.Math
Imports System.Collections
Imports System.Collections.Generic
Imports Microsoft.SqlServer.Dts.Runtime
Imports System.Linq
Imports System.Text
Imports System.Windows.Forms
Public Sub Main()
Dim fileName As String = Dts.Variables("User::vFileName").Value.ToString()
Dim ds As DataSet = Nothing
Dim dt As DataTable = Nothing
Dim outputFolder As String = Dts.Variables("User::vDestinationPath").Value.ToString()
Dim fileMask As String = String.Empty
Dim sheetName As String = String.Empty
Dim outSubFolder As String = String.Empty
Dim message As String = String.Empty
Dim fireAgain As Boolean = True
Try
ds = New DataSet()
dt = New DataTable()
Dim adapter As New System.Data.OleDb.OleDbDataAdapter()
adapter.Fill(dt, Dts.Variables("User::vResultSet").Value)
Dim baseFileName As String = System.IO.Path.GetFileNameWithoutExtension(fileName)
baseFileName = System.IO.Path.GetFileName(fileName)
ds.Tables.Add(dt)
'foreach (DataTable dt in ds.Tables)
If True Then
Persist(ds, fileName, outputFolder)
End If
Catch ex As Exception
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "fileName", fileName), String.Empty, 0, fireAgain)
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "outputFolder", outputFolder), String.Empty, 0, fireAgain)
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), String.Empty, 0, fireAgain)
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), String.Empty, 0, fireAgain)
End Try
Dts.TaskResult = CInt(ScriptResults.Success)
End Sub
Public Shared Sub Persist(ds As System.Data.DataSet, originalFileName As String, outputFolder As String, Optional delimiter As String = ",")
' Enumerate through all the tables in the dataset
' Save it out as sub versions of the
If ds Is Nothing Then
Return
End If
Dim baseFileName As String = System.IO.Path.GetFileNameWithoutExtension(originalFileName)
Dim baseFolder As String = System.IO.Path.GetDirectoryName(originalFileName)
Dim header As System.Collections.Generic.List(Of String) = Nothing
For Each table As System.Data.DataTable In ds.Tables
Dim outFilePath As String = System.IO.Path.Combine(outputFolder, String.Format("{0}.csv", baseFileName, table.TableName))
Dim e As System.Text.Encoding = System.Text.Encoding.[Default]
If table.ExtendedProperties.ContainsKey("Unicode") AndAlso CBool(table.ExtendedProperties("Unicode")) Then
e = System.Text.Encoding.Unicode
End If
Using file As New System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e)
table.ExtendedProperties.Add("Path", outFilePath)
' add header row
header = New System.Collections.Generic.List(Of String)(table.Columns.Count)
For Each item As System.Data.DataColumn In table.Columns
header.Add(item.ColumnName)
Next
file.WriteLine(String.Join(delimiter, header))
For Each row As System.Data.DataRow In table.Rows
' TODO: For string based fields, capture the max length
Dim fields As IEnumerable(Of String) = (row.ItemArray).[Select](Function(field) field.ToString())
file.WriteLine(String.Join(delimiter, fields))
Next
End Using
Next
End Sub
我正在使用 SQL Server Data Tools 2013 创建 SSIS 包。这个包有一个带有完整结果集选项的执行 SQL 任务,用于将查询结果推送到对象类型的 SSIS 变量中。
我在脚本任务中使用以下命令获取存储在对象变量中的记录集并将其写入 CSV:
Public Sub Main()
Dim fileName As String = Dts.Variables("vFileName").Value.ToString
Dim destinationPath As String = Dts.Variables("vDestinationPath").Value.ToString
Dim destinationPathAndFileName As String = destinationPath + fileName
Dim fileContents As String = ""
Dim oleDB As OleDbDataAdapter = New OleDbDataAdapter()
Dim table As DataTable = New DataTable()
Dim rs As System.Object = Dts.Variables("vResultSet").Value
' Populate DataTable with vResultSet data
oleDB.Fill(table, rs)
' Loop through columns and concatenate with commas
For Each col As DataColumn In table.Columns
fileContents &= col.ColumnName & ","
Next
' Remove final comma from columns string and append line break
fileContents = fileContents.Substring(0, fileContents.Length - 1)
fileContents &= Environment.NewLine
' Loop through rows and concatenate with commas
Dim i As Integer
For Each row As DataRow In table.Rows
For i = 1 To table.Columns.Count
fileContents &= row(i - 1).ToString() & ","
Next
' Remove final comma from row string and append line break
fileContents = fileContents.Substring(0, fileContents.Length - 1)
fileContents &= Environment.NewLine
Next
' Write all text to destination file. If file exists, this step will overwrite it.
System.IO.File.WriteAllText(destinationPathAndFileName, fileContents)
Dts.TaskResult = ScriptResults.Success
End Sub
这可行,但速度非常慢,将单个 14k 行数据集写入 CSV 需要 25 分钟以上。我不能使用数据流,因为这个过程存在一个循环,每个 table 导出的元数据是不同的。我很确定脚本任务是唯一的选择,但是有没有比遍历数据集的每一行更快的方法?如果我可以提供更多信息,请告诉我。
您可以随意翻译成 VB.NET。鉴于我已经为不同的项目编写了这段代码,我将你的请求与我的工作方式结合起来
传入3个SSIS变量:vFileName、vDestinationPath和vResultSet,Main中的代码将ado记录集转换为DataTable,然后将其添加到DataSet并传递给Persist方法。 Persist
的 delimiter
默认参数为 |
。
此实现根本不尝试处理任何极端情况。它不使用限定符转义文本列,不转义嵌入式限定符,对提要中的换行符做任何事情,OleDbDataAdapter
的填充方法中的某些内容因二进制数据而失败,等等
public void Main()
{
string fileName = Dts.Variables["User::vFileName"].Value.ToString();
DataSet ds = null;
DataTable dt = null;
string outputFolder = Dts.Variables["User::vDestinationPath"].Value.ToString();
string fileMask = string.Empty;
string sheetName = string.Empty;
string outSubFolder = string.Empty;
string message = string.Empty;
bool fireAgain = true;
try
{
ds = new DataSet();
dt = new DataTable();
System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter();
adapter.Fill(dt, Dts.Variables["User::vResultSet"].Value);
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(fileName);
baseFileName = System.IO.Path.GetFileName(fileName);
ds.Tables.Add(dt);
//foreach (DataTable dt in ds.Tables)
{
Persist(ds, fileName, outputFolder);
}
}
catch (Exception ex)
{
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "fileName", fileName), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "outputFolder", outputFolder), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), string.Empty, 0, ref fireAgain);
}
Dts.TaskResult = (int)ScriptResults.Success;
}
public static void Persist(System.Data.DataSet ds, string originalFileName, string outputFolder, string delimiter = "|")
{
// Enumerate through all the tables in the dataset
// Save it out as sub versions of the
if (ds == null)
{
return;
}
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(originalFileName);
string baseFolder = System.IO.Path.GetDirectoryName(originalFileName);
System.Collections.Generic.List<string> header = null;
foreach (System.Data.DataTable table in ds.Tables)
{
string outFilePath = System.IO.Path.Combine(outputFolder, string.Format("{0}.{1}.csv", baseFileName, table.TableName));
System.Text.Encoding e = System.Text.Encoding.Default;
if (table.ExtendedProperties.ContainsKey("Unicode") && (bool)table.ExtendedProperties["Unicode"])
{
e = System.Text.Encoding.Unicode;
}
using (System.IO.StreamWriter file = new System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e))
{
table.ExtendedProperties.Add("Path", outFilePath);
// add header row
header = new System.Collections.Generic.List<string>(table.Columns.Count);
foreach (System.Data.DataColumn item in table.Columns)
{
header.Add(item.ColumnName);
}
file.WriteLine(string.Join(delimiter, header));
foreach (System.Data.DataRow row in table.Rows)
{
// TODO: For string based fields, capture the max length
IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());
file.WriteLine(string.Join(delimiter, fields));
}
}
}
}
需要 运行 但 Biml 实现看起来像
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<Connections>
<OleDbConnection Name="tempdb" ConnectionString="Data Source=localhost\dev2014;Initial Catalog=AdventureWorksDW2014;Provider=SQLNCLI11.0;Integrated Security=SSPI;"/>
</Connections>
<Packages>
<Package Name="so_37059747" ConstraintMode="Linear">
<Variables>
<Variable DataType="String" Name="QuerySource"><![CDATA[SELECT
S.name
, T.name
FROM
sys.schemas AS S
INNER JOIN
sys.tables AS T
ON T.schema_id = S.schema_id;]]></Variable>
<Variable DataType="String" Name="SchemaName">dbo</Variable>
<Variable DataType="String" Name="TableName">foo</Variable>
<Variable DataType="String" Name="QueryTableDump" EvaluateAsExpression="true">"SELECT X.* FROM [" + @[User::SchemaName] + "].[" + @[User::TableName] + "] AS X;"</Variable>
<Variable DataType="Object" Name="rsTables"></Variable>
<Variable DataType="Object" Name="vResultSet"></Variable>
<Variable DataType="String" Name="vFileName" EvaluateAsExpression="true">@[User::SchemaName] + "_" + @[User::TableName] + ".txt"</Variable>
<Variable DataType="String" Name="vDestinationPath">c:\ssisdata\so\Output</Variable>
</Variables>
<Tasks>
<ExecuteSQL
ConnectionName="tempdb"
Name="SQL Generate Loop data"
ResultSet="Full">
<VariableInput VariableName="User.QuerySource" />
<Results>
<Result VariableName="User.rsTables" Name="0" />
</Results>
</ExecuteSQL>
<ForEachAdoLoop SourceVariableName="User.rsTables" Name="FELC Shred rs" ConstraintMode="Linear">
<VariableMappings>
<VariableMapping VariableName="User.SchemaName" Name="0" />
<VariableMapping VariableName="User.TableName" Name="1" />
</VariableMappings>
<Tasks>
<ExecuteSQL
ConnectionName="tempdb"
Name="SQL Generate Export data"
ResultSet="Full">
<VariableInput VariableName="User.QueryTableDump" />
<Results>
<Result VariableName="User.vResultSet" Name="0" />
</Results>
</ExecuteSQL>
<Script ProjectCoreName="ST_RS2CSV" Name="SCR Convert to text">
<ScriptTaskProjectReference ScriptTaskProjectName="ST_RS2CSV" />
</Script>
</Tasks>
</ForEachAdoLoop>
</Tasks>
</Package>
</Packages>
<ScriptProjects>
<ScriptTaskProject ProjectCoreName="ST_RS2CSV" Name="ST_RS2CSV" VstaMajorVersion="0">
<ReadOnlyVariables>
<Variable Namespace="User" VariableName="vFileName" DataType="String" />
<Variable Namespace="User" VariableName="vDestinationPath" DataType="String" />
<Variable Namespace="User" VariableName="vResultSet" DataType="Object" />
</ReadOnlyVariables>
<Files>
<File Path="ScriptMain.cs" BuildAction="Compile">
<![CDATA[namespace DataDumper
{
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Xml.Linq;
using Microsoft.SqlServer.Dts.Runtime;
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
public void Main()
{
string fileName = Dts.Variables["User::vFileName"].Value.ToString();
DataSet ds = null;
DataTable dt = null;
string outputFolder = Dts.Variables["User::vDestinationPath"].Value.ToString();
string fileMask = string.Empty;
string sheetName = string.Empty;
string outSubFolder = string.Empty;
string message = string.Empty;
bool fireAgain = true;
try
{
ds = new DataSet();
dt = new DataTable();
System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter();
adapter.Fill(dt, Dts.Variables["User::vResultSet"].Value);
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(fileName);
baseFileName = System.IO.Path.GetFileName(fileName);
ds.Tables.Add(dt);
//foreach (DataTable dt in ds.Tables)
{
Persist(ds, fileName, outputFolder);
}
}
catch (Exception ex)
{
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "fileName", fileName), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "outputFolder", outputFolder), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), string.Empty, 0, ref fireAgain);
Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), string.Empty, 0, ref fireAgain);
}
Dts.TaskResult = (int)ScriptResults.Success;
}
public static void Persist(System.Data.DataSet ds, string originalFileName, string outputFolder, string delimiter = "|")
{
// Enumerate through all the tables in the dataset
// Save it out as sub versions of the
if (ds == null)
{
return;
}
string baseFileName = System.IO.Path.GetFileNameWithoutExtension(originalFileName);
string baseFolder = System.IO.Path.GetDirectoryName(originalFileName);
System.Collections.Generic.List<string> header = null;
foreach (System.Data.DataTable table in ds.Tables)
{
string outFilePath = System.IO.Path.Combine(outputFolder, string.Format("{0}.{1}.csv", baseFileName, table.TableName));
System.Text.Encoding e = System.Text.Encoding.Default;
if (table.ExtendedProperties.ContainsKey("Unicode") && (bool)table.ExtendedProperties["Unicode"])
{
e = System.Text.Encoding.Unicode;
}
using (System.IO.StreamWriter file = new System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e))
{
table.ExtendedProperties.Add("Path", outFilePath);
// add header row
header = new System.Collections.Generic.List<string>(table.Columns.Count);
foreach (System.Data.DataColumn item in table.Columns)
{
header.Add(item.ColumnName);
}
file.WriteLine(string.Join(delimiter, header));
foreach (System.Data.DataRow row in table.Rows)
{
// TODO: For string based fields, capture the max length
IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());
file.WriteLine(string.Join(delimiter, fields));
}
}
}
}
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
}
}
]]>
</File>
<File Path="Properties\AssemblyInfo.cs" BuildAction="Compile">
using System.Reflection;
using System.Runtime.CompilerServices;
[assembly: AssemblyTitle("AssemblyTitle")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("Bill Fellows")]
[assembly: AssemblyProduct("ProductName")]
[assembly: AssemblyCopyright("Copyright @ 2016")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly: AssemblyVersion("1.0.*")]
</File>
</Files>
<AssemblyReferences>
<AssemblyReference AssemblyPath="System" />
<AssemblyReference AssemblyPath="System.Core" />
<AssemblyReference AssemblyPath="System.Data" />
<AssemblyReference AssemblyPath="System.Data.DataSetExtensions" />
<AssemblyReference AssemblyPath="System.Windows.Forms" />
<AssemblyReference AssemblyPath="System.Xml" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.ManagedDTS.dll" />
<AssemblyReference AssemblyPath="Microsoft.SqlServer.ScriptTask.dll" />
<AssemblyReference AssemblyPath="System.Linq" />
<AssemblyReference AssemblyPath="System.Xml.Linq" />
<AssemblyReference AssemblyPath="Microsoft.VisualBasic" />
</AssemblyReferences>
</ScriptTaskProject>
</ScriptProjects>
</Biml>
在 15 秒内转储了所有 AdventureworksDW2014
根据这条线失败的评论IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());
确保您的项目中有以下 using 语句。我认为那些扩展在 Linq 命名空间中,但它可能是 Collections
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Xml.Linq;
using Microsoft.SqlServer.Dts.Runtime;
为什么原来的速度很慢?
我的假设是缓慢归结为所有这些串联。字符串在 .Net 中是不可变的,每次向它添加一列时都会创建该字符串的新版本。当我构建我的行时,我使用 String.Join 方法将数组中的每个元素压缩成一个字符串。这也简化了附加字段分隔符所需的逻辑。
我还立即将当前行写入一个文件,而不是通过调用 WriteAllText
这是@billinkc 的优秀答案的 VB.NET 版本,以防对任何人有用:
导入系统
Imports System.Data
Imports System.Math
Imports System.Collections
Imports System.Collections.Generic
Imports Microsoft.SqlServer.Dts.Runtime
Imports System.Linq
Imports System.Text
Imports System.Windows.Forms
Public Sub Main()
Dim fileName As String = Dts.Variables("User::vFileName").Value.ToString()
Dim ds As DataSet = Nothing
Dim dt As DataTable = Nothing
Dim outputFolder As String = Dts.Variables("User::vDestinationPath").Value.ToString()
Dim fileMask As String = String.Empty
Dim sheetName As String = String.Empty
Dim outSubFolder As String = String.Empty
Dim message As String = String.Empty
Dim fireAgain As Boolean = True
Try
ds = New DataSet()
dt = New DataTable()
Dim adapter As New System.Data.OleDb.OleDbDataAdapter()
adapter.Fill(dt, Dts.Variables("User::vResultSet").Value)
Dim baseFileName As String = System.IO.Path.GetFileNameWithoutExtension(fileName)
baseFileName = System.IO.Path.GetFileName(fileName)
ds.Tables.Add(dt)
'foreach (DataTable dt in ds.Tables)
If True Then
Persist(ds, fileName, outputFolder)
End If
Catch ex As Exception
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "fileName", fileName), String.Empty, 0, fireAgain)
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "outputFolder", outputFolder), String.Empty, 0, fireAgain)
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), String.Empty, 0, fireAgain)
Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), String.Empty, 0, fireAgain)
End Try
Dts.TaskResult = CInt(ScriptResults.Success)
End Sub
Public Shared Sub Persist(ds As System.Data.DataSet, originalFileName As String, outputFolder As String, Optional delimiter As String = ",")
' Enumerate through all the tables in the dataset
' Save it out as sub versions of the
If ds Is Nothing Then
Return
End If
Dim baseFileName As String = System.IO.Path.GetFileNameWithoutExtension(originalFileName)
Dim baseFolder As String = System.IO.Path.GetDirectoryName(originalFileName)
Dim header As System.Collections.Generic.List(Of String) = Nothing
For Each table As System.Data.DataTable In ds.Tables
Dim outFilePath As String = System.IO.Path.Combine(outputFolder, String.Format("{0}.csv", baseFileName, table.TableName))
Dim e As System.Text.Encoding = System.Text.Encoding.[Default]
If table.ExtendedProperties.ContainsKey("Unicode") AndAlso CBool(table.ExtendedProperties("Unicode")) Then
e = System.Text.Encoding.Unicode
End If
Using file As New System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e)
table.ExtendedProperties.Add("Path", outFilePath)
' add header row
header = New System.Collections.Generic.List(Of String)(table.Columns.Count)
For Each item As System.Data.DataColumn In table.Columns
header.Add(item.ColumnName)
Next
file.WriteLine(String.Join(delimiter, header))
For Each row As System.Data.DataRow In table.Rows
' TODO: For string based fields, capture the max length
Dim fields As IEnumerable(Of String) = (row.ItemArray).[Select](Function(field) field.ToString())
file.WriteLine(String.Join(delimiter, fields))
Next
End Using
Next
End Sub