且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

解析 SSIS .xml 源以检索表映射

更新时间:2023-09-17 22:58:34

以下代码专为 SSIS 包 PackageFormatVersion=3

Following code is designed for SSIS packages PackageFormatVersion=3

它并不漂亮,但对于一种方式的 XML 转换来说还可以.

It is not pretty but it's ok for one way XML transformation.

解析源

private static Mapping<ColumnMapping> ParseSourceComponent(XElement source)
{
    var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;


    var nonErrorOutput = source.XPathSelectElement("outputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));

    var outputColumns = nonErrorOutput.XPathSelectElement("outputColumns").Elements().Select(x => 
        new ColumnMapping
        {
            Id = (int)x.Attribute("id"),
            Name = (string)x.Attribute("name")
        }).ToList();

    return new Mapping<ColumnMapping>
    {
        TableName = NormalizeTableNames(table),
        Columns = outputColumns
    };
}
static readonly Regex tableNameRegex = new Regex("\\[dbo\\]\\.\\[(.*)\\]");
private static string NormalizeTableNames(string rawTableName)
{
    var matches = tableNameRegex.Match(rawTableName);
    if (matches.Success) 
        return matches.Groups[1].Value;
    return rawTableName;

}

解析目的地

private static Mapping<InputColumnMapping> ParseDestinationComponent(string ssisName,XElement source)
{
    var table = source.XPathSelectElement("properties/property[@name='OpenRowset']").Value;


    var nonErrorOutput = source.XPathSelectElement("inputs").Elements().First(x => !((string)x.Attribute("name")).Contains("Error"));

    var inputColumns = nonErrorOutput.XPathSelectElement("inputColumns").Elements().Select(x =>
        new
        {
            lineageId = (int)x.Attribute("lineageId"),
            externalMetadataColumnId = (int)x.Attribute("externalMetadataColumnId")
        }).ToList();

    var externalMetadataColumns = nonErrorOutput.XPathSelectElement("externalMetadataColumns").Elements().Select(x =>
        new InputColumnMapping
        {
            Id = (int)x.Attribute("id"),
            Name = (string)x.Attribute("name")
        }).ToList();
    foreach (var externalMetadataColumn in externalMetadataColumns.ToList())
    {
        var inputMapping =
            inputColumns.FirstOrDefault(x => x.externalMetadataColumnId == externalMetadataColumn.Id);
        if (inputMapping == null)
        {
            Console.WriteLine("{0} | destination external column {1} with id {2} was not found in input mappings", ssisName, externalMetadataColumn.Name, externalMetadataColumn.Id);
            externalMetadataColumns.Remove(externalMetadataColumn);
            continue;
        }
        externalMetadataColumn.MappsToId = inputMapping.lineageId;
    }
    return new Mapping<InputColumnMapping>
    {
        TableName = NormalizeTableNames(table),
        Columns = externalMetadataColumns
    };
}

处理整个 .dtsx 文件

Processing the whole .dtsx file

private static RemoteMappingFile ParseDtsx(string ssisName)
{
    var xml = XDocument.Load(@"ssis/"+ssisName);

    if (xml.Root == null)
    {
        throw new Exception("Root is null");
    }
    var mappings = new List<RemoteMapping>();

    XNamespace ns = "www.microsoft.com/SqlServer/Dts";
    XmlNamespaceManager man = new XmlNamespaceManager(new NameTable());
    man.AddNamespace("DTS", "www.microsoft.com/SqlServer/Dts");
    var executables = xml.Root.Descendants(ns + "Executable").Select(x => x).ToList();
    foreach (var executable in executables)
    {
        var components = executable.Descendants(ns + "ObjectData").First().XPathSelectElement("pipeline/components").Elements().ToList();
        if (components.Count != 2)
        {
            Console.WriteLine("{0} | WARN - 2 components expected. Found {1} with names: {2}", ssisName, components.Count, string.Join(",",components.Select(x=>((string)x.Attribute("name"))).ToList()));
        }
        var source = components.First(x => ((string)x.Attribute("name")).Contains("Source"));
        var destination = components.First(x => ((string)x.Attribute("name")).Contains("Destination"));
        var sourceMapping = ParseSourceComponent(source);
        var destinationMapping = ParseDestinationComponent(ssisName,destination);
        var remoteMapping = new RemoteMapping
        {
            TableNames = new Column { Source = sourceMapping.TableName, Destination = destinationMapping.TableName },
            Columns = new List<Column>()
        };
        foreach (var sourceItem in sourceMapping.Columns)
        {
            var foundMatchingDestinationColumn =
                destinationMapping.Columns.FirstOrDefault(x => x.MappsToId == sourceItem.Id);
            if (foundMatchingDestinationColumn == null)
            {
                Console.WriteLine("{0} | input mapping {1} with id {2} was not found in destination mappings",
                    ssisName, sourceItem.Name, sourceItem.Id);
                continue;
            }
            remoteMapping.Columns.Add(new Column
            {
                Destination = foundMatchingDestinationColumn.Name,
                Source = sourceItem.Name
            });
        }
        mappings.Add(remoteMapping);
    }

    return new RemoteMappingFile
    {
        RemoteMappings = mappings,
        SSISName = ssisName
    };
}

需要的数据结构

public class ColumnMapping
{
    public int Id { get; set; }
    public string Name { get; set; }
}
public class InputColumnMapping : ColumnMapping
{
    public int MappsToId { get; set; }
}
public class Mapping<T> where T : ColumnMapping
{
    [XmlAttribute]
    public string TableName { get; set; }
    public List<T> Columns { get; set; }
}
public class RemoteMapping
{
    public Column TableNames { get; set; }
    public List<Column> Columns { get; set; }
}

public class Column
{
    [XmlAttribute]
    public string Source { get; set; }

[XmlAttribute]
    public string Destination { get; set; }
}
public class RemoteMappingFile
{
    [XmlAttribute]
    public string SSISName { get; set; }
    public List<RemoteMapping> RemoteMappings { get; set; }
}
public class MappingsXml
{
    public List<RemoteMappingFile> Mappings { get; set; }
}

main 方法获取 ssis 文件夹中的所有 .dtsx 文件

The main method takes all .dtsx files in ssis folder

internal class Program
{
    private static void Main()
    {
        //var mappings = Directory.EnumerateFiles("ssis","*.dtsx").Select(x=>ParseDtsx(Path.GetFileName(x).ToString())).ToList();
        var list = new MappingsXml
        {
            Mappings =
                Directory.EnumerateFiles("ssis", "*.dtsx")
                    .Select(x => ParseDtsx((Path.GetFileName(x) ?? "").ToString()))
                    .ToList()
        };
        var xsSubmit = new XmlSerializer(typeof (MappingsXml));

        using (var file = new StreamWriter(
            @"AutoRemoteMappingXmls.xml"))
        {
            xsSubmit.Serialize(file, list);
        }
    }
}

最终输出:

<?xml version="1.0" encoding="utf-8"?>
<MappingsXml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <Mappings>
    <RemoteMappingFile SSISName="ssis1.dtsx">
      <RemoteMappings>
        <RemoteMapping>
          <TableNames Source="sourceTable1" Destination="destinationTable1" />
          <Columns>
            <Column Source="sourceColumn1" Destination="destinationColumn1" />
            <Column Source="sourceColumn2" Destination="destinationColumn2" />
          </Columns>
        </RemoteMapping>
        <RemoteMapping>
          <TableNames Source="sourceTable2" Destination="destinationTable2" />
          <Columns>
            <Column Source="sourceColumn3" Destination="destinationColumn3" />
            <Column Source="sourceColumn4" Destination="destinationColumn4" />
          </Columns>
        </RemoteMapping>
      </RemoteMappings>
    </RemoteMappingFile>
  </Mappings>
</MappingsXml>

如果:

  1. 有超过 2 个 DTS:ObjectData/pipeline/components/component(我们只期望OLE DB 源"和OLE DB 目标".有些时候有一些数据转换组件,所以可能有一些为此需要额外的工作
  2. 有些源列未映射到目标列
  3. 有一些目标列没有映射到源列
  4. 源表名和目标表名不匹配(不是真正的问题)