【发布时间】:2017-03-02 12:10:47
【问题描述】:
我不知道它是否已经被回答,但是我的运气不好,所以我在stackoverflow中的任何地方都找不到我的狩猎技术。请忽略我的垃圾邮件
我们有一个要求,我们需要编写一个 API 解析器,该解析器适用于任何提供 XML 输出的 API。
我们不会事先知道 XML 结构。
解决方案应转换 XML 文件并将其保存在通用 tsql 表中,并将 XML 元素/属性名称作为第一行。
所以基本上它是任何 API 的 XML 反序列化器。
我们的 C# 类不能使用任何第三方 dll。
我对C#一无所知,所以不知道它是否可能。但是我已经能够使用 OPENXML 在 tsql 中编写一个通用的 XML->row 转换器。 tsql解决方案的问题是我们无法将一个巨大的XML文件成功导入数据库。
我可以提供任何需要的细节。请在 cmets/answers 中告诉我。
我不希望任何人为我写代码,任何合适的指针就足够了
资源: JSON
[
{
"id" : 21953,
"mainReqIdentity" : "xxxx",
"itemName" : "xxxx",
"kanbanPhase" : "xxxx",
"kanbanStatus" : "xxxx",
"backlogItemType" : "xxxx",
"identityDomain" : "xxxx",
"fromDatetime" : "2016-08-05 17:52:34",
"teams" : [],
"releases" : [{
"id" : 1229,
"release_name" : "xxxx",
"release_connection_type" : "xxxx"
}
],
"fpReleases" : [],
"sources" : [{
"sourceName" : "xxxx",
"sourceRecordUrl" : "xxxx",
"sourceRecordIdentity" : "xxxx"
}
],
"productNumbers" : [],
"tags" : [],
"productComponents" : [],
"ranPlatforms" : [],
"subReleases" : [],
"requirementAreaId" : xxxx,
"requirementArea" : "xxxx",
"toBeHandledAtxxxx" : "xxxx"
}, {
"id" : 22014,
"mainReqIdentity" : "xxxx",
"itemName" : "xxxx",
"kanbanPhase" : "xxxx",
"kanbanStatus" : "xxxx",
"backlogItemType" : "xxxx",
"identityDomain" : "xxxx",
"fromDatetime" : "2016-08-05 17:52:34",
"teams" : [],
"releases" : [{
"id" : xxxx,
"release_name" : "xxxx",
"release_connection_type" : "xxxx"
}
],
"fpReleases" : [],
"sources" : [{
"sourceName" : "xxxx",
"sourceRecordUrl" : "xxxx",
"sourceRecordIdentity" : "xxxx"
}
],
"productNumbers" : [],
"tags" : [],
"productComponents" : [],
"ranPlatforms" : [],
"subReleases" : [],
"requirementAreaId" : xxxx,
"requirementArea" : "xxxx",
"f0Date" : "2015-10-01",
"f1Date" : "2015-10-01",
"f2Date" : "2016-02-01",
"f4Date" : "2016-03-31",
"fgDate" : "2016-04-29",
"toBeHandledAtxxxx" : "xxxx"
}
]
XML:2 个样本
示例 1
<root type="array">
<id type="number">21286</id>
<mainReqIdentity type="string">xxxxxx</mainReqIdentity>
<itemName type="string">xxxxxx</itemName>
<kanbanPhase type="string">xxxxxx</kanbanPhase>
<kanbanStatus type="string">xxxxxx</kanbanStatus>
<kanbanNote type="string">xxxxxx</kanbanNote>
<backlogItemType type="string">xxxxxx</backlogItemType>
<identityDomain type="string">xxxxxx</identityDomain>
<fromDatetime type="string">2016-08-23 17:01:52</fromDatetime>
<teams type="array">
<item type="object">
<team_name type="string">xxxxxx</team_name>
<preliminary type="boolean">xxxxxx</preliminary>
</item>
</teams>
<releases type="array">
<item type="object">
<id type="number">xxxxxx</id>
<release_name type="string">xxxxxx</release_name>
<release_connection_type type="string">xxxxxx</release_connection_type>
</item>
</releases>
<fpReleases type="array">
</fpReleases>
<sources type="array">
<item type="object">
<sourceName type="string">xxxxxx</sourceName>
<sourceRecordUrl type="string">xxxxxx</sourceRecordUrl>
</item>
</sources>
<productNumbers type="array">
</productNumbers>
<tags type="array">
</tags>
<productComponents type="array">
</productComponents>
<ranPlatforms type="array">
</ranPlatforms>
<subReleases type="array">
</subReleases>
<requirementAreaId type="number">xxxxxx</requirementAreaId>
<requirementArea type="string">xxxxxx</requirementArea>
<itemContact type="string">xxxxxx</itemContact>
<toBeHandledAtxxx type="string">xxxxxx</toBeHandledAtLuca>
</item>
<item type="object">
<id type="number">xxxxxx</id>
<mainReqIdentity type="string">xxxxxx</mainReqIdentity>
<itemName type="string">xxxxxx</itemName>
<kanbanPhase type="string">xxxxxx</kanbanPhase>
<kanbanStatus type="string">xxxxxx</kanbanStatus>
<kanbanNote type="string">xxxxxx</kanbanNote>
<backlogItemType type="string">xxxxxx</backlogItemType>
<identityDomain type="string">xxxxxx</identityDomain>
<fromDatetime type="string">2016-08-23 17:01:52</fromDatetime>
<teams type="array">
<item type="object">
<team_name type="string">xxxxxx</team_name>
<preliminary type="boolean">xxxxxx</preliminary>
</item>
</teams>
<releases type="array">
<item type="object">
<id type="number">xxxxxx</id>
<release_name type="string">xxxxxx</release_name>
<release_connection_type type="string">xxxxxx</release_connection_type>
</item>
</releases>
<fpReleases type="array">
</fpReleases>
<sources type="array">
<item type="object">
<sourceName type="string">xxxxxx</sourceName>
<sourceRecordUrl type="string">xxxxxx</sourceRecordUrl>
</item>
</sources>
<productNumbers type="array">
</productNumbers>
<tags type="array">
</tags>
<productComponents type="array">
</productComponents>
<ranPlatforms type="array">
</ranPlatforms>
<subReleases type="array">
</subReleases>
<requirementAreaId type="number">xxxxxx</requirementAreaId>
<requirementArea type="string">xxxxxx</requirementArea>
<oaResultReference type="string">xxxxxx</oaResultReference>
<itemContact type="string">xxxxxx</itemContact>
<f0Date type="string">2014-10-17</f0Date>
<f1Date type="string">2015-01-16</f1Date>
<f2Date type="string">2015-02-13</f2Date>
<f4Date type="string">2015-06-12</f4Date>
<faDate type="string">2015-06-12</faDate>
<fgDate type="string">2015-06-12</fgDate>
<toBeHandledAtxxx type="string">xxxxxx</toBeHandledAtLuca>
</item>
</root>
示例 2
<ROOT>
<Customer CustomerID="VINET" ContactName="Paul Henriot">
<Order CustomerID="VINET" EmployeeID="5" OrderDate="1996-07-04T00:00:00">
<OrderDetail OrderID="10248" ProductID="11" Quantity="12"/>
<OrderDetail OrderID="10248" ProductID="42" Quantity="10"/>
</Order>
</Customer>
<Customer CustomerID="LILAS" ContactName="Carlos Gonzlez">
<Order CustomerID="LILAS" EmployeeID="3" OrderDate="1996-08-16T00:00:00">
<OrderDetail OrderID="10283" ProductID="72" Quantity="3"/>
</Order>
</Customer>
</ROOT>
SQL
通用暂存表
create table ZZZZZZZZZ
(
api_id int,
record_type char(1),
record_id INT,
last_run_time datetime,
last_run_by varchar(500),
col1 VARCHAR(500),
col2 VARCHAR(500),
col3 VARCHAR(500),
col4 VARCHAR(500),
col5 VARCHAR(500),
col6 VARCHAR(500),
col7 VARCHAR(500),
col8 VARCHAR(500),
col9 VARCHAR(500),
col10 VARCHAR(500),
col11 VARCHAR(500),
col12 VARCHAR(500),
col13 VARCHAR(500),
col14 VARCHAR(500),
col15 VARCHAR(500),
col16 VARCHAR(500),
col17 VARCHAR(500),
col18 VARCHAR(500),
col19 VARCHAR(500),
col20 VARCHAR(500),
col21 VARCHAR(500),
col22 VARCHAR(500),
col23 VARCHAR(500),
col24 VARCHAR(500),
col25 VARCHAR(500),
col26 VARCHAR(500),
col27 VARCHAR(500),
col28 VARCHAR(500),
col29 VARCHAR(500),
col30 VARCHAR(500),
col31 VARCHAR(500),
col32 VARCHAR(500),
col33 VARCHAR(500),
col34 VARCHAR(500),
col35 VARCHAR(500),
col36 VARCHAR(500),
col37 VARCHAR(500),
col38 VARCHAR(500),
col39 VARCHAR(500),
col40 VARCHAR(500),
col41 VARCHAR(500),
col42 VARCHAR(500),
col43 VARCHAR(500),
col44 VARCHAR(500),
col45 VARCHAR(500),
col46 VARCHAR(500),
col47 VARCHAR(500),
col48 VARCHAR(500),
col49 VARCHAR(500),
col50 VARCHAR(500),
col51 VARCHAR(500),
col52 VARCHAR(500),
col53 VARCHAR(500),
col54 VARCHAR(500),
col55 VARCHAR(500),
col56 VARCHAR(500),
col57 VARCHAR(500),
col58 VARCHAR(500),
col59 VARCHAR(500),
col60 VARCHAR(500),
col61 VARCHAR(500),
col62 VARCHAR(500),
col63 VARCHAR(500),
col64 VARCHAR(500),
col65 VARCHAR(500),
col66 VARCHAR(500),
col67 VARCHAR(500),
col68 VARCHAR(500),
col69 VARCHAR(500),
col70 VARCHAR(500),
col71 VARCHAR(500),
col72 VARCHAR(500),
col73 VARCHAR(500),
col74 VARCHAR(500),
col75 VARCHAR(500),
col76 VARCHAR(500),
col77 VARCHAR(500),
col78 VARCHAR(500),
col79 VARCHAR(500),
col80 VARCHAR(500),
col81 VARCHAR(500),
col82 VARCHAR(500),
col83 VARCHAR(500),
col84 VARCHAR(500),
col85 VARCHAR(500),
col86 VARCHAR(500),
col87 VARCHAR(500),
col88 VARCHAR(500),
col89 VARCHAR(500),
col90 VARCHAR(500),
col91 VARCHAR(500),
col92 VARCHAR(500),
col93 VARCHAR(500),
col94 VARCHAR(500),
col95 VARCHAR(500),
col96 VARCHAR(500),
col97 VARCHAR(500),
col98 VARCHAR(500),
col99 VARCHAR(500),
col100 VARCHAR(500),
col101 VARCHAR(500),
col102 VARCHAR(500),
col103 VARCHAR(500),
col104 VARCHAR(500),
col105 VARCHAR(500),
col106 VARCHAR(500),
col107 VARCHAR(500),
col108 VARCHAR(500),
col109 VARCHAR(500),
col110 VARCHAR(500),
col111 VARCHAR(500),
col112 VARCHAR(500),
col113 VARCHAR(500),
col114 VARCHAR(500),
col115 VARCHAR(500),
col116 VARCHAR(500),
col117 VARCHAR(500),
col118 VARCHAR(500),
col119 VARCHAR(500),
col120 VARCHAR(500),
col121 VARCHAR(500),
col122 VARCHAR(500),
col123 VARCHAR(500),
col124 VARCHAR(500),
col125 VARCHAR(500),
col126 VARCHAR(500),
col127 VARCHAR(500),
col128 VARCHAR(500),
col129 VARCHAR(500),
col130 VARCHAR(500),
col131 VARCHAR(500),
col132 VARCHAR(500),
col133 VARCHAR(500),
col134 VARCHAR(500),
col135 VARCHAR(500),
col136 VARCHAR(500),
col137 VARCHAR(500),
col138 VARCHAR(500),
col139 VARCHAR(500),
col140 VARCHAR(500),
col141 VARCHAR(500),
col142 VARCHAR(500),
col143 VARCHAR(500),
col144 VARCHAR(500),
col145 VARCHAR(500),
col146 VARCHAR(500),
col147 VARCHAR(500),
col148 VARCHAR(500),
col149 VARCHAR(500),
col150 VARCHAR(500)
)
样本输出
用 TSQL 编写的通用 XML 解析器。代码中几乎没有 hack 和一些需要删除的杂散代码。这工作正常。但问题在于通过直接调用或通过文件将整个 XML 文档作为输入参数从 C# 代码发送。
CREATE PROC ZZZZZZZ
(
@in_api_id int,
@in_xml_doc XML,
@in_xml_root varchar(100),
@in_tot_result_col int = 150,
@in_need_colnm_result CHAR(1) = 'Y',
@in_debug_flg CHAR(1) = 'N'
)
AS
BEGIN
DECLARE
@idoc int,
@sqlstr nvarchar(max) = '',
@param nvarchar(200) = '',
@runtime datetime = getdate(),
@runby varchar(30) = suser_name(),
@cnt int,
@pre_stg_col_nm varchar(max) = '',
@max_lvl int,
@max_node varchar(500)='',
@max_node_wo_slash varchar(500)='',
@xml_col nvarchar(max) = '',
@unq_col nvarchar(max) = '',
@unq_xml_col nvarchar(max)=''
--Create an internal representation of the XML document.
EXEC sp_xml_preparedocument @idoc OUTPUT, @in_xml_doc;
-- Execute a SELECT statement that uses the OPENXML rowset provider.
set @in_xml_root = concat('/',@in_xml_root)
SELECT * into #tmp FROM OPENXML (@idoc, @in_xml_root,2) where id <> 0;
--select * from #tmp_xml_nodes
--select * from #tmp
--select * from #tmp_pre_staging
;with xml_cte(id, parentid, nodetype, localname, prefix, namespaceuri, datatype, prev, text, lvl,node,parent_localname)
AS
(
select id,
parentid,
nodetype,
localname,
prefix,
namespaceuri,
datatype,
prev,
text,
1 as lvl,
cast(CONCAT(@in_xml_root,'/',localname) as varchar(100)) node,
cast('' as varchar(200))
from #tmp
where parentid = 0
UNION all
select t.id,
t.parentid,
t.nodetype,
t.localname,
t.prefix,
t.namespaceuri,
t.datatype,
t.prev,
t.text,
iif(t.nodetype = 1,xc.lvl+1,xc.lvl),
cast(
CONCAT (
xc.node
,iif(t.nodetype = 1,
CONCAT (
'/'
,t.localname
)
,''
)
) AS VARCHAR(100)
),
cast(xc.localname as varchar(200))
from #tmp t
inner join xml_cte xc
on xc.id = t.parentid
)
select * into #xmlcte from xml_cte
--select * from #xmlcte
--v2 change
select @max_lvl = max(lvl)--iif(max(lvl)>=4,1,0) -- the iif condition is just a hack, I dont know why it works
from #xmlcte
select
@max_node = concat(max(node),'/'),
@max_node_wo_slash = max(node)
from #xmlcte
where lvl = @max_lvl
select *,concat(parent_localname,'_',localname,' varchar(500)') fnl_col_nm,
case
when lvl<@max_lvl then concat(replicate('../',@max_lvl-lvl+iif(nodetype=1,nodetype,0)),iif(nodetype=1,'','@'),localname) --v2 change
when lvl>@max_lvl then concat(replace(node,@max_node,''),iif(nodetype=1,'','/@'),localname)--v2 change
else concat('../',iif(nodetype=1,'',concat(parent_localname,'/@')),localname)--v2 change
end col_Struct
,concat(parent_localname,'_',localname) col_unq_nm
,ROW_NUMBER() over (order by(select 100)) sno
,concat('xmlname.value(''/Names[1]/name[',ROW_NUMBER() over (order by(select 100)),']'',''varchar(500)'') AS ',concat(parent_localname,'_',localname)) col_splt_nm
into #xml_col_struct
from #xmlcte
where nodetype <= 2--v2 change
--select * from #xml_col_struct
set @cnt = (select count(distinct col_unq_nm) from #xml_col_struct)
select @pre_stg_col_nm =
(
select concat(',',COLUMN_NAME)
from INFORMATION_SCHEMA.COLUMNS
where table_name = 'ZZZZZZ'
and COLUMN_NAME like 'col%'
and ORDINAL_POSITION <= @cnt+5
order by ORDINAL_POSITION
for xml path('')
)
set @sqlstr = concat(
'insert into ZZZZZ(api_id,record_type,record_id,last_run_time,last_run_by',
@pre_stg_col_nm,
')'
)
select @xml_col =
(
select distinct concat(',',fnl_col_nm,' ''',col_Struct,'''',char(10))
from #xml_col_struct
order by 1
for xml path('')
)
set @xml_col = stuff(@xml_col,1,1,'')
select @unq_col =
(
select distinct concat(',',col_unq_nm )
from #xml_col_struct
order by 1
for xml path('')
)
set @unq_col = stuff(@unq_col,1,1,'')
select @in_tot_result_col = @in_tot_result_col - count(distinct col_unq_nm)
from #xml_col_struct
select @unq_xml_col =
(
select
concat(',xmlname.value(''/Names[1]/name[',ROW_NUMBER() over (order by(select 100)),']'',''varchar(500)'') AS ',col_unq_nm,char(10))
from (select distinct col_unq_nm from #xml_col_struct) t
for xml path('')
)
set @unq_xml_col = stuff(@unq_xml_col,1,1,'')
set @sqlstr =
concat(
iif(@in_need_colnm_result = 'Y',
concat('
;WITH Split_Names (xmlname)
AS
(
SELECT
CONVERT(XML,''<Names><name>''
+ REPLACE(''',@unq_col,''','','', ''</name><name>'') + ''</name></Names>'') AS xmlname
)
'
--,@sqlstr
,char(10),
' SELECT ',@in_api_id,',''H'',0,''',@runtime,''',''',@runby,''',',char(10)
,@unq_xml_col,replicate(',NULL',@in_tot_result_col)--v2 change
,char(10)
,'FROM Split_Names'
,char(10)
,'union all'
)
,''
)
--,iif(@in_need_colnm_result = 'Y','',@sqlstr)
,'
SELECT ',@in_api_id,',''D'',ROW_NUMBER() over (order by(select 100)),''',@runtime,''',''',@runby,''',*'
,replicate(',NULL',@in_tot_result_col)--v2 change
,char(10)
,'FROM OPENXML (@idoc_inn, ''',@max_node_wo_slash,''',2)'
,char(10)
,'WITH (',@xml_col,')'
)
if @in_debug_flg = 'Y'
begin
select @max_lvl+1,@max_lvl,@max_node_wo_slash,@xml_col,@unq_col,@sqlstr,@unq_xml_col
select * from #xml_col_struct--v2 change
end
else
begin
set @param = '@idoc_inn int'
exec sys.sp_executesql @sqlstr,@param,@idoc_inn = @idoc
end
EXEC sp_xml_removedocument @idoc
END
读取 C# 类加载的 XML 文件的 SQL 代码。这也可以正常工作,但问题是所有行都在单独的行中,并且连接在一个点之后截断
create table #tmp(data_line nvarchar(max))
bulk insert #tmp
FROM '\\Server\\ZZZZ\\Downloads\\Data.xml'
WITH
(
--firstrow = 1
ROWTERMINATOR ='\n'
);
select * from #tmp
C# 类
Object httpConn = Dts.Connections["HTTP"].AcquireConnection(null);
HttpClientConnection myConnection = new HttpClientConnection(httpConn);
myConnection.ServerURL = string.Format(("http://xxxx.com/jjjj"),"userid","password");
byte[] webdata = myConnection.DownloadData();
String result_data = Convert.ToBase64String(webdata);
XmlDocument xd = new XmlDocument();
XmlDictionaryReader xr = JsonReaderWriterFactory.CreateJsonReader(webdata, XmlDictionaryReaderQuotas.Max);
xr.Read();
xd.LoadXml(xr.ReadOuterXml());
xd.Save("\\Server\\ZZZZ\\Downloads\\Data.xml");
【问题讨论】:
-
您在这里可能遇到的是 SQL Server 的一些固有限制。每个非宽表的列数 1024,每个宽表的列数 30000。如果您使用的是非宽表并且 xml 每条记录的元素超过 1024 个,则表创建将失败。
-
我们确信我们不会在任何 API 中看到超过 100 个元素的属性
-
好吧,但这不是你在问题中所说的。你说的是任何 XML。
-
对不起……
-
重点在这里:在通用 tsql 表中。实际上它只是 a:基本结构是否简单(类似于键值对)或者这可能与 1:n 相关 数据深度嵌套? XML 是一件大事还是它可能携带许多记录?您需要 row-wise 结构(了解键值对),还是需要带有命名列的宽表。请提供一些(简化的)XML 示例,它们的外观以及您希望它如何存储。
标签: c# sql-server xml tsql sql-server-2014