感谢(巨大的)样本文件。我已经处理 xml 文件超过 15 年了。我一直怀疑 Access 女士在接近 GB 限制时的表现如何。
根据我的经验,现在已经确认,只有一位获胜者:
Open FileURL For Input As #FileNum。与InputLine = Input(1000, #FileNum) ' read some 1.000 characters 结合使用。基本上,只需将 XML 视为纯文本文件。
如果可以使用Line Input 代替Input,编码会更容易,但在您的示例中并非如此。您的示例文件使用vbLf 标记文本中的行尾,而Line Input 需要vbCrLf 才能正常工作。
我最终得到了一个小型应用程序,它首先扫描文件以查找不同出现的标签。之后,这些标签可以分配给几个任务:
在第二次完整读取中,所有值都分配给数据库中的目标字段。
我将尝试通过插入一些代码(as of 02 Feb 2018 15h London time, I have to dash, I am gonna come back to it at a later point of time)来澄清一下
Option Compare Database
Option Explicit
Dim marrKnownTags() As String
Public Sub ReadFile2GB()
Dim FileNum As Integer
Dim InputLine As String
Call init_marrKnownTags
FileNum = FreeFile
Open "X:\20180128-gleif-concatenated-file-lei2.xml" For Input As #FileNum
Do While Not EOF(FileNum)
InputLine = Input(99000, #FileNum) ' read some 99.000 characters
Call processTemporaryBlock(InputLine)
...
Loop
Close #FileNum
End Sub
Public Function positionCrOfLf(PieceToScan As String) As Long
Dim Pos As Long
Pos = 0
If Pos = 0 Then
Pos = InStr(PieceToScan, vbCrLf)
End If
If Pos = 0 Then
Pos = InStr(PieceToScan, vbLf)
End If
If Pos = 0 Then
Pos = InStr(PieceToScan, vbCr)
End If
'Debug.Print "fie positionCrOfLf := " & Pos
positionCrOfLf = Pos
End Function
Private Sub init_marrKnownTags()
ReDim Preserve marrKnownTags(333)
marrKnownTags(1) = "<?xml version=" ' start of xml
marrKnownTags(10) = "<lei:LEIData" ' Table_01 Open
marrKnownTags(20) = "<lei:LEIHeader>" ' Table_02 Open
marrKnownTags(21) = "<lei:ContentDate>" ' field
marrKnownTags(22) = "<lei:FileContent>" ' field
marrKnownTags(23) = "<lei:RecordCount>" ' field
marrKnownTags(30) = "<lei:Extension>" ' Table_03 Open
marrKnownTags(40) = "<gleif:Sources>" ' Table_04 Open
marrKnownTags(41) = "<gleif:Source>" ' addnew record Table_04
marrKnownTags(42) = "<gleif:ContentDate>" ' field
marrKnownTags(43) = "<gleif:Originator>" ' field
marrKnownTags(44) = "<gleif:RecordCount>" ' field
marrKnownTags(45) = "</gleif:Source>" ' save this new record Table_04
marrKnownTags(46) = "</gleif:Sources>" ' Table_04 Close
marrKnownTags(31) = "</lei:Extension>" ' Table_03 Close
' ... some more child-tables in the future ??
marrKnownTags(129) = "</lei:Entity>" ' Table_12 Close ' close child table
marrKnownTags(140) = "<lei:Registration>" ' Table_14 Open
marrKnownTags(141) = "<lei:LastUpdateDate>" ' DO NOT SKIP field with "2017-11-30T15:06:27Z" =?= 2017-11-30 15:06:27
marrKnownTags(142) = "<lei:RegistrationStatus>" ' DO NOT SKIP field with "ISSUED"
marrKnownTags(149) = "</lei:Registration>" ' Table_14 Close
marrKnownTags(2) = "</lei:LEIRecord>" ' save this new record
marrKnownTags(2) = "</lei:LEIRecords>" ' Table_11 Close ' close child table
End Sub
Public Function processTemporaryBlock(ByVal TemporaryBlock As String)
Dim positionStart As Long, positionEnd As Long, positionLength As Long
Dim OneLine As String, searchTag As String
Dim indexArray As Long
Dim tagFoundYN As Boolean
positionStart = 1
positionEnd = positionCrOfLf(TemporaryBlock)
Do While positionEnd > 0
OneLine = trim(Mid(TemporaryBlock, positionStart, positionEnd - 1))
Debug.Print "OneLine := " & OneLine
tagFoundYN = False
For indexArray = LBound(marrKnownTags) To UBound(marrKnownTags)
searchTag = marrKnownTags(indexArray)
searchTag = Trim(searchTag)
If searchTag = "" Then
' skip
Else
If Left(OneLine, Len(searchTag)) = searchTag Then
' Call processTag(OneLine)
tagFoundYN = True
exit for
End If
End If
Next
positionStart = positionStart + positionEnd
positionEnd = positionCrOfLf(Mid(TemporaryBlock, positionStart))
Loop
End Sub