【问题标题】:Change code to pull additional data from site更改代码以从站点中提取其他数据
【发布时间】:2019-10-11 06:49:43
【问题描述】:

我正在从网站提取数据,但需要帮助才能提取整个字符串。

例子:

我试图查看网站源代码以理解它,但不同的更改会产生不好的结果

Option Explicit

Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument
    Dim ws As Worksheet, i As Long
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value
    Dim results()
    ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send
        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        Stop
        For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft

                html.body.innerHTML = .responseText

                Dim shipping As String, order As String, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                order = html.querySelector(".order-history__item-descript--min").innerText
                items = Split(order, vbNewLine)

                Dim qtyOrdered As Long, qtyShipped As String, product As String

                qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                product = html.querySelector(".details-table a").Title

                results(i, 1) = shipping
                results(i, 2) = qtyOrdered
                results(i, 3) = qtyShipped
                results(i, 4) = product
            End If
            'Application.Wait Now + TimeSerial(0, 0, 1)
        Next
    End With
    'results written out from row 2 column E
    ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results

End Sub

我输入了一个包含多个部分的 PO 和 zip(4500987740 和 33314),返回的数据只是第一部分,而不是所有部分。

示例 2:

我需要返回所有数据: 跟踪、订购的数量、发货的数量、产品、订购的数量、发货的产品数量等 - 基本上作为一个字符串,直到显示所有部件

【问题讨论】:

  • 已编辑以尝试制作更令人心酸的内容。
  • 您的代码如上面发布的值 4500987740 和 33314 - 您是说产生示例 2 中的输出吗?
  • 是的,这是正确的,我想要的是它可以将所有值都串起来,而不仅仅是顶行。

标签: excel vba web-scraping xmlhttprequest


【解决方案1】:

问题在于querySelector的使用。 querySelector 仅返回第一个匹配项。在这种情况下,这意味着您只考虑第一个 排。所需的修改是使用querySelectorAll 返回所有匹配项。然后循环这些匹配以提取每一行的信息。

此外,此选择器 .details-table a 必须更改为仅返回感兴趣的项目,即 .details-table a[title] - 那些具有 title 属性。

为了适当地写出每一行,一个辅助函数用于查找下一个空闲行。由于事先不知道行数,因此无法将适当大小的数组设置为容纳所有结果 - 尽管您可以从一开始就加大数组的大小。后一点是您可以做出的修正。我改为循环批量写出数组。

Option Explicit
Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, ws As Worksheet
    Dim lastRow As Long, wsTarget As Worksheet, j As Long '<  VBE > Tools > References > Microsoft HTML Object Library
    Dim sourceValues()

    Set html = New HTMLDocument
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    Set wsTarget = ThisWorkbook.Worksheets("Sheet1")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send

        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        For j = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(j, 1) <> vbNullString And sourceValues(j, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(j, 1) & "&postalCode=" & sourceValues(j, 3) & "&CSRFToken=" & csrft
                html.body.innerHTML = .responseText

                Dim shipping As String, orders As Object, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                Set orders = html.querySelectorAll(".order-history__item-descript--min")

                Dim i As Long, c As Long, results(), products As Object
                ReDim results(1 To 1, 1 To 4 * orders.length)
                Dim qtyOrdered As Long, qtyShipped As String, product As String
                Set products = html.querySelectorAll(".details-table a[title]")
                c = 1
                For i = 0 To orders.length - 1
                    items = Split(orders.item(i).innerText, vbNewLine)
                    qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                    qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                    results(1, c) = shipping
                    results(1, c + 1) = qtyOrdered
                    results(1, c + 2) = qtyShipped
                    results(1, c + 3) = products.item(i).Title
                    c = c + 4
                Next
                wsTarget.Cells(GetLastRow(wsTarget) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
            End If
        Next
    End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

【讨论】:

  • 这实际上非常接近。有没有办法让它们从左到右串起来? imgur.com/YJ8bzAs
  • 什么意思?我正在逐行写它们。
  • 我提供了一个屏幕截图链接
  • 将函数更改为查找最后一列函数并添加一个。然后将 wsTarget.Cells(GetLastRow(wsTarget) + 1, 1) 切换为 wsTarget.Cells(1, GetLastColumn(1))
  • 好的,所以现在它所做的就是移动它放置在数据右侧的行,我想我要问的是这个。有没有办法获取垂直数据,并将其水平粘贴以对应每个 PO 和邮政编码?像截图描述的那样? imgur.com/qz4ekcj我保证我不会让这变得困难
猜你喜欢
  • 2012-05-01
  • 2014-04-16
  • 1970-01-01
  • 2011-11-05
  • 1970-01-01
  • 2013-01-10
  • 2019-05-13
  • 2015-02-18
相关资源
最近更新 更多