【发布时间】:2023-03-29 07:40:01
【问题描述】:
我正在尝试更快地加载 Captcha,然后在 WebBrowser 控件中呈现它们,然后复制/粘贴图像并将其呈现到图片框中。
为什么不直接将图片直接下载到 PictureBox 中,它的优点是使用更少的 CPU 使用率和内存,这个解决方案几乎适用于任何其他更高级的验证码服务,称为 Solve Media(如果您查看,请使用 Solve Media下次您尝试查看该图像 url 时,它会为您提供一个虚假的错误 catpcha 图像)。
但现在我需要支持 ReCaptcha Captcha 系统,以便以更快的速度自动化我的机器人,然后只需刷新网页并等待它呈现。
因此,据我所知,我只是在这里编写代码,我只是缺少模拟 HTML 请求中的一个属性Cookie 似乎以某种方式生成了一个我不知道在哪里的 cookie,但我还从下载 Javascript 文件中获得了一个我认为的 Cookie。
无论哪种方式,Google ReCaptcha 都试图用您无法阅读的假 Captcha 来欺骗您,以在您的脸上摩擦它,表明您没有做正确的事情。当你看到 2 个黑色圆圈时,我发现它很明显是假的。
这里是 Bad Captcha 和 Good Captcha 的示例
有一次,我记得 ReCaptcha 有另一个安全功能,它以某种方式知道您是否从放置它的实际域加载了 Captcha 图像我不知道它是如何工作的,因为我在本地下载了所有内容,对吗?但他们似乎无论如何都删除了该功能。 (其实它存在于某些网站上似乎默认是禁用的,使用Referer头很容易欺骗它)
我不想在这里作弊,我仍然会手动手动输入这些 Captcha,但我想更快地输入它们,然后通常需要渲染页面。
我希望 Captcha 成为那些街道号码......或者至少 2 个没有那些黑色圆圈的单词。
无论如何,这是我当前的代码。
Dim newCaptcha = New Captcha
Dim myUserAgent As String = ""
Dim myReferer As String = "http://www.google.com/recaptcha/demo/"
Dim outputSite As String = HTTP.HTTPGET("http://www.google.com/recaptcha/demo/", "", "", "", myUserAgent, myReferer)
Dim recaptchaChallengeKey = GetBetween(outputSite, "http://www.google.com/recaptcha/api/challenge?k=", """")
'Google ReCaptcha Captcha
outputSite = HTTP.HTTPGET("http://www.google.com/recaptcha/api/challenge?k=" & recaptchaChallengeKey, "", "", "", myUserAgent, myReferer)
'outputSite = outputSite.Replace("var RecaptchaState = {", "{""RecaptchaState"": {")
'outputSite = outputSite.Replace("};", "}}")
'Dim jsonDictionary As Dictionary(Of String, Object) = New JavaScriptSerializer().Deserialize(Of Dictionary(Of String, Object))(outputSite)
Dim recaptchaChallenge = GetBetween(outputSite, "challenge : '", "',")
outputSite = HTTP.HTTPGET("http://www.google.com/recaptcha/api/js/recaptcha.js", "", "", "", myUserAgent, myReferer) 'This page looks useless but it seems the javascript loads this anyways, maybe this why I get bad captchas?
If HTTP.LoadWebImageToPictureBox(newCaptcha.picCaptcha, "http://www.google.com/recaptcha/api/image?c=" & recaptchaChallenge, myUserAgent, myReferer) = False Then
MessageBox.Show("Recaptcha Image loading failed!")
Else
Dim newWork As New Work
newWork.CaptchaForm = newCaptcha
newWork.AccountId = 1234 'ID of Accounts.
newWork.CaptchaHash = "recaptcha_challenge_field=" & recaptchaChallenge
newWork.CaptchaType = "ReCaptcha"
Works.Add(newWork)
newCaptcha.Show()
End If
这是我使用的 HTTP 类。
Imports System.Collections.Generic
Imports System.Linq
Imports System.Text
Imports System.Net
Imports System.IO
Public Class HTTP
Public StoredCookies As New CookieContainer
Public Function HTTPGET(ByVal url As String, ByVal proxyname As String, ByVal proxylogin As String, ByVal proxypassword As String, ByVal userAgent As String, ByVal referer As String) As String
Dim resp As HttpWebResponse
Dim req As HttpWebRequest = DirectCast(WebRequest.Create(url), HttpWebRequest)
If userAgent = "" Then
userAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"
End If
req.UserAgent = userAgent
req.Referer = referer
req.AllowAutoRedirect = True
req.ReadWriteTimeout = 5000
req.CookieContainer = StoredCookies
req.Headers.Set("Accept-Language", "en-us")
req.KeepAlive = True
req.Method = "GET"
Dim stream_in As StreamReader
If proxyname <> "" Then
Dim proxyIP As String = proxyname.Split(New Char() {":"})(0)
Dim proxyPORT As Integer = CInt(proxyname.Split(New Char() {":"})(1))
Dim proxy As New WebProxy(proxyIP, proxyPORT)
'if proxylogin is an empty string then don't use proxy credentials (open proxy)
If proxylogin <> "" Then
proxy.Credentials = New NetworkCredential(proxylogin, proxypassword)
End If
req.Proxy = proxy
End If
Dim response As String = ""
Try
resp = DirectCast(req.GetResponse(), HttpWebResponse)
StoredCookies.Add(resp.Cookies)
stream_in = New StreamReader(resp.GetResponseStream())
response = stream_in.ReadToEnd()
stream_in.Close()
Catch ex As Exception
End Try
Return response
End Function
Public Function LoadWebImageToPictureBox(ByVal pb As PictureBox, ByVal ImageURL As String, ByVal userAgent As String, ByVal referer As String) As Boolean
Dim bAns As Boolean
Try
Dim resp As WebResponse
Dim req As HttpWebRequest
Dim sURL As String = Trim(ImageURL)
If Not sURL.ToLower().StartsWith("http://") Then sURL = "http://" & sURL
req = DirectCast(WebRequest.Create(sURL), HttpWebRequest)
If userAgent = "" Then
userAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)"
End If
req.UserAgent = userAgent
req.Referer = referer
req.AllowAutoRedirect = True
req.ReadWriteTimeout = 5000
req.CookieContainer = StoredCookies
req.Headers.Set("Accept-Language", "en-us")
req.KeepAlive = True
req.Method = "GET"
resp = req.GetResponse()
If Not resp Is Nothing Then
Dim remoteStream As Stream = resp.GetResponseStream()
Dim objImage As New MemoryStream
Dim bytesProcessed As Integer = 0
Dim myBuffer As Byte()
ReDim myBuffer(1024)
Dim bytesRead As Integer
bytesRead = remoteStream.Read(myBuffer, 0, 1024)
Do While (bytesRead > 0)
objImage.Write(myBuffer, 0, bytesRead)
bytesProcessed += bytesRead
bytesRead = remoteStream.Read(myBuffer, 0, 1024)
Loop
pb.Image = Image.FromStream(objImage)
bAns = True
objImage.Close()
End If
Catch ex As Exception
bAns = False
End Try
Return bAns
End Function
End Class
编辑:我发现问题出在
的 Google Javascript 客户端混淆加密系统http://www.google.com/js/th/1lOyLe_nzkTfeM2GpTkE65M1Lr8y0MC8hybXoEd-x1s.js
我仍然希望能够在不使用繁重的网络浏览器的情况下击败它,也许是一些轻量级的快速 javascript 评估控件? 对它进行模糊处理并将其移植到 VB.NET 是没有意义的,因为一旦我这样做,它们可能会完全改变一些变量或加密,而我所做的一切都是徒劳的,所以我想要更智能的东西。在这一点上,我什至不知道 URL 是如何生成的,它现在看起来确实是静态的,它可能是一个真实的文件,而不仅仅是及时生成的文件。
原来为图像提供挑战的_challenge 页面只是一个诱饵挑战.. 然后该挑战被替换(可能加密?)客户端使用变量 t1、t2、t3,似乎这种加密不是每次都使用,如果你通过它一旦你可以做我想做的事情,我的代码几乎可以工作,但它会以非常随机的时间间隔停止工作,我想要一些更可靠的东西,我可以在几周内无人看管。
【问题讨论】:
标签: javascript vb.net cookies recaptcha