这里的问题是页面源中的验证码 URL 不是
实际图片网址。这是一个动态生成验证码的脚本
当您使用您正在解决的验证码求解器 API 时的图像
与加载的浏览器不同的图像。为了解决这个问题,我们必须
保存与浏览器加载相同的图像。我跟踪了图像请求
并发现,它使用的是在
浏览器加载了页面。
使用硒:
from selenium import webdriver
from python3_anticaptcha import ImageToTextTask, CallbackClient
from time import sleep
import requests
def GetImageCookies():
print('Extracting Browser Cookies')
image_cookies = ''
for cookie in browser.get_cookies():
if cookie['name'] == 'ssc':
image_cookies += 'ssc={};'.format(cookie['value'])
elif cookie['name'] == 'ghsdfkjlksssalk35bbr':
image_cookies += 'ghsdfkjlksssalk35bbr={};'.format(cookie['value'])
# print(image_cookies)
return image_cookies
def SaveImage(captcha_file = "master.jpg"):
print('Saving the captcha image')
header = {
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
'Cookie': GetImageCookies(),
'Host': 'masked',
'Referer': 'masked',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}
pic = requests.get('https://masked/site/captcha/v/',verify=False,headers = header)
if pic.status_code == 200:
with open(captcha_file, 'wb') as f:
f.write(pic.content)
def SolveCapcha(captcha_file = "master.jpg"):
print('Solving the captcha image')
ANTICAPTCHA_KEY = 'masked'
result = ImageToTextTask.ImageToTextTask(
anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)
captcha_text = result['solution']['text']
print('Captcha text is :',captcha_text)
return captcha_text
browser = webdriver.Firefox()
url = 'https://masked/'
browser.get(url)
def Login():
SaveImage()
sleep(5)
username = browser.find_element_by_id("masked_username")
username.clear()
username.send_keys("testuser")
password = browser.find_element_by_id("masked")
password.clear()
password.send_keys("testpass")
captcha = browser.find_element_by_id("masked")
captcha.clear()
captcha_text = SolveCapcha()
captcha.send_keys(captcha_text)
login = browser.find_element_by_id("masked").click()
sleep(5)
err_message = browser.find_elements_by_id('masked')
if err_message :
if err_message[0].text == 'The verification code is incorrect.':
print(err_message[0].text)
return False
return True
"""The logic here is that the image gets downloaded using the cookies but sometimes
the letters are hard to be solved so each time we download the same image with the
same cookies the content of the image will be the same but how it's written is different
So we keep trying till we get it right """
while Login() == False:
Login()
使用请求和美丽的汤:
以下是不确定是否有效的想法,您必须自己测试:
from bs4 import BeautifulSoup
def SaveImage(captcha_file = "master.jpg"):
print('Saving the captcha image')
header = {
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
'Host': 'masked',
'Referer': 'https://masked/',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}
pic = session.get('https://masked/site/captcha/v/',verify=False,headers = header)
if pic.status_code == 200:
with open(captcha_file, 'wb') as f:
f.write(pic.content)
with requests.Session() as session:
source = session.get(url = 'https://masked/',verify=False) # To get the itial cookies
soup = BeautifulSoup(source.text, 'html.parser')
token = soup.find('input', {'name': 'masked'}).get('value')
SaveImage()
captcha_text = SolveCapcha()
post_data={"masked": token,
'masked[username]': 'testuser',
'masked[password]': 'testpass',
'masked[captcha]': captcha_text,
'masked':''}
session.post('https://masked/', data=post_data,verify=False)