代码不全,大致思路
原理:
浏览器(客户端):在浏览器中注入一段JS代码,与服务端建立连接。调用浏览器中的js方法,把返回的数据发送给服务端
node启动js代码,监听某端口(客户端):服务端把参数(python发过来的)发送给客户端处理,并接收处理结果,再次把接收的结果返回给python处理
python(调用者):把参数发送给node,接收node传回来的数据

优点:
	1.对于js混淆加密较深的,可以采用此方法。
  2.不用扣js加密代码,直接调用浏览器环境
缺点:
	1.如果有selenium监测,要想使用此方法,必须先绕过selenium监测,否则只能使用真机进行js注入
  2.需要node环境,写一个websocket服务端和客户端
  3.速度没有直接破解js快

服务端--WebSocketServer.js

let iconv = require('iconv-lite')
var ws = require("nodejs-websocket");
 
console.log("开始建立连接...")
 

var server = ws.createServer(function(conn){
      let cached = {};
       
    conn.on("text", function (msg) {
        if (!msg) return;
        // console.log("msg", msg);
 
        var key = conn.key;
        if ((msg === "Browser") || (msg === "Python")){
            // browser或者python第一次连接
            cached[msg] = key;
            // console.log("cached",cached);
            return;
        }
        if (Object.values(cached).includes(key)){
            // console.log(server.connections.forEach(conn=>conn.key));
            var targetConn = server.connections.filter(function(conn){
                return conn.key !== key;
            })
            // console.log("将要发送的实参:",msg);
            targetConn.forEach(conn=>{
                conn.send(msg);
            })
        }
    })
    conn.on("close", function (code, reason) {
        // console.log("关闭连接")
    });
    conn.on("error", function (code, reason) {
        console.log("异常关闭")
    });
    conn.on("connection", function (conn) {
        console.log(conn)
    });
}).listen(10512)
 
console.log("WebSocket建立完毕")

客户端注入JS代码

createSocket();

function createSocket() {
    window.ws = new WebSocket('ws://127.0.0.1:10512/');
    window.ws.onopen = function (e) {
        console.log("连接服务器成功");
        window.ws.send("Browser");
    }
    window.ws.onclose = function (e) {
        console.log("服务器关闭");
        setTimeout(createSocket, 60000);
    }
    window.ws.onerror = function () {
        console.log("连接出错");
    }

    window.ws.onmessage = function (e) {
        var xmlhttp = new glb.XMLHttpRequest();
        function state_Change() {
            if (xmlhttp.readyState == 4) {
                if (xmlhttp.status == 200) {

                    let result = xmlhttp.responseText
                    result = JSON.parse(result)
                    result = JSON.stringify(result)
                    // result = String.fromCharCode(result)
                    //发送给Python
                    // console.log(result);
                    window.ws.send(result);
                } else {
                    alert("Problem retrieving XML data");
                }
            }
        }
        xmlhttp.onreadystatechange = state_Change;
        xmlhttp.open('GET', e.data, true);
        xmlhttp.send(null);
    }
}

python开端口

# -*- coding: utf-8 -*-
from sanic import Sanic
from sanic.response import json
import os
import urllib3

from toutiao2_文件方式.get_data import get_data
from toutiao2_文件方式.get_user_id import get_user

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
app = Sanic(__name__)




@app.route("/get_user_id", methods=["GET"])
def captcha_server(request):
    try:
        data = request.args
        media_id = data['media_id'][0]
        return get_user_id(media_id)
    except Exception as e:
        pass


@app.route("/get_data", methods=["GET"])
def captcha_server(request):
    try:
        data = request.args
        user_id = data['user_id'][0]
        offset = data['offset'][0]
        return get_res(user_id, offset)
    except Exception as e:
        pass


def get_user_id(media_id):
    html = get_user(media_id)
    return html



def get_res(user_id, offset):
    html = get_data(user_id,offset)
    return html


if __name__ == "__main__":
    app.run(host="127.0.0.1", port=4007)

get_data.py 文件方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale

_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

class CG_Client(WebSocketClient):
    def opened(self):
        self.max_cursor = 0
        self.send("Python")

    def closed(self, code, reason=None):
        # print("Closed down:", code, reason)
        pass

    def received_message(self, resp):
        data = resp.data.decode("utf-8")
        write_data(data)
        ws.close()




def write_data(data):
    with open('./data.txt', 'w', encoding='utf-8') as f:
        f.write(data)
        f.close()


def get_data(user_id, offset):
    ws = CG_Client('ws://127.0.0.1:10512/')
    ws.connect()
    try:
        real_arg = f"/api/feed_backflow/profile_share/v1/?category=profile_article&visited_uid={user_id}&stream_api_version=82&request_source=1&offset={offset}&user_id={user_id}&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=288&screen_height=511&browser_language=zh-CN&browser_platform=MacIntel&browser_name=firefox&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
        time.sleep(0.1)
        ws.send(real_arg)
        ws.run_forever()
    except KeyboardInterrupt:
        print('异常关闭')
        ws.close()

get_user_id.py 文件方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
# media_id = sys.argv[1].split(',', 1)[0]   # sys.argv--> [get_attention.py,user_id,cursor]



class CG_Client(WebSocketClient):
    def opened(self):
        self.max_cursor = 0
        self.send("Python")

    def closed(self, code, reason=None):
        # print("Closed down:", code, reason)
        pass

    def received_message(self, resp):
        data = resp.data.decode("utf-8")
        write_user(data)
        ws.close()
def write_user(data):
    with open('./user.txt', 'w', encoding='utf-8') as f:
        f.write(data)
        f.close()

def get_user(media_id):
    ws = CG_Client('ws://127.0.0.1:10512/')
    ws.connect()
    try:
        real_arg = f"/user/profile/homepage/share/v7/?media_id={media_id}&request_source=1&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=393&screen_height=882&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
        time.sleep(0.1)
        ws.send(real_arg)
        ws.run_forever()
    except KeyboardInterrupt:
        print('异常关闭')
        ws.close()

get_data.py 终端方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale

_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
user_id = sys.argv[1].split(',', 1)[0]   # sys.argv--> [get_attention.py,user_id,cursor]
offset = str(sys.argv[2])

class CG_Client(WebSocketClient):

    def opened(self):
        print("连接成功")
        self.max_cursor = 0
        self.send("Python")

    def closed(self, code, reason=None):
        print("Closed down:", code, reason)

    def received_message(self, resp):
        data = resp.data.decode("utf-8")
        print(data)
        ws.close()


try:
    ws = CG_Client('ws://127.0.0.1:10512/')
    ws.connect()

    real_arg = f"/api/feed_backflow/profile_share/v1/?category=profile_article&visited_uid={user_id}&stream_api_version=82&request_source=1&offset={offset}&user_id={user_id}&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=288&screen_height=511&browser_language=zh-CN&browser_platform=MacIntel&browser_name=firefox&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
    time.sleep(0.1)
    ws.send(real_arg)
    ws.run_forever()
except KeyboardInterrupt:
    ws.close()

get_user_id.py 终端方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
media_id = sys.argv[1].split(',', 1)[0]   # sys.argv--> [get_attention.py,user_id,cursor]

class CG_Client(WebSocketClient):

    def opened(self):
        print("连接成功")
        self.max_cursor = 0
        self.send("Python")

    def closed(self, code, reason=None):
        print("Closed down:", code, reason)

    def received_message(self, resp):
        data = resp.data.decode("utf-8")
        # data = resp.data.decode("gbk")
        print(data)
        ws.close()


try:
    ws = CG_Client('ws://127.0.0.1:10512/')
    ws.connect()

    real_arg = f"/user/profile/homepage/share/v7/?media_id={media_id}&request_source=1&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=393&screen_height=882&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
    time.sleep(0.1)
    ws.send(real_arg)
    ws.run_forever()
except KeyboardInterrupt:
    ws.close()

爬虫调用者

import time

import requests
import json
import urllib3

from toutiao2_文件方式.get_user_id import get_user, CG_Client

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


def open_user():
    with open('./user.txt', 'r', encoding='utf-8') as f:
        user = json.loads(f.read())
        f.close()
        return user

def open_data():
    with open('./data.txt', 'r', encoding='utf-8') as f:
        data = json.loads(f.read())
        f.close()
        return data

# media_id换user_id
def start_ocean_toutiao_user_id(media_id):
    data = {
        'media_id': media_id,
    }
    requests.get('http://127.0.0.1:4007/get_user_id', params=data, timeout=3)
    time.sleep(2)
    response = open_user()
    res_media_id = response.get('data').get('media_id')
    if int(res_media_id) == int(media_id):
        user_id = response.get('data').get('user_id')
        return user_id
    else:
        print('media不对应,请检查')
        return None


# 通过websocket获取数据
def start_ocean_toutiao_data(user_id, offset):
    if user_id == None:
        print('没有获取到user_id,请检查原因。可能消息堆积错误')
        return None
    data = {
        'user_id': user_id,
        'offset': offset
    }
    requests.get('http://127.0.0.1:4007/get_data', params=data, timeout=3)
    response = open_data()
    return response

def get_response(media_id,offset):
    user_id = start_ocean_toutiao_user_id(media_id)
    print(user_id)
    data = start_ocean_toutiao_data(user_id, offset)
    print(data)
    return data


if __name__ == '__main__':
    for i in range(1):
        offset = 1587744000
        # media_id = 6860767764
        media_id = 6989633739
        user_id = start_ocean_toutiao_user_id(media_id)
        print(user_id)
        # user_id = 6860406890
        data = start_ocean_toutiao_data(user_id, offset)
        print(data)
        get_response(media_id, offset)
    pass

分类:

技术点:

相关文章: