【问题标题】:Why I am getting response 400 in Python requests?为什么我在 Python 请求中收到响应 400?
【发布时间】:2021-07-19 06:52:23
【问题描述】:

我想爬取https://www.ketto.org/crowdfunding/fundraisers。我找到了一个 url https://nn2uorrizx-dsn.algolia.net/1/indexes/*/queries ,我可以从中获取来自 post 请求的数据,但我得到的响应是 400 而不是 200。请帮我抓取数据!

这是我的代码:

    import requests

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0',
        'Accept': 'application/json',
        'Accept-Language': 'en-US,en;q=0.5',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Origin': 'https://www.ketto.org',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'cross-site',
        'Referer': 'https://www.ketto.org/',
        'Connection': 'keep-alive',
    }

    params = (
        ('x-algolia-agent', 'Algolia for JavaScript (3.35.1); Browser (lite); angular (8.2.14); angular-instantsearch (3.0.0-beta.4); instantsearch.js (3.7.0); JS Helper (2.28.1)'),
        ('x-algolia-application-id', 'NN2UORRIZX'),
        ('x-algolia-api-key', 'b2caa1b0589e8db9398d5fe2a40bbaed'),
    )

    data = [
      ('{requests:[{indexName:fundraiser_prod,params:query', ''),
      ('hitsPerPage', '9'),
      ('hitsPerPage', '1'),
      ('hitsPerPage', '1'),
      ('hitsPerPage', '1'),
      ('maxValuesPerFacet', '10'),
      ('maxValuesPerFacet', '10'),
      ('maxValuesPerFacet', '10'),
      ('maxValuesPerFacet', '10'),
      ('page', '1'),
      ('page', '0'),
      ('page', '0'),
      ('page', '0'),
      ('highlightPreTag', '__ais-highlight__'),
      ('highlightPreTag', '__ais-highlight__'),
      ('highlightPreTag', '__ais-highlight__'),
      ('highlightPreTag', '__ais-highlight__'),
      ('highlightPostTag', '__/ais-highlight__'),
      ('highlightPostTag', '__/ais-highlight__'),
      ('highlightPostTag', '__/ais-highlight__'),
      ('highlightPostTag', '__/ais-highlight__'),
      ('facets', '["cause.label","tags","address"]'),
      ('facets', '["cause.label"]'),
      ('facets', '["tags"]'),
      ('facets', '["address"]'),
      ('tagFilters', ''),
      ('tagFilters', ''),
      ('tagFilters', ''),
      ('tagFilters', ''),
      ('facetFilters', '[["cause.label:"],["tags:"],["address:"]]},{indexName:fundraiser_prod,params:query='),
      ('facetFilters', '[["tags:"],["address:"]]},{indexName:fundraiser_prod,params:query='),
      ('facetFilters', '[["cause.label:"],["address:"]]},{indexName:fundraiser_prod,params:query='),
      ('facetFilters', '[["cause.label:"],["tags:"]]}]}'),
      ('attributesToRetrieve', '[]'),
      ('attributesToRetrieve', '[]'),
      ('attributesToRetrieve', '[]'),
      ('attributesToHighlight', '[]'),
      ('attributesToHighlight', '[]'),
      ('attributesToHighlight', '[]'),
      ('attributesToSnippet', '[]'),
      ('attributesToSnippet', '[]'),
      ('attributesToSnippet', '[]'),
      ('analytics', 'false'),
      ('analytics', 'false'),
      ('analytics', 'false'),
      ('clickAnalytics', 'false'),
      ('clickAnalytics', 'false'),
      ('clickAnalytics', 'false'),
    ]

    response = requests.post('https://nn2uorrizx-dsn.algolia.net/1/indexes/*/queries', headers=headers, params=params, data=data)

    print(response)

如果有任何其他建议使用 python requests 模块抓取https://www.ketto.org/crowdfunding/fundraisers,那么我很乐意在这里。提前谢谢你。

【问题讨论】:

    标签: python-3.x post web-scraping python-requests web-crawler


    【解决方案1】:

    要从服务器获得正确的响应,请在requests 中使用json= 参数:

    import json
    import requests
    
    url = "https://nn2uorrizx-dsn.algolia.net/1/indexes/*/queries"
    
    params = {
        "x-algolia-agent": "Algolia for JavaScript (3.35.1); Browser (lite); angular (8.2.14); angular-instantsearch (3.0.0-beta.4); instantsearch.js (3.7.0); JS Helper (2.28.1)",
        "x-algolia-application-id": "NN2UORRIZX",
        "x-algolia-api-key": "b2caa1b0589e8db9398d5fe2a40bbaed",
    }
    
    data = {
        "requests": [
            {
                "indexName": "fundraiser_prod",
                "params": "query=&hitsPerPage=9&maxValuesPerFacet=10&page=0&highlightPreTag=__ais-highlight__&highlightPostTag=__%2Fais-highlight__&facets=%5B%22cause.label%22%2C%22tags%22%2C%22address%22%5D&tagFilters=&facetFilters=%5B%5B%22cause.label%3A%22%5D%2C%5B%22tags%3A%22%5D%2C%5B%22address%3A%22%5D%5D",
            },
            {
                "indexName": "fundraiser_prod",
                "params": "query=&hitsPerPage=1&maxValuesPerFacet=10&page=0&highlightPreTag=__ais-highlight__&highlightPostTag=__%2Fais-highlight__&attributesToRetrieve=%5B%5D&attributesToHighlight=%5B%5D&attributesToSnippet=%5B%5D&tagFilters=&analytics=false&clickAnalytics=false&facets=%5B%22cause.label%22%5D&facetFilters=%5B%5B%22tags%3A%22%5D%2C%5B%22address%3A%22%5D%5D",
            },
            {
                "indexName": "fundraiser_prod",
                "params": "query=&hitsPerPage=1&maxValuesPerFacet=10&page=0&highlightPreTag=__ais-highlight__&highlightPostTag=__%2Fais-highlight__&attributesToRetrieve=%5B%5D&attributesToHighlight=%5B%5D&attributesToSnippet=%5B%5D&tagFilters=&analytics=false&clickAnalytics=false&facets=%5B%22tags%22%5D&facetFilters=%5B%5B%22cause.label%3A%22%5D%2C%5B%22address%3A%22%5D%5D",
            },
            {
                "indexName": "fundraiser_prod",
                "params": "query=&hitsPerPage=1&maxValuesPerFacet=10&page=0&highlightPreTag=__ais-highlight__&highlightPostTag=__%2Fais-highlight__&attributesToRetrieve=%5B%5D&attributesToHighlight=%5B%5D&attributesToSnippet=%5B%5D&tagFilters=&analytics=false&clickAnalytics=false&facets=%5B%22address%22%5D&facetFilters=%5B%5B%22cause.label%3A%22%5D%2C%5B%22tags%3A%22%5D%5D",
            },
        ]
    }
    
    
    data = requests.post(url, params=params, json=data).json()
    print(json.dumps(data, indent=4))
    

    打印:

    {
        "results": [
            {
                "hits": [
                    {
                        "id": 180773,
                        "title": "Feeding From Far-Ration Distribution Amid Lockdown",
                        "end_date": "2021-09-11 23:59:59",
                        "amount_requested": 65000000,
                        "entity_details_id": 1505699,
                        "creator_entity_details_id": 1463388,
                        "address_1": "Mumbai",
                        "creation_date": "2020-04-05 15:10:12",
                        "parent_cause_id": 48,
                        "event_entity_details_id": 1399904,
                        "sucess_story_flag": 0,
                        "custom_tag": "FeedingFromFarForCorona",
                        "beneficiary": {
                            "full_name": "Prayatna "
                        },
                        "raised": {
                            "campaign_id": 180773,
                            "backers": 10616,
                            "raised": "45094771.40",
                            "usdraised": "644211.02",
                            "currencies": {
                                "INR": 45094771,
                                "USD": 644211,
                                "GBP": 501053,
                                "EUR": 536843,
                                "AED": 2254739,
                                "SGD": 867207,
                                "SAR": 2254739
                            }
                        },
                        "widget": {
                            "media_type": "image",
                            "file_name": "wid60a133a89542c.jpg",
                            "path": "/media/campaign/180000/180773/image/",
                            "cdn_path": "https://d1vdjc70h9nzd9.cloudfront.net/media/campaign/180000/180773/image/wid60a133a89542c.jpg"
                        },
                        "cause": {
                            "info_1": "Food & Hunger",
                            "info_3": "Orange",
                            "label": "Food & Hunger"
                        },
                        "campaigner": {
                            "id": 1463388,
                            "fname": "Pooja Reddy",
                            "lname": null,
                            "full_name": "Pooja Reddy ",
                            "disable_foreign_donation": 0,
                            "no_80g": false,
                            "user_details_id": 1470493,
                            "entity_type": "individual",
                            "tax_benefit": false,
                            "avtar": {
                                "entity_type_id": 1463388,
                                "entity_type": "individual",
                                "media_type": "image",
                                "file_name": "60efdb6db4758.jpg",
                                "path": "/media/individual/1463000/1463388/image/",
                                "cdn_path": "https://d1vdjc70h9nzd9.cloudfront.net/media/individual/1463000/1463388/image/60efdb6db4758.jpg"
                            }
                        },
                        "taxexempted": {
                            "id": 1505699,
                            "no_80g": true,
                            "entity_type": "ngo",
                            "tax_benefit": true
                        },
    
    ... and so on.
    }
    

    【讨论】:

    • 非常感谢。请说明您在哪里找到数据变量中的请求列表?
    • @SachinGupta 我在 Firefox 开发者工具 -> 网络选项卡中打开了 https://www.ketto.org/crowdfunding/fundraisers(Chrome 有类似的东西)并在那里看到了请求(以及所有需要的参数)。
    • 我在 Firefox 中看到了,但我无法像你一样格式化它。你是怎么做到的?
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2012-10-04
    • 2014-12-06
    • 2012-06-03
    • 1970-01-01
    • 2022-01-19
    相关资源
    最近更新 更多