【问题标题】:PhantomJS persistent cookies and JavascriptPhantomJS 持久性 cookie 和 Javascript
【发布时间】:2016-08-25 22:42:39
【问题描述】:

一天中的大部分时间我都在苦苦挣扎。简而言之,我正在尝试通过 Node.js 模块使用 PhantomJS 登录亚马逊。我的问题的简短版本是亚马逊给我一条消息说使用该网站需要 cookie。

这是我目前的资源...

NPM's phantom module

Working example of logging into Amazon using PhantomJS

SO question addressing persistent cookies in PhantomJS

Another SO question about cookies set by Javascript

最后一个问题特别有趣,因为第一个答案针对的是用户代理(我已经尝试了至少 3 或 4 个相同的结果),而第二个答案指向我认为可能是我的问题。简而言之,亚马逊可能会尝试通过 javascript 设置一个测试 cookie,然后检查该 cookie 是否设置成功,以确定用户是否允许使用 cookie。我可以成功地确认我的 cookie 文件正在创建,并且亚马逊已经在文件中设置了 cookie,但是在提交登录表单时这些显然是不够的,因为在下一页我被 cookie 警告阻止了。这让我相信最后一个问题中的用户是正确的 - 我的页面的 Javascript 没有触发,尽管试图确保它是。

最后,我的 page.render 显示一条亚马逊消息,说我需要启用 cookie 才能继续。这是我的代码...

'use strict';

/**
 * Module dependencies.
 */
var mongoose = require('mongoose'),
  phantom = require('phantom'),
  // Admin = mongoose.model('Admin'),
  Item = mongoose.model('Item'),
  config = require('../config/config');


/*
 * Check function.
 */
module.exports.check= function() {
  var loadInProgress = false,
    interval = '',
    testindex = 0,
    cookiePath = __dirname + 'cookies.txt',
    url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome',
    tag = config.defaultAffiliateTag,
    periodType = 'preSelected',
    preSelectedPeriod = 'yesterday',
    // url2 is for order data
    url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX',
    // url3 is for earnings data
    url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX';

  phantom.create([/* '--debug=true', */ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false', '--cookies-file=' + cookiePath]).then(function(ph) {
    ph.createPage().then(function(page) {

      page.on('onLoadStarted', function() {
        loadInProgress = true;
      });

      page.on('onLoadFinished', function(response) {
        if (response === 'success') {
          loadInProgress = false;
        } else {
          console.log('Phantom page failed to load.');
        }
      });

      page.on('onError', function(msg, trace) {
        var msgStack = ['ERROR: ' + msg];
        if (trace && trace.length) {
          msgStack.push('TRACE:');
          trace.forEach(function(t) {
            msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
          });
        }
        console.error(msgStack.join('\n'));
      });

      page.on('onResourceError', function(resourceError) {
        console.log('= onResourceError()');
        console.log('  - unable to load url: "' + resourceError.url + '"');
        console.log('  - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString);
        loadInProgress = false;
      });

      var steps = [
        // Step 1
        function() {
          // Load the initial login page.
          console.log('--- JAVASCRIPT ---')

          // This is where I try to ensure my page has Javascript Enabled.
          // val outputs true here.
          page.setting('javascriptEnabled').then(function(val) {
            console.log('val: ' + val);
            page.setting('settings.userAgent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36');
            loadInProgress = true;
            page.open(url);
          })
        },
        // Step 2
        function() {
          // Update username/password.
          page.evaluate(function() {
            document.getElementById('ap_email').value = 'XXXX';
            document.getElementById('ap_password').value = 'XXXX';
          });
        },
        // Step 3
        function() {
          // Login.
          loadInProgress = true;
          page.evaluate(function() {
            document.forms['signIn'].submit();
          });
        },
        // Step 4
        function() {
          loadInProgress = true;
          page.open(url2);
        }
      ];

      var interval = setInterval(function() {
        if (!loadInProgress && typeof steps[testindex] === 'function') {
          steps[testindex]();
          console.log('Test Index: ' + (testindex + 1));
          page.render('config/images/step' + (testindex + 1) + '.png');
          testindex++;
        }
        if (typeof steps[testindex] !== 'function') {
          clearInterval(interval);
          setTimeout(function() {
            ph.exit();
          }, 5000);
        }
      }, 50);
    });
  });
};

我得到的结果是以下输出:

    --- JAVASCRIPT ---
    Test Index: 1
    val: true
    Test Index: 2
    Test Index: 3
    Test Index: 4
    = onResourceError()
      - unable to load url: "https://sentry.amazon.com/SSO/redirect?response_typ
e=id_token&client_id=affiliate-program.amazon.com%3A443&redirect_uri=https%3A%2F
%2Faffiliate-program.amazon.com%3A443%2Fhome%2Freports%2Ftable.json%3Fquery%255B
type%255D%3Dorders%26query%255Bstart_date%255D%3D2016-05-28%26query%255Bend_date
%255D%3D2016-06-26%26query%255Btag_id%255D%3D189318233%26query%255Bdevice_type%2
55D%3Dall%26query%255Blast_accessed_row_index%255D%3D0%26query%255Bcolumns%255D%
3Dtitle%252Casin%252Ccategory%252Cclicks%252Cconversion%252Cseller%252Cdqty%252C
nqty%252Cqty%26query%255Bskip%255D%3D0%26query%255Bsort%255D%3Dasin%26query%255B
limit%255D%3D25%26store_id%3XXXX&scope=openid&nonce=5d8a3f10bb3746c799
a05a927b0204f3c0629d5c8c5646bb49ccdcd93f07247e&sentry_handler_version=TomcatSSOF
ilter-1.1-1"
      - error code: 5, description: Operation canceled
    Phantom page failed to load.

谁能告诉我我可能缺少什么?

【问题讨论】:

    标签: javascript node.js cookies phantomjs


    【解决方案1】:

    这似乎是 PhantomJS 2.1.1(由 NPM 模块实现的版本)或 NPM 模块本身的问题。

    我使用 Horseman 和 PhantomJS 2.0.0 完全重写了脚本,并立即让它工作。对于未来的后代,以下是工作实现。我才刚接触 Horseman 一天,而且我已经喜欢比我用过的任何其他 Phantom 包装器更干净的链式实现。

    'use strict';
    
    /**
     * Module dependencies.
     */
    var mongoose = require('mongoose'),
      Horseman = require('node-horseman'),
      phPath = __dirname + '\\phantomjs-2.0.0-windows\\bin\\phantomjs.exe',
      Item = mongoose.model('Item'),
      config = require('../config/config');
    
    
    /*
     * Check function.
     */
    module.exports.updateItems = function() {
      var cookiePath = __dirname + 'cookies.txt',
        url = 'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Faffiliate%2Dprogram.amazon.com%2Fhome',
        tag = config.defaultAffiliateTag,
        periodType = 'preSelected',
        preSelectedPeriod = 'yesterday',
        // url2 is for order data
        url2 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=orders&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Ccategory%2Cclicks%2Cconversion%2Cseller%2Cdqty%2Cnqty%2Cqty&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX',
        // url3 is for earnings data
        url3 = 'https://affiliate-program.amazon.com/home/reports/table.json?query%5Btype%5D=earnings&query%5Bstart_date%5D=2016-05-28&query%5Bend_date%5D=2016-06-26&query%5Btag_id%5D=189318233&query%5Bdevice_type%5D=all&query%5Blast_accessed_row_index%5D=0&query%5Bcolumns%5D=title%2Casin%2Cseller%2Cprice%2Crate%2Cqty%2Crevenue%2Cearnings%2Cdevicetype&query%5Bskip%5D=0&query%5Bsort%5D=asin&query%5Blimit%5D=25&store_id=XXXX';
    
      var horseman = new Horseman({
        cookiesFile: cookiePath,
        ignoreSSLErrors: true,
        sslProtocol: 'any',
        webSecurity: false,
        timeout: 15000,
        phantomPath: phPath
      });
    
      horseman
        .userAgent('Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36')
        .authentication('XXXX', 'XXXX')
        .on('consoleMessage', function(msg) {
          console.log(msg);
        })
        .on('error', function(msg, trace) {
          var msgStack = ['ERROR: ' + msg];
          if (trace && trace.length) {
            msgStack.push('TRACE:');
            trace.forEach(function(t) {
              msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
            });
          }
          console.error(msgStack.join('\n'));
        })
        .open(url)
        .screenshot('config/images/step1.png')
        .waitForSelector('#ap_email')
        .value('#ap_email', 'XXXX')
        .waitForSelector('#ap_password')
        .value('#ap_password', 'XXXX')
        .screenshot('config/images/step2.png')
        .click('#signInSubmit')
        .waitForNextPage()
        .screenshot('config/images/step3.png')
        .open(url2)
        .screenshot('config/images/step4.png')
        .plainText()
        .then(function(txt) {
          console.log('Page results: ');
          console.dir(txt);
          return;
        })
        .open(url3)
        .screenshot('config/images/step5.png')
        .plainText()
        .then(function(txt) {
          console.log('Page results: ');
          console.dir(txt);
          return;
        })
        .close();
    };
    

    祝你好运!

    【讨论】:

    • 有趣!如果必须做出 if/then/else 逻辑决策,一种如何处理 Horseman 链接脚本?
    • @Vaviloff 好问题。 Horseman 有一个 do 函数 (github.com/johntitus/node-horseman#dofn),它允许您运行任意函数而不会破坏链。据我了解,结果将传递给链中的下一个函数,所以如果你问我,你会运行 .do(function() { return stuff; }).then(function(stuffFromDo) { return moreStuff; }); 非常漂亮。
    【解决方案2】:

    我最近遇到了同样的问题,简单的解决方案是将用户添加到新创建的网页。如果您使用的是phantomjs-node 模块,那么这里是代码。

    page.setting("userAgent", "your user agent here");

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2013-03-06
      • 1970-01-01
      • 1970-01-01
      • 2016-04-24
      • 2012-08-10
      • 1970-01-01
      • 2017-07-25
      • 1970-01-01
      相关资源
      最近更新 更多