刮Netflix

尝试以编程方式从Netflix中获取最新查看的数据,但仅在login阶段出现一些问题。 我目前的代码只是导致Netflix吐口水We were unable to process your request. 页:

 var request = require('request').defaults({jar: true}); var cheerio = require('cheerio'); var url = "https://www.netflix.com/Login?locale=en-GB&nextpage=https%3A%2F%2Fwww.netflix.com%2FWiViewingActivity"; request(url, function (error, response, body) { if (!error && response.statusCode == 200) { var $ = cheerio.load(body); var authCode = $("#login-form > input").attr("value"); request.post(url+"?email=myemail%40gmail.com&password=mypassword&RememberMe=on&authURL="+authCode, { }, function(err, response, body){ console.log(body); }); } }) 

有任何想法吗?

令人惊讶的是,在Google上, Scraping Netflix几乎没有什么用。

想通了,需要:

  1. 发送任何种类的用户代理string
  2. 使用请求的form参数发送表单数据
  3. 手动发送cookie

这是我的最终代码,它获取最新的观看项目:

 var request = require('request').defaults({jar: true}); var cheerio = require('cheerio'); var url = "https://www.netflix.com/Login"; request(url, function (error, response, body) { if (!error && response.statusCode == 200) { var $ = cheerio.load(body); var authCode = $("#login-form > input").attr("value"); request.post({url: url, form: { "email": "email@gmail.com", "password": "password", "authURL": authCode, "RememberMe": "on" }, headers:{ 'User-Agent': "NodeScrape" } }, function(err, response, body){ var cookies = response.headers['set-cookie']; request({url: "https://www.netflix.com/WiViewingActivity", headers: {'Cookie': cookies, 'User-Agent': "NodeScrape"}}, function(error, response, body){ var $ = cheerio.load(body); console.log($(".seriestitle").eq(0).text()); }); }); } })