将节点请求响应响应上传到MongoDB

我在Node里玩Cheerio。 我有一个刮到一个文章列表,抓住所有的文章url,然后去每篇文章,刮标题和url。 一切工作正常,除非当我尝试将结果上传到我的Mongodb,我得到undefined。

我假设它试图在定义值之前插入…但即使使用请求响应,我也无法使它工作。 任何帮助将不胜感激! 由于代码不是太长,我只需粘贴整个事情,这样就可以更容易地看到我正在尝试做什么。 再次,主要的问题是获取upsertArticle实际上插入variables。

 const request = require('request'); const cheerio = require('cheerio'); const rp = require('request-promise'); const mongoose = require('mongoose'); const Article = require('./models/article'); var urls = []; //get the list of articles to scrape rp('https://www.somesite.com/', function(error, response, html) { if (!error && response.statusCode == 200) { var $ = cheerio.load(html); $('.c-entry-box--compact__title').each(function(i, element) { var a = $(this); urls.push(a.children().attr('href')); }); } }) //scrape over each article individually .then(function(getStuff) { var arrayLength = urls.length; //get the list of articles to scrape and upsert each one for (var i = 0; i < arrayLength; i++) { const result = rp(urls[i], function(error, response, html) { if (!error && response.statusCode == 200) { var $ = cheerio.load(html); var parsedResults = []; $('.l-main-content').each(function(n, element) { var a = $(this); var title = a.find('.c-page-title').text(); var url = response.request.uri.href; //I also tried upserting the variables right here, that didn't work return { title, url }; }); } else {console.log(error);} }).then(function(upsertStuff) { //also tried returning and upserting stuff here... but nothing gets upserted upsertArticle({ title: result.title, source: result.url, dateCrawled: new Date() }); console.log('Upserted ' + result.title); }).catch(function(err) {console.log(err); }); } }) .catch(function(err) {console.log(err); }); function upsertArticle(userObj) { const DB_URL = 'mongodb://localhost/articles'; if (mongoose.connection.readyState == 0) { mongoose.connect(DB_URL, { useMongoClient: true }); } let conditions = { title: userObj.title }; let options = { upsert: true, new: true, setDefaultsOnInsert: true }; Article.findOneAndUpdate(conditions, userObj, options, (err, result) => { if (err) throw err; }); } 

我对提供的代码做了一些更改。 也就是说,我正在使用承诺而不是callback来保持逻辑的一致性,并确保一切都在运行。

对于for循环,我将upsertArticle({...})移回到each函数的内部,以便titleurl在运行时定义。

最后,我正在使用Bluebird的Promise.allrequest-promise已经对Bluebird有一个依赖)来表示所有的链接已经被插入。 此更改是可选的,但我认为在完成所有操作时获得反馈将非常有用:

试试这个:

 const request = require('request'); const cheerio = require('cheerio'); const rp = require('request-promise'); const mongoose = require('mongoose'); const Article = require('./models/article'); const Promise = require("bluebird"); var urls = []; rp({uri: 'https://www.somesite.com', resolveWithFullResponse: true}).then(function(response) { if(response.statusCode != 200) throw "Response: " + response.statusCode; var html = response.body; var $ = cheerio.load(html); $('.c-entry-box--compact__title').each(function(i, element) { var a = $(this); urls.push(a.children().attr('href')); }); }).then(function(getStuff) { var arrayLength = urls.length; var promiseArray = []; for(var i = 0; i < arrayLength; i++) { const p = rp({uri: urls[i], resolveWithFullResponse: true}).then(function(response) { if(response.statusCode != 200) throw "Response: " + response.statusCode; var html = response.body; var $ = cheerio.load(html); var parsedResults = []; $('.l-main-content').each(function(n, element) { var a = $(this); var title = a.find('.c-page-title').text(); var url = response.request.uri.href; upsertArticle({ title: title, source: url, dateCrawled: new Date() }); console.log('Upserted ' + title); }); }); promiseArray.push(p); } return Promise.all(promiseArray); }).then(function() { console.log("Done upserting!"); }) .catch(function(err) { console.log(err); }); function upsertArticle(userObj) { const DB_URL = 'mongodb://localhost/articles'; if (mongoose.connection.readyState == 0) { mongoose.connect(DB_URL, { useMongoClient: true }); } let conditions = { title: userObj.title }; let options = { upsert: true, new: true, setDefaultsOnInsert: true }; Article.findOneAndUpdate(conditions, userObj, options, (err, result) => { if (err) throw err; }); } 

我不能在不知道https://www.somesite.com的真实价值的情况下testing代码,所以让我知道如果代码给你任何新的错误。