Web Crawler – 返回一个数组,用于下一个函数

我可以确认的第一个function正常工作。

我想返回一个数组到variablesAtoZLinks,所以它可以在后面的函数中使用。 我将向数组中的每个URL发出请求,并从这些链接中提取更多信息。

非常感谢,我已经作为一个项目工作了一段时间,我是jQuery,Web爬网,JS,NodeJS和expressJS的初学者。 为自己的工作而深深地投入。

var express = require('express'); var request = require('request'); var cheerio = require('cheerio'); var router = express.Router(); var fullUrl; fullUrl = []; var AtoZLinks = function(){ var url = 'http://example1.com'; request(url, function(error, response, html) { if (!error && response.statusCode === 200) { var $ = cheerio.load(html); var fullUrl = []; var places = "Places"; $('.clear a').each(function() { var link = $(this); link.each(function(index) { var href = link.attr('href'); if (href.match(places)) { // The urls from fullUrl here to be returned to parent variable. fullUrl[index] = url + href; // Think something is wrong here... I've also tried "fullUrl.push(url + href);" console.log(fullUrl); // This prints out all urls correctly } }); }); for (var i = 0; i < fullUrl.length; i++) { console.log(fullUrl[i]; } // This code only prints out the last url stored (So I'm thinking the urls are being stored incorrectly...) } }); }; /* GET crawler page. */ router.get('/crawler', function(req, res, next) { AtoZLinks(); next(); }, function(req, res) { }); module.exports = router; // Feel free to ignore the following work I've done or.. // Your support with the the following function will be a bonus! // I need to use the links in the previous array variable in the following // function to extract further urls within those urls that I will work with. var url = AtoZLinks; request(AtoZLinks, function(error, response, html) { if (!error && response.statusCode === 200) { var $ = cheerio.load(html); // This selector is the code needed to extract the links from within the // links in the AtoZLinks array. $('div.fclist-section.clear.list-four').each(function() { $(this).find('a').each(function() { var link = $(this); var href = link.attr('href'); fullUrl = url + href; console.log(fullUrl); }); }); } ); 

你的意思是这样吗?

 var arrURLs; arrURLs = [ 'www.ask.com', 'www.google.com', 'www.bing.com', 'www.yahoo.com' ]; AtoZLinks(arrURLs); var AtoZLinks = function(theURLs){ for (var i = 0; i < theURLs.length; i++) { var url = theURLs[i]; request(url, function(error, response, html) { if (!error && response.statusCode === 200) { var $ = cheerio.load(html); var fullUrl = []; var places = "Places"; $('.clear a').each(function() { var link = $(this); link.each(function(index) { var href = link.attr('href'); //absolute match if (href === url) { //true } else { //false } //href contains url if (href.indexOf(url) > -1) { //true } else { //false } if (href.match(places)) { // The urls from fullUrl here to be returned to parent variable. fullUrl.push(url + href); console.log(JSON.stringify(fullUrl)); } }); }); } }); } };