最大调用堆栈大小超过了Node.JS WebCrawler

我在Node.JS中编写了这个WebCrawler。它抓取页面并将其保存到Redis中。我使用setImmediate和process.nextTick，但它仍然会引发此错误。我做错了什么？

var $, client, f, redis, request, s, urlhelper, urls, validator, _; request = require("request"); validator = require("validator"); $ = require("cheerio"); _ = require("underscore"); s = require("underscore.string"); urlhelper = require("url"); urls = []; redis = require("redis"); client = redis.createClient(); f = function(url) { return process.nextTick(function() { urls.push(url); if (validator.isURL(url) !== true) { return; } return request(url, function(error, response, body) { var title, _$; if (!error && response.statusCode === 200) { _$ = $.load(body); title = _$("title").text() || ""; return client.hset(url, "title", title, function() { return _.each(_$("a"), function(object) { var href; href = object.attribs["href"]; if (!validator.isURL(href)) { href = urlhelper.resolve(url, href || ""); } console.log(href); return client.exists(href, function(error, reply) { if (error) { throw error; } if (reply === 1) { return client.hincrby(href, "refs", 1, function() {}); } else { return client.hmset(href, { "refs": "1", "title": "" }, function() { return client.sadd("sites", href, function() { return setTimeout(function() { return f(href); }, 0); }); }); } }); }); }); } }); }); }; client.on("connect", function() { return f("http://www.apple.com"); });

我将不胜感激任何帮助。

非常感谢，

马克斯

你已经有了一个recursion函数，它必须在完成之前（或者至less可以从www.apple.com得到的所有内容）遍历整个万维网。除非它能够在内存中容纳几十亿个网页，否则它将耗尽堆栈空间。您需要重写它，以便在某种数据库中维护一个单独的页面队列以进行爬网 – 这不是一个可以使用recursion的地方。或者，您可以设置recursion的最大深度。

最大调用堆栈大小超过了Node.JS WebCrawler

使用Node.js集群模块在不同的用户下绑定一个工作者？

NodeJS – pipe道多个FFMPEG进程

执行shell命令并获取日志nodejs

通过nodejs重新附加到产生的进程

检索标准输出到variables

如何用node.js模拟bash进程replace？

是否使用recursionprocess.nexttick让其他进程或线程工作？

如何安装和pipe理节点js应用程序中的Windows服务？

无法从特定应用程序的spawnpipe道输出stdout

Node.js产生的颜色？