如何检查一个元素是否与cheerio js重复的属性



<div class="logo-center" data-something-very-long="something long" ... class="logo" data-more-stuff> 


 var fileContents = fs.readFileSync(file, "utf8"); var $ = cheerio.load(fileContents); 


再次parsing被testing的元素。 为了达到这个目的,你需要深入到由cheerio / htmlparser2生成的原始DOM对象。 它使用domhandlerlogging的属性 ,但不能用于cheerio,因此可能需要关心这些版本。 我已经testing过了

 └─┬ cheerio@1.0.0-rc.1 ├─┬ htmlparser2@3.9.2 │ ├── domhandler@2.4.1 



 const fileContents = fs.readFileSync(file, "utf8"); const $ = cheerio.load(fileContents, { useHtmlParser2: true, withStartIndices: true, withEndIndices: true }); function getDuplicateAttributes ($elem) { const dom = $elem.get(0); // identify tag text position in string const start = dom.startIndex; const end = dom.children.length ? dom.children[0].startIndex : dom.endIndex + 1; // extract const html = fileContents.slice(start, end); // generator function loops through all attribute matches on the html string function* multivals (attr) { const re = new RegExp(`\\s${attr}="(.*?)"`, 'g'); let match; while((match = re.exec(html)) !== null) { // yield each property value found for the attr name yield match[1]; } } // the DOM will contain all attribute names once const doubleAttributeList = Object.keys(dom.attribs) // compound attribute names with all found values .map((attr) => { const matchIterator = multivals(attr); return [attr, Array.from(matchIterator)]; }) // filter for doubles .filter((entry) => entry[1].length > 1); return new Map(doubleAttributeList); } 



 var file = 'some file'; var fileContents = fs.readFileSync(file, 'utf8'); var $ = cheerio.load(fileContents, { useHtmlParser2: true, withStartIndices: true, withEndIndices: true }); function getDuplicateAttributes ($elem) { var dom = $elem.get(0); // identify tag text position in fileContents var start = dom.startIndex; var end = dom.children.length ? dom.children[0].startIndex : dom.endIndex + 1; // extract var html = fileContents.slice(start, end); // the DOM will contain all attribute names once return Object.keys(dom.attribs) // compound attribute names with all found values .map(function (attr) { // modify regexp to capture values if needed var regexp = new RegExp('\\s' + attr + '[\\s>=]', 'g'); return html.match(regexp).length > 1 ? attr : null; }) // filter for doubles .filter(function (attr) { return attr !== null; }); } var duplicatedAttrs = getDuplicateAttributes($(".some-elem")); 


  • 删除发电机
  • ES6到ES5
  • 改善RegExp
  • 使用string.match()而不是regexp.exec()。