使用无头铬来拦截图像请求数据

我有一个用例,需要使用无头Chrome浏览器( https://chromedevtools.github.io/devtools-protocol/tot/Network/ )截取所有的图片请求,并find图片的大小,然后保存它(基本上丢弃小图像,如图标)。

但是,我无法find一种方法来加载内存中的图像数据保存之前。 我需要加载它的Img对象来获得widthheightNetwork.getResponseBody采取requestId,我没有在Network.requestIntercepted访问。 另外Network.loadingFinishedencodedDataLengthvariables中总是给我“0”。 我不知道为什么。 所以我的问题是:

  1. 如何拦截来自jpg / png请求的所有响应并获取图像数据? 不通过URLstring将文件保存到磁盘并加载回来。

  2. BEST:如何从标题响应获取图像维度? 然后,我不必将数据读入内存。

我的代码如下:

 const chromeLauncher = require('chrome-launcher'); const CDP = require('chrome-remote-interface'); const file = require('fs'); (async function() { async function launchChrome() { return await chromeLauncher.launch({ chromeFlags: [ '--disable-gpu', '--headless' ] }); } const chrome = await launchChrome(); const protocol = await CDP({ port: chrome.port }); const { DOM, Network, Page, Emulation, Runtime } = protocol; await Promise.all([Network.enable(), Page.enable(), Runtime.enable(), DOM.enable()]); await Network.setRequestInterceptionEnabled({enabled: true}); Network.requestIntercepted(({interceptionId, request, resourceType}) => { if ((request.url.indexOf('.jpg') >= 0) || (request.url.indexOf('.png') >= 0)) { console.log(JSON.stringify(request)); console.log(resourceType); if (request.url.indexOf("/unspecified.jpg") >= 0) { console.log("FOUND unspecified.jpg"); console.log(JSON.stringify(interceptionId)); // console.log(JSON.stringify(Network.getResponseBody(interceptionId))); } } Network.continueInterceptedRequest({interceptionId}); }); Network.loadingFinished(({requestId, timestamp, encodedDataLength}) => { console.log(requestId); console.log(timestamp); console.log(encodedDataLength); }); Page.navigate({ url: 'https://www.yahoo.com/' }); Page.loadEventFired(async() => { protocol.close(); chrome.kill(); }); })(); 

这应该让你有90%的方式。 它获取每个图像请求的主体。 你仍然需要base64解码,检查大小和保存等…

 const CDP = require('chrome-remote-interface'); const sizeThreshold = 1024; async function run() { try { var client = await CDP(); const { Network, Page } = client; // enable events await Promise.all([Network.enable(), Page.enable()]); // commands const _url = "https://google.co.za"; let _pics = []; Network.responseReceived(async ({requestId, response}) => { let url = response ? response.url : null; if ((url.indexOf('.jpg') >= 0) || (url.indexOf('.png') >= 0)) { const {body, base64Encoded} = await Network.getResponseBody({ requestId }); // throws promise error returning null/undefined so can't destructure. Must be different in inspect shell to app? _pics.push({ url, body, base64Encoded }); console.log(url, body, base64Encoded); } }); await Page.navigate({ url: _url }); await sleep(5000); // TODO: process _pics - base64Encoded, check body.length > sizeThreshold, save etc... } catch (err) { if (err.message && err.message === "No inspectable targets") { console.error("Either chrome isn't running or you already have another app connected to chrome - eg `chrome-remote-interface inspect`") } else { console.error(err); } } finally { if (client) { await client.close(); } } } function sleep(miliseconds = 1000) { if (miliseconds == 0) return Promise.resolve(); return new Promise(resolve => setTimeout(() => resolve(), miliseconds)) } run();