如何读取基于单词作为键分隔符的文本文件块?

我有这种格式的.txt文件:

Part #368 - XXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Part #369 - XXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX Part #370 - XXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 

我读这样的文件:

 var lines = fs.readFileSync('file.txt', 'utf-8') .split('\n') .filter(Boolean); 

所以它返回一个文件行的数组。 如何获得以“Part”string开头的文件块?

 var parts = _.filter(lines,function( s ) { return s.indexOf( 'Part' ) !== -1; }); 

像这样的东西,而不是获得以“部分”开始的string我想要从“部分”string到下一个“部分”string的所有行。

这将创build一个数组的行。

 var parts = _.reduce(lines, function( result, line ) { if (line.indexOf('Part') !== -1) result.push([]); _.last(result).push(line); return result; }, []); 

JSONstream

根据@Brad的build议 ,这是一个从stream.Transform扩展的类,它将文件stream.Transform成一个JSON数组stream:

 const { Transform } = require('stream'); class Delimited extends Transform { constructor({ delimiter = /\r?\n/g, encoding = 'utf8' } = {}) { super(); // initialize internal values this._delimiter = delimiter instanceof RegExp ? delimiter : new RegExp(delimiter, 'g'); this._encoding = encoding; this._buffer = ''; this._first = true; } _transform(chunk, encoding, callback) { // convert input encoding into output encoding // and append to internal buffer if (encoding === 'buffer') { this._buffer += chunk.toString(this._encoding); } else if (encoding === this._encoding) { this._buffer += chunk; } else { this._buffer += Buffer.from(chunk, encoding).toString(this._encoding); } let partialJSON = ''; // check if delimiter is found if (this._delimiter.test(this._buffer)) { // split internal buffer by delimiter let sections = this._buffer.split(this._delimiter); // put possibly incomplete section from array back into internal buffer this._buffer = sections.pop(); // add each section to partial json array sections.forEach(section => { partialJSON += `${this._first ? '[' : ','}${JSON.stringify(section)}`; this._first = false; }); } // push partial json array to readable stream callback(null, partialJSON); } _flush(callback) { // add remaining buffer as last section to json array callback(null, `${this._first ? '[' : ','}${JSON.stringify(this._buffer)}]`); } } 

用法示例:

 const fs = require('fs'); let stream = fs.createReadStream('file.txt', 'utf8'); let transform = new Delimited({ delimiter: /\n\n(?=Part #\d)/g }); let json = ''; transform.on('data', (chunk) => json += chunk); transform.on('end', () => console.log(JSON.parse(json))); stream.pipe(transform); 

在线试用!

stringstream

或者,如果您不想将JSON传输到其他文件,进程或作为客户端响应,则可以通过将输出stream设置为objectMode: true来将每个段作为块objectMode: true

 const { Transform } = require('stream'); class Delimited extends Transform { constructor(delimiter = /\r?\n/g) { super({ objectMode: true }); // initialize internal values this._delimiter = delimiter instanceof RegExp ? delimiter : new RegExp(delimiter, 'g'); this._encoding = 'utf8'; this._buffer = ''; this._first = true; } _transform(chunk, encoding, callback) { // convert input encoding into output encoding // and append to internal buffer if (encoding === 'buffer') { this._buffer += chunk.toString(this._encoding); } else if (encoding === this._encoding) { this._buffer += chunk; } else { this._buffer += Buffer.from(chunk, encoding).toString(this._encoding); } if (this._delimiter.test(this._buffer)) { // split internal buffer by delimiter let sections = this._buffer.split(this._delimiter); // put possibly incomplete section from array back into internal buffer this._buffer = sections.pop(); // push each section to readable stream in object mode sections.forEach(this.push, this); } callback(); } _flush(callback) { // push remaining buffer to readable stream callback(null, this._buffer); } } 

用法示例:

 const fs = require('fs'); let stream = fs.createReadStream('file.txt', 'utf8'); let transform = new Delimited(/\n\n(?=Part #\d)/g); let array = []; transform.on('data', (chunk) => array.push(chunk)); transform.on('end', () => console.log(array)); stream.pipe(transform); 

在线试用!