parse-chunked.cjs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. 'use strict';
  2. const utils = require('./utils.cjs');
  3. const STACK_OBJECT = 1;
  4. const STACK_ARRAY = 2;
  5. const decoder = new TextDecoder();
  6. function adjustPosition(error, parser) {
  7. if (error.name === 'SyntaxError' && parser.jsonParseOffset) {
  8. error.message = error.message.replace(/at position (\d+)/, (_, pos) =>
  9. 'at position ' + (Number(pos) + parser.jsonParseOffset)
  10. );
  11. }
  12. return error;
  13. }
  14. function append(array, elements) {
  15. // Note: Avoid to use array.push(...elements) since it may lead to
  16. // "RangeError: Maximum call stack size exceeded" for a long arrays
  17. const initialLength = array.length;
  18. array.length += elements.length;
  19. for (let i = 0; i < elements.length; i++) {
  20. array[initialLength + i] = elements[i];
  21. }
  22. }
  23. async function parseChunked(chunkEmitter) {
  24. const iterable = typeof chunkEmitter === 'function'
  25. ? chunkEmitter()
  26. : chunkEmitter;
  27. if (utils.isIterable(iterable)) {
  28. let parser = new ChunkParser();
  29. try {
  30. for await (const chunk of iterable) {
  31. if (typeof chunk !== 'string' && !ArrayBuffer.isView(chunk)) {
  32. throw new TypeError('Invalid chunk: Expected string, TypedArray or Buffer');
  33. }
  34. parser.push(chunk);
  35. }
  36. return parser.finish();
  37. } catch (e) {
  38. throw adjustPosition(e, parser);
  39. }
  40. }
  41. throw new TypeError(
  42. 'Invalid chunk emitter: Expected an Iterable, AsyncIterable, generator, ' +
  43. 'async generator, or a function returning an Iterable or AsyncIterable'
  44. );
  45. }
  46. class ChunkParser {
  47. constructor() {
  48. this.value = undefined;
  49. this.valueStack = null;
  50. this.stack = new Array(100);
  51. this.lastFlushDepth = 0;
  52. this.flushDepth = 0;
  53. this.stateString = false;
  54. this.stateStringEscape = false;
  55. this.pendingByteSeq = null;
  56. this.pendingChunk = null;
  57. this.chunkOffset = 0;
  58. this.jsonParseOffset = 0;
  59. }
  60. parseAndAppend(fragment, wrap) {
  61. // Append new entries or elements
  62. if (this.stack[this.lastFlushDepth - 1] === STACK_OBJECT) {
  63. if (wrap) {
  64. this.jsonParseOffset--;
  65. fragment = '{' + fragment + '}';
  66. }
  67. Object.assign(this.valueStack.value, JSON.parse(fragment));
  68. } else {
  69. if (wrap) {
  70. this.jsonParseOffset--;
  71. fragment = '[' + fragment + ']';
  72. }
  73. append(this.valueStack.value, JSON.parse(fragment));
  74. }
  75. }
  76. prepareAddition(fragment) {
  77. const { value } = this.valueStack;
  78. const expectComma = Array.isArray(value)
  79. ? value.length !== 0
  80. : Object.keys(value).length !== 0;
  81. if (expectComma) {
  82. // Skip a comma at the beginning of fragment, otherwise it would
  83. // fail to parse
  84. if (fragment[0] === ',') {
  85. this.jsonParseOffset++;
  86. return fragment.slice(1);
  87. }
  88. // When value (an object or array) is not empty and a fragment
  89. // doesn't start with a comma, a single valid fragment starting
  90. // is a closing bracket. If it's not, a prefix is adding to fail
  91. // parsing. Otherwise, the sequence of chunks can be successfully
  92. // parsed, although it should not, e.g. ["[{}", "{}]"]
  93. if (fragment[0] !== '}' && fragment[0] !== ']') {
  94. this.jsonParseOffset -= 3;
  95. return '[[]' + fragment;
  96. }
  97. }
  98. return fragment;
  99. }
  100. flush(chunk, start, end) {
  101. let fragment = chunk.slice(start, end);
  102. // Save position correction an error in JSON.parse() if any
  103. this.jsonParseOffset = this.chunkOffset + start;
  104. // Prepend pending chunk if any
  105. if (this.pendingChunk !== null) {
  106. fragment = this.pendingChunk + fragment;
  107. this.jsonParseOffset -= this.pendingChunk.length;
  108. this.pendingChunk = null;
  109. }
  110. if (this.flushDepth === this.lastFlushDepth) {
  111. // Depth didn't changed, so it's a root value or entry/element set
  112. if (this.flushDepth > 0) {
  113. this.parseAndAppend(this.prepareAddition(fragment), true);
  114. } else {
  115. // That's an entire value on a top level
  116. this.value = JSON.parse(fragment);
  117. this.valueStack = {
  118. value: this.value,
  119. prev: null
  120. };
  121. }
  122. } else if (this.flushDepth > this.lastFlushDepth) {
  123. // Add missed closing brackets/parentheses
  124. for (let i = this.flushDepth - 1; i >= this.lastFlushDepth; i--) {
  125. fragment += this.stack[i] === STACK_OBJECT ? '}' : ']';
  126. }
  127. if (this.lastFlushDepth === 0) {
  128. // That's a root value
  129. this.value = JSON.parse(fragment);
  130. this.valueStack = {
  131. value: this.value,
  132. prev: null
  133. };
  134. } else {
  135. this.parseAndAppend(this.prepareAddition(fragment), true);
  136. }
  137. // Move down to the depths to the last object/array, which is current now
  138. for (let i = this.lastFlushDepth || 1; i < this.flushDepth; i++) {
  139. let value = this.valueStack.value;
  140. if (this.stack[i - 1] === STACK_OBJECT) {
  141. // find last entry
  142. let key;
  143. // eslint-disable-next-line curly
  144. for (key in value);
  145. value = value[key];
  146. } else {
  147. // last element
  148. value = value[value.length - 1];
  149. }
  150. this.valueStack = {
  151. value,
  152. prev: this.valueStack
  153. };
  154. }
  155. } else /* this.flushDepth < this.lastFlushDepth */ {
  156. fragment = this.prepareAddition(fragment);
  157. // Add missed opening brackets/parentheses
  158. for (let i = this.lastFlushDepth - 1; i >= this.flushDepth; i--) {
  159. this.jsonParseOffset--;
  160. fragment = (this.stack[i] === STACK_OBJECT ? '{' : '[') + fragment;
  161. }
  162. this.parseAndAppend(fragment, false);
  163. for (let i = this.lastFlushDepth - 1; i >= this.flushDepth; i--) {
  164. this.valueStack = this.valueStack.prev;
  165. }
  166. }
  167. this.lastFlushDepth = this.flushDepth;
  168. }
  169. push(chunk) {
  170. if (typeof chunk !== 'string') {
  171. // Suppose chunk is Buffer or Uint8Array
  172. // Prepend uncompleted byte sequence if any
  173. if (this.pendingByteSeq !== null) {
  174. const origRawChunk = chunk;
  175. chunk = new Uint8Array(this.pendingByteSeq.length + origRawChunk.length);
  176. chunk.set(this.pendingByteSeq);
  177. chunk.set(origRawChunk, this.pendingByteSeq.length);
  178. this.pendingByteSeq = null;
  179. }
  180. // In case Buffer/Uint8Array, an input is encoded in UTF8
  181. // Seek for parts of uncompleted UTF8 symbol on the ending
  182. // This makes sense only if we expect more chunks and last char is not multi-bytes
  183. if (chunk[chunk.length - 1] > 127) {
  184. for (let seqLength = 0; seqLength < chunk.length; seqLength++) {
  185. const byte = chunk[chunk.length - 1 - seqLength];
  186. // 10xxxxxx - 2nd, 3rd or 4th byte
  187. // 110xxxxx – first byte of 2-byte sequence
  188. // 1110xxxx - first byte of 3-byte sequence
  189. // 11110xxx - first byte of 4-byte sequence
  190. if (byte >> 6 === 3) {
  191. seqLength++;
  192. // If the sequence is really incomplete, then preserve it
  193. // for the future chunk and cut off it from the current chunk
  194. if ((seqLength !== 4 && byte >> 3 === 0b11110) ||
  195. (seqLength !== 3 && byte >> 4 === 0b1110) ||
  196. (seqLength !== 2 && byte >> 5 === 0b110)) {
  197. this.pendingByteSeq = chunk.slice(chunk.length - seqLength);
  198. chunk = chunk.slice(0, -seqLength);
  199. }
  200. break;
  201. }
  202. }
  203. }
  204. // Convert chunk to a string, since single decode per chunk
  205. // is much effective than decode multiple small substrings
  206. chunk = decoder.decode(chunk);
  207. }
  208. const chunkLength = chunk.length;
  209. let lastFlushPoint = 0;
  210. let flushPoint = 0;
  211. // Main scan loop
  212. scan: for (let i = 0; i < chunkLength; i++) {
  213. if (this.stateString) {
  214. for (; i < chunkLength; i++) {
  215. if (this.stateStringEscape) {
  216. this.stateStringEscape = false;
  217. } else {
  218. switch (chunk.charCodeAt(i)) {
  219. case 0x22: /* " */
  220. this.stateString = false;
  221. continue scan;
  222. case 0x5C: /* \ */
  223. this.stateStringEscape = true;
  224. }
  225. }
  226. }
  227. break;
  228. }
  229. switch (chunk.charCodeAt(i)) {
  230. case 0x22: /* " */
  231. this.stateString = true;
  232. this.stateStringEscape = false;
  233. break;
  234. case 0x2C: /* , */
  235. flushPoint = i;
  236. break;
  237. case 0x7B: /* { */
  238. // Open an object
  239. flushPoint = i + 1;
  240. this.stack[this.flushDepth++] = STACK_OBJECT;
  241. break;
  242. case 0x5B: /* [ */
  243. // Open an array
  244. flushPoint = i + 1;
  245. this.stack[this.flushDepth++] = STACK_ARRAY;
  246. break;
  247. case 0x5D: /* ] */
  248. case 0x7D: /* } */
  249. // Close an object or array
  250. flushPoint = i + 1;
  251. this.flushDepth--;
  252. if (this.flushDepth < this.lastFlushDepth) {
  253. this.flush(chunk, lastFlushPoint, flushPoint);
  254. lastFlushPoint = flushPoint;
  255. }
  256. break;
  257. case 0x09: /* \t */
  258. case 0x0A: /* \n */
  259. case 0x0D: /* \r */
  260. case 0x20: /* space */
  261. // Move points forward when they points on current position and it's a whitespace
  262. if (lastFlushPoint === i) {
  263. lastFlushPoint++;
  264. }
  265. if (flushPoint === i) {
  266. flushPoint++;
  267. }
  268. break;
  269. }
  270. }
  271. if (flushPoint > lastFlushPoint) {
  272. this.flush(chunk, lastFlushPoint, flushPoint);
  273. }
  274. // Produce pendingChunk if something left
  275. if (flushPoint < chunkLength) {
  276. if (this.pendingChunk !== null) {
  277. // When there is already a pending chunk then no flush happened,
  278. // appending entire chunk to pending one
  279. this.pendingChunk += chunk;
  280. } else {
  281. // Create a pending chunk, it will start with non-whitespace since
  282. // flushPoint was moved forward away from whitespaces on scan
  283. this.pendingChunk = chunk.slice(flushPoint, chunkLength);
  284. }
  285. }
  286. this.chunkOffset += chunkLength;
  287. }
  288. finish() {
  289. if (this.pendingChunk !== null) {
  290. this.flush('', 0, 0);
  291. this.pendingChunk = null;
  292. }
  293. return this.value;
  294. }
  295. }
  296. exports.parseChunked = parseChunked;