/** * EventSource/Server-Sent Events parser * @see https://html.spec.whatwg.org/multipage/server-sent-events.html */ import {ParseError} from './errors.ts' import type {EventSourceParser, ParserCallbacks} from './types.ts' // ASCII codes used in the hot parsing paths. const LF = 10 const CR = 13 const SPACE = 32 // oxlint-disable-next-line no-unused-vars function noop(_arg: unknown) { // intentional noop } /** * Creates a new EventSource parser. * * @param callbacks - Callbacks to invoke on different parsing events: * - `onEvent` when a new event is parsed * - `onError` when an error occurs * - `onRetry` when a new reconnection interval has been sent from the server * - `onComment` when a comment is encountered in the stream * * @returns A new EventSource parser, with `parse` and `reset` methods. * @public */ export function createParser(callbacks: ParserCallbacks): EventSourceParser { if (typeof callbacks === 'function') { throw new TypeError( '`callbacks` must be an object, got a function instead. Did you mean `{onEvent: fn}`?', ) } const {onEvent = noop, onError = noop, onRetry = noop, onComment} = callbacks // Trailing bytes from prior `feed()` calls that did not yet form a complete line. // Stored as an array of fragments and only joined when a line terminator arrives. // Concatenating per-feed (`prefix + chunk`) is O(N²) when a single SSE line spans // many chunks (e.g. a large `data:` payload streamed in tiny slices, or an MCP-style // server that emits one giant content block). Buffering as fragments + joining once // makes the same workload linear. const pendingFragments: string[] = [] let isFirstChunk = true let id: string | undefined let data = '' let dataLines = 0 let eventType: string | undefined /** * Feeds a chunk of the SSE stream to the parser. Any trailing bytes that do * not yet form a complete line are held back and prepended to the next chunk, * so callers can pass arbitrary slices of the stream without worrying about * line boundaries. * * Per the SSE spec, a UTF-8 BOM (0xEF 0xBB 0xBF) at the start of the very * first chunk is stripped before parsing. * * @see https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream */ function feed(chunk: string) { if (isFirstChunk) { isFirstChunk = false // Match and strip UTF-8 BOM from the start of the stream, if present. // (Per the spec, this is only valid at the very start of the stream) if ( chunk.charCodeAt(0) === 0xef && chunk.charCodeAt(1) === 0xbb && chunk.charCodeAt(2) === 0xbf ) { chunk = chunk.slice(3) } } // Hot path: no buffered prefix from a prior partial line. Hand the chunk // straight to `processLines`, exactly like the original implementation. // Zero new work in the common case (every chunk ends with `\n\n`). if (pendingFragments.length === 0) { const trailing = processLines(chunk) if (trailing !== '') pendingFragments.push(trailing) return } // We have a buffered prefix. If this chunk also has no terminator, append // to the buffer without concatenating — that's the O(N²) trap we're // avoiding (large single `data:` payload split across many tiny chunks). if (chunk.indexOf('\n') === -1 && chunk.indexOf('\r') === -1) { pendingFragments.push(chunk) return } // Terminator arrived. Join the accumulated fragments + this chunk once, // process, and buffer any new trailing partial line. pendingFragments.push(chunk) const input = pendingFragments.join('') pendingFragments.length = 0 const trailing = processLines(input) if (trailing !== '') pendingFragments.push(trailing) } /** * Splits `chunk` into SSE lines and dispatches each to the appropriate handler. * Returns any trailing bytes that did not terminate with a line break, so the * caller can prepend them to the next chunk. * * The SSE spec permits three line terminators: `\n`, `\r`, and `\r\n`. Real-world * streams almost always use plain `\n`, so we take a fast path when no `\r` is * present in the chunk. The slow path is spec-correct but does more work per line. */ function processLines(chunk: string): string { let searchIndex = 0 // Fast path: LF-only chunk (the common case for typical SSE servers). // We can scan forward with a single `indexOf('\n')` per line and inline // the hot-path branches for `data:` and `event:` without the CR bookkeeping // the slow path needs. if (chunk.indexOf('\r') === -1) { let lfIndex = chunk.indexOf('\n', searchIndex) while (lfIndex !== -1) { // Blank line: end-of-event marker. Dispatch the accumulated event (if any) // and reset the buffered fields. This is hoisted out of `parseLine` because // it's the single most common line shape after `data:` lines. if (searchIndex === lfIndex) { if (dataLines > 0) { onEvent({id, event: eventType, data}) } id = undefined data = '' dataLines = 0 eventType = undefined searchIndex = lfIndex + 1 lfIndex = chunk.indexOf('\n', searchIndex) continue } const firstCharCode = chunk.charCodeAt(searchIndex) if (isDataPrefix(chunk, searchIndex, firstCharCode)) { // `data:` line — append the value to the event's data buffer. // 'data:'.length === 5, 'data: '.length === 6 const valueStart = chunk.charCodeAt(searchIndex + 5) === SPACE ? searchIndex + 6 : searchIndex + 5 const value = chunk.slice(valueStart, lfIndex) // Fast path within a fast path: if this is the first data line AND the // next char is another LF (i.e. `data:foo\n\n`), dispatch immediately // without ever writing to the `data` buffer. This is the shape of a // typical single-line SSE event (ChatGPT-style streams, etc.) and is // hot enough to be worth the duplication. if (dataLines === 0 && chunk.charCodeAt(lfIndex + 1) === LF) { onEvent({id, event: eventType, data: value}) id = undefined data = '' eventType = undefined searchIndex = lfIndex + 2 lfIndex = chunk.indexOf('\n', searchIndex) continue } // Multi-line data: concatenate with newline separator per spec. data = dataLines === 0 ? value : `${data}\n${value}` dataLines++ } else if (isEventPrefix(chunk, searchIndex, firstCharCode)) { // `event:` line — set the event type for the next dispatch. Per spec, // an empty value resets `event type` to its default (undefined here). // 'event:'.length === 6, 'event: '.length === 7 eventType = chunk.slice( chunk.charCodeAt(searchIndex + 6) === SPACE ? searchIndex + 7 : searchIndex + 6, lfIndex, ) || undefined } else { // Everything else: `id:`, `retry:`, comment lines (`:` prefix), unknown // fields, or malformed lines. These are rarer and go through the full // per-line parser, which handles the SSE field grammar in detail. parseLine(chunk, searchIndex, lfIndex) } searchIndex = lfIndex + 1 lfIndex = chunk.indexOf('\n', searchIndex) } return chunk.slice(searchIndex) } // Slow path: the chunk contains at least one `\r`, so lines may be terminated // by `\r`, `\n`, or `\r\n`. We locate the next terminator by looking at both // the nearest `\r` and `\n` and picking whichever comes first. while (searchIndex < chunk.length) { const crIndex = chunk.indexOf('\r', searchIndex) const lfIndex = chunk.indexOf('\n', searchIndex) let lineEnd = -1 if (crIndex !== -1 && lfIndex !== -1) { lineEnd = crIndex < lfIndex ? crIndex : lfIndex } else if (crIndex !== -1) { // A trailing `\r` at the very end of the chunk is ambiguous: it could be // a bare-CR terminator, or the first half of a `\r\n` whose `\n` arrives // in the next chunk. Defer until we see more input. if (crIndex === chunk.length - 1) { lineEnd = -1 } else { lineEnd = crIndex } } else if (lfIndex !== -1) { lineEnd = lfIndex } if (lineEnd === -1) { break } parseLine(chunk, searchIndex, lineEnd) searchIndex = lineEnd + 1 // If we just consumed a `\r` and the next char is `\n`, skip it so the // pair is treated as a single terminator rather than an empty line. if (chunk.charCodeAt(searchIndex - 1) === CR && chunk.charCodeAt(searchIndex) === LF) { searchIndex++ } } return chunk.slice(searchIndex) } function parseLine(chunk: string, start: number, end: number) { if (start === end) { dispatchEvent() return } const firstCharCode = chunk.charCodeAt(start) if (isDataPrefix(chunk, start, firstCharCode)) { // 'data:'.length === 5, 'data: '.length === 6 const valueStart = chunk.charCodeAt(start + 5) === SPACE ? start + 6 : start + 5 const value = chunk.slice(valueStart, end) data = dataLines === 0 ? value : `${data}\n${value}` dataLines++ return } if (isEventPrefix(chunk, start, firstCharCode)) { // 'event:'.length === 6, 'event: '.length === 7 eventType = chunk.slice(chunk.charCodeAt(start + 6) === SPACE ? start + 7 : start + 6, end) || undefined return } // Fast path for "id:" — 'i' = 105, 'd' = 100, ':' = 58 if ( firstCharCode === 105 && chunk.charCodeAt(start + 1) === 100 && chunk.charCodeAt(start + 2) === 58 ) { // 'id:'.length === 3, 'id: '.length === 4 const value = chunk.slice(chunk.charCodeAt(start + 3) === SPACE ? start + 4 : start + 3, end) id = value.includes('\0') ? undefined : value return } // Comment line — ':' = 58 if (firstCharCode === 58) { if (onComment) { const line = chunk.slice(start, end) // skip ':' (+1), or ': ' (+2) when a space follows onComment(line.slice(chunk.charCodeAt(start + 1) === SPACE ? 2 : 1)) } return } const line = chunk.slice(start, end) const fieldSeparatorIndex = line.indexOf(':') if (fieldSeparatorIndex === -1) { processField(line, '', line) return } const field = line.slice(0, fieldSeparatorIndex) // skip ':' (+1), or ': ' (+2) when a space follows const offset = line.charCodeAt(fieldSeparatorIndex + 1) === SPACE ? 2 : 1 const value = line.slice(fieldSeparatorIndex + offset) processField(field, value, line) } function processField(field: string, value: string, line: string) { // Field names must be compared literally, with no case folding performed. switch (field) { case 'event': // Set the `event type` buffer to field value eventType = value || undefined break case 'data': data = dataLines === 0 ? value : `${data}\n${value}` dataLines++ break case 'id': // If the field value does not contain U+0000 NULL, then set the `ID` buffer to // the field value. Otherwise, ignore the field. id = value.includes('\0') ? undefined : value break case 'retry': // If the field value consists of only ASCII digits, then interpret the field value as an // integer in base ten, and set the event stream's reconnection time to that integer. // Otherwise, ignore the field. if (/^\d+$/.test(value)) { onRetry(parseInt(value, 10)) } else { onError( new ParseError(`Invalid \`retry\` value: "${value}"`, { type: 'invalid-retry', value, line, }), ) } break default: // Otherwise, the field is ignored. onError( new ParseError( `Unknown field "${field.length > 20 ? `${field.slice(0, 20)}…` : field}"`, {type: 'unknown-field', field, value, line}, ), ) break } } function dispatchEvent() { if (dataLines > 0) { onEvent({ id, event: eventType, data, }) } id = undefined data = '' dataLines = 0 eventType = undefined } function reset(options: {consume?: boolean} = {}) { if (options.consume && pendingFragments.length > 0) { const incompleteLine = pendingFragments.join('') parseLine(incompleteLine, 0, incompleteLine.length) } isFirstChunk = true id = undefined data = '' dataLines = 0 eventType = undefined pendingFragments.length = 0 } return {feed, reset} } /** * Checks if `chunk` starts with the literal `data:` at index `i`. * * Equivalent to `chunk.startsWith('data:', i)`, but benchmarks show this * hand-unrolled char-code comparison is ~20% faster on common event types. * The caller passes `firstCharCode` (the code at `i`) so it can be reused * across prefix checks. * * ASCII: 'd' = 100, 'a' = 97, 't' = 116, 'a' = 97, ':' = 58 */ function isDataPrefix(chunk: string, i: number, firstCharCode: number): boolean { return ( firstCharCode === 100 && chunk.charCodeAt(i + 1) === 97 && chunk.charCodeAt(i + 2) === 116 && chunk.charCodeAt(i + 3) === 97 && chunk.charCodeAt(i + 4) === 58 ) } /** * Checks if `chunk` starts with the literal `event:` at index `i`. * * See {@link isDataPrefix} for why this is hand-unrolled rather than using * `String.prototype.startsWith`. * * ASCII: 'e' = 101, 'v' = 118, 'e' = 101, 'n' = 110, 't' = 116, ':' = 58 */ function isEventPrefix(chunk: string, i: number, firstCharCode: number): boolean { return ( firstCharCode === 101 && chunk.charCodeAt(i + 1) === 118 && chunk.charCodeAt(i + 2) === 101 && chunk.charCodeAt(i + 3) === 110 && chunk.charCodeAt(i + 4) === 116 && chunk.charCodeAt(i + 5) === 58 ) }