smart-management-auto-test/node_modules/eventsource-parser/src/parse.ts

/**
 * EventSource/Server-Sent Events parser
 * @see https://html.spec.whatwg.org/multipage/server-sent-events.html
 */
import {ParseError} from './errors.ts'
import type {EventSourceParser, ParserCallbacks} from './types.ts'

// ASCII codes used in the hot parsing paths.
const LF = 10
const CR = 13
const SPACE = 32

// oxlint-disable-next-line no-unused-vars
function noop(_arg: unknown) {
  // intentional noop
}

/**
 * Creates a new EventSource parser.
 *
 * @param callbacks - Callbacks to invoke on different parsing events:
 *   - `onEvent` when a new event is parsed
 *   - `onError` when an error occurs
 *   - `onRetry` when a new reconnection interval has been sent from the server
 *   - `onComment` when a comment is encountered in the stream
 *
 * @returns A new EventSource parser, with `parse` and `reset` methods.
 * @public
 */
export function createParser(callbacks: ParserCallbacks): EventSourceParser {
  if (typeof callbacks === 'function') {
    throw new TypeError(
      '`callbacks` must be an object, got a function instead. Did you mean `{onEvent: fn}`?',
    )
  }

  const {onEvent = noop, onError = noop, onRetry = noop, onComment} = callbacks

  // Trailing bytes from prior `feed()` calls that did not yet form a complete line.
  // Stored as an array of fragments and only joined when a line terminator arrives.
  // Concatenating per-feed (`prefix + chunk`) is O(N²) when a single SSE line spans
  // many chunks (e.g. a large `data:` payload streamed in tiny slices, or an MCP-style
  // server that emits one giant content block). Buffering as fragments + joining once
  // makes the same workload linear.
  const pendingFragments: string[] = []

  let isFirstChunk = true
  let id: string | undefined
  let data = ''
  let dataLines = 0
  let eventType: string | undefined

  /**
   * Feeds a chunk of the SSE stream to the parser. Any trailing bytes that do
   * not yet form a complete line are held back and prepended to the next chunk,
   * so callers can pass arbitrary slices of the stream without worrying about
   * line boundaries.
   *
   * Per the SSE spec, a UTF-8 BOM (0xEF 0xBB 0xBF) at the start of the very
   * first chunk is stripped before parsing.
   *
   * @see https://html.spec.whatwg.org/multipage/server-sent-events.html#parsing-an-event-stream
   */
  function feed(chunk: string) {
    if (isFirstChunk) {
      isFirstChunk = false
      // Match and strip UTF-8 BOM from the start of the stream, if present.
      // (Per the spec, this is only valid at the very start of the stream)
      if (
        chunk.charCodeAt(0) === 0xef &&
        chunk.charCodeAt(1) === 0xbb &&
        chunk.charCodeAt(2) === 0xbf
      ) {
        chunk = chunk.slice(3)
      }
    }

    // Hot path: no buffered prefix from a prior partial line. Hand the chunk
    // straight to `processLines`, exactly like the original implementation.
    // Zero new work in the common case (every chunk ends with `\n\n`).
    if (pendingFragments.length === 0) {
      const trailing = processLines(chunk)
      if (trailing !== '') pendingFragments.push(trailing)
      return
    }

    // We have a buffered prefix. If this chunk also has no terminator, append
    // to the buffer without concatenating — that's the O(N²) trap we're
    // avoiding (large single `data:` payload split across many tiny chunks).
    if (chunk.indexOf('\n') === -1 && chunk.indexOf('\r') === -1) {
      pendingFragments.push(chunk)
      return
    }

    // Terminator arrived. Join the accumulated fragments + this chunk once,
    // process, and buffer any new trailing partial line.
    pendingFragments.push(chunk)
    const input = pendingFragments.join('')
    pendingFragments.length = 0
    const trailing = processLines(input)
    if (trailing !== '') pendingFragments.push(trailing)
  }

  /**
   * Splits `chunk` into SSE lines and dispatches each to the appropriate handler.
   * Returns any trailing bytes that did not terminate with a line break, so the
   * caller can prepend them to the next chunk.
   *
   * The SSE spec permits three line terminators: `\n`, `\r`, and `\r\n`. Real-world
   * streams almost always use plain `\n`, so we take a fast path when no `\r` is
   * present in the chunk. The slow path is spec-correct but does more work per line.
   */
  function processLines(chunk: string): string {
    let searchIndex = 0

    // Fast path: LF-only chunk (the common case for typical SSE servers).
    // We can scan forward with a single `indexOf('\n')` per line and inline
    // the hot-path branches for `data:` and `event:` without the CR bookkeeping
    // the slow path needs.
    if (chunk.indexOf('\r') === -1) {
      let lfIndex = chunk.indexOf('\n', searchIndex)
      while (lfIndex !== -1) {
        // Blank line: end-of-event marker. Dispatch the accumulated event (if any)
        // and reset the buffered fields. This is hoisted out of `parseLine` because
        // it's the single most common line shape after `data:` lines.
        if (searchIndex === lfIndex) {
          if (dataLines > 0) {
            onEvent({id, event: eventType, data})
          }
          id = undefined
          data = ''
          dataLines = 0
          eventType = undefined
          searchIndex = lfIndex + 1
          lfIndex = chunk.indexOf('\n', searchIndex)
          continue
        }
        const firstCharCode = chunk.charCodeAt(searchIndex)
        if (isDataPrefix(chunk, searchIndex, firstCharCode)) {
          // `data:` line — append the value to the event's data buffer.
          // 'data:'.length === 5, 'data: '.length === 6
          const valueStart =
            chunk.charCodeAt(searchIndex + 5) === SPACE ? searchIndex + 6 : searchIndex + 5
          const value = chunk.slice(valueStart, lfIndex)
          // Fast path within a fast path: if this is the first data line AND the
          // next char is another LF (i.e. `data:foo\n\n`), dispatch immediately
          // without ever writing to the `data` buffer. This is the shape of a
          // typical single-line SSE event (ChatGPT-style streams, etc.) and is
          // hot enough to be worth the duplication.
          if (dataLines === 0 && chunk.charCodeAt(lfIndex + 1) === LF) {
            onEvent({id, event: eventType, data: value})
            id = undefined
            data = ''
            eventType = undefined
            searchIndex = lfIndex + 2
            lfIndex = chunk.indexOf('\n', searchIndex)
            continue
          }
          // Multi-line data: concatenate with newline separator per spec.
          data = dataLines === 0 ? value : `${data}\n${value}`
          dataLines++
        } else if (isEventPrefix(chunk, searchIndex, firstCharCode)) {
          // `event:` line — set the event type for the next dispatch. Per spec,
          // an empty value resets `event type` to its default (undefined here).
          // 'event:'.length === 6, 'event: '.length === 7
          eventType =
            chunk.slice(
              chunk.charCodeAt(searchIndex + 6) === SPACE ? searchIndex + 7 : searchIndex + 6,
              lfIndex,
            ) || undefined
        } else {
          // Everything else: `id:`, `retry:`, comment lines (`:` prefix), unknown
          // fields, or malformed lines. These are rarer and go through the full
          // per-line parser, which handles the SSE field grammar in detail.
          parseLine(chunk, searchIndex, lfIndex)
        }
        searchIndex = lfIndex + 1
        lfIndex = chunk.indexOf('\n', searchIndex)
      }
      return chunk.slice(searchIndex)
    }

    // Slow path: the chunk contains at least one `\r`, so lines may be terminated
    // by `\r`, `\n`, or `\r\n`. We locate the next terminator by looking at both
    // the nearest `\r` and `\n` and picking whichever comes first.
    while (searchIndex < chunk.length) {
      const crIndex = chunk.indexOf('\r', searchIndex)
      const lfIndex = chunk.indexOf('\n', searchIndex)

      let lineEnd = -1
      if (crIndex !== -1 && lfIndex !== -1) {
        lineEnd = crIndex < lfIndex ? crIndex : lfIndex
      } else if (crIndex !== -1) {
        // A trailing `\r` at the very end of the chunk is ambiguous: it could be
        // a bare-CR terminator, or the first half of a `\r\n` whose `\n` arrives
        // in the next chunk. Defer until we see more input.
        if (crIndex === chunk.length - 1) {
          lineEnd = -1
        } else {
          lineEnd = crIndex
        }
      } else if (lfIndex !== -1) {
        lineEnd = lfIndex
      }

      if (lineEnd === -1) {
        break
      }

      parseLine(chunk, searchIndex, lineEnd)
      searchIndex = lineEnd + 1
      // If we just consumed a `\r` and the next char is `\n`, skip it so the
      // pair is treated as a single terminator rather than an empty line.
      if (chunk.charCodeAt(searchIndex - 1) === CR && chunk.charCodeAt(searchIndex) === LF) {
        searchIndex++
      }
    }

    return chunk.slice(searchIndex)
  }

  function parseLine(chunk: string, start: number, end: number) {
    if (start === end) {
      dispatchEvent()
      return
    }

    const firstCharCode = chunk.charCodeAt(start)

    if (isDataPrefix(chunk, start, firstCharCode)) {
      // 'data:'.length === 5, 'data: '.length === 6
      const valueStart = chunk.charCodeAt(start + 5) === SPACE ? start + 6 : start + 5
      const value = chunk.slice(valueStart, end)
      data = dataLines === 0 ? value : `${data}\n${value}`
      dataLines++
      return
    }

    if (isEventPrefix(chunk, start, firstCharCode)) {
      // 'event:'.length === 6, 'event: '.length === 7
      eventType =
        chunk.slice(chunk.charCodeAt(start + 6) === SPACE ? start + 7 : start + 6, end) || undefined
      return
    }

    // Fast path for "id:" — 'i' = 105, 'd' = 100, ':' = 58
    if (
      firstCharCode === 105 &&
      chunk.charCodeAt(start + 1) === 100 &&
      chunk.charCodeAt(start + 2) === 58
    ) {
      // 'id:'.length === 3, 'id: '.length === 4
      const value = chunk.slice(chunk.charCodeAt(start + 3) === SPACE ? start + 4 : start + 3, end)
      id = value.includes('\0') ? undefined : value
      return
    }

    // Comment line — ':' = 58
    if (firstCharCode === 58) {
      if (onComment) {
        const line = chunk.slice(start, end)
        // skip ':' (+1), or ': ' (+2) when a space follows
        onComment(line.slice(chunk.charCodeAt(start + 1) === SPACE ? 2 : 1))
      }
      return
    }

    const line = chunk.slice(start, end)
    const fieldSeparatorIndex = line.indexOf(':')
    if (fieldSeparatorIndex === -1) {
      processField(line, '', line)
      return
    }

    const field = line.slice(0, fieldSeparatorIndex)
    // skip ':' (+1), or ': ' (+2) when a space follows
    const offset = line.charCodeAt(fieldSeparatorIndex + 1) === SPACE ? 2 : 1
    const value = line.slice(fieldSeparatorIndex + offset)
    processField(field, value, line)
  }

  function processField(field: string, value: string, line: string) {
    // Field names must be compared literally, with no case folding performed.
    switch (field) {
      case 'event':
        // Set the `event type` buffer to field value
        eventType = value || undefined
        break
      case 'data':
        data = dataLines === 0 ? value : `${data}\n${value}`
        dataLines++
        break
      case 'id':
        // If the field value does not contain U+0000 NULL, then set the `ID` buffer to
        // the field value. Otherwise, ignore the field.
        id = value.includes('\0') ? undefined : value
        break
      case 'retry':
        // If the field value consists of only ASCII digits, then interpret the field value as an
        // integer in base ten, and set the event stream's reconnection time to that integer.
        // Otherwise, ignore the field.
        if (/^\d+$/.test(value)) {
          onRetry(parseInt(value, 10))
        } else {
          onError(
            new ParseError(`Invalid \`retry\` value: "${value}"`, {
              type: 'invalid-retry',
              value,
              line,
            }),
          )
        }
        break
      default:
        // Otherwise, the field is ignored.
        onError(
          new ParseError(
            `Unknown field "${field.length > 20 ? `${field.slice(0, 20)}…` : field}"`,
            {type: 'unknown-field', field, value, line},
          ),
        )
        break
    }
  }

  function dispatchEvent() {
    if (dataLines > 0) {
      onEvent({
        id,
        event: eventType,
        data,
      })
    }

    id = undefined
    data = ''
    dataLines = 0
    eventType = undefined
  }

  function reset(options: {consume?: boolean} = {}) {
    if (options.consume && pendingFragments.length > 0) {
      const incompleteLine = pendingFragments.join('')
      parseLine(incompleteLine, 0, incompleteLine.length)
    }

    isFirstChunk = true
    id = undefined
    data = ''
    dataLines = 0
    eventType = undefined
    pendingFragments.length = 0
  }

  return {feed, reset}
}

/**
 * Checks if `chunk` starts with the literal `data:` at index `i`.
 *
 * Equivalent to `chunk.startsWith('data:', i)`, but benchmarks show this
 * hand-unrolled char-code comparison is ~20% faster on common event types.
 * The caller passes `firstCharCode` (the code at `i`) so it can be reused
 * across prefix checks.
 *
 * ASCII: 'd' = 100, 'a' = 97, 't' = 116, 'a' = 97, ':' = 58
 */
function isDataPrefix(chunk: string, i: number, firstCharCode: number): boolean {
  return (
    firstCharCode === 100 &&
    chunk.charCodeAt(i + 1) === 97 &&
    chunk.charCodeAt(i + 2) === 116 &&
    chunk.charCodeAt(i + 3) === 97 &&
    chunk.charCodeAt(i + 4) === 58
  )
}

/**
 * Checks if `chunk` starts with the literal `event:` at index `i`.
 *
 * See {@link isDataPrefix} for why this is hand-unrolled rather than using
 * `String.prototype.startsWith`.
 *
 * ASCII: 'e' = 101, 'v' = 118, 'e' = 101, 'n' = 110, 't' = 116, ':' = 58
 */
function isEventPrefix(chunk: string, i: number, firstCharCode: number): boolean {
  return (
    firstCharCode === 101 &&
    chunk.charCodeAt(i + 1) === 118 &&
    chunk.charCodeAt(i + 2) === 101 &&
    chunk.charCodeAt(i + 3) === 110 &&
    chunk.charCodeAt(i + 4) === 116 &&
    chunk.charCodeAt(i + 5) === 58
  )
}