`: generic parser that hides inline tags and extracts their contents
+- `CitationStreamParser`: convenience wrapper for `...`
+- `strip_citations(...)`: one-shot helper for non-streamed strings
+- `Utf8StreamParser`: adapter for raw `&[u8]` streams that may split UTF-8 code points
+
+## Why this exists
+
+Some model outputs arrive as a stream and may contain hidden markup (for example
+`...`) split across chunk boundaries. Parsing each chunk
+independently is incorrect because tags can be split (``).
+
+This crate keeps parser state across chunks, returns visible text safe to render
+immediately, and extracts hidden payloads separately.
+
+## Example: citation streaming
+
+```rust
+use codex_utils_stream_parser::CitationStreamParser;
+use codex_utils_stream_parser::StreamTextParser;
+
+let mut parser = CitationStreamParser::new();
+
+let first = parser.push_str("Hello doc A world");
+assert_eq!(second.visible_text, " world");
+assert_eq!(second.extracted, vec!["doc A".to_string()]);
+
+let tail = parser.finish();
+assert!(tail.visible_text.is_empty());
+assert!(tail.extracted.is_empty());
+```
+
+## Example: raw byte streaming with split UTF-8 code points
+
+```rust
+use codex_utils_stream_parser::CitationStreamParser;
+use codex_utils_stream_parser::Utf8StreamParser;
+
+# fn demo() -> Result<(), codex_utils_stream_parser::Utf8StreamParserError> {
+let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+
+// "é" split across chunks: 0xC3 + 0xA9
+let first = parser.push_bytes(&[b'H', 0xC3])?;
+assert_eq!(first.visible_text, "H");
+
+let second = parser.push_bytes(&[0xA9, b'!'])?;
+assert_eq!(second.visible_text, "é!");
+
+let tail = parser.finish()?;
+assert!(tail.visible_text.is_empty());
+# Ok(())
+# }
+```
+
+## Example: custom hidden tags
+
+```rust
+use codex_utils_stream_parser::InlineHiddenTagParser;
+use codex_utils_stream_parser::InlineTagSpec;
+use codex_utils_stream_parser::StreamTextParser;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+enum Tag {
+ Secret,
+}
+
+let mut parser = InlineHiddenTagParser::new(vec![InlineTagSpec {
+ tag: Tag::Secret,
+ open: "",
+ close: "",
+}]);
+
+let out = parser.push_str("axb");
+assert_eq!(out.visible_text, "ab");
+assert_eq!(out.extracted.len(), 1);
+assert_eq!(out.extracted[0].content, "x");
+```
+
+## Known limitations
+
+- Tags are matched literally and case-sensitively
+- No nested tag support
+- A stream can return empty objects.
\ No newline at end of file
diff --git a/codex-rs/utils/stream-parser/src/assistant_text.rs b/codex-rs/utils/stream-parser/src/assistant_text.rs
new file mode 100644
index 000000000..931c30bd5
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/assistant_text.rs
@@ -0,0 +1,130 @@
+use crate::CitationStreamParser;
+use crate::ProposedPlanParser;
+use crate::ProposedPlanSegment;
+use crate::StreamTextChunk;
+use crate::StreamTextParser;
+
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub struct AssistantTextChunk {
+ pub visible_text: String,
+ pub citations: Vec,
+ pub plan_segments: Vec,
+}
+
+impl AssistantTextChunk {
+ pub fn is_empty(&self) -> bool {
+ self.visible_text.is_empty() && self.citations.is_empty() && self.plan_segments.is_empty()
+ }
+}
+
+/// Parses assistant text streaming markup in one pass:
+/// - strips `` tags and extracts citation payloads
+/// - in plan mode, also strips `` blocks and emits plan segments
+#[derive(Debug, Default)]
+pub struct AssistantTextStreamParser {
+ plan_mode: bool,
+ citations: CitationStreamParser,
+ plan: ProposedPlanParser,
+}
+
+impl AssistantTextStreamParser {
+ pub fn new(plan_mode: bool) -> Self {
+ Self {
+ plan_mode,
+ ..Self::default()
+ }
+ }
+
+ pub fn push_str(&mut self, chunk: &str) -> AssistantTextChunk {
+ let citation_chunk = self.citations.push_str(chunk);
+ let mut out = self.parse_visible_text(citation_chunk.visible_text);
+ out.citations = citation_chunk.extracted;
+ out
+ }
+
+ pub fn finish(&mut self) -> AssistantTextChunk {
+ let citation_chunk = self.citations.finish();
+ let mut out = self.parse_visible_text(citation_chunk.visible_text);
+ if self.plan_mode {
+ let mut tail = self.plan.finish();
+ if !tail.is_empty() {
+ out.visible_text.push_str(&tail.visible_text);
+ out.plan_segments.append(&mut tail.extracted);
+ }
+ }
+ out.citations = citation_chunk.extracted;
+ out
+ }
+
+ fn parse_visible_text(&mut self, visible_text: String) -> AssistantTextChunk {
+ if !self.plan_mode {
+ return AssistantTextChunk {
+ visible_text,
+ ..AssistantTextChunk::default()
+ };
+ }
+ let plan_chunk: StreamTextChunk = self.plan.push_str(&visible_text);
+ AssistantTextChunk {
+ visible_text: plan_chunk.visible_text,
+ plan_segments: plan_chunk.extracted,
+ ..AssistantTextChunk::default()
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::AssistantTextStreamParser;
+ use crate::ProposedPlanSegment;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn parses_citations_across_seed_and_delta_boundaries() {
+ let mut parser = AssistantTextStreamParser::new(false);
+
+ let seeded = parser.push_str("hello doc");
+ let parsed = parser.push_str("1 world");
+ let tail = parser.finish();
+
+ assert_eq!(seeded.visible_text, "hello ");
+ assert_eq!(seeded.citations, Vec::::new());
+ assert_eq!(parsed.visible_text, " world");
+ assert_eq!(parsed.citations, vec!["doc1".to_string()]);
+ assert_eq!(tail.visible_text, "");
+ assert_eq!(tail.citations, Vec::::new());
+ }
+
+ #[test]
+ fn parses_plan_segments_after_citation_stripping() {
+ let mut parser = AssistantTextStreamParser::new(true);
+
+ let seeded = parser.push_str("Intro\n\n- step doc\n");
+ let tail = parser.push_str("\nOutro");
+ let finish = parser.finish();
+
+ assert_eq!(seeded.visible_text, "Intro\n");
+ assert_eq!(
+ seeded.plan_segments,
+ vec![ProposedPlanSegment::Normal("Intro\n".to_string())]
+ );
+ assert_eq!(parsed.visible_text, "");
+ assert_eq!(parsed.citations, vec!["doc".to_string()]);
+ assert_eq!(
+ parsed.plan_segments,
+ vec![
+ ProposedPlanSegment::ProposedPlanStart,
+ ProposedPlanSegment::ProposedPlanDelta("- step \n".to_string()),
+ ]
+ );
+ assert_eq!(tail.visible_text, "Outro");
+ assert_eq!(
+ tail.plan_segments,
+ vec![
+ ProposedPlanSegment::ProposedPlanEnd,
+ ProposedPlanSegment::Normal("Outro".to_string()),
+ ]
+ );
+ assert!(finish.is_empty());
+ }
+}
diff --git a/codex-rs/utils/stream-parser/src/citation.rs b/codex-rs/utils/stream-parser/src/citation.rs
new file mode 100644
index 000000000..d7be6dd5f
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/citation.rs
@@ -0,0 +1,179 @@
+use crate::InlineHiddenTagParser;
+use crate::InlineTagSpec;
+use crate::StreamTextChunk;
+use crate::StreamTextParser;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum CitationTag {
+ Citation,
+}
+
+const CITATION_OPEN: &str = "";
+const CITATION_CLOSE: &str = "";
+
+/// Stream parser for `...` tags.
+///
+/// This is a thin convenience wrapper around [`InlineHiddenTagParser`]. It returns citation bodies
+/// as plain strings and omits the citation tags from visible text.
+///
+/// Matching is literal and non-nested. If EOF is reached before a closing
+/// ``, the parser auto-closes the tag and returns the buffered body as an
+/// extracted citation.
+#[derive(Debug)]
+pub struct CitationStreamParser {
+ inner: InlineHiddenTagParser,
+}
+
+impl CitationStreamParser {
+ pub fn new() -> Self {
+ Self {
+ inner: InlineHiddenTagParser::new(vec![InlineTagSpec {
+ tag: CitationTag::Citation,
+ open: CITATION_OPEN,
+ close: CITATION_CLOSE,
+ }]),
+ }
+ }
+}
+
+impl Default for CitationStreamParser {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl StreamTextParser for CitationStreamParser {
+ type Extracted = String;
+
+ fn push_str(&mut self, chunk: &str) -> StreamTextChunk {
+ let inner = self.inner.push_str(chunk);
+ StreamTextChunk {
+ visible_text: inner.visible_text,
+ extracted: inner.extracted.into_iter().map(|tag| tag.content).collect(),
+ }
+ }
+
+ fn finish(&mut self) -> StreamTextChunk {
+ let inner = self.inner.finish();
+ StreamTextChunk {
+ visible_text: inner.visible_text,
+ extracted: inner.extracted.into_iter().map(|tag| tag.content).collect(),
+ }
+ }
+}
+
+/// Strip citation tags from a complete string and return `(visible_text, citations)`.
+///
+/// This uses [`CitationStreamParser`] internally, so it inherits the same semantics:
+/// literal, non-nested matching and auto-closing unterminated citations at EOF.
+pub fn strip_citations(text: &str) -> (String, Vec) {
+ let mut parser = CitationStreamParser::new();
+ let mut out = parser.push_str(text);
+ let tail = parser.finish();
+ out.visible_text.push_str(&tail.visible_text);
+ out.extracted.extend(tail.extracted);
+ (out.visible_text, out.extracted)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::CitationStreamParser;
+ use super::strip_citations;
+ use crate::StreamTextChunk;
+ use crate::StreamTextParser;
+ use pretty_assertions::assert_eq;
+
+ fn collect_chunks(parser: &mut P, chunks: &[&str]) -> StreamTextChunk
+ where
+ P: StreamTextParser,
+ {
+ let mut all = StreamTextChunk::default();
+ for chunk in chunks {
+ let next = parser.push_str(chunk);
+ all.visible_text.push_str(&next.visible_text);
+ all.extracted.extend(next.extracted);
+ }
+ let tail = parser.finish();
+ all.visible_text.push_str(&tail.visible_text);
+ all.extracted.extend(tail.extracted);
+ all
+ }
+
+ #[test]
+ fn citation_parser_streams_across_chunk_boundaries() {
+ let mut parser = CitationStreamParser::new();
+ let out = collect_chunks(
+ &mut parser,
+ &[
+ "Hello source A world",
+ ],
+ );
+
+ assert_eq!(out.visible_text, "Hello world");
+ assert_eq!(out.extracted, vec!["source A".to_string()]);
+ }
+
+ #[test]
+ fn citation_parser_buffers_partial_open_tag_prefix() {
+ let mut parser = CitationStreamParser::new();
+
+ let first = parser.push_str("abc ::new());
+
+ let second = parser.push_str("citation>xz");
+ let tail = parser.finish();
+
+ assert_eq!(second.visible_text, "z");
+ assert_eq!(second.extracted, vec!["x".to_string()]);
+ assert!(tail.is_empty());
+ }
+
+ #[test]
+ fn citation_parser_auto_closes_unterminated_tag_on_finish() {
+ let mut parser = CitationStreamParser::new();
+ let out = collect_chunks(&mut parser, &["xsource"]);
+
+ assert_eq!(out.visible_text, "x");
+ assert_eq!(out.extracted, vec!["source".to_string()]);
+ }
+
+ #[test]
+ fn citation_parser_preserves_partial_open_tag_at_eof_if_not_a_full_tag() {
+ let mut parser = CitationStreamParser::new();
+ let out = collect_chunks(&mut parser, &["hello ::new());
+ }
+
+ #[test]
+ fn strip_citations_collects_all_citations() {
+ let (visible, citations) = strip_citations(
+ "aonebtwoc",
+ );
+
+ assert_eq!(visible, "abc");
+ assert_eq!(citations, vec!["one".to_string(), "two".to_string()]);
+ }
+
+ #[test]
+ fn strip_citations_auto_closes_unterminated_citation_at_eof() {
+ let (visible, citations) = strip_citations("xy");
+
+ assert_eq!(visible, "x");
+ assert_eq!(citations, vec!["y".to_string()]);
+ }
+
+ #[test]
+ fn citation_parser_does_not_support_nested_tags() {
+ let (visible, citations) = strip_citations(
+ "axyzb",
+ );
+
+ assert_eq!(visible, "azb");
+ assert_eq!(citations, vec!["xy".to_string()]);
+ }
+}
diff --git a/codex-rs/utils/stream-parser/src/inline_hidden_tag.rs b/codex-rs/utils/stream-parser/src/inline_hidden_tag.rs
new file mode 100644
index 000000000..0b7501cdb
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/inline_hidden_tag.rs
@@ -0,0 +1,323 @@
+use crate::StreamTextChunk;
+use crate::StreamTextParser;
+
+/// One hidden inline tag extracted by [`InlineHiddenTagParser`].
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ExtractedInlineTag {
+ pub tag: T,
+ pub content: String,
+}
+
+/// Literal tag specification used by [`InlineHiddenTagParser`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct InlineTagSpec {
+ pub tag: T,
+ pub open: &'static str,
+ pub close: &'static str,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct ActiveTag {
+ tag: T,
+ close: &'static str,
+ content: String,
+}
+
+/// Generic streaming parser that hides configured inline tags and extracts their contents.
+///
+/// Example:
+/// - input: `hello doc A world`
+/// - visible output: `hello world`
+/// - extracted: `["doc A"]`
+///
+/// Matching is literal and non-nested. If EOF is reached while a tag is still open, the parser
+/// auto-closes it and returns the buffered content as extracted data.
+#[derive(Debug)]
+pub struct InlineHiddenTagParser
+where
+ T: Clone + Eq,
+{
+ specs: Vec>,
+ pending: String,
+ active: Option>,
+}
+
+impl InlineHiddenTagParser
+where
+ T: Clone + Eq,
+{
+ /// Create a parser for one or more hidden inline tags.
+ pub fn new(specs: Vec>) -> Self {
+ assert!(
+ !specs.is_empty(),
+ "InlineHiddenTagParser requires at least one tag spec"
+ );
+ for spec in &specs {
+ assert!(
+ !spec.open.is_empty(),
+ "InlineHiddenTagParser requires non-empty open delimiters"
+ );
+ assert!(
+ !spec.close.is_empty(),
+ "InlineHiddenTagParser requires non-empty close delimiters"
+ );
+ }
+ Self {
+ specs,
+ pending: String::new(),
+ active: None,
+ }
+ }
+
+ fn find_next_open(&self) -> Option<(usize, usize)> {
+ self.specs
+ .iter()
+ .enumerate()
+ .filter_map(|(idx, spec)| {
+ self.pending
+ .find(spec.open)
+ .map(|pos| (pos, spec.open.len(), idx))
+ })
+ .min_by(|(pos_a, len_a, idx_a), (pos_b, len_b, idx_b)| {
+ pos_a
+ .cmp(pos_b)
+ .then_with(|| len_b.cmp(len_a))
+ .then_with(|| idx_a.cmp(idx_b))
+ })
+ .map(|(pos, _len, idx)| (pos, idx))
+ }
+
+ fn max_open_prefix_suffix_len(&self) -> usize {
+ self.specs
+ .iter()
+ .map(|spec| longest_suffix_prefix_len(&self.pending, spec.open))
+ .max()
+ .map_or(0, std::convert::identity)
+ }
+
+ fn push_visible_prefix(out: &mut StreamTextChunk>, pending: &str) {
+ if !pending.is_empty() {
+ out.visible_text.push_str(pending);
+ }
+ }
+
+ fn drain_visible_to_suffix_match(
+ &mut self,
+ out: &mut StreamTextChunk>,
+ keep_suffix_len: usize,
+ ) {
+ let take = self.pending.len().saturating_sub(keep_suffix_len);
+ if take == 0 {
+ return;
+ }
+ Self::push_visible_prefix(out, &self.pending[..take]);
+ self.pending.drain(..take);
+ }
+}
+
+impl StreamTextParser for InlineHiddenTagParser
+where
+ T: Clone + Eq,
+{
+ type Extracted = ExtractedInlineTag;
+
+ fn push_str(&mut self, chunk: &str) -> StreamTextChunk {
+ self.pending.push_str(chunk);
+ let mut out = StreamTextChunk::default();
+
+ loop {
+ if let Some(close) = self.active.as_ref().map(|active| active.close) {
+ if let Some(close_idx) = self.pending.find(close) {
+ let Some(mut active) = self.active.take() else {
+ continue;
+ };
+ active.content.push_str(&self.pending[..close_idx]);
+ out.extracted.push(ExtractedInlineTag {
+ tag: active.tag,
+ content: active.content,
+ });
+ let close_len = close.len();
+ self.pending.drain(..close_idx + close_len);
+ continue;
+ }
+
+ let keep = longest_suffix_prefix_len(&self.pending, close);
+ let take = self.pending.len().saturating_sub(keep);
+ if take > 0 {
+ if let Some(active) = self.active.as_mut() {
+ active.content.push_str(&self.pending[..take]);
+ }
+ self.pending.drain(..take);
+ }
+ break;
+ }
+
+ if let Some((open_idx, spec_idx)) = self.find_next_open() {
+ Self::push_visible_prefix(&mut out, &self.pending[..open_idx]);
+ let spec = &self.specs[spec_idx];
+ let open_len = spec.open.len();
+ self.pending.drain(..open_idx + open_len);
+ self.active = Some(ActiveTag {
+ tag: spec.tag.clone(),
+ close: spec.close,
+ content: String::new(),
+ });
+ continue;
+ }
+
+ let keep = self.max_open_prefix_suffix_len();
+ self.drain_visible_to_suffix_match(&mut out, keep);
+ break;
+ }
+
+ out
+ }
+
+ fn finish(&mut self) -> StreamTextChunk {
+ let mut out = StreamTextChunk::default();
+
+ if let Some(mut active) = self.active.take() {
+ if !self.pending.is_empty() {
+ active.content.push_str(&self.pending);
+ self.pending.clear();
+ }
+ out.extracted.push(ExtractedInlineTag {
+ tag: active.tag,
+ content: active.content,
+ });
+ return out;
+ }
+
+ if !self.pending.is_empty() {
+ out.visible_text.push_str(&self.pending);
+ self.pending.clear();
+ }
+
+ out
+ }
+}
+
+fn longest_suffix_prefix_len(s: &str, needle: &str) -> usize {
+ let max = s.len().min(needle.len().saturating_sub(1));
+ for k in (1..=max).rev() {
+ if needle.is_char_boundary(k) && s.ends_with(&needle[..k]) {
+ return k;
+ }
+ }
+ 0
+}
+
+#[cfg(test)]
+mod tests {
+ use super::InlineHiddenTagParser;
+ use super::InlineTagSpec;
+ use crate::StreamTextChunk;
+ use crate::StreamTextParser;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+ enum Tag {
+ A,
+ B,
+ }
+
+ fn collect_chunks(parser: &mut P, chunks: &[&str]) -> StreamTextChunk
+ where
+ P: StreamTextParser,
+ {
+ let mut all = StreamTextChunk::default();
+ for chunk in chunks {
+ let next = parser.push_str(chunk);
+ all.visible_text.push_str(&next.visible_text);
+ all.extracted.extend(next.extracted);
+ }
+ let tail = parser.finish();
+ all.visible_text.push_str(&tail.visible_text);
+ all.extracted.extend(tail.extracted);
+ all
+ }
+
+ #[test]
+ fn generic_inline_parser_supports_multiple_tag_types() {
+ let mut parser = InlineHiddenTagParser::new(vec![
+ InlineTagSpec {
+ tag: Tag::A,
+ open: "",
+ close: "",
+ },
+ InlineTagSpec {
+ tag: Tag::B,
+ open: "",
+ close: "",
+ },
+ ]);
+
+ let out = collect_chunks(&mut parser, &["1x2y3"]);
+
+ assert_eq!(out.visible_text, "123");
+ assert_eq!(out.extracted.len(), 2);
+ assert_eq!(out.extracted[0].tag, Tag::A);
+ assert_eq!(out.extracted[0].content, "x");
+ assert_eq!(out.extracted[1].tag, Tag::B);
+ assert_eq!(out.extracted[1].content, "y");
+ }
+
+ #[test]
+ fn generic_inline_parser_supports_non_ascii_tag_delimiters() {
+ let mut parser = InlineHiddenTagParser::new(vec![InlineTagSpec {
+ tag: Tag::A,
+ open: "<é>",
+ close: "é>",
+ }]);
+
+ let out = collect_chunks(&mut parser, &["a<", "é>中", "é>b"]);
+
+ assert_eq!(out.visible_text, "ab");
+ assert_eq!(out.extracted.len(), 1);
+ assert_eq!(out.extracted[0].tag, Tag::A);
+ assert_eq!(out.extracted[0].content, "中");
+ }
+
+ #[test]
+ fn generic_inline_parser_prefers_longest_opener_at_same_offset() {
+ let mut parser = InlineHiddenTagParser::new(vec![
+ InlineTagSpec {
+ tag: Tag::A,
+ open: "",
+ close: "",
+ },
+ InlineTagSpec {
+ tag: Tag::B,
+ open: "",
+ close: " ",
+ },
+ ]);
+
+ let out = collect_chunks(&mut parser, &["xy z"]);
+
+ assert_eq!(out.visible_text, "xz");
+ assert_eq!(out.extracted.len(), 1);
+ assert_eq!(out.extracted[0].tag, Tag::B);
+ assert_eq!(out.extracted[0].content, "y");
+ }
+
+ #[test]
+ #[should_panic(expected = "non-empty open delimiters")]
+ fn generic_inline_parser_rejects_empty_open_delimiter() {
+ let _ = InlineHiddenTagParser::new(vec![InlineTagSpec {
+ tag: Tag::A,
+ open: "",
+ close: "",
+ }]);
+ }
+
+ #[test]
+ #[should_panic(expected = "non-empty close delimiters")]
+ fn generic_inline_parser_rejects_empty_close_delimiter() {
+ let _ = InlineHiddenTagParser::new(vec![InlineTagSpec {
+ tag: Tag::A,
+ open: "",
+ close: "",
+ }]);
+ }
+}
diff --git a/codex-rs/utils/stream-parser/src/lib.rs b/codex-rs/utils/stream-parser/src/lib.rs
new file mode 100644
index 000000000..2cf91ed45
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/lib.rs
@@ -0,0 +1,23 @@
+mod assistant_text;
+mod citation;
+mod inline_hidden_tag;
+mod proposed_plan;
+mod stream_text;
+mod tagged_line_parser;
+mod utf8_stream;
+
+pub use assistant_text::AssistantTextChunk;
+pub use assistant_text::AssistantTextStreamParser;
+pub use citation::CitationStreamParser;
+pub use citation::strip_citations;
+pub use inline_hidden_tag::ExtractedInlineTag;
+pub use inline_hidden_tag::InlineHiddenTagParser;
+pub use inline_hidden_tag::InlineTagSpec;
+pub use proposed_plan::ProposedPlanParser;
+pub use proposed_plan::ProposedPlanSegment;
+pub use proposed_plan::extract_proposed_plan_text;
+pub use proposed_plan::strip_proposed_plan_blocks;
+pub use stream_text::StreamTextChunk;
+pub use stream_text::StreamTextParser;
+pub use utf8_stream::Utf8StreamParser;
+pub use utf8_stream::Utf8StreamParserError;
diff --git a/codex-rs/utils/stream-parser/src/proposed_plan.rs b/codex-rs/utils/stream-parser/src/proposed_plan.rs
new file mode 100644
index 000000000..cd3a2a352
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/proposed_plan.rs
@@ -0,0 +1,212 @@
+use crate::StreamTextChunk;
+use crate::StreamTextParser;
+use crate::tagged_line_parser::TagSpec;
+use crate::tagged_line_parser::TaggedLineParser;
+use crate::tagged_line_parser::TaggedLineSegment;
+
+const OPEN_TAG: &str = "";
+const CLOSE_TAG: &str = "";
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum PlanTag {
+ ProposedPlan,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ProposedPlanSegment {
+ Normal(String),
+ ProposedPlanStart,
+ ProposedPlanDelta(String),
+ ProposedPlanEnd,
+}
+
+/// Parser for `` blocks emitted in plan mode.
+///
+/// Implements [`StreamTextParser`] so callers can consume:
+/// - `visible_text`: normal assistant text with plan blocks removed
+/// - `extracted`: ordered plan segments (includes `Normal(...)` segments for ordering fidelity)
+#[derive(Debug)]
+pub struct ProposedPlanParser {
+ parser: TaggedLineParser,
+}
+
+impl ProposedPlanParser {
+ pub fn new() -> Self {
+ Self {
+ parser: TaggedLineParser::new(vec![TagSpec {
+ open: OPEN_TAG,
+ close: CLOSE_TAG,
+ tag: PlanTag::ProposedPlan,
+ }]),
+ }
+ }
+}
+
+impl Default for ProposedPlanParser {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl StreamTextParser for ProposedPlanParser {
+ type Extracted = ProposedPlanSegment;
+
+ fn push_str(&mut self, chunk: &str) -> StreamTextChunk {
+ map_segments(self.parser.parse(chunk))
+ }
+
+ fn finish(&mut self) -> StreamTextChunk {
+ map_segments(self.parser.finish())
+ }
+}
+
+fn map_segments(segments: Vec>) -> StreamTextChunk {
+ let mut out = StreamTextChunk::default();
+ for segment in segments {
+ let mapped = match segment {
+ TaggedLineSegment::Normal(text) => ProposedPlanSegment::Normal(text),
+ TaggedLineSegment::TagStart(PlanTag::ProposedPlan) => {
+ ProposedPlanSegment::ProposedPlanStart
+ }
+ TaggedLineSegment::TagDelta(PlanTag::ProposedPlan, text) => {
+ ProposedPlanSegment::ProposedPlanDelta(text)
+ }
+ TaggedLineSegment::TagEnd(PlanTag::ProposedPlan) => {
+ ProposedPlanSegment::ProposedPlanEnd
+ }
+ };
+ if let ProposedPlanSegment::Normal(text) = &mapped {
+ out.visible_text.push_str(text);
+ }
+ out.extracted.push(mapped);
+ }
+ out
+}
+
+pub fn strip_proposed_plan_blocks(text: &str) -> String {
+ let mut parser = ProposedPlanParser::new();
+ let mut out = parser.push_str(text).visible_text;
+ out.push_str(&parser.finish().visible_text);
+ out
+}
+
+pub fn extract_proposed_plan_text(text: &str) -> Option {
+ let mut parser = ProposedPlanParser::new();
+ let mut plan_text = String::new();
+ let mut saw_plan_block = false;
+ for segment in parser
+ .push_str(text)
+ .extracted
+ .into_iter()
+ .chain(parser.finish().extracted)
+ {
+ match segment {
+ ProposedPlanSegment::ProposedPlanStart => {
+ saw_plan_block = true;
+ plan_text.clear();
+ }
+ ProposedPlanSegment::ProposedPlanDelta(delta) => {
+ plan_text.push_str(&delta);
+ }
+ ProposedPlanSegment::ProposedPlanEnd | ProposedPlanSegment::Normal(_) => {}
+ }
+ }
+ saw_plan_block.then_some(plan_text)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::ProposedPlanParser;
+ use super::ProposedPlanSegment;
+ use super::extract_proposed_plan_text;
+ use super::strip_proposed_plan_blocks;
+ use crate::StreamTextChunk;
+ use crate::StreamTextParser;
+ use pretty_assertions::assert_eq;
+
+ fn collect_chunks(parser: &mut P, chunks: &[&str]) -> StreamTextChunk
+ where
+ P: StreamTextParser,
+ {
+ let mut all = StreamTextChunk::default();
+ for chunk in chunks {
+ let next = parser.push_str(chunk);
+ all.visible_text.push_str(&next.visible_text);
+ all.extracted.extend(next.extracted);
+ }
+ let tail = parser.finish();
+ all.visible_text.push_str(&tail.visible_text);
+ all.extracted.extend(tail.extracted);
+ all
+ }
+
+ #[test]
+ fn streams_proposed_plan_segments_and_visible_text() {
+ let mut parser = ProposedPlanParser::new();
+ let out = collect_chunks(
+ &mut parser,
+ &[
+ "Intro text\n\n- step 1\n",
+ "
\nOutro",
+ ],
+ );
+
+ assert_eq!(out.visible_text, "Intro text\nOutro");
+ assert_eq!(
+ out.extracted,
+ vec![
+ ProposedPlanSegment::Normal("Intro text\n".to_string()),
+ ProposedPlanSegment::ProposedPlanStart,
+ ProposedPlanSegment::ProposedPlanDelta("- step 1\n".to_string()),
+ ProposedPlanSegment::ProposedPlanEnd,
+ ProposedPlanSegment::Normal("Outro".to_string()),
+ ]
+ );
+ }
+
+ #[test]
+ fn preserves_non_tag_lines() {
+ let mut parser = ProposedPlanParser::new();
+ let out = collect_chunks(&mut parser, &[" extra\n"]);
+
+ assert_eq!(out.visible_text, " extra\n");
+ assert_eq!(
+ out.extracted,
+ vec![ProposedPlanSegment::Normal(
+ " extra\n".to_string()
+ )]
+ );
+ }
+
+ #[test]
+ fn closes_unterminated_plan_block_on_finish() {
+ let mut parser = ProposedPlanParser::new();
+ let out = collect_chunks(&mut parser, &["\n- step 1\n"]);
+
+ assert_eq!(out.visible_text, "");
+ assert_eq!(
+ out.extracted,
+ vec![
+ ProposedPlanSegment::ProposedPlanStart,
+ ProposedPlanSegment::ProposedPlanDelta("- step 1\n".to_string()),
+ ProposedPlanSegment::ProposedPlanEnd,
+ ]
+ );
+ }
+
+ #[test]
+ fn strips_proposed_plan_blocks_from_text() {
+ let text = "before\n\n- step\n\nafter";
+ assert_eq!(strip_proposed_plan_blocks(text), "before\nafter");
+ }
+
+ #[test]
+ fn extracts_proposed_plan_text() {
+ let text = "before\n\n- step\n\nafter";
+ assert_eq!(
+ extract_proposed_plan_text(text),
+ Some("- step\n".to_string())
+ );
+ }
+}
diff --git a/codex-rs/utils/stream-parser/src/stream_text.rs b/codex-rs/utils/stream-parser/src/stream_text.rs
new file mode 100644
index 000000000..2ba16ea3a
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/stream_text.rs
@@ -0,0 +1,36 @@
+/// Incremental parser result for one pushed chunk (or final flush).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StreamTextChunk {
+ /// Text safe to render immediately.
+ pub visible_text: String,
+ /// Hidden payloads extracted from the chunk.
+ pub extracted: Vec,
+}
+
+impl Default for StreamTextChunk {
+ fn default() -> Self {
+ Self {
+ visible_text: String::new(),
+ extracted: Vec::new(),
+ }
+ }
+}
+
+impl StreamTextChunk {
+ /// Returns true when no visible text or extracted payloads were produced.
+ pub fn is_empty(&self) -> bool {
+ self.visible_text.is_empty() && self.extracted.is_empty()
+ }
+}
+
+/// Trait for parsers that consume streamed text and emit visible text plus extracted payloads.
+pub trait StreamTextParser {
+ /// Payload extracted by this parser (for example a citation body).
+ type Extracted;
+
+ /// Feed a new text chunk.
+ fn push_str(&mut self, chunk: &str) -> StreamTextChunk;
+
+ /// Flush any buffered state at end-of-stream (or end-of-item).
+ fn finish(&mut self) -> StreamTextChunk;
+}
diff --git a/codex-rs/core/src/tagged_block_parser.rs b/codex-rs/utils/stream-parser/src/tagged_line_parser.rs
similarity index 74%
rename from codex-rs/core/src/tagged_block_parser.rs
rename to codex-rs/utils/stream-parser/src/tagged_line_parser.rs
index 46ec012c3..dadc77ec3 100644
--- a/codex-rs/core/src/tagged_block_parser.rs
+++ b/codex-rs/utils/stream-parser/src/tagged_line_parser.rs
@@ -1,9 +1,7 @@
//! Line-based tag block parsing for streamed text.
//!
//! The parser buffers each line until it can disprove that the line is a tag,
-//! which is required for tags that must appear alone on a line. For example,
-//! Proposed Plan output uses `` and `` tags
-//! on their own lines so clients can stream plan content separately.
+//! which is required for tags that must appear alone on a line.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct TagSpec {
@@ -21,17 +19,6 @@ pub(crate) enum TaggedLineSegment {
}
/// Stateful line parser that splits input into normal text vs tag blocks.
-///
-/// How it works:
-/// - While reading a line, we buffer characters until the line either finishes
-/// (`\n`) or stops matching any tag prefix (after `trim_start`).
-/// - If it stops matching a tag prefix, the buffered line is immediately
-/// emitted as text and we continue in "plain text" mode until the next
-/// newline.
-/// - When a full line is available, we compare it to the open/close tags; tag
-/// lines emit TagStart/TagEnd, otherwise the line is emitted as text.
-/// - `finish()` flushes any buffered line and auto-closes an unterminated tag,
-/// which keeps streaming resilient to missing closing tags.
#[derive(Debug, Default)]
pub(crate) struct TaggedLineParser
where
@@ -56,7 +43,6 @@ where
}
}
- /// Parse a streamed delta into line-aware segments.
pub(crate) fn parse(&mut self, delta: &str) -> Vec> {
let mut segments = Vec::new();
let mut run = String::new();
@@ -75,7 +61,6 @@ where
if slug.is_empty() || self.is_tag_prefix(slug) {
continue;
}
- // This line cannot be a tag line, so flush it immediately.
let buffered = std::mem::take(&mut self.line_buffer);
self.detect_tag = false;
self.push_text(buffered, &mut segments);
@@ -96,7 +81,6 @@ where
segments
}
- /// Flush any buffered text and close an unterminated tag block.
pub(crate) fn finish(&mut self) -> Vec> {
let mut segments = Vec::new();
if !self.line_buffer.is_empty() {
@@ -115,7 +99,6 @@ where
push_segment(&mut segments, TaggedLineSegment::TagEnd(tag));
self.active_tag = None;
} else {
- // The buffered line never proved to be a tag line.
self.push_text(buffered, &mut segments);
}
}
@@ -210,12 +193,8 @@ where
}
segments.push(TaggedLineSegment::TagDelta(tag, delta));
}
- TaggedLineSegment::TagStart(tag) => {
- segments.push(TaggedLineSegment::TagStart(tag));
- }
- TaggedLineSegment::TagEnd(tag) => {
- segments.push(TaggedLineSegment::TagEnd(tag));
- }
+ TaggedLineSegment::TagStart(tag) => segments.push(TaggedLineSegment::TagStart(tag)),
+ TaggedLineSegment::TagEnd(tag) => segments.push(TaggedLineSegment::TagEnd(tag)),
}
}
@@ -267,48 +246,4 @@ mod tests {
vec![TaggedLineSegment::Normal(" extra\n".to_string())]
);
}
-
- #[test]
- fn closes_unterminated_tag_on_finish() {
- let mut parser = parser();
- let mut segments = parser.parse("\nline\n");
- segments.extend(parser.finish());
-
- assert_eq!(
- segments,
- vec![
- TaggedLineSegment::TagStart(Tag::Block),
- TaggedLineSegment::TagDelta(Tag::Block, "line\n".to_string()),
- TaggedLineSegment::TagEnd(Tag::Block),
- ]
- );
- }
-
- #[test]
- fn accepts_tags_with_trailing_whitespace() {
- let mut parser = parser();
- let mut segments = parser.parse(" \nline\n \n");
- segments.extend(parser.finish());
-
- assert_eq!(
- segments,
- vec![
- TaggedLineSegment::TagStart(Tag::Block),
- TaggedLineSegment::TagDelta(Tag::Block, "line\n".to_string()),
- TaggedLineSegment::TagEnd(Tag::Block),
- ]
- );
- }
-
- #[test]
- fn passes_through_plain_text() {
- let mut parser = parser();
- let mut segments = parser.parse("plain text\n");
- segments.extend(parser.finish());
-
- assert_eq!(
- segments,
- vec![TaggedLineSegment::Normal("plain text\n".to_string())]
- );
- }
}
diff --git a/codex-rs/utils/stream-parser/src/utf8_stream.rs b/codex-rs/utils/stream-parser/src/utf8_stream.rs
new file mode 100644
index 000000000..f9cd31eec
--- /dev/null
+++ b/codex-rs/utils/stream-parser/src/utf8_stream.rs
@@ -0,0 +1,333 @@
+use std::error::Error;
+use std::fmt;
+
+use crate::StreamTextChunk;
+use crate::StreamTextParser;
+
+/// Error returned by [`Utf8StreamParser`] when streamed bytes are not valid UTF-8.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Utf8StreamParserError {
+ /// The provided bytes contain an invalid UTF-8 sequence.
+ InvalidUtf8 {
+ /// Byte offset in the parser's buffered bytes where decoding failed.
+ valid_up_to: usize,
+ /// Length in bytes of the invalid sequence.
+ error_len: usize,
+ },
+ /// EOF was reached with a buffered partial UTF-8 code point.
+ IncompleteUtf8AtEof,
+}
+
+impl fmt::Display for Utf8StreamParserError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Self::InvalidUtf8 {
+ valid_up_to,
+ error_len,
+ } => write!(
+ f,
+ "invalid UTF-8 in streamed bytes at offset {valid_up_to} (error length {error_len})"
+ ),
+ Self::IncompleteUtf8AtEof => {
+ write!(f, "incomplete UTF-8 code point at end of stream")
+ }
+ }
+ }
+}
+
+impl Error for Utf8StreamParserError {}
+
+/// Wraps a [`StreamTextParser`] and accepts raw bytes, buffering partial UTF-8 code points.
+///
+/// This is useful when upstream data arrives as `&[u8]` and a code point may be split across
+/// chunk boundaries (for example `0xC3` followed by `0xA9` for `é`).
+#[derive(Debug)]
+pub struct Utf8StreamParser {
+ inner: P,
+ pending_utf8: Vec,
+}
+
+impl Utf8StreamParser
+where
+ P: StreamTextParser,
+{
+ pub fn new(inner: P) -> Self {
+ Self {
+ inner,
+ pending_utf8: Vec::new(),
+ }
+ }
+
+ /// Feed a raw byte chunk.
+ ///
+ /// If the chunk contains invalid UTF-8, this returns an error and rolls back the entire
+ /// pushed chunk so callers can decide how to recover without the inner parser seeing a partial
+ /// prefix from that chunk.
+ pub fn push_bytes(
+ &mut self,
+ chunk: &[u8],
+ ) -> Result, Utf8StreamParserError> {
+ let old_len = self.pending_utf8.len();
+ self.pending_utf8.extend_from_slice(chunk);
+
+ match std::str::from_utf8(&self.pending_utf8) {
+ Ok(text) => {
+ let out = self.inner.push_str(text);
+ self.pending_utf8.clear();
+ Ok(out)
+ }
+ Err(err) => {
+ if let Some(error_len) = err.error_len() {
+ self.pending_utf8.truncate(old_len);
+ return Err(Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: err.valid_up_to(),
+ error_len,
+ });
+ }
+
+ let valid_up_to = err.valid_up_to();
+ if valid_up_to == 0 {
+ return Ok(StreamTextChunk::default());
+ }
+
+ let text = match std::str::from_utf8(&self.pending_utf8[..valid_up_to]) {
+ Ok(text) => text,
+ Err(prefix_err) => {
+ self.pending_utf8.truncate(old_len);
+ let error_len = prefix_err.error_len().unwrap_or(0);
+ return Err(Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: prefix_err.valid_up_to(),
+ error_len,
+ });
+ }
+ };
+ let out = self.inner.push_str(text);
+ self.pending_utf8.drain(..valid_up_to);
+ Ok(out)
+ }
+ }
+ }
+
+ pub fn finish(&mut self) -> Result, Utf8StreamParserError> {
+ if !self.pending_utf8.is_empty() {
+ match std::str::from_utf8(&self.pending_utf8) {
+ Ok(_) => {}
+ Err(err) => {
+ if let Some(error_len) = err.error_len() {
+ return Err(Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: err.valid_up_to(),
+ error_len,
+ });
+ }
+ return Err(Utf8StreamParserError::IncompleteUtf8AtEof);
+ }
+ }
+ }
+
+ let mut out = if self.pending_utf8.is_empty() {
+ StreamTextChunk::default()
+ } else {
+ let text = match std::str::from_utf8(&self.pending_utf8) {
+ Ok(text) => text,
+ Err(err) => {
+ let error_len = err.error_len().unwrap_or(0);
+ return Err(Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: err.valid_up_to(),
+ error_len,
+ });
+ }
+ };
+ let out = self.inner.push_str(text);
+ self.pending_utf8.clear();
+ out
+ };
+
+ let mut tail = self.inner.finish();
+ out.visible_text.push_str(&tail.visible_text);
+ out.extracted.append(&mut tail.extracted);
+ Ok(out)
+ }
+
+ /// Return the wrapped parser if no undecoded UTF-8 bytes are buffered.
+ ///
+ /// Use [`Self::finish`] first if you want to flush buffered text into the wrapped parser.
+ pub fn into_inner(self) -> Result {
+ if self.pending_utf8.is_empty() {
+ return Ok(self.inner);
+ }
+ match std::str::from_utf8(&self.pending_utf8) {
+ Ok(_) => Ok(self.inner),
+ Err(err) => {
+ if let Some(error_len) = err.error_len() {
+ return Err(Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: err.valid_up_to(),
+ error_len,
+ });
+ }
+ Err(Utf8StreamParserError::IncompleteUtf8AtEof)
+ }
+ }
+ }
+
+ /// Return the wrapped parser without validating or flushing buffered undecoded bytes.
+ ///
+ /// This may drop a partial UTF-8 code point that was buffered across chunk boundaries.
+ pub fn into_inner_lossy(self) -> P {
+ self.inner
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Utf8StreamParser;
+ use super::Utf8StreamParserError;
+ use crate::CitationStreamParser;
+ use crate::StreamTextChunk;
+ use crate::StreamTextParser;
+
+ use pretty_assertions::assert_eq;
+
+ fn collect_bytes(
+ parser: &mut Utf8StreamParser,
+ chunks: &[&[u8]],
+ ) -> Result, Utf8StreamParserError> {
+ let mut all = StreamTextChunk::default();
+ for chunk in chunks {
+ let next = parser.push_bytes(chunk)?;
+ all.visible_text.push_str(&next.visible_text);
+ all.extracted.extend(next.extracted);
+ }
+ let tail = parser.finish()?;
+ all.visible_text.push_str(&tail.visible_text);
+ all.extracted.extend(tail.extracted);
+ Ok(all)
+ }
+
+ #[test]
+ fn utf8_stream_parser_handles_split_code_points_across_chunks() {
+ let chunks: [&[u8]; 3] = [
+ b"A\xC3",
+ b"\xA9\xE4",
+ b"\xB8\xADZ",
+ ];
+
+ let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+ let out = match collect_bytes(&mut parser, &chunks) {
+ Ok(out) => out,
+ Err(err) => panic!("valid UTF-8 stream should parse: {err}"),
+ };
+
+ assert_eq!(out.visible_text, "AéZ");
+ assert_eq!(out.extracted, vec!["中".to_string()]);
+ }
+
+ #[test]
+ fn utf8_stream_parser_rolls_back_on_invalid_utf8_chunk() {
+ let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+
+ let first = match parser.push_bytes(&[0xC3]) {
+ Ok(out) => out,
+ Err(err) => panic!("leading byte may be buffered until next chunk: {err}"),
+ };
+ assert!(first.is_empty());
+
+ let err = match parser.push_bytes(&[0x28]) {
+ Ok(out) => panic!("invalid continuation byte should error, got output: {out:?}"),
+ Err(err) => err,
+ };
+ assert_eq!(
+ err,
+ Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: 0,
+ error_len: 1,
+ }
+ );
+
+ let second = match parser.push_bytes(&[0xA9, b'x']) {
+ Ok(out) => out,
+ Err(err) => panic!("state should still allow a valid continuation: {err}"),
+ };
+ let tail = match parser.finish() {
+ Ok(out) => out,
+ Err(err) => panic!("stream should finish: {err}"),
+ };
+
+ assert_eq!(second.visible_text, "éx");
+ assert!(second.extracted.is_empty());
+ assert!(tail.is_empty());
+ }
+
+ #[test]
+ fn utf8_stream_parser_rolls_back_entire_chunk_when_invalid_byte_follows_valid_prefix() {
+ let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+
+ let err = match parser.push_bytes(b"ok\xFF") {
+ Ok(out) => panic!("invalid byte should error, got output: {out:?}"),
+ Err(err) => err,
+ };
+ assert_eq!(
+ err,
+ Utf8StreamParserError::InvalidUtf8 {
+ valid_up_to: 2,
+ error_len: 1,
+ }
+ );
+
+ let next = match parser.push_bytes(b"!") {
+ Ok(out) => out,
+ Err(err) => panic!("parser should recover after rollback: {err}"),
+ };
+
+ assert_eq!(next.visible_text, "!");
+ assert!(next.extracted.is_empty());
+ }
+
+ #[test]
+ fn utf8_stream_parser_errors_on_incomplete_code_point_at_eof() {
+ let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+
+ let out = match parser.push_bytes(&[0xE2, 0x82]) {
+ Ok(out) => out,
+ Err(err) => panic!("partial code point should be buffered: {err}"),
+ };
+ assert!(out.is_empty());
+
+ let err = match parser.finish() {
+ Ok(out) => panic!("unfinished code point should error, got output: {out:?}"),
+ Err(err) => err,
+ };
+ assert_eq!(err, Utf8StreamParserError::IncompleteUtf8AtEof);
+ }
+
+ #[test]
+ fn utf8_stream_parser_into_inner_errors_when_partial_code_point_is_buffered() {
+ let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+
+ let out = match parser.push_bytes(&[0xC3]) {
+ Ok(out) => out,
+ Err(err) => panic!("partial code point should be buffered: {err}"),
+ };
+ assert!(out.is_empty());
+
+ let err = match parser.into_inner() {
+ Ok(_) => panic!("buffered partial code point should be rejected"),
+ Err(err) => err,
+ };
+ assert_eq!(err, Utf8StreamParserError::IncompleteUtf8AtEof);
+ }
+
+ #[test]
+ fn utf8_stream_parser_into_inner_lossy_drops_buffered_partial_code_point() {
+ let mut parser = Utf8StreamParser::new(CitationStreamParser::new());
+
+ let out = match parser.push_bytes(&[0xC3]) {
+ Ok(out) => out,
+ Err(err) => panic!("partial code point should be buffered: {err}"),
+ };
+ assert!(out.is_empty());
+
+ let mut inner = parser.into_inner_lossy();
+ let tail = inner.finish();
+ assert!(tail.is_empty());
+ }
+}