,
}
impl DefaultEmitter {
- /// Whether to use [`naive_next_state`] to switch states automatically.
+ /// Whether to use [crate::naive_next_state] to switch states automatically.
///
/// The default is off.
pub fn naively_switch_states(&mut self, yes: bool) {
@@ -86,10 +88,11 @@ impl DefaultEmitter {
}
}
-// opaque type around inner emitter
impl Emitter for DefaultEmitter {
type Token = Token;
+ // opaque type around inner emitter
+
fn set_last_start_tag(&mut self, last_start_tag: Option<&[u8]>) {
self.inner.set_last_start_tag(last_start_tag)
}
@@ -202,7 +205,7 @@ impl Emitter for DefaultEmitter {
/// A HTML end/close tag, such as `` or ``.
#[derive(Debug, Default, Eq, PartialEq, Clone)]
pub struct StartTag {
- /// Whether this tag is self-closing. If it is self-closing, no following [`EndTag`] should be
+ /// Whether this tag is self-closing. If it is self-closing, no following [EndTag] should be
/// expected.
pub self_closing: bool,
@@ -212,7 +215,7 @@ pub struct StartTag {
/// A mapping for any HTML attributes this start tag may have.
///
/// Duplicate attributes are ignored after the first one as per WHATWG spec. Implement your own
- /// [`Emitter`] to tweak this behavior.
+ /// [crate::Emitter] to tweak this behavior.
pub attributes: BTreeMap,
}
diff --git a/src/emitter.rs b/src/emitters/emitter.rs
similarity index 100%
rename from src/emitter.rs
rename to src/emitters/emitter.rs
diff --git a/src/html5ever_emitter.rs b/src/emitters/html5ever.rs
similarity index 84%
rename from src/html5ever_emitter.rs
rename to src/emitters/html5ever.rs
index 4398427..75ed9bf 100644
--- a/src/html5ever_emitter.rs
+++ b/src/emitters/html5ever.rs
@@ -1,6 +1,7 @@
+//! See [`examples/scraper.rs`] for usage.
use std::convert::Infallible;
-use crate::callbacks::{Callback, CallbackEmitter, CallbackEvent};
+use crate::emitters::callback::{Callback, CallbackEmitter, CallbackEvent};
use crate::utils::trace_log;
use crate::{Emitter, Error, State};
@@ -175,27 +176,30 @@ impl<'a, S: TokenSink> Emitter for Html5everEmitter<'a, S> {
}
fn emit_error(&mut self, error: Error) {
- self.emitter_inner.emit_error(error);
+ self.emitter_inner.emit_error(error)
}
- fn pop_token(&mut self) -> Option {
- None
+ fn should_emit_errors(&mut self) -> bool {
+ self.emitter_inner.should_emit_errors()
}
+ fn pop_token(&mut self) -> Option {
+ self.emitter_inner.pop_token()
+ }
fn emit_string(&mut self, c: &[u8]) {
- self.emitter_inner.emit_string(c);
+ self.emitter_inner.emit_string(c)
}
fn init_start_tag(&mut self) {
- self.emitter_inner.init_start_tag();
+ self.emitter_inner.init_start_tag()
}
fn init_end_tag(&mut self) {
- self.emitter_inner.init_end_tag();
+ self.emitter_inner.init_end_tag()
}
fn init_comment(&mut self) {
- self.emitter_inner.init_comment();
+ self.emitter_inner.init_comment()
}
fn emit_current_tag(&mut self) -> Option {
@@ -204,63 +208,63 @@ impl<'a, S: TokenSink> Emitter for Html5everEmitter<'a, S> {
}
fn emit_current_comment(&mut self) {
- self.emitter_inner.emit_current_comment();
+ self.emitter_inner.emit_current_comment()
}
fn emit_current_doctype(&mut self) {
- self.emitter_inner.emit_current_doctype();
+ self.emitter_inner.emit_current_doctype()
}
fn set_self_closing(&mut self) {
- self.emitter_inner.set_self_closing();
+ self.emitter_inner.set_self_closing()
}
fn set_force_quirks(&mut self) {
- self.emitter_inner.set_force_quirks();
+ self.emitter_inner.set_force_quirks()
}
fn push_tag_name(&mut self, s: &[u8]) {
- self.emitter_inner.push_tag_name(s);
+ self.emitter_inner.push_tag_name(s)
}
fn push_comment(&mut self, s: &[u8]) {
- self.emitter_inner.push_comment(s);
+ self.emitter_inner.push_comment(s)
}
fn push_doctype_name(&mut self, s: &[u8]) {
- self.emitter_inner.push_doctype_name(s);
+ self.emitter_inner.push_doctype_name(s)
}
fn init_doctype(&mut self) {
- self.emitter_inner.init_doctype();
+ self.emitter_inner.init_doctype()
}
fn init_attribute(&mut self) {
- self.emitter_inner.init_attribute();
+ self.emitter_inner.init_attribute()
}
fn push_attribute_name(&mut self, s: &[u8]) {
- self.emitter_inner.push_attribute_name(s);
+ self.emitter_inner.push_attribute_name(s)
}
fn push_attribute_value(&mut self, s: &[u8]) {
- self.emitter_inner.push_attribute_value(s);
+ self.emitter_inner.push_attribute_value(s)
}
fn set_doctype_public_identifier(&mut self, value: &[u8]) {
- self.emitter_inner.set_doctype_public_identifier(value);
+ self.emitter_inner.set_doctype_public_identifier(value)
}
fn set_doctype_system_identifier(&mut self, value: &[u8]) {
- self.emitter_inner.set_doctype_system_identifier(value);
+ self.emitter_inner.set_doctype_system_identifier(value)
}
- fn push_doctype_public_identifier(&mut self, value: &[u8]) {
- self.emitter_inner.push_doctype_public_identifier(value);
+ fn push_doctype_public_identifier(&mut self, s: &[u8]) {
+ self.emitter_inner.push_doctype_public_identifier(s)
}
- fn push_doctype_system_identifier(&mut self, value: &[u8]) {
- self.emitter_inner.push_doctype_system_identifier(value);
+ fn push_doctype_system_identifier(&mut self, s: &[u8]) {
+ self.emitter_inner.push_doctype_system_identifier(s)
}
fn current_is_appropriate_end_tag_token(&mut self) -> bool {
diff --git a/src/emitters/mod.rs b/src/emitters/mod.rs
new file mode 100644
index 0000000..2775fc8
--- /dev/null
+++ b/src/emitters/mod.rs
@@ -0,0 +1,25 @@
+//! [Emitter] is a "visitor" on the underlying token stream.
+//!
+//! When html5gum parses HTML, it (more specifically, the [crate::Tokenizer]) calls into emitters to keep
+//! track of state and to produce output.
+//!
+//! Emitters can yield control to the _caller_ of the tokenizer by emitting tokens in
+//! [Emitter::pop_token]. This is what powers the basic API where users just iterate over
+//! [crate::Tokenizer] which is an iterator over [default::Token].
+//!
+//! Most performant implementations don't implement `pop_token` and instead hold internal mutable
+//! state, or directly produce side effects.
+//!
+//! Emitters are "a way to consume parsing results." The following ways are available:
+//!
+//! * [default::DefaultEmitter], if you don't care about speed and only want convenience.
+//! * [callback::CallbackEmitter], if you can deal with some lifetime problems in exchange for way fewer allocations.
+//! * Implementing your own [Emitter] for maximum performance and maximum pain.
+pub mod callback;
+pub mod default;
+#[cfg(feature = "html5ever")]
+pub mod html5ever;
+
+mod emitter;
+
+pub use emitter::{naive_next_state, Emitter};
diff --git a/src/lib.rs b/src/lib.rs
index d1a1421..51af1e8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,6 +6,8 @@
#![doc = concat!("[LICENSE]: ", blob_url_prefix!(), "LICENSE")]
#![doc = concat!("[examples/tokenize_with_state_switches.rs]: ", blob_url_prefix!(), "examples/tokenize_with_state_switches.rs")]
#![doc = concat!("[examples/custom_emitter.rs]: ", blob_url_prefix!(), "examples/custom_emitter.rs")]
+#![doc = concat!("[examples/callback_emitter.rs]: ", blob_url_prefix!(), "examples/callback_emitter.rs")]
+#![doc = concat!("[examples/scraper.rs]: ", blob_url_prefix!(), "examples/scraper.rs")]
#![doc = include_str!("../README.md")]
//
#![warn(clippy::all)]
@@ -36,14 +38,10 @@ macro_rules! blob_url_prefix {
use blob_url_prefix;
mod arrayvec;
-pub mod callbacks;
mod char_validator;
-mod default_emitter;
-mod emitter;
+pub mod emitters;
mod entities;
mod error;
-#[cfg(feature = "html5ever")]
-mod html5ever_emitter;
mod htmlstring;
mod machine;
mod machine_helper;
@@ -57,13 +55,10 @@ mod utils;
#[doc(hidden)]
pub mod testutils;
-pub use default_emitter::{DefaultEmitter, Doctype, EndTag, StartTag, Token};
-pub use emitter::{naive_next_state, Emitter};
+pub use emitters::default::{DefaultEmitter, Doctype, EndTag, StartTag, Token};
+pub use emitters::{naive_next_state, Emitter};
pub use error::Error;
pub use htmlstring::HtmlString;
pub use reader::{IoReader, Readable, Reader, StringReader};
pub use state::State;
pub use tokenizer::{InfallibleTokenizer, Tokenizer};
-
-#[cfg(feature = "html5ever")]
-pub use html5ever_emitter::Html5everEmitter;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 0d1908a..b355c1b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -66,7 +66,7 @@ impl> Tokenizer {
/// use std::convert::Infallible;
///
/// use html5gum::Tokenizer;
- /// use html5gum::callbacks::{CallbackEvent, CallbackEmitter};
+ /// use html5gum::emitters::callback::{CallbackEvent, CallbackEmitter};
///
/// let emitter = CallbackEmitter::new(move |event: CallbackEvent<'_>| -> Option {
/// if let CallbackEvent::String { value } = event {
diff --git a/tests/html5lib_tree_builder.rs b/tests/html5lib_tree_builder.rs
index e5d1c3d..302a01d 100644
--- a/tests/html5lib_tree_builder.rs
+++ b/tests/html5lib_tree_builder.rs
@@ -22,7 +22,8 @@ use html5ever::tokenizer::states::{RawKind, State};
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
use html5ever::{namespace_url, ns};
use html5ever::{LocalName, QualName};
-use html5gum::{testutils::trace_log, Html5everEmitter, Tokenizer};
+use html5gum::emitters::html5ever::Html5everEmitter;
+use html5gum::{testutils::trace_log, Tokenizer};
use markup5ever_rcdom::{Handle, NodeData, RcDom};
use pretty_assertions::assert_eq;