Skip to content

Commit

Permalink
RUST-1992 Minor parsing cleanup (#485)
Browse files Browse the repository at this point in the history
  • Loading branch information
abr-egn authored Jul 23, 2024
1 parent 39d90f6 commit 1c6e65a
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 159 deletions.
6 changes: 6 additions & 0 deletions src/de/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ impl From<string::FromUtf8Error> for Error {
}
}

impl From<crate::raw::Error> for Error {
fn from(value: crate::raw::Error) -> Self {
Self::deserialization(value)
}
}

impl fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match *self {
Expand Down
34 changes: 5 additions & 29 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use crate::{
};

use ::serde::{
de::{DeserializeOwned, Error as _, Unexpected},
de::{DeserializeOwned, Error as _},
Deserialize,
};

Expand Down Expand Up @@ -68,32 +68,6 @@ enum DeserializerHint {
RawBson,
}

pub(crate) fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
let val = read_u8(&mut reader)?;
if val > 1 {
return Err(Error::invalid_value(
Unexpected::Unsigned(val as u64),
&"boolean must be stored as 0 or 1",
));
}

Ok(val != 0)
}

#[inline]
pub(crate) fn read_u8<R: Read + ?Sized>(reader: &mut R) -> Result<u8> {
let mut buf = [0; 1];
reader.read_exact(&mut buf)?;
Ok(u8::from_le_bytes(buf))
}

#[inline]
pub(crate) fn read_i32<R: Read + ?Sized>(reader: &mut R) -> Result<i32> {
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
Ok(i32::from_le_bytes(buf))
}

impl Timestamp {
pub(crate) fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
let mut bytes = [0; 8];
Expand Down Expand Up @@ -181,8 +155,10 @@ where
Deserialize::deserialize(de)
}

fn reader_to_vec<R: Read>(mut reader: R) -> Result<Vec<u8>> {
let length = read_i32(&mut reader)?;
pub(crate) fn reader_to_vec<R: Read>(mut reader: R) -> Result<Vec<u8>> {
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
let length = i32::from_le_bytes(buf);

if length < MIN_BSON_DOCUMENT_SIZE {
return Err(Error::custom("document size too small"));
Expand Down
21 changes: 5 additions & 16 deletions src/de/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct DeserializerOptions {
impl<'de> Deserializer<'de> {
pub(crate) fn new(buf: &'de [u8], utf8_lossy: bool) -> Result<Self> {
Ok(Self {
element: RawElement::toplevel(buf).map_err(Error::deserialization)?,
element: RawElement::toplevel(buf)?,
options: DeserializerOptions {
utf8_lossy,
human_readable: false,
Expand All @@ -60,7 +60,7 @@ impl<'de> Deserializer<'de> {
}

fn value(&self) -> Result<RawBsonRef<'de>> {
self.element.value().map_err(Error::deserialization)
Ok(self.element.value()?)
}

/// Deserialize the element, using the type of the element along with the
Expand All @@ -70,11 +70,7 @@ impl<'de> Deserializer<'de> {
V: serde::de::Visitor<'de>,
{
if self.options.utf8_lossy {
if let Some(lossy) = self
.element
.value_utf8_lossy()
.map_err(Error::deserialization)?
{
if let Some(lossy) = self.element.value_utf8_lossy()? {
return match lossy {
Utf8LossyBson::String(s) => visitor.visit_string(s),
Utf8LossyBson::RegularExpression(re) => {
Expand Down Expand Up @@ -183,10 +179,7 @@ impl<'de> Deserializer<'de> {

fn get_string(&self) -> Result<Cow<'de, str>> {
if self.options.utf8_lossy {
let value = self
.element
.value_utf8_lossy()
.map_err(Error::deserialization)?;
let value = self.element.value_utf8_lossy()?;
let s = match value {
Some(Utf8LossyBson::String(s)) => s,
_ => {
Expand Down Expand Up @@ -335,11 +328,7 @@ impl<'de> DocumentAccess<'de> {
}

fn advance(&mut self) -> Result<()> {
self.elem = self
.iter
.next()
.transpose()
.map_err(Error::deserialization)?;
self.elem = self.iter.next().transpose()?;
Ok(())
}

Expand Down
21 changes: 2 additions & 19 deletions src/document.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! A BSON document represented as an associative HashMap with insertion ordering.

use std::{
convert::TryInto,
error,
fmt::{self, Debug, Display, Formatter},
io::{Read, Write},
Expand All @@ -10,11 +9,10 @@ use std::{

use ahash::RandomState;
use indexmap::IndexMap;
use serde::{de::Error, Deserialize};
use serde::Deserialize;

use crate::{
bson::{Array, Bson, Timestamp},
de::{read_i32, MIN_BSON_DOCUMENT_SIZE},
oid::ObjectId,
spec::BinarySubtype,
Binary,
Expand Down Expand Up @@ -548,22 +546,7 @@ impl Document {
}

fn decode<R: Read + ?Sized>(reader: &mut R, utf_lossy: bool) -> crate::de::Result<Document> {
let length = read_i32(reader)?;
if length < MIN_BSON_DOCUMENT_SIZE {
return Err(crate::de::Error::invalid_length(
length as usize,
&"document length must be at least 5",
));
}
let ulen: usize =
length
.try_into()
.map_err(|e| crate::de::Error::DeserializationError {
message: format!("invalid document length: {}", e),
})?;
let mut buf = vec![0u8; ulen];
buf[0..4].copy_from_slice(&length.to_le_bytes());
reader.read_exact(&mut buf[4..])?;
let buf = crate::de::reader_to_vec(reader)?;
let deserializer = crate::de::RawDeserializer::new(&buf, utf_lossy)?;
Document::deserialize(deserializer)
}
Expand Down
8 changes: 2 additions & 6 deletions src/raw/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,15 +519,11 @@ impl RawDocument {
let buf = &self.as_bytes()[start_at..];

let mut splits = buf.splitn(2, |x| *x == 0);
let value = splits
.next()
.ok_or_else(|| Error::new_without_key(ErrorKind::new_malformed("no value")))?;
let value = splits.next().ok_or_else(|| Error::malformed("no value"))?;
if splits.next().is_some() {
Ok(value)
} else {
Err(Error::new_without_key(ErrorKind::new_malformed(
"expected null terminator",
)))
Err(Error::malformed("expected null terminator"))
}
}

Expand Down
27 changes: 9 additions & 18 deletions src/raw/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,18 @@ pub struct Error {
}

impl Error {
pub(crate) fn new_with_key(key: impl Into<String>, kind: ErrorKind) -> Self {
Self {
kind,
key: Some(key.into()),
}
pub(crate) fn new(kind: ErrorKind) -> Self {
Self { key: None, kind }
}

pub(crate) fn new_without_key(kind: ErrorKind) -> Self {
Self { key: None, kind }
pub(crate) fn malformed(e: impl ToString) -> Self {
Self::new(ErrorKind::MalformedValue {
message: e.to_string(),
})
}

pub(crate) fn with_key(mut self, key: impl AsRef<str>) -> Self {
self.key = Some(key.as_ref().to_string());
pub(crate) fn with_key(mut self, key: impl Into<String>) -> Self {
self.key = Some(key.into());
self
}

Expand All @@ -48,14 +47,6 @@ pub enum ErrorKind {
Utf8EncodingError(Utf8Error),
}

impl ErrorKind {
pub(crate) fn new_malformed(e: impl ToString) -> Self {
ErrorKind::MalformedValue {
message: e.to_string(),
}
}
}

impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let p = self
Expand All @@ -80,7 +71,7 @@ pub type Result<T> = std::result::Result<T, Error>;

/// Execute the provided closure, mapping the key of the returned error (if any) to the provided
/// key.
pub(crate) fn try_with_key<G, F: FnOnce() -> Result<G>>(key: impl AsRef<str>, f: F) -> Result<G> {
pub(crate) fn try_with_key<G, F: FnOnce() -> Result<G>>(key: impl Into<String>, f: F) -> Result<G> {
f().map_err(|e| e.with_key(key))
}

Expand Down
49 changes: 20 additions & 29 deletions src/raw/iter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::convert::TryInto;

use crate::{
de::{read_bool, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
de::{MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
oid::ObjectId,
raw::{Error, ErrorKind, Result},
raw::{Error, Result},
spec::{BinarySubtype, ElementType},
Bson,
DateTime,
Expand All @@ -18,6 +18,7 @@ use crate::{
};

use super::{
bool_from_slice,
checked_add,
error::try_with_key,
f64_from_slice,
Expand Down Expand Up @@ -81,11 +82,11 @@ impl<'a> RawIter<'a> {
fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> {
let end = checked_add(start, num_bytes)?;
if self.doc.as_bytes().get(start..end).is_none() {
return Err(Error::new_without_key(ErrorKind::new_malformed(format!(
return Err(Error::malformed(format!(
"length exceeds remaining length of buffer: {} vs {}",
num_bytes,
self.doc.as_bytes().len() - start
))));
)));
}
Ok(())
}
Expand All @@ -95,18 +96,16 @@ impl<'a> RawIter<'a> {
let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize;

if size < MIN_BSON_DOCUMENT_SIZE as usize {
return Err(Error::new_without_key(ErrorKind::new_malformed(format!(
return Err(Error::malformed(format!(
"document too small: {} bytes",
size
))));
)));
}

self.verify_enough_bytes(starting_at, size)?;

if self.doc.as_bytes()[starting_at + size - 1] != 0 {
return Err(Error::new_without_key(ErrorKind::new_malformed(
"not null terminated",
)));
return Err(Error::malformed("not null terminated"));
}
Ok(size)
}
Expand Down Expand Up @@ -186,9 +185,9 @@ impl<'a> RawElement<'a> {
ElementType::Array => {
RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes(self.slice())?))
}
ElementType::Boolean => {
RawBsonRef::Boolean(read_bool(self.slice()).map_err(|e| self.malformed_error(e))?)
}
ElementType::Boolean => RawBsonRef::Boolean(
bool_from_slice(self.slice()).map_err(|e| self.malformed_error(e))?,
),
ElementType::DateTime => {
RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice(self.slice())?))
}
Expand Down Expand Up @@ -309,7 +308,7 @@ impl<'a> RawElement<'a> {
}

fn malformed_error(&self, e: impl ToString) -> Error {
Error::new_with_key(self.key, ErrorKind::new_malformed(e))
Error::malformed(e).with_key(self.key)
}

pub(crate) fn slice(&self) -> &'a [u8] {
Expand All @@ -336,7 +335,7 @@ impl<'a> RawElement<'a> {
Ok(ObjectId::from_bytes(
self.doc.as_bytes()[start_at..(start_at + 12)]
.try_into()
.map_err(|e| Error::new_with_key(self.key, ErrorKind::new_malformed(e)))?,
.map_err(|e| Error::malformed(e).with_key(self.key))?,
))
}
}
Expand All @@ -345,9 +344,7 @@ impl<'a> RawIter<'a> {
fn get_next_length_at(&self, start_at: usize) -> Result<usize> {
let len = i32_from_slice(&self.doc.as_bytes()[start_at..])?;
if len < 0 {
Err(Error::new_without_key(ErrorKind::new_malformed(
"lengths can't be negative",
)))
Err(Error::malformed("lengths can't be negative"))
} else {
Ok(len as usize)
}
Expand All @@ -366,15 +363,11 @@ impl<'a> Iterator for RawIter<'a> {
return None;
} else {
self.valid = false;
return Some(Err(Error::new_without_key(ErrorKind::new_malformed(
"document not null terminated",
))));
return Some(Err(Error::malformed("document not null terminated")));
}
} else if self.offset >= self.doc.as_bytes().len() {
self.valid = false;
return Some(Err(Error::new_without_key(ErrorKind::new_malformed(
"iteration overflowed document",
))));
return Some(Err(Error::malformed("iteration overflowed document")));
}

let key = match self.doc.read_cstring_at(self.offset + 1) {
Expand All @@ -390,13 +383,11 @@ impl<'a> Iterator for RawIter<'a> {
let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) {
Some(et) => et,
None => {
return Err(Error::new_with_key(
key,
ErrorKind::new_malformed(format!(
"invalid tag: {}",
self.doc.as_bytes()[self.offset]
)),
return Err(Error::malformed(format!(
"invalid tag: {}",
self.doc.as_bytes()[self.offset]
))
.with_key(key))
}
};

Expand Down
Loading

0 comments on commit 1c6e65a

Please sign in to comment.