peekread-0.1.1/.cargo_vcs_info.json0000644000000001120000000000000126040ustar { "git": { "sha1": "0459a6b23d49008b4a0db8137e7b41384ca19020" } } peekread-0.1.1/.gitignore000064400000000000000000000000250000000000000133450ustar 00000000000000/target Cargo.lock peekread-0.1.1/Cargo.toml0000644000000015470000000000000106170ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "peekread" version = "0.1.1" authors = ["Orson Peters "] description = "Allows peeking ahead in Read streams" readme = "README.md" keywords = ["peek", "read", "stream"] categories = ["filesystem", "parsing"] license = "Zlib" repository = "https://github.com/orlp/peekread" [dependencies] peekread-0.1.1/Cargo.toml.orig000064400000000000000000000006300000000000000142460ustar 00000000000000[package] name = "peekread" version = "0.1.1" # Remember to grep and update version everywhere. authors = ["Orson Peters "] edition = "2018" description = "Allows peeking ahead in Read streams" license = "Zlib" repository = "https://github.com/orlp/peekread" readme = "README.md" keywords = ["peek", "read", "stream"] categories = ["filesystem", "parsing"] [dependencies]peekread-0.1.1/LICENSE000064400000000000000000000016050000000000000123670ustar 00000000000000Copyright (c) 2021 Orson Peters This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution.peekread-0.1.1/README.md000064400000000000000000000033140000000000000126400ustar 00000000000000# peekread This crate allows you to take an arbitrary `Read` stream and 'peek ahead' into the stream without consuming the original stream. This is done through the `PeekRead` trait which has the method `peek`. When this method is called it returns a new `PeekCursor` object implementing `Read`, `BufRead` and `Seek` that allows you to read from the stream without affecting the original stream. The `PeekRead` trait is directly implemented on a select few types, but for most you will have to wrap your type in a `SeekPeekReader` or `BufPeekReader` that implements the peeking behavior using respectively seeking or buffering. Please refer to the [**the documentation**](https://docs.rs/peekread) for more information. The minimum required stable Rust version for `peekread` is 1.31.0. To start using `peekread` add the following to your `Cargo.toml`: ```toml [dependencies] peekread = "0.1" ``` # Example A short example: ```rust use peekread::{PeekRead, SeekPeekReader}; let mut f = SeekPeekReader::new(File::open("ambiguous")?); // HTML is so permissive its parser never fails, so check for signature. if f.starts_with(" { // Where we store the peeked but not yet read data. buf_storage: VecDeque, // A vec used for temporary storage. tmp: Vec, min_read_size: usize, inner: R, } impl BufPeekReader { const MIN_READ_TO_END: usize = 32; /// Creates a new [`BufPeekReader`]. pub fn new(reader: R) -> Self { Self { buf_storage: VecDeque::new(), tmp: Vec::new(), min_read_size: 0, inner: reader, } } /// Pushes the given data into the stream at the front, pushing the read /// cursor back. pub fn unread(&mut self, data: &[u8]) { self.buf_storage.reserve(data.len()); for byte in data.iter().copied().rev() { self.buf_storage.push_front(byte); } } /// Sets the minimum size used when reading from the underlying stream. /// Setting this allows for efficient buffered reads on any stream /// similar to [`BufReader`], but is disabled by default since doing /// bigger reads than requested might unnecessarily block. pub fn set_min_read_size(&mut self, nbytes: usize) { self.min_read_size = nbytes; } /// Gets the minimum read size. See [`Self::set_min_read_size`]. pub fn min_read_size(&self) -> usize { self.min_read_size } /// Returns a reference to the internally buffered data. /// /// Unlike [`BufRead::fill_buf`], this will not attempt to fill the buffer /// if it is empty. pub fn buffer(&self) -> &VecDeque { &self.buf_storage } /// Gets a reference to the underlying reader. /// /// It is inadvisable to directly read from the underlying reader. pub fn get_ref(&self) -> &R { &self.inner } /// Gets a mutable reference to the underlying reader. /// /// It is inadvisable to directly read from the underlying reader. pub fn get_mut(&mut self) -> &mut R { &mut self.inner } /// Unwraps this `BufPeekReader`, returning the underlying reader. pub fn into_inner(self) -> R { self.inner } // Try to fill the buffer so that it's at least nbytes in length // (may fail to do so if EOF is reached - no error is reported then). fn request_buffer(&mut self, nbytes: usize) -> Result<()> { let nbytes_needed = nbytes.saturating_sub(self.buf_storage.len()); if nbytes_needed > 0 { let read_size = nbytes_needed.max(self.min_read_size); self.inner .by_ref() .take(read_size as u64) .read_to_end(&mut self.tmp)?; self.buf_storage.reserve(self.tmp.len()); self.buf_storage.extend(self.tmp.drain(..)); } Ok(()) } // The buffered data starting from the peek position as two slices. fn peek_slices(&self, peek_pos: usize) -> (&[u8], &[u8]) { let (a, b) = self.buf_storage.as_slices(); let first = a.get(peek_pos..).unwrap_or_default(); let second = b .get(peek_pos.saturating_sub(a.len())..) .unwrap_or_default(); (first, second) } } impl PeekRead for BufPeekReader { fn peek(&mut self) -> PeekCursor<'_> { PeekCursor::new(self) } } impl PeekReadImpl for BufPeekReader { fn peek_read(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result { self.request_buffer(state.peek_pos as usize + buf.len())?; let (mut first, mut second) = self.peek_slices(state.peek_pos as usize); let mut written = first.read(buf).unwrap(); // Can't fail. written += second.read(&mut buf[written..]).unwrap(); // Can't fail. state.peek_pos += written as u64; Ok(written) } fn peek_fill_buf(&mut self, state: &mut PeekCursorState) -> Result<&[u8]> { self.request_buffer(state.peek_pos as usize + 1)?; let (first, second) = self.peek_slices(state.peek_pos as usize); if !first.is_empty() { Ok(first) } else { Ok(second) } } fn peek_consume(&mut self, state: &mut PeekCursorState, amt: usize) { state.peek_pos += amt as u64; } fn peek_read_exact(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result<()> { self.request_buffer(state.peek_pos as usize + buf.len())?; let (mut first, mut second) = self.peek_slices(state.peek_pos as usize); let written = first.read(buf).unwrap(); // Can't fail. second.read_exact(&mut buf[written..])?; state.peek_pos += buf.len() as u64; Ok(()) } fn peek_stream_position(&mut self, state: &mut PeekCursorState) -> Result { Ok(state.peek_pos) } fn peek_seek(&mut self, state: &mut PeekCursorState, pos: SeekFrom) -> Result { match pos { SeekFrom::Start(offset) => state.peek_pos = offset, SeekFrom::Current(offset) => { state.peek_pos = seek_add_offset(state.peek_pos, offset)?; } SeekFrom::End(offset) => { let mut requested_buffer_size = self.buf_storage.len(); while self.buf_storage.len() == requested_buffer_size { requested_buffer_size = (requested_buffer_size * 2).max(Self::MIN_READ_TO_END); self.request_buffer(requested_buffer_size)?; } state.peek_pos = seek_add_offset(self.buf_storage.len() as u64, offset)?; } } Ok(state.peek_pos) } } impl Read for BufPeekReader { fn read(&mut self, buf: &mut [u8]) -> Result { let (mut first, mut second) = self.buf_storage.as_slices(); let mut written = first.read(buf).unwrap(); // Can't fail. written += second.read(&mut buf[written..]).unwrap(); // Can't fail. self.inner.read(&mut buf[written..]).map(|inner_written| { self.consume(written); written + inner_written }) } fn read_exact(&mut self, buf: &mut [u8]) -> Result<()> { let (mut first, mut second) = self.buf_storage.as_slices(); let mut written = first.read(buf).unwrap(); // Can't fail. written += second.read(&mut buf[written..])?; // Can't fail. self.inner .read_exact(&mut buf[written..]) .map(|_| self.consume(buf.len())) } } impl BufRead for BufPeekReader { fn fill_buf(&mut self) -> Result<&[u8]> { self.request_buffer(self.min_read_size)?; let (first, second) = self.buf_storage.as_slices(); if !first.is_empty() { Ok(first) } else { Ok(second) } } fn consume(&mut self, amt: usize) { for _ in 0..amt.min(self.buf_storage.len()) { self.buf_storage.pop_front(); } } } peekread-0.1.1/src/detail/cursor.rs000064400000000000000000000075050000000000000153030ustar 00000000000000use std::fmt::{Debug, Error as FmtError, Formatter}; use std::io::{BufRead, Read, Result, Seek, SeekFrom}; type FmtResult = std::result::Result<(), FmtError>; use crate::detail::PeekReadImpl; #[cfg(doc)] use crate::PeekRead; /// The internal state of a [`PeekCursor`]. See [`PeekReadImpl`]. /// /// All fields here are just provided to help you make your implementation /// possible, you may use them in any way you see fit. [`PeekCursor::new`] /// initializes these fields to their default value. #[non_exhaustive] #[derive(Debug)] pub struct PeekCursorState { /// The peek position in the stream. pub peek_pos: u64, /// A buffer you can use for BufRead. pub buf: [u8; 1], } /// An object implementing [`BufRead`] and [`Seek`] to peek ahead in a stream /// without affecting the original stream. pub struct PeekCursor<'a> { inner: &'a mut dyn PeekReadImpl, state: PeekCursorState, } impl<'a> PeekCursor<'a> { /// Creates a new [`PeekCursor`]. /// /// Unless you are trying to implement [`PeekRead`] you will never call /// this, you should look at [`PeekRead::peek`] instead. If you are /// trying to implement [`PeekRead`], see [`PeekReadImpl`]. pub fn new(inner: &'a mut dyn PeekReadImpl) -> Self { Self { inner, state: PeekCursorState { peek_pos: 0, buf: [0], }, } } } impl<'a> Seek for PeekCursor<'a> { fn seek(&mut self, pos: SeekFrom) -> Result { self.inner.peek_seek(&mut self.state, pos) } fn stream_position(&mut self) -> Result { self.inner.peek_stream_position(&mut self.state) } } impl<'a> Read for PeekCursor<'a> { fn read(&mut self, buf: &mut [u8]) -> Result { self.inner.peek_read(&mut self.state, buf) } fn read_exact(&mut self, buf: &mut [u8]) -> Result<()> { self.inner.peek_read_exact(&mut self.state, buf) } fn read_to_end(&mut self, buf: &mut Vec) -> Result { self.inner.peek_read_to_end(&mut self.state, buf) } fn read_to_string(&mut self, buf: &mut String) -> Result { self.inner.peek_read_to_string(&mut self.state, buf) } } impl<'a> BufRead for PeekCursor<'a> { fn fill_buf(&mut self) -> Result<&[u8]> { self.inner.peek_fill_buf(&mut self.state) } fn consume(&mut self, amt: usize) { self.inner.peek_consume(&mut self.state, amt) } } impl<'a> Drop for PeekCursor<'a> { fn drop(&mut self) { self.inner.peek_drop(&mut self.state) } } impl<'a> Debug for PeekCursor<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { f.debug_struct("PeekCursor") .field("state", &self.state) .finish() } } /// A `PeekCursor` that does not override the default implementations. /// Used to provide our default implementations without being circular. #[derive(Debug)] pub(crate) struct DefaultImplPeekCursor<'a, T: ?Sized + PeekReadImpl> { inner: &'a mut T, state: &'a mut PeekCursorState, } impl<'a, T: ?Sized + PeekReadImpl> DefaultImplPeekCursor<'a, T> { pub fn new(inner: &'a mut T, state: &'a mut PeekCursorState) -> Self { Self { inner, state } } } impl<'a, T: ?Sized + PeekReadImpl> Seek for DefaultImplPeekCursor<'a, T> { fn seek(&mut self, pos: SeekFrom) -> Result { self.inner.peek_seek(self.state, pos) } } impl<'a, T: ?Sized + PeekReadImpl> Read for DefaultImplPeekCursor<'a, T> { fn read(&mut self, buf: &mut [u8]) -> Result { self.inner.peek_read(self.state, buf) } } impl<'a, T: ?Sized + PeekReadImpl> BufRead for DefaultImplPeekCursor<'a, T> { fn fill_buf(&mut self) -> Result<&[u8]> { self.inner.peek_fill_buf(self.state) } fn consume(&mut self, amt: usize) { self.inner.peek_consume(self.state, amt) } } peekread-0.1.1/src/detail/mod.rs000064400000000000000000000056720000000000000145500ustar 00000000000000pub(crate) mod cursor; #[cfg(doc)] use crate::{PeekCursor, PeekRead}; use cursor::DefaultImplPeekCursor; pub use cursor::PeekCursorState; #[cfg(doc)] use std::io::BufRead; use std::io::{Read, Result, Seek, SeekFrom}; /// A helper trait used to implement [`PeekRead`]. /// /// In order to implement [`PeekRead`] for one of your types you must first /// implement this trait on your type and then implement [`PeekRead::peek`] /// returning a [`PeekCursor`] (which you'll find you can only construct for /// types implementing [`PeekReadImpl`]). /// /// The [`PeekCursor`] contains a [`PeekCursorState`] object. In this object /// there is some storage available to aid you in case the object you're /// implementing [`PeekReadImpl`] on does not have the needed storage available /// to keep the cursor state (e.g. the [`PeekRead`] implementation for `&[u8]`). pub trait PeekReadImpl { /// Used to implement `self.peek().seek(pos)`. See [`Seek::seek`]. fn peek_seek<'a>(&'a mut self, state: &'a mut PeekCursorState, pos: SeekFrom) -> Result; /// Used to implement `self.peek().read(buf)`. See [`Read::read`]. fn peek_read<'a, 'b>( &'a mut self, state: &'a mut PeekCursorState, buf: &'b mut [u8], ) -> Result; /// Used to implement `self.peek().fill_buf()`. See [`BufRead::fill_buf`]. fn peek_fill_buf<'a>(&'a mut self, state: &'a mut PeekCursorState) -> Result<&'a [u8]>; /// Used to implement `self.peek().consume()`. See [`BufRead::consume`]. fn peek_consume<'a>(&'a mut self, state: &'a mut PeekCursorState, amt: usize); // Start default methods. /// Used to implement `self.peek().stream_position()`. See /// [`Seek::stream_position`]. fn peek_stream_position<'a>(&'a mut self, state: &'a mut PeekCursorState) -> Result { DefaultImplPeekCursor::new(self, state).stream_position() } /// Used to implement `self.peek().read_exact(buf)`. See /// [`Read::read_exact`]. fn peek_read_exact<'a, 'b>( &'a mut self, state: &'a mut PeekCursorState, buf: &'b mut [u8], ) -> Result<()> { DefaultImplPeekCursor::new(self, state).read_exact(buf) } /// Used to implement `self.peek().read_to_end(buf)`. See /// [`Read::read_to_end`]. fn peek_read_to_end<'a, 'b>( &'a mut self, state: &'a mut PeekCursorState, buf: &'b mut Vec, ) -> Result { DefaultImplPeekCursor::new(self, state).read_to_end(buf) } /// Used to implement `self.peek().read_to_string(buf)`. See /// [`Read::read_to_string`]. fn peek_read_to_string<'a, 'b>( &'a mut self, state: &'a mut PeekCursorState, buf: &'b mut String, ) -> Result { DefaultImplPeekCursor::new(self, state).read_to_string(buf) } /// Called when the `PeekCursor` is dropped. fn peek_drop<'a>(&'a mut self, _state: &'a mut PeekCursorState) { // Do nothing by default. } } peekread-0.1.1/src/foreign_impl.rs000064400000000000000000000147450000000000000152020ustar 00000000000000use crate::detail::{PeekCursorState, PeekReadImpl}; use crate::util::seek_add_offset; use crate::{PeekCursor, PeekRead}; use std::io::{self, Cursor, Empty, Read, Result, Seek, SeekFrom, Take}; impl PeekRead for &mut T { #[inline] fn peek(&mut self) -> PeekCursor<'_> { (**self).peek() } } impl PeekRead for Box { #[inline] fn peek(&mut self) -> PeekCursor<'_> { (**self).peek() } } impl PeekRead for Empty { fn peek(&mut self) -> PeekCursor<'_> { PeekCursor::new(self) } } impl PeekReadImpl for Empty { fn peek_seek(&mut self, _state: &mut PeekCursorState, _pos: SeekFrom) -> Result { Ok(0) } fn peek_read(&mut self, _state: &mut PeekCursorState, _buf: &mut [u8]) -> Result { Ok(0) } fn peek_fill_buf(&mut self, _state: &mut PeekCursorState) -> Result<&[u8]> { Ok(&[]) } fn peek_consume(&mut self, _state: &mut PeekCursorState, _amt: usize) {} } impl PeekRead for &[u8] { fn peek(&mut self) -> PeekCursor<'_> { PeekCursor::new(self) } } impl PeekReadImpl for &[u8] { fn peek_seek(&mut self, state: &mut PeekCursorState, pos: SeekFrom) -> Result { state.peek_pos = match pos { SeekFrom::Start(offset) => offset, SeekFrom::Current(offset) => seek_add_offset(state.peek_pos, offset)?, SeekFrom::End(offset) => seek_add_offset(self.len() as u64, offset)?, }; Ok(state.peek_pos) } fn peek_read(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result { let written = self .get(state.peek_pos as usize..) .unwrap_or_default() .read(buf)?; state.peek_pos += written as u64; Ok(written) } fn peek_read_exact(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result<()> { self.get(state.peek_pos as usize..) .unwrap_or_default() .read_exact(buf)?; state.peek_pos += buf.len() as u64; Ok(()) } fn peek_fill_buf(&mut self, state: &mut PeekCursorState) -> Result<&[u8]> { Ok(self.get(state.peek_pos as usize..).unwrap_or_default()) } fn peek_consume(&mut self, state: &mut PeekCursorState, amt: usize) { state.peek_pos += amt as u64; } } impl> PeekRead for Cursor { fn peek(&mut self) -> PeekCursor<'_> { PeekCursor::new(self) } } impl> PeekReadImpl for Cursor { fn peek_seek(&mut self, state: &mut PeekCursorState, pos: SeekFrom) -> Result { let start_pos = self.stream_position()? as usize; let slice = self.get_ref().as_ref(); slice .get(start_pos..) .unwrap_or_default() .peek_seek(state, pos) } fn peek_read(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result { let start_pos = self.stream_position()? as usize; let slice = self.get_ref().as_ref(); slice .get(start_pos..) .unwrap_or_default() .peek_read(state, buf) } fn peek_read_exact(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result<()> { let start_pos = self.stream_position()? as usize; let slice = self.get_ref().as_ref(); slice .get(start_pos..) .unwrap_or_default() .peek_read_exact(state, buf) } fn peek_fill_buf<'a>(&'a mut self, state: &'a mut PeekCursorState) -> Result<&'a [u8]> { let start_pos = self.stream_position()? as usize; let slice = self.get_ref().as_ref(); Ok(slice .get(start_pos + state.peek_pos as usize..) .unwrap_or_default()) } fn peek_consume(&mut self, state: &mut PeekCursorState, amt: usize) { state.peek_pos += amt as u64; } } impl PeekRead for Take { fn peek(&mut self) -> PeekCursor<'_> { PeekCursor::new(self) } } impl PeekReadImpl for Take { fn peek_seek(&mut self, state: &mut PeekCursorState, pos: SeekFrom) -> Result { let limit_from_start = state.peek_pos + self.limit(); state.peek_pos = match pos { SeekFrom::Start(offset) => offset, SeekFrom::Current(offset) => seek_add_offset(state.peek_pos, offset)?, SeekFrom::End(offset) => { // Is there a more efficient way without specialization? let end = { let mut dummy: u8 = 0; let mut peeker = self.peek(); peeker.seek(SeekFrom::Start(limit_from_start))?; let is_eof = peeker.read(std::slice::from_mut(&mut dummy))? == 0; if is_eof { // Have to scan to find real end. peeker.seek(SeekFrom::Start(0))?; io::copy(&mut peeker, &mut io::sink())? } else { limit_from_start } }; seek_add_offset(end, offset)? } }; state.peek_pos = state.peek_pos.min(limit_from_start); self.set_limit(limit_from_start - state.peek_pos); Ok(state.peek_pos) } fn peek_read(&mut self, state: &mut PeekCursorState, buf: &mut [u8]) -> Result { if self.limit() == 0 { return Ok(0); } let limit = self.limit(); let mut peeker = self.peek(); peeker.seek(SeekFrom::Start(state.peek_pos))?; let written = peeker.take(limit).read(buf)? as u64; state.peek_pos += written; self.set_limit(limit - written); Ok(written as usize) } fn peek_fill_buf<'a>(&'a mut self, state: &'a mut PeekCursorState) -> Result<&'a [u8]> { if self.limit() == 0 { return Ok(&[]); } let mut peeker = self.peek(); peeker.seek(SeekFrom::Start(state.peek_pos))?; let read = peeker.read(&mut state.buf)?; Ok(&state.buf[..read]) } fn peek_consume(&mut self, state: &mut PeekCursorState, amt: usize) { let limit = self.limit(); let limit_from_start = limit + state.peek_pos; state.peek_pos += amt as u64; state.peek_pos = state.peek_pos.min(limit_from_start); self.set_limit(limit_from_start - state.peek_pos); } } // TODO: Not sure if this is possible, there are then two peek cursors. // impl PeekRead for Chain { } peekread-0.1.1/src/lib.rs000064400000000000000000000122730000000000000132700ustar 00000000000000#![doc(html_root_url = "https://docs.rs/peekread/0.1.1")] #![warn( invalid_html_tags, missing_debug_implementations, trivial_casts, trivial_numeric_casts, unused_lifetimes, unused_import_braces )] #![deny(missing_docs)] //! This crate allows you to take an arbitrary [`Read`] stream and 'peek ahead' //! into the stream without consuming the original stream. //! //! This is done through the [`PeekRead`] trait which has the method //! [`peek`]. When this method is called it returns a new [`PeekCursor`] object //! implementing [`Read`], [`BufRead`] and [`Seek`] that allows you to read from //! the stream without affecting the original stream. //! //! The [`PeekRead`] trait is directly //! implemented on a select few types, but for most you will have to wrap your //! type in a [`SeekPeekReader`] or [`BufPeekReader`] that implements the //! peeking behavior using respectively seeking or buffering. //! //! # Examples //! One could try various different parsers on the same stream until one //! succeeds: //! ```no_run //! # use std::io::{Result, Read, BufRead}; //! # use std::fs::File; //! # enum ParseResult { Html(()), Jpg(()), Png(()), Gif(()), Js(()), Unknown } //! # fn parse_as_html(f: T) -> () { () } //! # fn parse_as_jpg(f: T) -> Result<()> { Ok(()) } //! # fn parse_as_gif(f: T) -> Result<()> { Ok(()) } //! # fn parse_as_png(f: T) -> Result<()> { Ok(()) } //! # fn parse_as_javascript(f: T) -> Result<()> { Ok(()) } //! # fn foo() -> Result { //! # use peekread::{PeekRead, SeekPeekReader}; //! let mut f = SeekPeekReader::new(File::open("ambiguous")?); //! //! // HTML is so permissive its parser never fails, so check for signature. //! if f.starts_with("\n") { //! Ok(ParseResult::Html(parse_as_html(f))) //! } else { //! // Can pass PeekCursor to functions accepting T: Read without them //! // having to be aware of peekread. //! parse_as_jpg(f.peek()).map(ParseResult::Jpg) //! .or_else(|_| parse_as_png(f.peek()).map(ParseResult::Png)) //! .or_else(|_| parse_as_gif(f.peek()).map(ParseResult::Gif)) //! .or_else(|_| parse_as_javascript(f.peek()).map(ParseResult::Js)) //! } //! # } //! ``` //! //! [`peek`]: [`PeekRead::peek`] /// Details for those wishing to implement [`PeekRead`]. pub mod detail; mod bufreader; mod foreign_impl; mod seekreader; mod util; pub use bufreader::BufPeekReader; pub use detail::cursor::PeekCursor; pub use seekreader::SeekPeekReader; #[cfg(doc)] use std::io::{BufRead, BufReader, Seek}; use std::io::{Read, Result}; /// A trait for a [`Read`] stream that supports peeking ahead in the stream. /// /// In addition to a normal read cursor it can create a separate 'peek cursor' /// which can go ahead of the regular read cursor, but never behind it. Reading /// from the peek cursor does not affect the read cursor in any way. pub trait PeekRead: Read { /// Returns a [`PeekCursor`] which implements [`BufRead`] + [`Seek`], /// allowing you to peek ahead in a stream of data. Reading from this or /// seeking on it won't affect the read cursor, only the peek cursor. /// /// You can't seek before the read cursor, `peek().seek(SeekFrom::Start(0))` /// is defined to be the read cursor position. /// /// Despite implementing [`BufRead`] for convenience, by default reads from /// the [`PeekCursor`] are unbuffered where possible and will only read /// as much as necessary from the underlying stream, if reading can /// block or otherwise invoke a cost. This can be circumvented by /// buffering the underlying stream (e.g. with /// [`BufPeekReader::set_min_read_size`], or for [`SeekPeekReader`] by /// wrapping the inner stream in a [`BufReader`]), or one can wrap the /// peek cursor itself in [`BufReader`], although this will only buffer /// reads from this particular peek cursor. fn peek(&mut self) -> PeekCursor<'_>; /// Convenience method to check if the upcoming bytes in a stream equal the /// given string of bytes, without advancing the stream. fn starts_with>(&mut self, bytes: B) -> Result { let bytes = bytes.as_ref(); let mut buf = [0u8; 32]; // Prevent allocation, check 32 bytes at a time. let mut peeker = self.peek(); for chunk in bytes.chunks(32) { let partial_buf = &mut buf[..chunk.len()]; if let Err(e) = peeker.read_exact(partial_buf) { return match e.kind() { std::io::ErrorKind::UnexpectedEof => Ok(false), _ => Err(e), }; } if partial_buf != chunk { return Ok(false); } } Ok(true) } /// Convenience method to consume a specific string of bytes if they are /// next up in the stream, leaving the stream unchanged otherwise. Returns /// whether the string was found and removed. fn consume_prefix>(&mut self, bytes: B) -> Result { let bytes = bytes.as_ref(); let should_strip = self.starts_with(bytes)?; if should_strip { std::io::copy(&mut self.take(bytes.len() as u64), &mut std::io::sink())?; } Ok(should_strip) } } peekread-0.1.1/src/seekreader.rs000064400000000000000000000117030000000000000146310ustar 00000000000000use crate::util::seek_add_offset; use crate::{ detail::{PeekCursorState, PeekReadImpl}, PeekCursor, PeekRead, }; use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom}; /// A wrapper for a [`Read`] + [`Seek`] stream that implements [`PeekRead`] /// using seeking. #[derive(Debug)] pub struct SeekPeekReader { inner: R, start_pos: Option, } impl SeekPeekReader { /// Creates a new [`SeekPeekReader`]. /// /// When calling `.peek()` on this object the stream is restored to /// its original position when the [`PeekCursor`] is dropped using a seek. pub fn new(reader: R) -> Self { Self { inner: reader, start_pos: None, } } /// Gets a reference to the underlying reader. /// /// It is inadvisable to directly read from the underlying reader. pub fn get_ref(&self) -> &R { &self.inner } /// Gets a mutable reference to the underlying reader. /// /// It is inadvisable to directly read from the underlying reader. pub fn get_mut(&mut self) -> &mut R { &mut self.inner } /// Unwraps this `SeekPeekReader`, returning the underlying reader. pub fn into_inner(self) -> R { self.inner } fn init_start_pos(&mut self) -> Result { let start_pos = self .start_pos .map(Ok) .unwrap_or_else(|| self.inner.stream_position())?; self.start_pos = Some(start_pos); Ok(start_pos) } } impl Seek for SeekPeekReader { fn seek(&mut self, pos: SeekFrom) -> Result { self.inner.seek(pos) } fn stream_position(&mut self) -> Result { self.inner.stream_position() } } impl Read for SeekPeekReader { fn read(&mut self, buf: &mut [u8]) -> Result { self.inner.read(buf) } fn read_exact(&mut self, buf: &mut [u8]) -> Result<()> { self.inner.read_exact(buf) } fn read_to_end(&mut self, buf: &mut Vec) -> Result { self.inner.read_to_end(buf) } fn read_to_string(&mut self, buf: &mut String) -> Result { self.inner.read_to_string(buf) } } impl PeekRead for SeekPeekReader { fn peek(&mut self) -> PeekCursor<'_> { self.start_pos = None; PeekCursor::new(self) } } impl PeekReadImpl for SeekPeekReader { fn peek_seek(&mut self, _state: &mut PeekCursorState, pos: SeekFrom) -> Result { let start_pos = self.init_start_pos()?; let cur_pos = self.stream_position()?; let new_pos = match pos { SeekFrom::Start(offset) => self.inner.seek(SeekFrom::Start(start_pos + offset))?, SeekFrom::Current(offset) => self.inner.seek(SeekFrom::Current(offset))?, SeekFrom::End(offset) => { // TODO: can this be more efficient? let end_pos = self.inner.seek(SeekFrom::End(0))?.max(start_pos); match seek_add_offset(end_pos, offset) { Ok(o) => self.inner.seek(SeekFrom::Start(o))?, Err(e) => { // Restore position. self.inner.seek(SeekFrom::Start(cur_pos))?; return Err(e); } } } }; if new_pos < start_pos { self.inner.seek(SeekFrom::Start(cur_pos))?; Err(Error::new( ErrorKind::InvalidInput, "invalid seek to a negative or overflowing position", )) } else { Ok(new_pos - start_pos) } } fn peek_read(&mut self, _state: &mut PeekCursorState, buf: &mut [u8]) -> Result { self.init_start_pos()?; let written = self.inner.read(buf)?; Ok(written) } fn peek_read_exact(&mut self, _state: &mut PeekCursorState, buf: &mut [u8]) -> Result<()> { self.init_start_pos()?; self.inner.read_exact(buf)?; Ok(()) } fn peek_fill_buf<'a>(&'a mut self, state: &'a mut PeekCursorState) -> Result<&'a [u8]> { self.init_start_pos()?; // With specialization we could provide a more optimal fill_buf here. let read = self.inner.read(&mut state.buf)?; self.inner.seek(SeekFrom::Current(-(read as i64)))?; Ok(&state.buf[..read]) } fn peek_consume(&mut self, _state: &mut PeekCursorState, amt: usize) { self.init_start_pos().ok(); // With specialization we could provide a more optimal fill_buf here. self.inner.seek(SeekFrom::Current(amt as i64)).ok(); } fn peek_drop(&mut self, _state: &mut PeekCursorState) { if let Some(start_pos) = self.start_pos { while let Err(e) = self.inner.seek(SeekFrom::Start(start_pos)) { if e.kind() != std::io::ErrorKind::Interrupted { break; } } } } } peekread-0.1.1/src/util.rs000064400000000000000000000007140000000000000134740ustar 00000000000000use std::convert::TryInto; use std::io::{Error, ErrorKind, Result}; pub fn seek_add_offset(current: u64, offset: i64) -> Result { current .try_into() .ok() .and_then(|n: i64| n.checked_add(offset)) .and_then(|n| n.try_into().ok()) .ok_or_else(|| { Error::new( ErrorKind::InvalidInput, "invalid seek to a negative or overflowing position", ) }) }