bytelines-2.2.2/.gitignore010064400007660000024000000000231341473346400137270ustar0000000000000000/target **/*.rs.bk bytelines-2.2.2/.travis.yml010064400007660000024000000004301341473346400140520ustar0000000000000000# Language flags. language: rust # OS matrix. os: - linux - osx - windows # Version matrix. rust: - stable - beta - nightly # Matrix modifications. matrix: # Failures to allow. allow_failures: - rust: nightly # Cargo package caching. cache: cargo: true bytelines-2.2.2/Cargo.toml.orig010064400007660000024000000006631341570562400146360ustar0000000000000000[package] name = "bytelines" version = "2.2.2" # remember to update html_root_url authors = ["Isaac Whitfield "] description = "Read input lines as byte slices for high efficiency" repository = "https://github.com/whitfin/bytelines" keywords = ["lines", "bytes"] categories = ["parsing", "text-processing"] readme = "README.md" edition = "2018" license = "MIT" [badges] travis-ci = { repository = "whitfin/bytelines" } bytelines-2.2.2/Cargo.toml0000644000000016250000000000000110760ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "bytelines" version = "2.2.2" authors = ["Isaac Whitfield "] description = "Read input lines as byte slices for high efficiency" readme = "README.md" keywords = ["lines", "bytes"] categories = ["parsing", "text-processing"] license = "MIT" repository = "https://github.com/whitfin/bytelines" [badges.travis-ci] repository = "whitfin/bytelines" bytelines-2.2.2/README.md010064400007660000024000000047731341570561500132340ustar0000000000000000# bytelines [![Crates.io](https://img.shields.io/crates/v/bytelines.svg)](https://crates.io/crates/bytelines) [![Build Status](https://img.shields.io/travis/whitfin/bytelines.svg?)](https://travis-ci.org/whitfin/bytelines) This library provides an easy way to read in input lines as byte slices for high efficiency. It's basically [lines](https://doc.rust-lang.org/std/io/trait.BufRead.html#method.lines) from the standard library, but it reads each line as a byte slice (`&[u8]`). This performs significantly faster than `lines()` in the case you don't particularly care about unicode, and basically as fast as writing the loops out by hand. Although the code itself is somewhat trivial, I've had to roll this in at least 4 tools I've written recently and so I figured it was time to have a convenience crate for it. ### Installation This tool will be available via [Crates.io](https://crates.io/crates/bytelines), so you can add it as a dependency in your `Cargo.toml`: ```toml [dependencies] bytelines = "2.2" ``` ### Usage It's quite simple; in the place you would typically call `lines` on a `BufRead` implementor, you can now call `byte_lines` to retrieve a structure used to walk over lines as `&[u8]` (and thus avoid allocations). There are two ways to use the API, and both are shown below: ```rust // our input file we're going to walk over lines of, and our reader let file = File::open("./my-input.txt").expect("able to open file"); let reader = BufReader::new(file); let mut lines = reader.byte_lines(); // Option 1: Walk using a `while` loop. // // This is the most performant option, as it avoids an allocation by // simply referencing bytes inside the reading structure. This means // that there's no copying at all, until the developer chooses to. while let Some(line) = lines.next() { // do something with the line } // Option 2: Use the `Iterator` trait. // // This is more idiomatic, but requires allocating each line into // an owned `Vec` to avoid potential memory safety issues. Although // there is an allocation here, the overhead should be negligible // except in cases where performance is paramount. for line in lines.into_iter() { // do something with the line } ``` This interface was introduced in the v2.x lineage of `bytelines`. The `Iterator` trait was previously implemented in v1.x, but required an `unsafe` contract in trying to be too idiomatic. This has since been fixed, and all unsafe code has been removed whilst providing `IntoIterator` implementations for those who prefer the cleaner syntax. bytelines-2.2.2/res/empty.txt010064400007660000024000000000011341570553700144240ustar0000000000000000 bytelines-2.2.2/res/numbers.txt010064400007660000024000000000241341473346400147450ustar00000000000000000 1 2 3 4 5 6 7 8 9 bytelines-2.2.2/src/lib.rs010064400007660000024000000157761341570563000136620ustar0000000000000000//! `Bytelines` is a simple library crate which offers line iteration for //! `BufRead` via `&[u8]` rather than `String`. //! //! Due to the removal of checking for `String` validity, this is typically //! much faster for reading in raw data and much more flexible. The APIs //! offered in this crate are intended to function exactly the same as the //! `lines` function inside the `BufRead` trait, except that the bytes which //! precede the line delimiter are not validated. //! //! Performance of [ByteLines](enum.ByteLines.html) is practically identical //! to that of writing a `loop` manually, due to the avoidance of allocations. #![doc(html_root_url = "https://docs.rs/bytelines/2.2.2")] use std::io::BufRead; /// Represents anything which can provide iterators of byte lines. pub trait ByteLinesReader where B: BufRead, { /// Returns an structure used to iterate the lines of this reader as `&[u8]`. fn byte_lines(self) -> ByteLines; /// Returns an iterator over the lines of this reader as `Vec`. fn byte_lines_iter(self) -> ByteLinesIter; } /// Blanket implementation for all `BufRead`. impl ByteLinesReader for B where B: BufRead, { /// Returns an structure used to iterate the lines of this reader as &[u8]. #[inline] fn byte_lines(self) -> ByteLines { ByteLines { buffer: Vec::new(), reader: self, } } /// Returns an iterator over the lines of this reader (as `Vec`). #[inline] fn byte_lines_iter(self) -> ByteLinesIter { self.byte_lines().into_iter() } } /// Provides iteration over bytes of input, split by line. /// /// Unlike the implementation in the standard library, this requires /// no allocations and simply references the input lines from the /// internal buffer. In order to do this safely, we must sacrifice /// the `Iterator` API, and operate using `while` syntax: /// /// ```rust /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").unwrap(); /// let mut lines = BufReader::new(file).byte_lines(); /// /// // walk our lines using `while` syntax /// while let Some(line) = lines.next() { /// // do something with the line, which is &[u8] /// } /// ``` /// /// For those who prefer the `Iterator` API, this structure implements /// the `IntoIterator` trait to provide it. This comes at the cost of /// an allocation of a `Vec` for each line in the `Iterator`. This is /// negligible in many cases, so often it comes down to which syntax /// is preferred: /// /// ```rust /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").unwrap(); /// let lines = BufReader::new(file).byte_lines(); /// /// // walk our lines using `for` syntax /// for line in lines.into_iter() { /// // do something with the line, which is Vec /// } /// ``` pub struct ByteLines where B: BufRead, { buffer: Vec, reader: B, } impl ByteLines where B: BufRead, { /// Constructs a new `ByteLines` from an input `BufRead`. pub fn new(buf: B) -> Self { Self { buffer: Vec::new(), reader: buf, } } /// Retrieves a reference to the next line of bytes in the reader (if any). pub fn next(&mut self) -> Option> { // clear the main buffer self.buffer.clear(); // iterate every line coming from the reader (but as bytes) match self.reader.read_until(b'\n', &mut self.buffer) { // short circuit on error Err(e) => Some(Err(e)), // no input, done Ok(0) => None, // bytes! Ok(mut n) => { // always "pop" the delim if self.buffer[n - 1] == b'\n' { n -= 1; // also "pop" a potential leading \r if n > 0 && self.buffer[n - 1] == b'\r' { n -= 1; } } // pass back the byte slice Some(Ok(&self.buffer[..n])) } } } } /// `IntoIterator` conversion for `ByteLines` to provide `Iterator` APIs. impl IntoIterator for ByteLines where B: BufRead, { type Item = Result, std::io::Error>; type IntoIter = ByteLinesIter; /// Constructs an `ByteLinesIter` to provide an `Iterator` API. #[inline] fn into_iter(self) -> ByteLinesIter { ByteLinesIter { inner: self } } } /// `Iterator` implementation of `ByteLines` to provide `Iterator` APIs. /// /// This structure enables developers the use of the `Iterator` API in /// their code, at the cost of an allocation per input line: /// /// ```rust /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").unwrap(); /// let lines = BufReader::new(file).byte_lines(); /// /// // walk our lines using `for` syntax /// for line in lines.into_iter() { /// // do something with the line, which is Vec /// } /// ``` pub struct ByteLinesIter where B: BufRead, { inner: ByteLines, } impl Iterator for ByteLinesIter where B: BufRead, { type Item = Result, std::io::Error>; /// Retrieves the next line in the iterator (if any). #[inline] fn next(&mut self) -> Option, std::io::Error>> { self.inner.next().map(|r| r.map(|s| s.to_vec())) } } #[cfg(test)] mod tests { use super::*; use std::fs::File; use std::io::BufReader; #[test] fn test_basic_loop() { let file = File::open("./res/numbers.txt").unwrap(); let mut brdr = BufReader::new(file).byte_lines(); let mut lines = Vec::new(); while let Some(line) = brdr.next() { let line = line.unwrap().to_vec(); let line = String::from_utf8(line).unwrap(); lines.push(line); } for i in 0..9 { assert_eq!(lines[i], format!("{}", i)); } } #[test] fn test_basic_iterator() { let file = File::open("./res/numbers.txt").unwrap(); let mut lines = Vec::new(); for line in BufReader::new(file).byte_lines().into_iter() { let line = line.unwrap(); let line = String::from_utf8(line).unwrap(); lines.push(line); } for i in 0..9 { assert_eq!(lines[i], format!("{}", i)); } } #[test] fn test_empty_line() { let file = File::open("./res/empty.txt").unwrap(); let mut lines = Vec::new(); for line in BufReader::new(file).byte_lines().into_iter() { let line = line.unwrap(); let line = String::from_utf8(line).unwrap(); lines.push(line); } assert_eq!(lines.len(), 1); assert_eq!(lines[0], ""); } } bytelines-2.2.2/.cargo_vcs_info.json0000644000000001120000000000000130660ustar00{ "git": { "sha1": "048e113dc33a1b0b7ae24cdfa2f525e040153a3b" } }