gix-chunk-0.4.5/.cargo_vcs_info.json0000644000000001470000000000100127410ustar { "git": { "sha1": "55d386a2448aba1dd22c73fb63b3fd5b3a8401c9" }, "path_in_vcs": "gix-chunk" }gix-chunk-0.4.5/Cargo.toml0000644000000017730000000000100107450ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-chunk" version = "0.4.5" authors = ["Sebastian Thiel "] include = [ "src/**/*", "LICENSE-*", ] description = "Interact with the git chunk file format used in multi-pack index and commit-graph files" documentation = "https://github.com/git/git/blob/seen/Documentation/technical/chunk-format.txt" license = "MIT OR Apache-2.0" repository = "https://github.com/Byron/gitoxide" [lib] test = false doctest = false [dependencies.thiserror] version = "1.0.34" gix-chunk-0.4.5/Cargo.toml.orig000064400000000000000000000010251046102023000144140ustar 00000000000000[package] name = "gix-chunk" version = "0.4.5" description = "Interact with the git chunk file format used in multi-pack index and commit-graph files" authors = ["Sebastian Thiel "] repository = "https://github.com/Byron/gitoxide" documentation = "https://github.com/git/git/blob/seen/Documentation/technical/chunk-format.txt" license = "MIT OR Apache-2.0" edition = "2021" include = ["src/**/*", "LICENSE-*"] rust-version = "1.65" [lib] doctest = false test = false [dependencies] thiserror = "1.0.34" gix-chunk-0.4.5/LICENSE-APACHE000064400000000000000000000251221046102023000134550ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2018-2021 Sebastian Thiel, and [contributors](https://github.com/byron/gitoxide/contributors) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-chunk-0.4.5/LICENSE-MIT000064400000000000000000000021551046102023000131660ustar 00000000000000Copyright (c) 2018-2021 Sebastian Thiel, and [contributors](https://github.com/byron/gitoxide/contributors). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-chunk-0.4.5/src/file/decode.rs000064400000000000000000000074031046102023000150320ustar 00000000000000use std::{convert::TryInto, ops::Range}; mod error { /// The value returned by [`crate::file::Index::from_bytes()`] #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Sentinel value encountered while still processing chunks.")] EarlySentinelValue, #[error("Sentinel value wasn't found, saw {:?}", std::str::from_utf8(actual.as_ref()).unwrap_or(""))] MissingSentinelValue { actual: crate::Id }, #[error("The chunk offset {offset} went past the file of length {file_length} - was it truncated?")] ChunkSizeOutOfBounds { offset: crate::file::Offset, file_length: u64, }, #[error("All chunk offsets must be incrementing.")] NonIncrementalChunkOffsets, #[error("The chunk of kind {:?} was encountered more than once", std::str::from_utf8(kind.as_ref()).unwrap_or(""))] DuplicateChunk { kind: crate::Id }, #[error("The table of contents would be {expected} bytes, but got only {actual}")] TocTooSmall { actual: usize, expected: usize }, #[error("Empty chunk indices are not allowed as the point of chunked files is to have chunks.")] Empty, } } pub use error::Error; use crate::{file, file::index}; impl file::Index { /// Provided a mapped file at the beginning via `data`, starting at `toc_offset` decode all chunk information to return /// an index with `num_chunks` chunks. pub fn from_bytes(data: &[u8], toc_offset: usize, num_chunks: u32) -> Result { if num_chunks == 0 { return Err(Error::Empty); } let data_len: u64 = data.len() as u64; let mut chunks = Vec::with_capacity(num_chunks as usize); let mut toc_entry = &data[toc_offset..]; let expected_min_size = (num_chunks as usize + 1) * file::Index::ENTRY_SIZE; if toc_entry.len() < expected_min_size { return Err(Error::TocTooSmall { expected: expected_min_size, actual: toc_entry.len(), }); } for _ in 0..num_chunks { let (kind, offset) = toc_entry.split_at(4); let kind = to_kind(kind); if kind == crate::SENTINEL { return Err(Error::EarlySentinelValue); } if chunks.iter().any(|c: &index::Entry| c.kind == kind) { return Err(Error::DuplicateChunk { kind }); } let offset = be_u64(offset); if offset > data_len { return Err(Error::ChunkSizeOutOfBounds { offset, file_length: data_len, }); } toc_entry = &toc_entry[file::Index::ENTRY_SIZE..]; let next_offset = be_u64(&toc_entry[4..]); if next_offset > data_len { return Err(Error::ChunkSizeOutOfBounds { offset: next_offset, file_length: data_len, }); } if next_offset <= offset { return Err(Error::NonIncrementalChunkOffsets); } chunks.push(index::Entry { kind, offset: Range { start: offset, end: next_offset, }, }) } let sentinel = to_kind(&toc_entry[..4]); if sentinel != crate::SENTINEL { return Err(Error::MissingSentinelValue { actual: sentinel }); } Ok(file::Index { chunks, will_write: false, }) } } fn to_kind(data: &[u8]) -> crate::Id { data[..4].try_into().unwrap() } fn be_u64(data: &[u8]) -> u64 { u64::from_be_bytes(data[..8].try_into().unwrap()) } gix-chunk-0.4.5/src/file/index.rs000064400000000000000000000075611046102023000147230ustar 00000000000000use std::ops::Range; use crate::file::Index; /// pub mod offset_by_kind { use std::fmt::{Display, Formatter}; /// The error returned by [`Index::offset_by_id()`][super::Index::offset_by_id()]. #[allow(missing_docs)] #[derive(Debug)] pub struct Error { pub kind: crate::Id, } impl Display for Error { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, "Chunk named {:?} was not found in chunk file index", std::str::from_utf8(&self.kind).unwrap_or("") ) } } impl std::error::Error for Error {} } /// pub mod data_by_kind { /// The error returned by [`Index::data_by_id()`][super::Index::data_by_id()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("The chunk wasn't found in the file index")] NotFound(#[from] super::offset_by_kind::Error), #[error("The offsets into the file couldn't be represented by usize")] FileTooLarge, } } /// An entry of a chunk file index pub struct Entry { /// The kind of the chunk file pub kind: crate::Id, /// The offset, relative to the beginning of the file, at which to find the chunk and its end. pub offset: Range, } impl Index { /// The size of a single index entry in bytes pub const ENTRY_SIZE: usize = std::mem::size_of::() + std::mem::size_of::(); /// The smallest possible size of an index, consisting only of the sentinel value pointing past itself. pub const EMPTY_SIZE: usize = Index::ENTRY_SIZE; /// Returns the size in bytes an index with `num_entries` would take. pub const fn size_for_entries(num_entries: usize) -> usize { Self::ENTRY_SIZE * (num_entries + 1/*sentinel*/) } /// Find a chunk of `kind` and return its offset into the data if found pub fn offset_by_id(&self, kind: crate::Id) -> Result, offset_by_kind::Error> { self.chunks .iter() .find_map(|c| (c.kind == kind).then(|| c.offset.clone())) .ok_or(offset_by_kind::Error { kind }) } /// Find a chunk of `kind` and return its offset as usize range into the data if found. /// /// /// # Panics /// /// - if the usize conversion fails, which isn't expected as memory maps can't be created if files are too large /// to require such offsets. pub fn usize_offset_by_id(&self, kind: crate::Id) -> Result, offset_by_kind::Error> { self.chunks .iter() .find_map(|c| (c.kind == kind).then(|| crate::range::into_usize_or_panic(c.offset.clone()))) .ok_or(offset_by_kind::Error { kind }) } /// Like [`Index::usize_offset_by_id()`] but with support for validation and transformation using a function. pub fn validated_usize_offset_by_id( &self, kind: crate::Id, validate: impl FnOnce(Range) -> T, ) -> Result { self.chunks .iter() .find_map(|c| (c.kind == kind).then(|| crate::range::into_usize_or_panic(c.offset.clone()))) .map(validate) .ok_or(offset_by_kind::Error { kind }) } /// Find a chunk of `kind` and return its data slice based on its offset. pub fn data_by_id<'a>(&self, data: &'a [u8], kind: crate::Id) -> Result<&'a [u8], data_by_kind::Error> { let offset = self.offset_by_id(kind)?; Ok(&data[crate::range::into_usize(offset).ok_or(data_by_kind::Error::FileTooLarge)?]) } /// Return the end offset lf the last chunk, which is the highest offset as well. /// It's definitely available as we have one or more chunks. pub fn highest_offset(&self) -> crate::file::Offset { self.chunks.last().expect("at least one chunk").offset.end } } gix-chunk-0.4.5/src/file/mod.rs000064400000000000000000000007501046102023000143640ustar 00000000000000/// pub mod decode; /// pub mod index; /// pub mod write; /// The offset to a chunk as seen relative to the beginning of the file containing it. pub type Offset = u64; /// A chunk file providing a table into the parent data. pub struct Index { /// If true, we use `chunks` in a way that facilitates writing them. will_write: bool, /// Validated chunks as defined by their index entries. /// /// Note that this list cannot be empty. chunks: Vec, } gix-chunk-0.4.5/src/file/write.rs000064400000000000000000000107251046102023000147420ustar 00000000000000use crate::file::{index::Entry, Index}; mod write_chunk { use std::collections::VecDeque; use crate::file::index; /// A [`Write`][std::io::Write] implementation that validates chunk sizes while allowing the user to know /// which chunk is to be written next. pub struct Chunk { chunks_to_write: VecDeque, inner: W, next_chunk: Option, written_bytes: usize, } impl Chunk where W: std::io::Write, { pub(crate) fn new(out: W, chunks: VecDeque) -> Chunk where W: std::io::Write, { Chunk { chunks_to_write: chunks, inner: out, next_chunk: None, written_bytes: 0, } } } impl std::io::Write for Chunk where W: std::io::Write, { fn write(&mut self, buf: &[u8]) -> std::io::Result { let written = self.inner.write(buf)?; self.written_bytes += written; Ok(written) } fn flush(&mut self) -> std::io::Result<()> { self.inner.flush() } } impl Chunk { /// Return the inner writer - should only be called once there is no more chunk to write. pub fn into_inner(self) -> W { self.inner } /// Return the next chunk-id to write, if there is one. pub fn next_chunk(&mut self) -> Option { if let Some(entry) = self.next_chunk.take() { assert_eq!( entry.offset.end, self.written_bytes as u64, "BUG: expected to write {} bytes, but only wrote {} for chunk {:?}", entry.offset.end, self.written_bytes, std::str::from_utf8(&entry.kind) ) } self.written_bytes = 0; self.next_chunk = self.chunks_to_write.pop_front(); self.next_chunk.as_ref().map(|e| e.kind) } } } pub use write_chunk::Chunk; /// Writing impl Index { /// Create a new index whose sole purpose is to be receiving chunks using [`plan_chunk()`][Index::plan_chunk()] and to be written to /// an output using [`into_write()`][Index::into_write()] pub fn for_writing() -> Self { Index { will_write: true, chunks: Vec::new(), } } /// Plan to write a new chunk as part of the index when [`into_write()`][Index::into_write()] is called. pub fn plan_chunk(&mut self, chunk: crate::Id, exact_size_on_disk: u64) { assert!(self.will_write, "BUG: create the index with `for_writing()`"); assert!( !self.chunks.iter().any(|e| e.kind == chunk), "BUG: must not add chunk of same kind twice: {:?}", std::str::from_utf8(&chunk) ); self.chunks.push(Entry { kind: chunk, offset: 0..exact_size_on_disk, }) } /// Return the total size of all planned chunks thus far. pub fn planned_storage_size(&self) -> u64 { assert!(self.will_write, "BUG: create the index with `for_writing()`"); self.chunks.iter().map(|e| e.offset.end).sum() } /// Return the amount of chunks we currently know. pub fn num_chunks(&self) -> usize { self.chunks.len() } /// After [planning all chunks][Index::plan_chunk()] call this method with the destination to write the chunks to. /// Use the [Chunk] writer to write each chunk in order. /// `current_offset` is the byte position at which `out` will continue writing. pub fn into_write(self, mut out: W, current_offset: usize) -> std::io::Result> where W: std::io::Write, { assert!( self.will_write, "BUG: create the index with `for_writing()`, cannot write decoded indices" ); // First chunk starts past the table of contents let mut current_offset = (current_offset + Self::size_for_entries(self.num_chunks())) as u64; for entry in &self.chunks { out.write_all(&entry.kind)?; out.write_all(¤t_offset.to_be_bytes())?; current_offset += entry.offset.end; } // sentinel to mark end of chunks out.write_all(&0u32.to_be_bytes())?; out.write_all(¤t_offset.to_be_bytes())?; Ok(Chunk::new(out, self.chunks.into())) } } gix-chunk-0.4.5/src/lib.rs000064400000000000000000000026231046102023000134350ustar 00000000000000//! Low-level access to reading and writing chunk file based formats. //! //! See the [git documentation](https://github.com/git/git/blob/seen/Documentation/technical/chunk-format.txt) for details. #![deny(missing_docs, rust_2018_idioms, unsafe_code)] /// An identifier to describe the kind of chunk, unique within a chunk file, typically in ASCII pub type Id = [u8; 4]; /// A special value denoting the end of the chunk file table of contents. pub const SENTINEL: Id = [0u8; 4]; /// pub mod range { use std::{convert::TryInto, ops::Range}; use crate::file; /// Turn a u64 Range into a usize range safely, to make chunk ranges useful in memory mapped files. pub fn into_usize(Range { start, end }: Range) -> Option> { let start = start.try_into().ok()?; let end = end.try_into().ok()?; Some(Range { start, end }) } /// Similar to [`into_usize()`], but panics assuming that the memory map couldn't be created if offsets /// stored are too high. /// /// This is only true for correctly formed files, as it's entirely possible to provide out of bounds offsets /// which are checked for separately - we wouldn't be here if that was the case. pub fn into_usize_or_panic(range: Range) -> Range { into_usize(range).expect("memory maps can't be created if files are too large") } } /// pub mod file;