gix-object-0.46.1/.cargo_vcs_info.json0000644000000001500000000000100131530ustar { "git": { "sha1": "c1ba5719132227410abefeb54e3032b015233e94" }, "path_in_vcs": "gix-object" }gix-object-0.46.1/Cargo.toml0000644000000104560000000000100111630ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-object" version = "0.46.1" authors = ["Sebastian Thiel "] build = false include = [ "src/**/*", "LICENSE-*", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Immutable and mutable git objects with decoding and encoding support" readme = false license = "MIT OR Apache-2.0" repository = "https://github.com/GitoxideLabs/gitoxide" [package.metadata.docs.rs] all-features = true features = ["document-features"] [lib] name = "gix_object" path = "src/lib.rs" doctest = false [dependencies.bstr] version = "1.3.0" features = [ "std", "unicode", ] default-features = false [dependencies.document-features] version = "0.2.0" optional = true [dependencies.gix-actor] version = "^0.33.1" [dependencies.gix-date] version = "^0.9.3" [dependencies.gix-features] version = "^0.39.1" features = [ "rustsha1", "progress", ] [dependencies.gix-hash] version = "^0.15.1" [dependencies.gix-hashtable] version = "^0.6.0" [dependencies.gix-path] version = "^0.10.12" [dependencies.gix-utils] version = "^0.1.13" [dependencies.gix-validate] version = "^0.9.2" [dependencies.itoa] version = "1.0.1" [dependencies.serde] version = "1.0.114" features = ["derive"] optional = true default-features = false [dependencies.smallvec] version = "1.4.0" features = ["write"] [dependencies.thiserror] version = "2.0.0" [dependencies.winnow] version = "0.6.18" features = ["simd"] [dev-dependencies.criterion] version = "0.5.1" [dev-dependencies.pretty_assertions] version = "1.0.0" [dev-dependencies.termtree] version = "0.5.1" [features] serde = [ "dep:serde", "bstr/serde", "smallvec/serde", "gix-hash/serde", "gix-actor/serde", ] verbose-object-parsing-errors = ["winnow/std"] [lints.clippy] bool_to_int_with_if = "allow" borrow_as_ptr = "allow" cast_lossless = "allow" cast_possible_truncation = "allow" cast_possible_wrap = "allow" cast_precision_loss = "allow" cast_sign_loss = "allow" checked_conversions = "allow" copy_iterator = "allow" default_trait_access = "allow" doc_markdown = "allow" empty_docs = "allow" enum_glob_use = "allow" explicit_deref_methods = "allow" explicit_into_iter_loop = "allow" explicit_iter_loop = "allow" filter_map_next = "allow" fn_params_excessive_bools = "allow" from_iter_instead_of_collect = "allow" if_not_else = "allow" ignored_unit_patterns = "allow" implicit_clone = "allow" inconsistent_struct_constructor = "allow" inefficient_to_string = "allow" inline_always = "allow" items_after_statements = "allow" iter_not_returning_iterator = "allow" iter_without_into_iter = "allow" manual_assert = "allow" manual_is_variant_and = "allow" manual_let_else = "allow" manual_string_new = "allow" many_single_char_names = "allow" match_bool = "allow" match_same_arms = "allow" match_wild_err_arm = "allow" match_wildcard_for_single_variants = "allow" missing_errors_doc = "allow" missing_panics_doc = "allow" module_name_repetitions = "allow" must_use_candidate = "allow" mut_mut = "allow" naive_bytecount = "allow" needless_for_each = "allow" needless_pass_by_value = "allow" needless_raw_string_hashes = "allow" no_effect_underscore_binding = "allow" option_option = "allow" range_plus_one = "allow" redundant_else = "allow" return_self_not_must_use = "allow" should_panic_without_expect = "allow" similar_names = "allow" single_match_else = "allow" stable_sort_primitive = "allow" struct_excessive_bools = "allow" struct_field_names = "allow" too_long_first_doc_paragraph = "allow" too_many_lines = "allow" transmute_ptr_to_ptr = "allow" trivially_copy_pass_by_ref = "allow" unnecessary_join = "allow" unnecessary_wraps = "allow" unreadable_literal = "allow" unused_self = "allow" used_underscore_binding = "allow" wildcard_imports = "allow" [lints.clippy.pedantic] level = "warn" priority = -1 [lints.rust] gix-object-0.46.1/Cargo.toml.orig000064400000000000000000000045121046102023000146400ustar 00000000000000lints.workspace = true [package] name = "gix-object" version = "0.46.1" description = "Immutable and mutable git objects with decoding and encoding support" authors = ["Sebastian Thiel "] repository = "https://github.com/GitoxideLabs/gitoxide" license = "MIT OR Apache-2.0" edition = "2021" include = ["src/**/*", "LICENSE-*"] rust-version = "1.65" [lib] doctest = false [[bench]] name = "decode-objects" harness = false path = "./benches/decode_objects.rs" [[bench]] name = "edit-tree" harness = false path = "./benches/edit_tree.rs" [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = [ "dep:serde", "bstr/serde", "smallvec/serde", "gix-hash/serde", "gix-actor/serde", ] ## When parsing objects by default errors will only be available on the granularity of success or failure, and with the above flag enabled ## details information about the error location will be collected. ## Use it in applications which expect broken or invalid objects or for debugging purposes. Incorrectly formatted objects aren't at all ## common otherwise. verbose-object-parsing-errors = ["winnow/std"] [dependencies] gix-features = { version = "^0.39.1", path = "../gix-features", features = [ "rustsha1", "progress", ] } gix-hash = { version = "^0.15.1", path = "../gix-hash" } gix-hashtable = { version = "^0.6.0", path = "../gix-hashtable" } gix-validate = { version = "^0.9.2", path = "../gix-validate" } gix-actor = { version = "^0.33.1", path = "../gix-actor" } gix-date = { version = "^0.9.3", path = "../gix-date" } gix-path = { version = "^0.10.12", path = "../gix-path" } gix-utils = { version = "^0.1.13", path = "../gix-utils" } itoa = "1.0.1" thiserror = "2.0.0" bstr = { version = "1.3.0", default-features = false, features = [ "std", "unicode", ] } winnow = { version = "0.6.18", features = ["simd"] } smallvec = { version = "1.4.0", features = ["write"] } serde = { version = "1.0.114", optional = true, default-features = false, features = [ "derive", ] } document-features = { version = "0.2.0", optional = true } [dev-dependencies] criterion = "0.5.1" pretty_assertions = "1.0.0" gix-testtools = { path = "../tests/tools" } gix-odb = { path = "../gix-odb" } termtree = "0.5.1" [package.metadata.docs.rs] all-features = true features = ["document-features"] gix-object-0.46.1/LICENSE-APACHE000064400000000000000000000247461046102023000137100ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-object-0.46.1/LICENSE-MIT000064400000000000000000000017771046102023000134170ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-object-0.46.1/src/blob.rs000064400000000000000000000020301046102023000140150ustar 00000000000000use std::{convert::Infallible, io}; use crate::{Blob, BlobRef, Kind}; impl crate::WriteTo for BlobRef<'_> { /// Write the blobs data to `out` verbatim. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { out.write_all(self.data) } fn kind(&self) -> Kind { Kind::Blob } fn size(&self) -> u64 { self.data.len() as u64 } } impl crate::WriteTo for Blob { /// Write the blobs data to `out` verbatim. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { self.to_ref().write_to(out) } fn kind(&self) -> Kind { Kind::Blob } fn size(&self) -> u64 { self.to_ref().size() } } impl Blob { /// Provide a `BlobRef` to this owned blob pub fn to_ref(&self) -> BlobRef<'_> { BlobRef { data: &self.data } } } impl BlobRef<'_> { /// Instantiate a `Blob` from the given `data`, which is used as-is. pub fn from_bytes(data: &[u8]) -> Result, Infallible> { Ok(BlobRef { data }) } } gix-object-0.46.1/src/commit/decode.rs000064400000000000000000000054621046102023000156260ustar 00000000000000use std::borrow::Cow; use smallvec::SmallVec; use winnow::{ combinator::{alt, eof, opt, preceded, repeat, rest, terminated}, error::{AddContext, ParserError, StrContext}, prelude::*, stream::Stream as _, token::take_till, }; use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef}; pub fn message<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( i: &mut &'a [u8], ) -> PResult<&'a BStr, E> { if i.is_empty() { // newline + [message] let start = i.checkpoint(); return Err( winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Eof).add_context( i, &start, StrContext::Expected("newline + ".into()), ), ); } preceded(NL, rest.map(ByteSlice::as_bstr)) .context(StrContext::Expected( "a newline separates headers from the message".into(), )) .parse_next(i) } pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( i: &mut &'a [u8], ) -> PResult, E> { ( (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash)) .context(StrContext::Expected("tree <40 lowercase hex char>".into())), repeat(0.., |i: &mut _| parse::header_field(i, b"parent", parse::hex_hash)) .map(|p: Vec<_>| p) .context(StrContext::Expected( "zero or more 'parent <40 lowercase hex char>'".into(), )), (|i: &mut _| parse::header_field(i, b"author", parse::signature)) .context(StrContext::Expected("author ".into())), (|i: &mut _| parse::header_field(i, b"committer", parse::signature)) .context(StrContext::Expected("committer ".into())), opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(1.., NL))) .context(StrContext::Expected("encoding ".into())), repeat( 0.., alt(( parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), |i: &mut _| { parse::any_header_field(i, take_till(1.., NL)) .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) }, )), ) .context(StrContext::Expected(" ".into())), terminated(message, eof), ) .map( |(tree, parents, author, committer, encoding, extra_headers, message)| CommitRef { tree, parents: SmallVec::from(parents), author, committer, encoding: encoding.map(ByteSlice::as_bstr), message, extra_headers, }, ) .parse_next(i) } gix-object-0.46.1/src/commit/message/body.rs000064400000000000000000000111071046102023000167550ustar 00000000000000use std::ops::Deref; use winnow::{ combinator::{eof, rest, separated_pair, terminated}, error::{ErrorKind, ParserError}, prelude::*, token::take_until, }; use crate::{ bstr::{BStr, ByteSlice}, commit::message::BodyRef, }; /// An iterator over trailers as parsed from a commit message body. /// /// lines with parsing failures will be skipped pub struct Trailers<'a> { pub(crate) cursor: &'a [u8], } /// A trailer as parsed from the commit message body. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct TrailerRef<'a> { /// The name of the trailer, like "Signed-off-by", up to the separator ": " #[cfg_attr(feature = "serde", serde(borrow))] pub token: &'a BStr, /// The value right after the separator ": ", with leading and trailing whitespace trimmed. /// Note that multi-line values aren't currently supported. pub value: &'a BStr, } fn parse_single_line_trailer<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<(&'a BStr, &'a BStr), E> { *i = i.trim_end(); let (token, value) = separated_pair(take_until(1.., b":".as_ref()), b": ", rest).parse_next(i)?; if token.trim_end().len() != token.len() || value.trim_start().len() != value.len() { Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail).cut()) } else { Ok((token.as_bstr(), value.as_bstr())) } } impl<'a> Iterator for Trailers<'a> { type Item = TrailerRef<'a>; fn next(&mut self) -> Option { if self.cursor.is_empty() { return None; } for mut line in self.cursor.lines_with_terminator() { self.cursor = &self.cursor[line.len()..]; if let Some(trailer) = terminated(parse_single_line_trailer::<()>, eof) .parse_next(&mut line) .ok() .map(|(token, value)| TrailerRef { token: token.trim().as_bstr(), value: value.trim().as_bstr(), }) { return Some(trailer); } } None } } impl<'a> BodyRef<'a> { /// Parse `body` bytes into the trailer and the actual body. pub fn from_bytes(body: &'a [u8]) -> Self { body.rfind(b"\n\n") .map(|pos| (2, pos)) .or_else(|| body.rfind(b"\r\n\r\n").map(|pos| (4, pos))) .and_then(|(sep_len, pos)| { let trailer = &body[pos + sep_len..]; let body = &body[..pos]; Trailers { cursor: trailer }.next().map(|_| BodyRef { body_without_trailer: body.as_bstr(), start_of_trailer: trailer, }) }) .unwrap_or_else(|| BodyRef { body_without_trailer: body.as_bstr(), start_of_trailer: &[], }) } /// Returns the body with the trailers stripped. /// /// You can iterate trailers with the [`trailers()`][BodyRef::trailers()] method. pub fn without_trailer(&self) -> &'a BStr { self.body_without_trailer } /// Return an iterator over the trailers parsed from the last paragraph of the body. May be empty. pub fn trailers(&self) -> Trailers<'a> { Trailers { cursor: self.start_of_trailer, } } } impl AsRef for BodyRef<'_> { fn as_ref(&self) -> &BStr { self.body_without_trailer } } impl Deref for BodyRef<'_> { type Target = BStr; fn deref(&self) -> &Self::Target { self.body_without_trailer } } #[cfg(test)] mod test_parse_trailer { use super::*; fn parse(input: &str) -> (&BStr, &BStr) { parse_single_line_trailer::<()>.parse_peek(input.as_bytes()).unwrap().1 } #[test] fn simple_newline() { assert_eq!(parse("foo: bar\n"), ("foo".into(), "bar".into())); } #[test] fn simple_non_ascii_no_newline() { assert_eq!(parse("🤗: 🎉"), ("🤗".into(), "🎉".into())); } #[test] fn with_lots_of_whitespace_newline() { assert_eq!( parse("hello foo: bar there \n"), ("hello foo".into(), "bar there".into()) ); } #[test] fn extra_whitespace_before_token_or_value_is_error() { assert!(parse_single_line_trailer::<()>.parse_peek(b"foo : bar").is_err()); assert!(parse_single_line_trailer::<()>.parse_peek(b"foo: bar").is_err()); } #[test] fn simple_newline_windows() { assert_eq!(parse("foo: bar\r\n"), ("foo".into(), "bar".into())); } } gix-object-0.46.1/src/commit/message/decode.rs000064400000000000000000000031371046102023000172470ustar 00000000000000use winnow::{ combinator::{alt, eof, preceded, rest, terminated}, error::ParserError, prelude::*, stream::{Offset, Stream}, token::take_till, }; use crate::bstr::{BStr, ByteSlice}; pub(crate) fn newline<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a [u8], E> { alt((b"\n", b"\r\n")).parse_next(i) } fn subject_and_body<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<(&'a BStr, Option<&'a BStr>), E> { let start_i = *i; let start = i.checkpoint(); while !i.is_empty() { match take_till::<_, _, E>(1.., |c| c == b'\n' || c == b'\r').parse_next(i) { Ok(_) => { let consumed_bytes = i.offset_from(&start); match preceded((newline::, newline::), rest).parse_next(i) { Ok(body) => { let body = (!body.is_empty()).then(|| body.as_bstr()); return Ok((start_i[0usize..consumed_bytes].as_bstr(), body)); } Err(_) => match i.next_token() { Some(_) => {} None => break, }, } } Err(_) => match i.next_token() { Some(_) => {} None => break, }, } } i.reset(&start); rest.map(|r: &[u8]| (r.as_bstr(), None)).parse_next(i) } /// Returns title and body, without separator pub fn message(mut input: &[u8]) -> (&BStr, Option<&BStr>) { terminated(subject_and_body::<()>, eof) .parse_next(&mut input) .expect("cannot fail") } gix-object-0.46.1/src/commit/message/mod.rs000064400000000000000000000071401046102023000166010ustar 00000000000000use std::borrow::Cow; use crate::{ bstr::{BStr, BString, ByteSlice, ByteVec}, commit::MessageRef, CommitRef, }; /// pub mod body; mod decode; impl<'a> CommitRef<'a> { /// Return exactly the same message as [`MessageRef::summary()`]. pub fn message_summary(&self) -> Cow<'a, BStr> { summary(self.message) } /// Return an iterator over message trailers as obtained from the last paragraph of the commit message. /// May be empty. pub fn message_trailers(&self) -> body::Trailers<'a> { BodyRef::from_bytes(self.message).trailers() } } impl<'a> MessageRef<'a> { /// Parse the given `input` as message. /// /// Note that this cannot fail as everything will be interpreted as title if there is no body separator. pub fn from_bytes(input: &'a [u8]) -> Self { let (title, body) = decode::message(input); MessageRef { title, body } } /// Produce a short commit summary for the message title. /// /// This means the following /// /// * Take the subject line which is delimited by two newlines (\n\n) /// * transform intermediate consecutive whitespace including \r into one space /// /// The resulting summary will have folded whitespace before a newline into spaces and stopped that process /// once two consecutive newlines are encountered. pub fn summary(&self) -> Cow<'a, BStr> { summary(self.title) } /// Further parse the body into into non-trailer and trailers, which can be iterated from the returned [`BodyRef`]. pub fn body(&self) -> Option> { self.body.map(|b| BodyRef::from_bytes(b)) } } pub(crate) fn summary(message: &BStr) -> Cow<'_, BStr> { let message = message.trim(); match message.find_byte(b'\n') { Some(mut pos) => { let mut out = BString::default(); let mut previous_pos = None; loop { if let Some(previous_pos) = previous_pos { if previous_pos + 1 == pos { let len_after_trim = out.trim_end().len(); out.resize(len_after_trim, 0); break out.into(); } } let message_to_newline = &message[previous_pos.map_or(0, |p| p + 1)..pos]; if let Some(pos_before_whitespace) = message_to_newline.rfind_not_byteset(b"\t\n\x0C\r ") { out.extend_from_slice(&message_to_newline[..=pos_before_whitespace]); } out.push_byte(b' '); previous_pos = Some(pos); match message.get(pos + 1..).and_then(|i| i.find_byte(b'\n')) { Some(next_nl_pos) => pos += next_nl_pos + 1, None => { if let Some(slice) = message.get((pos + 1)..) { out.extend_from_slice(slice); } break out.into(); } } } } None => message.as_bstr().into(), } } /// A reference to a message body, further parsed to only contain the non-trailer parts. /// /// See [git-interpret-trailers](https://git-scm.com/docs/git-interpret-trailers) for more information /// on what constitutes trailers and not that this implementation is only good for typical sign-off footer or key-value parsing. /// /// Note that we only parse trailers from the bottom of the body. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] pub struct BodyRef<'a> { body_without_trailer: &'a BStr, start_of_trailer: &'a [u8], } gix-object-0.46.1/src/commit/mod.rs000064400000000000000000000132051046102023000151540ustar 00000000000000use std::ops::Range; use bstr::{BStr, BString, ByteSlice}; use winnow::prelude::*; use crate::{Commit, CommitRef, TagRef}; mod decode; /// pub mod message; /// A parsed commit message that assumes a title separated from the body by two consecutive newlines. /// /// Titles can have any amount of whitespace #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MessageRef<'a> { /// The title of the commit, as separated from the body with two consecutive newlines. The newlines are not included. #[cfg_attr(feature = "serde", serde(borrow))] pub title: &'a BStr, /// All bytes not consumed by the title, excluding the separating newlines. /// /// The body is `None` if there was now title separation or the body was empty after the separator. pub body: Option<&'a BStr>, } /// The raw commit data, parseable by [`CommitRef`] or [`Commit`], which was fed into a program to produce a signature. /// /// See [`extract_signature()`](crate::CommitRefIter::signature()) for how to obtain it. // TODO: implement `std::io::Read` to avoid allocations #[derive(PartialEq, Eq, Debug, Hash, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct SignedData<'a> { /// The raw commit data that includes the signature. data: &'a [u8], /// The byte range at which we find the signature. All but the signature is the data that was signed. signature_range: Range, } impl SignedData<'_> { /// Convenience method to obtain a copy of the signed data. pub fn to_bstring(&self) -> BString { let mut buf = BString::from(&self.data[..self.signature_range.start]); buf.extend_from_slice(&self.data[self.signature_range.end..]); buf } } impl From> for BString { fn from(value: SignedData<'_>) -> Self { value.to_bstring() } } /// pub mod ref_iter; mod write; /// Lifecycle impl<'a> CommitRef<'a> { /// Deserialize a commit from the given `data` bytes while avoiding most allocations. pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { let input = &mut data; match decode::commit.parse_next(input) { Ok(tag) => Ok(tag), Err(err) => Err(crate::decode::Error::with_err(err, input)), } } } /// Access impl<'a> CommitRef<'a> { /// Return the `tree` fields hash digest. pub fn tree(&self) -> gix_hash::ObjectId { gix_hash::ObjectId::from_hex(self.tree).expect("prior validation of tree hash during parsing") } /// Returns an iterator of parent object ids pub fn parents(&self) -> impl Iterator + '_ { self.parents .iter() .map(|hex_hash| gix_hash::ObjectId::from_hex(hex_hash).expect("prior validation of hashes during parsing")) } /// Returns a convenient iterator over all extra headers. pub fn extra_headers(&self) -> crate::commit::ExtraHeaders> { ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref()))) } /// Return the author, with whitespace trimmed. /// /// This is different from the `author` field which may contain whitespace. pub fn author(&self) -> gix_actor::SignatureRef<'a> { self.author.trim() } /// Return the committer, with whitespace trimmed. /// /// This is different from the `committer` field which may contain whitespace. pub fn committer(&self) -> gix_actor::SignatureRef<'a> { self.committer.trim() } /// Returns a partially parsed message from which more information can be derived. pub fn message(&self) -> MessageRef<'a> { MessageRef::from_bytes(self.message) } /// Returns the time at which this commit was created. pub fn time(&self) -> gix_date::Time { self.committer.time } } impl Commit { /// Returns a convenient iterator over all extra headers. pub fn extra_headers(&self) -> ExtraHeaders> { ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (k.as_bstr(), v.as_bstr()))) } } /// An iterator over extra headers in [owned][crate::Commit] and [borrowed][crate::CommitRef] commits. pub struct ExtraHeaders { inner: I, } /// Instantiation and convenience. impl<'a, I> ExtraHeaders where I: Iterator, { /// Create a new instance from an iterator over tuples of (name, value) pairs. pub fn new(iter: I) -> Self { ExtraHeaders { inner: iter } } /// Find the _value_ of the _first_ header with the given `name`. pub fn find(mut self, name: &str) -> Option<&'a BStr> { self.inner .find_map(move |(k, v)| if k == name.as_bytes().as_bstr() { Some(v) } else { None }) } /// Return an iterator over all _values_ of headers with the given `name`. pub fn find_all(self, name: &'a str) -> impl Iterator { self.inner .filter_map(move |(k, v)| if k == name.as_bytes().as_bstr() { Some(v) } else { None }) } /// Return an iterator over all git mergetags. /// /// A merge tag is a tag object embedded within the respective header field of a commit, making /// it a child object of sorts. pub fn mergetags(self) -> impl Iterator, crate::decode::Error>> { self.find_all("mergetag").map(|b| TagRef::from_bytes(b)) } /// Return the cryptographic signature provided by gpg/pgp verbatim. pub fn pgp_signature(self) -> Option<&'a BStr> { self.find("gpgsig") } } gix-object-0.46.1/src/commit/ref_iter.rs000064400000000000000000000303271046102023000162000ustar 00000000000000use std::{borrow::Cow, ops::Range}; use bstr::BStr; use gix_hash::{oid, ObjectId}; use winnow::{ combinator::{alt, eof, opt, terminated}, error::StrContext, prelude::*, token::take_till, }; use crate::{ bstr::ByteSlice, commit::{decode, SignedData}, parse, parse::NL, CommitRefIter, }; #[derive(Copy, Clone)] pub(crate) enum SignatureKind { Author, Committer, } #[derive(Default, Copy, Clone)] pub(crate) enum State { #[default] Tree, Parents, Signature { of: SignatureKind, }, Encoding, ExtraHeaders, Message, } /// Lifecycle impl<'a> CommitRefIter<'a> { /// Create a commit iterator from data. pub fn from_bytes(data: &'a [u8]) -> CommitRefIter<'a> { CommitRefIter { data, state: State::default(), } } } /// Access impl<'a> CommitRefIter<'a> { /// Parse `data` as commit and return its PGP signature, along with *all non-signature* data as [`SignedData`], or `None` /// if the commit isn't signed. /// /// This allows the caller to validate the signature by passing the signed data along with the signature back to the program /// that created it. pub fn signature(data: &'a [u8]) -> Result, SignedData<'a>)>, crate::decode::Error> { let mut signature_and_range = None; let raw_tokens = CommitRefIterRaw { data, state: State::default(), offset: 0, }; for token in raw_tokens { let token = token?; if let Token::ExtraHeader((name, value)) = &token.token { if *name == "gpgsig" { // keep track of the signature range alongside the signature data, // because all but the signature is the signed data. signature_and_range = Some((value.clone(), token.token_range)); break; } } } Ok(signature_and_range.map(|(sig, signature_range)| (sig, SignedData { data, signature_range }))) } /// Returns the object id of this commits tree if it is the first function called and if there is no error in decoding /// the data. /// /// Note that this method must only be called once or else will always return None while consuming a single token. /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again. /// `next()`. pub fn tree_id(&mut self) -> Result { let tree_id = self.next().ok_or_else(missing_field)??; Token::try_into_id(tree_id).ok_or_else(missing_field) } /// Return all `parent_ids` as iterator. /// /// Parsing errors are ignored quietly. pub fn parent_ids(self) -> impl Iterator + 'a { self.filter_map(|t| match t { Ok(Token::Parent { id }) => Some(id), _ => None, }) } /// Returns all signatures, first the author, then the committer, if there is no decoding error. /// /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not /// if not exactly two signatures were iterable. /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator. pub fn signatures(self) -> impl Iterator> + 'a { self.filter_map(|t| match t { Ok(Token::Author { signature } | Token::Committer { signature }) => Some(signature), _ => None, }) } /// Returns the committer signature if there is no decoding error. pub fn committer(mut self) -> Result, crate::decode::Error> { self.find_map(|t| match t { Ok(Token::Committer { signature }) => Some(Ok(signature)), Err(err) => Some(Err(err)), _ => None, }) .ok_or_else(missing_field)? } /// Returns the author signature if there is no decoding error. /// /// It may contain white space surrounding it, and is exactly as parsed. pub fn author(mut self) -> Result, crate::decode::Error> { self.find_map(|t| match t { Ok(Token::Author { signature }) => Some(Ok(signature)), Err(err) => Some(Err(err)), _ => None, }) .ok_or_else(missing_field)? } /// Returns the message if there is no decoding error. /// /// It may contain white space surrounding it, and is exactly as // parsed. pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> { self.find_map(|t| match t { Ok(Token::Message(msg)) => Some(Ok(msg)), Err(err) => Some(Err(err)), _ => None, }) .transpose() .map(Option::unwrap_or_default) } } fn missing_field() -> crate::decode::Error { crate::decode::empty_error() } impl<'a> CommitRefIter<'a> { #[inline] fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { let input = &mut i; match Self::next_inner_(input, state) { Ok(token) => Ok((*input, token)), Err(err) => Err(crate::decode::Error::with_err(err, input)), } } fn next_inner_( input: &mut &'a [u8], state: &mut State, ) -> Result, winnow::error::ErrMode> { use State::*; Ok(match state { Tree => { let tree = (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash)) .context(StrContext::Expected("tree <40 lowercase hex char>".into())) .parse_next(input)?; *state = State::Parents; Token::Tree { id: ObjectId::from_hex(tree).expect("parsing validation"), } } Parents => { let parent = opt(|i: &mut _| parse::header_field(i, b"parent", parse::hex_hash)) .context(StrContext::Expected("commit <40 lowercase hex char>".into())) .parse_next(input)?; match parent { Some(parent) => Token::Parent { id: ObjectId::from_hex(parent).expect("parsing validation"), }, None => { *state = State::Signature { of: SignatureKind::Author, }; Self::next_inner_(input, state)? } } } Signature { ref mut of } => { let who = *of; let (field_name, err_msg) = match of { SignatureKind::Author => { *of = SignatureKind::Committer; (&b"author"[..], "author ") } SignatureKind::Committer => { *state = State::Encoding; (&b"committer"[..], "committer ") } }; let signature = (|i: &mut _| parse::header_field(i, field_name, parse::signature)) .context(StrContext::Expected(err_msg.into())) .parse_next(input)?; match who { SignatureKind::Author => Token::Author { signature }, SignatureKind::Committer => Token::Committer { signature }, } } Encoding => { let encoding = opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(1.., NL))) .context(StrContext::Expected("encoding ".into())) .parse_next(input)?; *state = State::ExtraHeaders; match encoding { Some(encoding) => Token::Encoding(encoding.as_bstr()), None => Self::next_inner_(input, state)?, } } ExtraHeaders => { let extra_header = opt(alt(( |i: &mut _| parse::any_header_field_multi_line(i).map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), |i: &mut _| { parse::any_header_field(i, take_till(1.., NL)) .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) }, ))) .context(StrContext::Expected(" ".into())) .parse_next(input)?; match extra_header { Some(extra_header) => Token::ExtraHeader(extra_header), None => { *state = State::Message; Self::next_inner_(input, state)? } } } Message => { let message = terminated(decode::message, eof).parse_next(input)?; debug_assert!( input.is_empty(), "we should have consumed all data - otherwise iter may go forever" ); Token::Message(message) } }) } } impl<'a> Iterator for CommitRefIter<'a> { type Item = Result, crate::decode::Error>; fn next(&mut self) -> Option { if self.data.is_empty() { return None; } match Self::next_inner(self.data, &mut self.state) { Ok((data, token)) => { self.data = data; Some(Ok(token)) } Err(err) => { self.data = &[]; Some(Err(err)) } } } } /// A variation of [`CommitRefIter`] that return's [`RawToken`]s instead. struct CommitRefIterRaw<'a> { data: &'a [u8], state: State, offset: usize, } impl<'a> Iterator for CommitRefIterRaw<'a> { type Item = Result, crate::decode::Error>; fn next(&mut self) -> Option { if self.data.is_empty() { return None; } match CommitRefIter::next_inner(self.data, &mut self.state) { Ok((remaining, token)) => { let consumed = self.data.len() - remaining.len(); let start = self.offset; let end = start + consumed; self.offset = end; self.data = remaining; Some(Ok(RawToken { token, token_range: start..end, })) } Err(err) => { self.data = &[]; Some(Err(err)) } } } } /// A combination of a parsed [`Token`] as well as the range of bytes that were consumed to parse it. struct RawToken<'a> { /// The parsed token. token: Token<'a>, token_range: Range, } /// A token returned by the [commit iterator][CommitRefIter]. #[allow(missing_docs)] #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] pub enum Token<'a> { Tree { id: ObjectId, }, Parent { id: ObjectId, }, /// A person who authored the content of the commit. Author { signature: gix_actor::SignatureRef<'a>, }, /// A person who committed the authors work to the repository. Committer { signature: gix_actor::SignatureRef<'a>, }, Encoding(&'a BStr), ExtraHeader((&'a BStr, Cow<'a, BStr>)), Message(&'a BStr), } impl Token<'_> { /// Return the object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent]. pub fn id(&self) -> Option<&oid> { match self { Token::Tree { id } | Token::Parent { id } => Some(id.as_ref()), _ => None, } } /// Return the owned object id of this token if it's a [tree][Token::Tree] or a [parent commit][Token::Parent]. pub fn try_into_id(self) -> Option { match self { Token::Tree { id } | Token::Parent { id } => Some(id), _ => None, } } } gix-object-0.46.1/src/commit/write.rs000064400000000000000000000077451046102023000155430ustar 00000000000000use std::io; use bstr::ByteSlice; use crate::{encode, encode::NL, Commit, CommitRef, Kind}; impl crate::WriteTo for Commit { /// Serializes this instance to `out` in the git serialization format. fn write_to(&self, mut out: &mut dyn io::Write) -> io::Result<()> { encode::trusted_header_id(b"tree", &self.tree, &mut out)?; for parent in &self.parents { encode::trusted_header_id(b"parent", parent, &mut out)?; } encode::trusted_header_signature(b"author", &self.author.to_ref(), &mut out)?; encode::trusted_header_signature(b"committer", &self.committer.to_ref(), &mut out)?; if let Some(encoding) = self.encoding.as_ref() { encode::header_field(b"encoding", encoding, &mut out)?; } for (name, value) in &self.extra_headers { encode::header_field_multi_line(name, value, &mut out)?; } out.write_all(NL)?; out.write_all(&self.message) } fn kind(&self) -> Kind { Kind::Commit } fn size(&self) -> u64 { let hash_in_hex = self.tree.kind().len_in_hex(); (b"tree".len() + 1 /*space*/ + hash_in_hex + 1 /* nl */ + self.parents.iter().count() * (b"parent".len() + 1 + hash_in_hex + 1) + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ + self .encoding .as_ref() .map_or(0, |e| b"encoding".len() + 1 /* space */ + e.len() + 1 /* nl */) + self .extra_headers .iter() .map(|(name, value)| { // each header *value* is preceded by a space, and it starts right after the name. name.len() + value.lines_with_terminator().map(|s| s.len() + 1).sum::() + usize::from(!value.ends_with_str(b"\n")) }) .sum::() + 1 /* nl */ + self.message.len()) as u64 } } impl crate::WriteTo for CommitRef<'_> { /// Serializes this instance to `out` in the git serialization format. fn write_to(&self, mut out: &mut dyn io::Write) -> io::Result<()> { encode::trusted_header_id(b"tree", &self.tree(), &mut out)?; for parent in self.parents() { encode::trusted_header_id(b"parent", &parent, &mut out)?; } encode::trusted_header_signature(b"author", &self.author, &mut out)?; encode::trusted_header_signature(b"committer", &self.committer, &mut out)?; if let Some(encoding) = self.encoding.as_ref() { encode::header_field(b"encoding", encoding, &mut out)?; } for (name, value) in &self.extra_headers { encode::header_field_multi_line(name, value, &mut out)?; } out.write_all(NL)?; out.write_all(self.message) } fn kind(&self) -> Kind { Kind::Commit } fn size(&self) -> u64 { let hash_in_hex = self.tree().kind().len_in_hex(); (b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */ + self.parents.iter().count() * (b"parent".len() + 1 /* space */ + hash_in_hex + 1 /* nl */) + b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */ + b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */ + self .encoding .as_ref() .map_or(0, |e| b"encoding".len() + 1 /* space */ + e.len() + 1 /* nl */) + self .extra_headers .iter() .map(|(name, value)| { // each header *value* is preceded by a space, and it starts right after the name. name.len() + value.lines_with_terminator().map(|s| s.len() + 1).sum::() + usize::from(!value.ends_with_str(b"\n")) }) .sum::() + 1 /* nl */ + self.message.len()) as u64 } } gix-object-0.46.1/src/data.rs000064400000000000000000000067721046102023000140310ustar 00000000000000//! Contains a borrowed Object bound to a buffer holding its decompressed data. use crate::{BlobRef, CommitRef, CommitRefIter, Data, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; impl<'a> Data<'a> { /// Constructs a new data object from `kind` and `data`. pub fn new(kind: Kind, data: &'a [u8]) -> Data<'a> { Data { kind, data } } /// Decodes the data in the backing slice into a [`ObjectRef`], allowing to access all of its data /// conveniently. The cost of parsing an object is negligible. /// /// **Note** that [mutable, decoded objects][crate::Object] can be created from [`Data`] /// using [`crate::ObjectRef::into_owned()`]. pub fn decode(&self) -> Result, crate::decode::Error> { Ok(match self.kind { Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(self.data)?), Kind::Blob => ObjectRef::Blob(BlobRef { data: self.data }), Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(self.data)?), Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(self.data)?), }) } /// Returns this object as tree iterator to parse entries one at a time to avoid allocations, or /// `None` if this is not a tree object. pub fn try_into_tree_iter(self) -> Option> { match self.kind { Kind::Tree => Some(TreeRefIter::from_bytes(self.data)), _ => None, } } /// Returns this object as commit iterator to parse tokens one at a time to avoid allocations, or /// `None` if this is not a commit object. pub fn try_into_commit_iter(self) -> Option> { match self.kind { Kind::Commit => Some(CommitRefIter::from_bytes(self.data)), _ => None, } } /// Returns this object as tag iterator to parse tokens one at a time to avoid allocations, or /// `None` if this is not a tag object. pub fn try_into_tag_iter(self) -> Option> { match self.kind { Kind::Tag => Some(TagRefIter::from_bytes(self.data)), _ => None, } } } /// Types supporting object hash verification pub mod verify { /// Returned by [`crate::Data::verify_checksum()`] #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Object expected to have id {desired}, but actual id was {actual}")] ChecksumMismatch { desired: gix_hash::ObjectId, actual: gix_hash::ObjectId, }, } impl crate::Data<'_> { /// Compute the checksum of `self` and compare it with the `desired` hash. /// If the hashes do not match, an [`Error`] is returned, containing the actual /// hash of `self`. pub fn verify_checksum(&self, desired: &gix_hash::oid) -> Result<(), Error> { let actual_id = crate::compute_hash(desired.kind(), self.kind, self.data); if desired != actual_id { return Err(Error::ChecksumMismatch { desired: desired.into(), actual: actual_id, }); } Ok(()) } } } #[cfg(test)] mod tests { use super::*; #[test] fn size_of_object() { #[cfg(target_pointer_width = "64")] assert_eq!(std::mem::size_of::>(), 24, "this shouldn't change unnoticed"); #[cfg(target_pointer_width = "32")] assert_eq!(std::mem::size_of::>(), 12, "this shouldn't change unnoticed"); } } gix-object-0.46.1/src/encode.rs000064400000000000000000000050231046102023000143410ustar 00000000000000//! Encoding utilities use std::io::{self, Write}; use bstr::{BString, ByteSlice}; /// An error returned when object encoding fails. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Newlines are not allowed in header values: {value:?}")] NewlineInHeaderValue { value: BString }, #[error("Header values must not be empty")] EmptyValue, } macro_rules! check { ($e: expr) => { $e.expect("Writing to a Vec should never fail.") }; } /// Generates a loose header buffer pub fn loose_header(kind: crate::Kind, size: u64) -> smallvec::SmallVec<[u8; 28]> { let mut v = smallvec::SmallVec::new(); check!(v.write_all(kind.as_bytes())); check!(v.write_all(SPACE)); check!(v.write_all(itoa::Buffer::new().format(size).as_bytes())); check!(v.write_all(b"\0")); v } impl From for io::Error { fn from(other: Error) -> io::Error { io::Error::new(io::ErrorKind::Other, other) } } pub(crate) fn header_field_multi_line(name: &[u8], value: &[u8], out: &mut dyn io::Write) -> io::Result<()> { let mut lines = value.as_bstr().lines_with_terminator(); out.write_all(name)?; out.write_all(SPACE)?; out.write_all(lines.next().ok_or(Error::EmptyValue)?)?; for line in lines { out.write_all(SPACE)?; out.write_all(line)?; } if !value.ends_with_str(b"\n") { out.write_all(NL)?; } Ok(()) } pub(crate) fn trusted_header_field(name: &[u8], value: &[u8], out: &mut dyn io::Write) -> io::Result<()> { out.write_all(name)?; out.write_all(SPACE)?; out.write_all(value)?; out.write_all(NL) } pub(crate) fn trusted_header_signature( name: &[u8], value: &gix_actor::SignatureRef<'_>, out: &mut dyn io::Write, ) -> io::Result<()> { out.write_all(name)?; out.write_all(SPACE)?; value.write_to(out)?; out.write_all(NL) } pub(crate) fn trusted_header_id( name: &[u8], value: &gix_hash::ObjectId, mut out: &mut dyn io::Write, ) -> io::Result<()> { out.write_all(name)?; out.write_all(SPACE)?; value.write_hex_to(&mut out)?; out.write_all(NL) } pub(crate) fn header_field(name: &[u8], value: &[u8], out: &mut dyn io::Write) -> io::Result<()> { if value.is_empty() { return Err(Error::EmptyValue.into()); } if value.find(NL).is_some() { return Err(Error::NewlineInHeaderValue { value: value.into() }.into()); } trusted_header_field(name, value, out) } pub(crate) const NL: &[u8; 1] = b"\n"; pub(crate) const SPACE: &[u8; 1] = b" "; gix-object-0.46.1/src/find.rs000064400000000000000000000047131046102023000140310ustar 00000000000000/// The error type returned by the [`Find`](crate::Find) trait. pub type Error = Box; /// pub mod existing { use gix_hash::ObjectId; /// The error returned by the [`find(…)`][crate::FindExt::find()] trait methods. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Find(crate::find::Error), #[error("An object with id {} could not be found", .oid)] NotFound { oid: ObjectId }, } } /// pub mod existing_object { use gix_hash::ObjectId; /// The error returned by the various [`find_*()`][crate::FindExt::find_commit()] trait methods. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Find(crate::find::Error), #[error("Could not decode object at {oid}")] Decode { oid: ObjectId, source: crate::decode::Error, }, #[error("An object with id {oid} could not be found")] NotFound { oid: ObjectId }, #[error("Expected object of kind {expected} but got {actual} at {oid}")] ObjectKind { oid: ObjectId, actual: crate::Kind, expected: crate::Kind, }, } } /// pub mod existing_iter { use gix_hash::ObjectId; /// The error returned by the various [`find_*_iter()`][crate::FindExt::find_commit_iter()] trait methods. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Find(crate::find::Error), #[error("An object with id {oid} could not be found")] NotFound { oid: ObjectId }, #[error("Expected object of kind {expected} but got {actual} at {oid}")] ObjectKind { oid: ObjectId, actual: crate::Kind, expected: crate::Kind, }, } } /// An implementation of all traits that never fails, but also never finds anything. #[derive(Debug, Copy, Clone)] pub struct Never; impl super::FindHeader for Never { fn try_header(&self, _id: &gix_hash::oid) -> Result, Error> { Ok(None) } } impl super::Find for Never { fn try_find<'a>(&self, _id: &gix_hash::oid, _buffer: &'a mut Vec) -> Result>, Error> { Ok(None) } } impl super::Exists for Never { fn exists(&self, _id: &gix_hash::oid) -> bool { false } } gix-object-0.46.1/src/kind.rs000064400000000000000000000033261046102023000140350ustar 00000000000000use std::fmt; use crate::Kind; /// The Error used in [`Kind::from_bytes()`]. #[derive(Debug, Clone, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Unknown object kind: {kind:?}")] InvalidObjectKind { kind: bstr::BString }, } /// Initialization impl Kind { /// Parse a `Kind` from its serialized loose git objects. pub fn from_bytes(s: &[u8]) -> Result { Ok(match s { b"tree" => Kind::Tree, b"blob" => Kind::Blob, b"commit" => Kind::Commit, b"tag" => Kind::Tag, _ => return Err(Error::InvalidObjectKind { kind: s.into() }), }) } } /// Access impl Kind { /// Return the name of `self` for use in serialized loose git objects. pub fn as_bytes(&self) -> &[u8] { match self { Kind::Tree => b"tree", Kind::Commit => b"commit", Kind::Blob => b"blob", Kind::Tag => b"tag", } } /// Returns `true` if this instance is representing a commit. pub fn is_commit(&self) -> bool { matches!(self, Kind::Commit) } /// Returns `true` if this instance is representing a tree. pub fn is_tree(&self) -> bool { matches!(self, Kind::Tree) } /// Returns `true` if this instance is representing a tag. pub fn is_tag(&self) -> bool { matches!(self, Kind::Tag) } /// Returns `true` if this instance is representing a blob. pub fn is_blob(&self) -> bool { matches!(self, Kind::Blob) } } impl fmt::Display for Kind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(std::str::from_utf8(self.as_bytes()).expect("Converting Kind name to utf8")) } } gix-object-0.46.1/src/lib.rs000064400000000000000000000416001046102023000136530ustar 00000000000000//! This crate provides types for [read-only git objects][crate::ObjectRef] backed by bytes provided in git's serialization format //! as well as [mutable versions][Object] of these. Both types of objects can be encoded. //! ## Feature Flags #![cfg_attr( all(doc, feature = "document-features"), doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms)] #![forbid(unsafe_code)] use std::borrow::Cow; /// For convenience to allow using `bstr` without adding it to own cargo manifest. pub use bstr; use bstr::{BStr, BString, ByteSlice}; /// For convenience to allow using `gix-date` without adding it to own cargo manifest. pub use gix_date as date; use smallvec::SmallVec; /// pub mod commit; mod object; /// pub mod tag; /// pub mod tree; mod blob; /// pub mod data; /// pub mod find; /// pub mod write { /// The error type returned by the [`Write`](crate::Write) trait. pub type Error = Box; } mod traits; pub use traits::{Exists, Find, FindExt, FindObjectOrHeader, Header as FindHeader, HeaderExt, Write, WriteTo}; pub mod encode; pub(crate) mod parse; /// pub mod kind; /// The four types of objects that git differentiates. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[allow(missing_docs)] pub enum Kind { Tree, Blob, Commit, Tag, } /// A chunk of any [`data`][BlobRef::data]. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct BlobRef<'a> { /// The bytes themselves. pub data: &'a [u8], } /// A mutable chunk of any [`data`][Blob::data]. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Blob { /// The data itself. pub data: Vec, } /// A git commit parsed using [`from_bytes()`][CommitRef::from_bytes()]. /// /// A commit encapsulates information about a point in time at which the state of the repository is recorded, usually after a /// change which is documented in the commit `message`. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct CommitRef<'a> { /// HEX hash of tree object we point to. Usually 40 bytes long. /// /// Use [`tree()`][CommitRef::tree()] to obtain a decoded version of it. #[cfg_attr(feature = "serde", serde(borrow))] pub tree: &'a BStr, /// HEX hash of each parent commit. Empty for first commit in repository. pub parents: SmallVec<[&'a BStr; 1]>, /// Who wrote this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping. /// /// Use the [`author()`][CommitRef::author()] method to received a trimmed version of it. pub author: gix_actor::SignatureRef<'a>, /// Who committed this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping. /// /// Use the [`committer()`][CommitRef::committer()] method to received a trimmed version of it. /// /// This may be different from the `author` in case the author couldn't write to the repository themselves and /// is commonly encountered with contributed commits. pub committer: gix_actor::SignatureRef<'a>, /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493). pub encoding: Option<&'a BStr>, /// The commit message documenting the change. pub message: &'a BStr, /// Extra header fields, in order of them being encountered, made accessible with the iterator returned by [`extra_headers()`][CommitRef::extra_headers()]. pub extra_headers: Vec<(&'a BStr, Cow<'a, BStr>)>, } /// Like [`CommitRef`], but as `Iterator` to support (up to) entirely allocation free parsing. /// It's particularly useful to traverse the commit graph without ever allocating arrays for parents. #[derive(Copy, Clone)] pub struct CommitRefIter<'a> { data: &'a [u8], state: commit::ref_iter::State, } /// A mutable git commit, representing an annotated state of a working tree along with a reference to its historical commits. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Commit { /// The hash of recorded working tree state. pub tree: gix_hash::ObjectId, /// Hash of each parent commit. Empty for the first commit in repository. pub parents: SmallVec<[gix_hash::ObjectId; 1]>, /// Who wrote this commit. pub author: gix_actor::Signature, /// Who committed this commit. /// /// This may be different from the `author` in case the author couldn't write to the repository themselves and /// is commonly encountered with contributed commits. pub committer: gix_actor::Signature, /// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493). pub encoding: Option, /// The commit message documenting the change. pub message: BString, /// Extra header fields, in order of them being encountered, made accessible with the iterator returned /// by [`extra_headers()`][Commit::extra_headers()]. pub extra_headers: Vec<(BString, BString)>, } /// Represents a git tag, commonly indicating a software release. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct TagRef<'a> { /// The hash in hexadecimal being the object this tag points to. Use [`target()`][TagRef::target()] to obtain a byte representation. #[cfg_attr(feature = "serde", serde(borrow))] pub target: &'a BStr, /// The kind of object that `target` points to. pub target_kind: Kind, /// The name of the tag, e.g. "v1.0". pub name: &'a BStr, /// The author of the tag. pub tagger: Option>, /// The message describing this release. pub message: &'a BStr, /// A cryptographic signature over the entire content of the serialized tag object thus far. pub pgp_signature: Option<&'a BStr>, } /// Like [`TagRef`], but as `Iterator` to support entirely allocation free parsing. /// It's particularly useful to dereference only the target chain. #[derive(Copy, Clone)] pub struct TagRefIter<'a> { data: &'a [u8], state: tag::ref_iter::State, } /// A mutable git tag. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Tag { /// The hash this tag is pointing to. pub target: gix_hash::ObjectId, /// The kind of object this tag is pointing to. pub target_kind: Kind, /// The name of the tag, e.g. "v1.0". pub name: BString, /// The tags author. pub tagger: Option, /// The message describing the tag. pub message: BString, /// A pgp signature over all bytes of the encoded tag, excluding the pgp signature itself. pub pgp_signature: Option, } /// Immutable objects are read-only structures referencing most data from [a byte slice][crate::ObjectRef::from_bytes()]. /// /// Immutable objects are expected to be deserialized from bytes that acts as backing store, and they /// cannot be mutated or serialized. Instead, one will [convert][crate::ObjectRef::into_owned()] them into their [`mutable`][Object] counterparts /// which support mutation and serialization. /// /// An `ObjectRef` is representing [`Trees`][TreeRef], [`Blobs`][BlobRef], [`Commits`][CommitRef], or [`Tags`][TagRef]. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[allow(missing_docs)] pub enum ObjectRef<'a> { #[cfg_attr(feature = "serde", serde(borrow))] Tree(TreeRef<'a>), Blob(BlobRef<'a>), Commit(CommitRef<'a>), Tag(TagRef<'a>), } /// Mutable objects with each field being separately allocated and changeable. /// /// Mutable objects are Commits, Trees, Blobs and Tags that can be changed and serialized. /// /// They either created using object [construction][Object] or by [deserializing existing objects][ObjectRef::from_bytes()] /// and converting these [into mutable copies][ObjectRef::into_owned()] for adjustments. /// /// An `Object` is representing [`Trees`][Tree], [`Blobs`][Blob], [`Commits`][Commit] or [`Tags`][Tag]. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[allow(clippy::large_enum_variant, missing_docs)] pub enum Object { Tree(Tree), Blob(Blob), Commit(Commit), Tag(Tag), } /// A directory snapshot containing files (blobs), directories (trees) and submodules (commits). #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct TreeRef<'a> { /// The directories and files contained in this tree. /// /// Beware that the sort order isn't *quite* by name, so one may bisect only with a [`tree::EntryRef`] to handle ordering correctly. #[cfg_attr(feature = "serde", serde(borrow))] pub entries: Vec>, } /// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated. #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] pub struct TreeRefIter<'a> { /// The directories and files contained in this tree. data: &'a [u8], } /// A mutable Tree, containing other trees, blobs or commits. #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Tree { /// The directories and files contained in this tree. They must be and remain sorted by [`filename`][tree::Entry::filename]. /// /// Beware that the sort order isn't *quite* by name, so one may bisect only with a [`tree::Entry`] to handle ordering correctly. pub entries: Vec, } impl Tree { /// Return an empty tree which serializes to a well-known hash pub fn empty() -> Self { Tree { entries: Vec::new() } } } /// A borrowed object using a slice as backing buffer, or in other words a bytes buffer that knows the kind of object it represents. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] pub struct Data<'a> { /// kind of object pub kind: Kind, /// decoded, decompressed data, owned by a backing store. pub data: &'a [u8], } /// Information about an object, which includes its kind and the amount of bytes it would have when obtained. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] pub struct Header { /// The kind of object. pub kind: Kind, /// The object's size in bytes, or the size of the buffer when it's retrieved in full. pub size: u64, } /// pub mod decode { #[cfg(feature = "verbose-object-parsing-errors")] mod _decode { /// The type to be used for parse errors. pub type ParseError = winnow::error::ContextError; pub(crate) fn empty_error() -> Error { Error { inner: winnow::error::ContextError::new(), remaining: Default::default(), } } /// A type to indicate errors during parsing and to abstract away details related to `nom`. #[derive(Debug, Clone)] pub struct Error { /// The actual error pub inner: ParseError, /// Where the error occurred pub remaining: Vec, } impl Error { pub(crate) fn with_err(err: winnow::error::ErrMode, remaining: &[u8]) -> Self { Self { inner: err.into_inner().expect("we don't have streaming parsers"), remaining: remaining.to_owned(), } } } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "object parsing failed at `{}`", bstr::BStr::new(&self.remaining))?; if self.inner.context().next().is_some() { writeln!(f)?; self.inner.fmt(f)?; } Ok(()) } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { self.inner.cause().map(|v| v as &(dyn std::error::Error + 'static)) } } } /// #[cfg(not(feature = "verbose-object-parsing-errors"))] mod _decode { /// The type to be used for parse errors, discards everything and is zero size pub type ParseError = (); pub(crate) fn empty_error() -> Error { Error { inner: () } } /// A type to indicate errors during parsing and to abstract away details related to `nom`. #[derive(Debug, Clone)] pub struct Error { /// The actual error pub inner: ParseError, } impl Error { pub(crate) fn with_err(err: winnow::error::ErrMode, _remaining: &[u8]) -> Self { Self { inner: err.into_inner().expect("we don't have streaming parsers"), } } } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("object parsing failed") } } impl std::error::Error for Error {} } pub(crate) use _decode::empty_error; pub use _decode::{Error, ParseError}; /// Returned by [`loose_header()`] #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum LooseHeaderDecodeError { #[error("{message}: {number:?}")] ParseIntegerError { source: gix_utils::btoi::ParseIntegerError, message: &'static str, number: bstr::BString, }, #[error("{message}")] InvalidHeader { message: &'static str }, #[error("The object header contained an unknown object kind.")] ObjectHeader(#[from] super::kind::Error), } use bstr::ByteSlice; /// Decode a loose object header, being ` \0`, returns /// ([`kind`](super::Kind), `size`, `consumed bytes`). /// /// `size` is the uncompressed size of the payload in bytes. pub fn loose_header(input: &[u8]) -> Result<(super::Kind, u64, usize), LooseHeaderDecodeError> { use LooseHeaderDecodeError::*; let kind_end = input.find_byte(0x20).ok_or(InvalidHeader { message: "Expected ' '", })?; let kind = super::Kind::from_bytes(&input[..kind_end])?; let size_end = input.find_byte(0x0).ok_or(InvalidHeader { message: "Did not find 0 byte in header", })?; let size_bytes = &input[kind_end + 1..size_end]; let size = gix_utils::btoi::to_signed(size_bytes).map_err(|source| ParseIntegerError { source, message: "Object size in header could not be parsed", number: size_bytes.into(), })?; Ok((kind, size, size_end + 1)) } } /// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`. #[doc(alias = "hash_object", alias = "git2")] pub fn compute_hash(hash_kind: gix_hash::Kind, object_kind: Kind, data: &[u8]) -> gix_hash::ObjectId { let header = encode::loose_header(object_kind, data.len() as u64); let mut hasher = gix_features::hash::hasher(hash_kind); hasher.update(&header); hasher.update(data); hasher.digest().into() } /// A function to compute a hash of kind `hash_kind` for an object of `object_kind` and its data read from `stream` /// which has to yield exactly `stream_len` bytes. /// Use `progress` to learn about progress in bytes processed and `should_interrupt` to be able to abort the operation /// if set to `true`. #[doc(alias = "hash_file", alias = "git2")] pub fn compute_stream_hash( hash_kind: gix_hash::Kind, object_kind: Kind, stream: &mut dyn std::io::Read, stream_len: u64, progress: &mut dyn gix_features::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { let header = encode::loose_header(object_kind, stream_len); let mut hasher = gix_features::hash::hasher(hash_kind); hasher.update(&header); gix_features::hash::bytes_with_hasher(stream, stream_len, hasher, progress, should_interrupt) } gix-object-0.46.1/src/object/convert.rs000064400000000000000000000125761046102023000160450ustar 00000000000000use crate::{tree, Blob, BlobRef, Commit, CommitRef, Object, ObjectRef, Tag, TagRef, Tree, TreeRef}; impl From> for Tag { fn from(other: TagRef<'_>) -> Tag { let TagRef { target, name, target_kind, message, tagger: signature, pgp_signature, } = other; Tag { target: gix_hash::ObjectId::from_hex(target).expect("prior parser validation"), name: name.to_owned(), target_kind, message: message.to_owned(), tagger: signature.map(Into::into), pgp_signature: pgp_signature.map(ToOwned::to_owned), } } } impl From> for Commit { fn from(other: CommitRef<'_>) -> Commit { let CommitRef { tree, parents, author, committer, encoding, message, extra_headers, } = other; Commit { tree: gix_hash::ObjectId::from_hex(tree).expect("prior parser validation"), parents: parents .iter() .map(|parent| gix_hash::ObjectId::from_hex(parent).expect("prior parser validation")) .collect(), author: author.into(), committer: committer.into(), encoding: encoding.map(ToOwned::to_owned), message: message.to_owned(), extra_headers: extra_headers .into_iter() .map(|(k, v)| (k.into(), v.into_owned())) .collect(), } } } impl<'a> From> for Blob { fn from(v: BlobRef<'a>) -> Self { Blob { data: v.data.to_owned(), } } } impl From> for Tree { fn from(other: TreeRef<'_>) -> Tree { let TreeRef { entries } = other; Tree { entries: entries.into_iter().map(Into::into).collect(), } } } impl From> for tree::Entry { fn from(other: tree::EntryRef<'_>) -> tree::Entry { let tree::EntryRef { mode, filename, oid } = other; tree::Entry { mode, filename: filename.to_owned(), oid: oid.into(), } } } impl From> for Object { fn from(v: ObjectRef<'_>) -> Self { match v { ObjectRef::Tree(v) => Object::Tree(v.into()), ObjectRef::Blob(v) => Object::Blob(v.into()), ObjectRef::Commit(v) => Object::Commit(v.into()), ObjectRef::Tag(v) => Object::Tag(v.into()), } } } impl From for Object { fn from(v: Tag) -> Self { Object::Tag(v) } } impl From for Object { fn from(v: Commit) -> Self { Object::Commit(v) } } impl From for Object { fn from(v: Tree) -> Self { Object::Tree(v) } } impl From for Object { fn from(v: Blob) -> Self { Object::Blob(v) } } impl TryFrom for Tag { type Error = Object; fn try_from(value: Object) -> Result { Ok(match value { Object::Tag(v) => v, _ => return Err(value), }) } } impl TryFrom for Commit { type Error = Object; fn try_from(value: Object) -> Result { Ok(match value { Object::Commit(v) => v, _ => return Err(value), }) } } impl TryFrom for Tree { type Error = Object; fn try_from(value: Object) -> Result { Ok(match value { Object::Tree(v) => v, _ => return Err(value), }) } } impl TryFrom for Blob { type Error = Object; fn try_from(value: Object) -> Result { Ok(match value { Object::Blob(v) => v, _ => return Err(value), }) } } impl<'a> From> for ObjectRef<'a> { fn from(v: TagRef<'a>) -> Self { ObjectRef::Tag(v) } } impl<'a> From> for ObjectRef<'a> { fn from(v: CommitRef<'a>) -> Self { ObjectRef::Commit(v) } } impl<'a> From> for ObjectRef<'a> { fn from(v: TreeRef<'a>) -> Self { ObjectRef::Tree(v) } } impl<'a> From> for ObjectRef<'a> { fn from(v: BlobRef<'a>) -> Self { ObjectRef::Blob(v) } } impl<'a> TryFrom> for TagRef<'a> { type Error = ObjectRef<'a>; fn try_from(value: ObjectRef<'a>) -> Result { Ok(match value { ObjectRef::Tag(v) => v, _ => return Err(value), }) } } impl<'a> TryFrom> for CommitRef<'a> { type Error = ObjectRef<'a>; fn try_from(value: ObjectRef<'a>) -> Result { Ok(match value { ObjectRef::Commit(v) => v, _ => return Err(value), }) } } impl<'a> TryFrom> for TreeRef<'a> { type Error = ObjectRef<'a>; fn try_from(value: ObjectRef<'a>) -> Result { Ok(match value { ObjectRef::Tree(v) => v, _ => return Err(value), }) } } impl<'a> TryFrom> for BlobRef<'a> { type Error = ObjectRef<'a>; fn try_from(value: ObjectRef<'a>) -> Result { Ok(match value { ObjectRef::Blob(v) => v, _ => return Err(value), }) } } gix-object-0.46.1/src/object/mod.rs000064400000000000000000000214161046102023000151350ustar 00000000000000use crate::{Blob, Commit, Object, Tag, Tree}; mod convert; mod write { use std::io; use crate::{Kind, Object, ObjectRef, WriteTo}; /// Serialization impl WriteTo for ObjectRef<'_> { /// Write the contained object to `out` in the git serialization format. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { use crate::ObjectRef::*; match self { Tree(v) => v.write_to(out), Blob(v) => v.write_to(out), Commit(v) => v.write_to(out), Tag(v) => v.write_to(out), } } fn kind(&self) -> Kind { self.kind() } fn size(&self) -> u64 { use crate::ObjectRef::*; match self { Tree(v) => v.size(), Blob(v) => v.size(), Commit(v) => v.size(), Tag(v) => v.size(), } } } /// Serialization impl WriteTo for Object { /// Write the contained object to `out` in the git serialization format. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { use crate::Object::*; match self { Tree(v) => v.write_to(out), Blob(v) => v.write_to(out), Commit(v) => v.write_to(out), Tag(v) => v.write_to(out), } } fn kind(&self) -> Kind { self.kind() } fn size(&self) -> u64 { use crate::Object::*; match self { Tree(v) => v.size(), Blob(v) => v.size(), Commit(v) => v.size(), Tag(v) => v.size(), } } } } /// Convenient extraction of typed object. impl Object { /// Turns this instance into a [`Blob`], panic otherwise. pub fn into_blob(self) -> Blob { match self { Object::Blob(v) => v, _ => panic!("BUG: not a blob"), } } /// Turns this instance into a [`Commit`] panic otherwise. pub fn into_commit(self) -> Commit { match self { Object::Commit(v) => v, _ => panic!("BUG: not a commit"), } } /// Turns this instance into a [`Tree`] panic otherwise. pub fn into_tree(self) -> Tree { match self { Object::Tree(v) => v, _ => panic!("BUG: not a tree"), } } /// Turns this instance into a [`Tag`] panic otherwise. pub fn into_tag(self) -> Tag { match self { Object::Tag(v) => v, _ => panic!("BUG: not a tag"), } } /// Turns this instance into a [`Blob`] if it is one. #[allow(clippy::result_large_err)] pub fn try_into_blob(self) -> Result { match self { Object::Blob(v) => Ok(v), _ => Err(self), } } /// Turns this instance into a [`BlobRef`] if it is a blob. pub fn try_into_blob_ref(&self) -> Option> { match self { Object::Blob(v) => Some(v.to_ref()), _ => None, } } /// Turns this instance into a [`Commit`] if it is one. #[allow(clippy::result_large_err)] pub fn try_into_commit(self) -> Result { match self { Object::Commit(v) => Ok(v), _ => Err(self), } } /// Turns this instance into a [`Tree`] if it is one. #[allow(clippy::result_large_err)] pub fn try_into_tree(self) -> Result { match self { Object::Tree(v) => Ok(v), _ => Err(self), } } /// Turns this instance into a [`Tag`] if it is one. #[allow(clippy::result_large_err)] pub fn try_into_tag(self) -> Result { match self { Object::Tag(v) => Ok(v), _ => Err(self), } } /// Returns a [`Blob`] if it is one. pub fn as_blob(&self) -> Option<&Blob> { match self { Object::Blob(v) => Some(v), _ => None, } } /// Returns a [`Commit`] if it is one. pub fn as_commit(&self) -> Option<&Commit> { match self { Object::Commit(v) => Some(v), _ => None, } } /// Returns a [`Tree`] if it is one. pub fn as_tree(&self) -> Option<&Tree> { match self { Object::Tree(v) => Some(v), _ => None, } } /// Returns a [`Tag`] if it is one. pub fn as_tag(&self) -> Option<&Tag> { match self { Object::Tag(v) => Some(v), _ => None, } } /// Returns the kind of object stored in this instance. pub fn kind(&self) -> crate::Kind { match self { Object::Tree(_) => crate::Kind::Tree, Object::Blob(_) => crate::Kind::Blob, Object::Commit(_) => crate::Kind::Commit, Object::Tag(_) => crate::Kind::Tag, } } } use crate::{ decode::{loose_header, Error as DecodeError, LooseHeaderDecodeError}, BlobRef, CommitRef, Kind, ObjectRef, TagRef, TreeRef, }; #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum LooseDecodeError { #[error(transparent)] InvalidHeader(#[from] LooseHeaderDecodeError), #[error(transparent)] InvalidContent(#[from] DecodeError), #[error("Object sized {size} does not fit into memory - this can happen on 32 bit systems")] OutOfMemory { size: u64 }, } impl<'a> ObjectRef<'a> { /// Deserialize an object from a loose serialisation pub fn from_loose(data: &'a [u8]) -> Result, LooseDecodeError> { let (kind, size, offset) = loose_header(data)?; let body = &data[offset..] .get(..size.try_into().map_err(|_| LooseDecodeError::OutOfMemory { size })?) .ok_or(LooseHeaderDecodeError::InvalidHeader { message: "object data was shorter than its size declared in the header", })?; Ok(Self::from_bytes(kind, body)?) } /// Deserialize an object of `kind` from the given `data`. pub fn from_bytes(kind: Kind, data: &'a [u8]) -> Result, crate::decode::Error> { Ok(match kind { Kind::Tree => ObjectRef::Tree(TreeRef::from_bytes(data)?), Kind::Blob => ObjectRef::Blob(BlobRef { data }), Kind::Commit => ObjectRef::Commit(CommitRef::from_bytes(data)?), Kind::Tag => ObjectRef::Tag(TagRef::from_bytes(data)?), }) } /// Convert the immutable object into a mutable version, consuming the source in the process. /// /// Note that this is an expensive operation. pub fn into_owned(self) -> Object { self.into() } /// Convert this immutable object into its mutable counterpart. /// /// Note that this is an expensive operation. pub fn to_owned(&self) -> Object { self.clone().into() } } /// Convenient access to contained objects. impl<'a> ObjectRef<'a> { /// Interpret this object as blob. pub fn as_blob(&self) -> Option<&BlobRef<'a>> { match self { ObjectRef::Blob(v) => Some(v), _ => None, } } /// Interpret this object as blob, chainable. pub fn into_blob(self) -> Option> { match self { ObjectRef::Blob(v) => Some(v), _ => None, } } /// Interpret this object as commit. pub fn as_commit(&self) -> Option<&CommitRef<'a>> { match self { ObjectRef::Commit(v) => Some(v), _ => None, } } /// Interpret this object as commit, chainable. pub fn into_commit(self) -> Option> { match self { ObjectRef::Commit(v) => Some(v), _ => None, } } /// Interpret this object as tree. pub fn as_tree(&self) -> Option<&TreeRef<'a>> { match self { ObjectRef::Tree(v) => Some(v), _ => None, } } /// Interpret this object as tree, chainable pub fn into_tree(self) -> Option> { match self { ObjectRef::Tree(v) => Some(v), _ => None, } } /// Interpret this object as tag. pub fn as_tag(&self) -> Option<&TagRef<'a>> { match self { ObjectRef::Tag(v) => Some(v), _ => None, } } /// Interpret this object as tag, chainable. pub fn into_tag(self) -> Option> { match self { ObjectRef::Tag(v) => Some(v), _ => None, } } /// Return the kind of object. pub fn kind(&self) -> Kind { match self { ObjectRef::Tree(_) => Kind::Tree, ObjectRef::Blob(_) => Kind::Blob, ObjectRef::Commit(_) => Kind::Commit, ObjectRef::Tag(_) => Kind::Tag, } } } gix-object-0.46.1/src/parse.rs000064400000000000000000000045041046102023000142210ustar 00000000000000use bstr::{BStr, BString, ByteVec}; use winnow::{ combinator::{preceded, repeat, terminated}, error::{AddContext, ParserError, StrContext}, prelude::*, token::{take_till, take_until, take_while}, }; use crate::ByteSlice; pub(crate) const NL: &[u8] = b"\n"; pub(crate) const SPACE: &[u8] = b" "; const SPACE_OR_NL: &[u8] = b" \n"; pub(crate) fn any_header_field_multi_line<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( i: &mut &'a [u8], ) -> PResult<(&'a [u8], BString), E> { ( terminated(take_till(1.., SPACE_OR_NL), SPACE), ( take_till(1.., NL), NL, repeat(1.., terminated((SPACE, take_until(0.., NL)), NL)).map(|()| ()), ) .take() .map(|o: &[u8]| { let bytes = o.as_bstr(); let mut out = BString::from(Vec::with_capacity(bytes.len())); let mut lines = bytes.lines_with_terminator(); out.push_str(lines.next().expect("first line")); for line in lines { out.push_str(&line[1..]); // cut leading space } out }), ) .context(StrContext::Expected("name ".into())) .parse_next(i) } pub(crate) fn header_field<'a, T, E: ParserError<&'a [u8]>>( i: &mut &'a [u8], name: &'static [u8], parse_value: impl Parser<&'a [u8], T, E>, ) -> PResult { terminated(preceded(terminated(name, SPACE), parse_value), NL).parse_next(i) } pub(crate) fn any_header_field<'a, T, E: ParserError<&'a [u8]>>( i: &mut &'a [u8], parse_value: impl Parser<&'a [u8], T, E>, ) -> PResult<(&'a [u8], T), E> { terminated((terminated(take_till(1.., SPACE_OR_NL), SPACE), parse_value), NL).parse_next(i) } fn is_hex_digit_lc(b: u8) -> bool { matches!(b, b'0'..=b'9' | b'a'..=b'f') } pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a BStr, E> { take_while( gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), is_hex_digit_lc, ) .map(ByteSlice::as_bstr) .parse_next(i) } pub(crate) fn signature<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( i: &mut &'a [u8], ) -> PResult, E> { gix_actor::signature::decode(i) } gix-object-0.46.1/src/tag/decode.rs000064400000000000000000000052511046102023000151050ustar 00000000000000use winnow::{ combinator::{alt, delimited, eof, opt, preceded, rest, terminated}, error::{AddContext, ParserError, StrContext}, prelude::*, stream::AsChar, token::{take_until, take_while}, }; use crate::{parse, parse::NL, BStr, ByteSlice, TagRef}; pub fn git_tag<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( i: &mut &'a [u8], ) -> PResult, E> { ( (|i: &mut _| parse::header_field(i, b"object", parse::hex_hash)) .context(StrContext::Expected("object <40 lowercase hex char>".into())), (|i: &mut _| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) .verify_map(|kind| crate::Kind::from_bytes(kind).ok()) .context(StrContext::Expected("type ".into())), (|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) .context(StrContext::Expected("tag ".into())), opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature)) .context(StrContext::Expected("tagger ".into())), terminated(message, eof), ) .map( |(target, kind, tag_version, signature, (message, pgp_signature))| TagRef { target, name: tag_version.as_bstr(), target_kind: kind, message, tagger: signature, pgp_signature, }, ) .parse_next(i) } pub fn message<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<(&'a BStr, Option<&'a BStr>), E> { const PGP_SIGNATURE_BEGIN: &[u8] = b"\n-----BEGIN PGP SIGNATURE-----"; const PGP_SIGNATURE_END: &[u8] = b"-----END PGP SIGNATURE-----"; if i.is_empty() { return Ok((b"".as_bstr(), None)); } delimited( NL, alt(( ( take_until(0.., PGP_SIGNATURE_BEGIN), preceded( NL, ( &PGP_SIGNATURE_BEGIN[1..], take_until(0.., PGP_SIGNATURE_END), PGP_SIGNATURE_END, rest, ) .take() .map(|signature: &[u8]| { if signature.is_empty() { None } else { Some(signature.as_bstr()) } }), ), ), rest.map(|rest: &[u8]| (rest, None)), )), opt(NL), ) .map(|(message, signature)| (message.as_bstr(), signature)) .parse_next(i) } gix-object-0.46.1/src/tag/mod.rs000064400000000000000000000011711046102023000144360ustar 00000000000000use winnow::prelude::*; use crate::TagRef; mod decode; /// pub mod write; /// pub mod ref_iter; impl<'a> TagRef<'a> { /// Deserialize a tag from `data`. pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { let input = &mut data; match decode::git_tag.parse_next(input) { Ok(tag) => Ok(tag), Err(err) => Err(crate::decode::Error::with_err(err, input)), } } /// The object this tag points to as `Id`. pub fn target(&self) -> gix_hash::ObjectId { gix_hash::ObjectId::from_hex(self.target).expect("prior validation") } } gix-object-0.46.1/src/tag/ref_iter.rs000064400000000000000000000135121046102023000154600ustar 00000000000000use bstr::BStr; use gix_hash::{oid, ObjectId}; use winnow::{ combinator::{eof, opt, terminated}, error::{ParserError, StrContext}, prelude::*, stream::AsChar, token::take_while, }; use crate::{bstr::ByteSlice, parse, parse::NL, tag::decode, Kind, TagRefIter}; #[derive(Default, Copy, Clone)] pub(crate) enum State { #[default] Target, TargetKind, Name, Tagger, Message, } impl<'a> TagRefIter<'a> { /// Create a tag iterator from data. pub fn from_bytes(data: &'a [u8]) -> TagRefIter<'a> { TagRefIter { data, state: State::default(), } } /// Returns the target id of this tag if it is the first function called and if there is no error in decoding /// the data. /// /// Note that this method must only be called once or else will always return None while consuming a single token. /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again. /// `next()`. pub fn target_id(mut self) -> Result { let token = self.next().ok_or_else(missing_field)??; Token::into_id(token).ok_or_else(missing_field) } /// Returns the taggers signature if there is no decoding error, and if this field exists. /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not. pub fn tagger(mut self) -> Result>, crate::decode::Error> { self.find_map(|t| match t { Ok(Token::Tagger(signature)) => Some(Ok(signature)), Err(err) => Some(Err(err)), _ => None, }) .ok_or_else(missing_field)? } } fn missing_field() -> crate::decode::Error { crate::decode::empty_error() } impl<'a> TagRefIter<'a> { #[inline] fn next_inner(mut i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { let input = &mut i; match Self::next_inner_(input, state) { Ok(token) => Ok((*input, token)), Err(err) => Err(crate::decode::Error::with_err(err, input)), } } fn next_inner_( input: &mut &'a [u8], state: &mut State, ) -> Result, winnow::error::ErrMode> { use State::*; Ok(match state { Target => { let target = (|i: &mut _| parse::header_field(i, b"object", parse::hex_hash)) .context(StrContext::Expected("object <40 lowercase hex char>".into())) .parse_next(input)?; *state = TargetKind; Token::Target { id: ObjectId::from_hex(target).expect("parsing validation"), } } TargetKind => { let kind = (|i: &mut _| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) .context(StrContext::Expected("type ".into())) .parse_next(input)?; let kind = Kind::from_bytes(kind) .map_err(|_| winnow::error::ErrMode::from_error_kind(input, winnow::error::ErrorKind::Verify))?; *state = Name; Token::TargetKind(kind) } Name => { let tag_version = (|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) .context(StrContext::Expected("tag ".into())) .parse_next(input)?; *state = Tagger; Token::Name(tag_version.as_bstr()) } Tagger => { let signature = opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature)) .context(StrContext::Expected("tagger ".into())) .parse_next(input)?; *state = Message; Token::Tagger(signature) } Message => { let (message, pgp_signature) = terminated(decode::message, eof).parse_next(input)?; debug_assert!( input.is_empty(), "we should have consumed all data - otherwise iter may go forever" ); Token::Body { message, pgp_signature } } }) } } impl<'a> Iterator for TagRefIter<'a> { type Item = Result, crate::decode::Error>; fn next(&mut self) -> Option { if self.data.is_empty() { return None; } match Self::next_inner(self.data, &mut self.state) { Ok((data, token)) => { self.data = data; Some(Ok(token)) } Err(err) => { self.data = &[]; Some(Err(err)) } } } } /// A token returned by the [tag iterator][TagRefIter]. #[allow(missing_docs)] #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] pub enum Token<'a> { Target { id: ObjectId, }, TargetKind(Kind), Name(&'a BStr), Tagger(Option>), Body { message: &'a BStr, pgp_signature: Option<&'a BStr>, }, } impl Token<'_> { /// Return the object id of this token if its a [Target][Token::Target]. pub fn id(&self) -> Option<&oid> { match self { Token::Target { id } => Some(id.as_ref()), _ => None, } } /// Return the owned object id of this token if its a [Target][Token::Target]. pub fn into_id(self) -> Option { match self { Token::Target { id } => Some(id), _ => None, } } } gix-object-0.46.1/src/tag/write/tests.rs000064400000000000000000000012021046102023000161460ustar 00000000000000mod validated_name { mod invalid { use bstr::ByteSlice; use super::super::super::*; #[test] fn only_dash() { assert!(validated_name(b"-".as_bstr()).is_err()); } #[test] fn leading_dash() { assert!(validated_name(b"-hello".as_bstr()).is_err()); } } mod valid { use bstr::ByteSlice; use super::super::super::*; #[test] fn version() { for version in &["v1.0.0", "0.2.1", "0-alpha1"] { assert!(validated_name(version.as_bytes().as_bstr()).is_ok()); } } } } gix-object-0.46.1/src/tag/write.rs000064400000000000000000000066761046102023000150300ustar 00000000000000use std::io; use bstr::BStr; use crate::{encode, encode::NL, Kind, Tag, TagRef}; /// An Error used in [`Tag::write_to()`][crate::WriteTo::write_to()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Tags must not start with a dash: '-'")] StartsWithDash, #[error("The tag name was no valid reference name")] InvalidRefName(#[from] gix_validate::tag::name::Error), } impl From for io::Error { fn from(err: Error) -> Self { io::Error::new(io::ErrorKind::Other, err) } } impl crate::WriteTo for Tag { fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { encode::trusted_header_id(b"object", &self.target, out)?; encode::trusted_header_field(b"type", self.target_kind.as_bytes(), out)?; encode::header_field(b"tag", validated_name(self.name.as_ref())?, out)?; if let Some(tagger) = &self.tagger { encode::trusted_header_signature(b"tagger", &tagger.to_ref(), out)?; } out.write_all(NL)?; if !self.message.is_empty() { out.write_all(self.message.as_ref())?; } if let Some(message) = &self.pgp_signature { out.write_all(NL)?; out.write_all(message.as_ref())?; } Ok(()) } fn kind(&self) -> Kind { Kind::Tag } fn size(&self) -> u64 { (b"object".len() + 1 /* space */ + self.target.kind().len_in_hex() + 1 /* nl */ + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + self .tagger .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + 1 /* nl */ + self.message.len() + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } impl crate::WriteTo for TagRef<'_> { fn write_to(&self, mut out: &mut dyn io::Write) -> io::Result<()> { encode::trusted_header_field(b"object", self.target, &mut out)?; encode::trusted_header_field(b"type", self.target_kind.as_bytes(), &mut out)?; encode::header_field(b"tag", validated_name(self.name)?, &mut out)?; if let Some(tagger) = &self.tagger { encode::trusted_header_signature(b"tagger", tagger, &mut out)?; } out.write_all(NL)?; if !self.message.is_empty() { out.write_all(self.message)?; } if let Some(message) = self.pgp_signature { out.write_all(NL)?; out.write_all(message)?; } Ok(()) } fn kind(&self) -> Kind { Kind::Tag } fn size(&self) -> u64 { (b"object".len() + 1 /* space */ + self.target().kind().len_in_hex() + 1 /* nl */ + b"type".len() + 1 /* space */ + self.target_kind.as_bytes().len() + 1 /* nl */ + b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */ + self .tagger .as_ref() .map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */) + 1 /* nl */ + self.message.len() + self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64 } } fn validated_name(name: &BStr) -> Result<&BStr, Error> { gix_validate::tag::name(name)?; if name[0] == b'-' { return Err(Error::StartsWithDash); } Ok(name) } #[cfg(test)] mod tests; gix-object-0.46.1/src/traits/_impls.rs000064400000000000000000000036431046102023000157030ustar 00000000000000use crate::{Kind, WriteTo}; use gix_hash::ObjectId; use std::io::Read; use std::ops::Deref; use std::rc::Rc; use std::sync::Arc; impl crate::Write for &T where T: crate::Write, { fn write(&self, object: &dyn WriteTo) -> Result { (*self).write(object) } fn write_buf(&self, object: Kind, from: &[u8]) -> Result { (*self).write_buf(object, from) } fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { (*self).write_stream(kind, size, from) } } impl crate::Write for Arc where T: crate::Write, { fn write(&self, object: &dyn WriteTo) -> Result { self.deref().write(object) } fn write_buf(&self, object: Kind, from: &[u8]) -> Result { self.deref().write_buf(object, from) } fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { self.deref().write_stream(kind, size, from) } } impl crate::Write for Rc where T: crate::Write, { fn write(&self, object: &dyn WriteTo) -> Result { self.deref().write(object) } fn write_buf(&self, object: Kind, from: &[u8]) -> Result { self.deref().write_buf(object, from) } fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { self.deref().write_stream(kind, size, from) } } impl WriteTo for &T where T: WriteTo, { fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> { ::write_to(self, out) } fn kind(&self) -> Kind { ::kind(self) } fn size(&self) -> u64 { ::size(self) } } gix-object-0.46.1/src/traits/find.rs000064400000000000000000000263621046102023000153430ustar 00000000000000use crate::find; /// Check if an object is present in an object store. pub trait Exists { /// Returns `true` if the object exists in the database. fn exists(&self, id: &gix_hash::oid) -> bool; } /// Find an object in the object store. /// /// ## Notes /// /// Find effectively needs [generic associated types][issue] to allow a trait for the returned object type. /// Until then, we will have to make due with explicit types and give them the potentially added features we want. /// /// [issue]: https://github.com/rust-lang/rust/issues/44265 pub trait Find { /// Find an object matching `id` in the database while placing its raw, possibly encoded data into `buffer`. /// /// Returns `Some` object if it was present in the database, or the error that occurred during lookup or object /// retrieval. fn try_find<'a>(&self, id: &gix_hash::oid, buffer: &'a mut Vec) -> Result>, find::Error>; } /// Find the header of an object in the object store. pub trait Header { /// Find the header of the object matching `id` in the database. /// /// Returns `Some` header if it was present, or the error that occurred during lookup. fn try_header(&self, id: &gix_hash::oid) -> Result, find::Error>; } /// A combination of [`Find`] and [`Header`] traits to help with `dyn` trait objects. pub trait FindObjectOrHeader: Find + Header {} mod _impls { use std::{ops::Deref, rc::Rc, sync::Arc}; use gix_hash::oid; use crate::Data; impl crate::Exists for &T where T: crate::Exists, { fn exists(&self, id: &oid) -> bool { (*self).exists(id) } } impl crate::FindObjectOrHeader for T where T: crate::Find + crate::FindHeader {} impl crate::Find for &T where T: crate::Find, { fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { (*self).try_find(id, buffer) } } impl crate::FindHeader for &T where T: crate::FindHeader, { fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { (*self).try_header(id) } } impl crate::Exists for Box where T: crate::Exists, { fn exists(&self, id: &oid) -> bool { self.deref().exists(id) } } impl crate::Exists for Rc where T: crate::Exists, { fn exists(&self, id: &oid) -> bool { self.deref().exists(id) } } impl crate::Find for Rc where T: crate::Find, { fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { self.deref().try_find(id, buffer) } } impl crate::FindHeader for Rc where T: crate::FindHeader, { fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { self.deref().try_header(id) } } impl crate::Find for Box where T: crate::Find, { fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { self.deref().try_find(id, buffer) } } impl crate::FindHeader for Box where T: crate::FindHeader, { fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { self.deref().try_header(id) } } impl crate::Exists for Arc where T: crate::Exists, { fn exists(&self, id: &oid) -> bool { self.deref().exists(id) } } impl crate::Find for Arc where T: crate::Find, { fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, crate::find::Error> { self.deref().try_find(id, buffer) } } impl crate::FindHeader for Arc where T: crate::FindHeader, { fn try_header(&self, id: &gix_hash::oid) -> Result, crate::find::Error> { self.deref().try_header(id) } } } mod ext { use crate::{find, BlobRef, CommitRef, CommitRefIter, Kind, ObjectRef, TagRef, TagRefIter, TreeRef, TreeRefIter}; macro_rules! make_obj_lookup { ($method:ident, $object_variant:path, $object_kind:path, $object_type:ty) => { /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error /// while returning the desired object type. fn $method<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result<$object_type, find::existing_object::Error> { self.try_find(id, buffer) .map_err(find::existing_object::Error::Find)? .ok_or_else(|| find::existing_object::Error::NotFound { oid: id.as_ref().to_owned(), }) .and_then(|o| { o.decode().map_err(|err| find::existing_object::Error::Decode { source: err, oid: id.as_ref().to_owned(), }) }) .and_then(|o| match o { $object_variant(o) => return Ok(o), o => Err(find::existing_object::Error::ObjectKind { oid: id.as_ref().to_owned(), actual: o.kind(), expected: $object_kind, }), }) } }; } macro_rules! make_iter_lookup { ($method:ident, $object_kind:path, $object_type:ty, $into_iter:tt) => { /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error /// while returning the desired iterator type. fn $method<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result<$object_type, find::existing_iter::Error> { self.try_find(id, buffer) .map_err(find::existing_iter::Error::Find)? .ok_or_else(|| find::existing_iter::Error::NotFound { oid: id.as_ref().to_owned(), }) .and_then(|o| { o.$into_iter() .ok_or_else(|| find::existing_iter::Error::ObjectKind { oid: id.as_ref().to_owned(), actual: o.kind, expected: $object_kind, }) }) } }; } /// An extension trait with convenience functions. pub trait HeaderExt: super::Header { /// Like [`try_header(…)`](super::Header::try_header()), but flattens the `Result>` into a single `Result` making a non-existing header an error. fn header(&self, id: &gix_hash::oid) -> Result { self.try_header(id) .map_err(find::existing::Error::Find)? .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) } } /// An extension trait with convenience functions. pub trait FindExt: super::Find { /// Like [`try_find(…)`](super::Find::try_find()), but flattens the `Result>` into a single `Result` making a non-existing object an error. fn find<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result, find::existing::Error> { self.try_find(id, buffer) .map_err(find::existing::Error::Find)? .ok_or_else(|| find::existing::Error::NotFound { oid: id.to_owned() }) } /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error /// while returning the desired object type. fn find_blob<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result, find::existing_object::Error> { if id == gix_hash::ObjectId::empty_blob(id.kind()) { return Ok(BlobRef { data: &[] }); } self.try_find(id, buffer) .map_err(find::existing_object::Error::Find)? .ok_or_else(|| find::existing_object::Error::NotFound { oid: id.as_ref().to_owned(), }) .and_then(|o| { o.decode().map_err(|err| find::existing_object::Error::Decode { source: err, oid: id.as_ref().to_owned(), }) }) .and_then(|o| match o { ObjectRef::Blob(o) => Ok(o), o => Err(find::existing_object::Error::ObjectKind { oid: id.as_ref().to_owned(), actual: o.kind(), expected: Kind::Blob, }), }) } /// Like [`find(…)`][Self::find()], but flattens the `Result>` into a single `Result` making a non-existing object an error /// while returning the desired object type. fn find_tree<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result, find::existing_object::Error> { if id == gix_hash::ObjectId::empty_tree(id.kind()) { return Ok(TreeRef { entries: Vec::new() }); } self.try_find(id, buffer) .map_err(find::existing_object::Error::Find)? .ok_or_else(|| find::existing_object::Error::NotFound { oid: id.as_ref().to_owned(), }) .and_then(|o| { o.decode().map_err(|err| find::existing_object::Error::Decode { source: err, oid: id.as_ref().to_owned(), }) }) .and_then(|o| match o { ObjectRef::Tree(o) => Ok(o), o => Err(find::existing_object::Error::ObjectKind { oid: id.as_ref().to_owned(), actual: o.kind(), expected: Kind::Tree, }), }) } make_obj_lookup!(find_commit, ObjectRef::Commit, Kind::Commit, CommitRef<'a>); make_obj_lookup!(find_tag, ObjectRef::Tag, Kind::Tag, TagRef<'a>); make_iter_lookup!(find_commit_iter, Kind::Commit, CommitRefIter<'a>, try_into_commit_iter); make_iter_lookup!(find_tree_iter, Kind::Tree, TreeRefIter<'a>, try_into_tree_iter); make_iter_lookup!(find_tag_iter, Kind::Tag, TagRefIter<'a>, try_into_tag_iter); } impl FindExt for T {} } pub use ext::{FindExt, HeaderExt}; gix-object-0.46.1/src/traits/mod.rs000064400000000000000000000041171046102023000151740ustar 00000000000000use std::io; use crate::Kind; /// Describe the capability to write git objects into an object store. pub trait Write { /// Write objects using the intrinsic kind of [`hash`](gix_hash::Kind) into the database, /// returning id to reference it in subsequent reads. fn write(&self, object: &dyn WriteTo) -> Result { let mut buf = Vec::with_capacity(2048); object.write_to(&mut buf)?; self.write_stream(object.kind(), buf.len() as u64, &mut buf.as_slice()) } /// As [`write`](Write::write), but takes an [`object` kind](Kind) along with its encoded bytes. fn write_buf(&self, object: crate::Kind, mut from: &[u8]) -> Result { self.write_stream(object, from.len() as u64, &mut from) } /// As [`write`](Write::write), but takes an input stream. /// This is commonly used for writing blobs directly without reading them to memory first. fn write_stream( &self, kind: crate::Kind, size: u64, from: &mut dyn io::Read, ) -> Result; } /// Writing of objects to a `Write` implementation pub trait WriteTo { /// Write a representation of this instance to `out`. fn write_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()>; /// Returns the type of this object. fn kind(&self) -> Kind; /// Returns the size of this object's representation (the amount /// of data which would be written by [`write_to`](Self::write_to)). /// /// [`size`](Self::size)'s value has no bearing on the validity of /// the object, as such it's possible for [`size`](Self::size) to /// return a sensible value but [`write_to`](Self::write_to) to /// fail because the object was not actually valid in some way. fn size(&self) -> u64; /// Returns a loose object header based on the object's data fn loose_header(&self) -> smallvec::SmallVec<[u8; 28]> { crate::encode::loose_header(self.kind(), self.size()) } } mod _impls; mod find; pub use find::*; gix-object-0.46.1/src/tree/editor.rs000064400000000000000000000511651046102023000153410ustar 00000000000000use crate::tree::{Editor, EntryKind}; use crate::{tree, Tree}; use bstr::{BStr, BString, ByteSlice, ByteVec}; use gix_hash::ObjectId; use std::cmp::Ordering; use std::collections::{hash_map, HashMap}; use std::fmt::Formatter; /// A way to constrain all [tree-edits](Editor) to a given subtree. pub struct Cursor<'a, 'find> { /// The underlying editor parent: &'a mut Editor<'find>, /// Our own location, used as prefix for all operations. /// Note that it's assumed to always contain a tree. prefix: BString, } impl std::fmt::Debug for Editor<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("Editor") .field("object_hash", &self.object_hash) .field("path_buf", &self.path_buf) .field("trees", &self.trees) .finish() } } /// The error returned by [Editor] or [Cursor] edit operation. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Empty path components are not allowed")] EmptyPathComponent, #[error(transparent)] FindExistingObject(#[from] crate::find::existing_object::Error), } /// Lifecycle impl<'a> Editor<'a> { /// Create a new editor that uses `root` as base for all edits. Use `find` to lookup existing /// trees when edits are made. Each tree will only be looked-up once and then edited in place from /// that point on. /// `object_hash` denotes the kind of hash to create. pub fn new(root: Tree, find: &'a dyn crate::FindExt, object_hash: gix_hash::Kind) -> Self { Editor { find, object_hash, trees: HashMap::from_iter(Some((empty_path(), root))), path_buf: BString::from(Vec::with_capacity(256)).into(), tree_buf: Vec::with_capacity(512), } } } /// Operations impl Editor<'_> { /// Write the entire in-memory state of all changed trees (and only changed trees) to `out`, and remove /// written portions from our state except for the root tree, which affects [`get()`](Editor::get()). /// Note that the returned object id *can* be the empty tree if everything was removed or if nothing /// was added to the tree. /// /// The last call to `out` will be the changed root tree, whose object-id will also be returned. /// `out` is free to do any kind of additional validation, like to assure that all entries in the tree exist. /// We don't assure that as there is no validation that inserted entries are valid object ids. /// /// Future calls to [`upsert`](Self::upsert) or similar will keep working on the last seen state of the /// just-written root-tree. /// If this is not desired, use [set_root()](Self::set_root()). /// /// ### Validation /// /// Note that no additional validation is performed to assure correctness of entry-names. /// It is absolutely and intentionally possible to write out invalid trees with this method. /// Higher layers are expected to perform detailed validation. pub fn write(&mut self, out: impl FnMut(&Tree) -> Result) -> Result { self.path_buf.borrow_mut().clear(); self.write_at_pathbuf(out, WriteMode::Normal) } /// Remove the entry at `rela_path`, loading all trees on the path accordingly. /// It's no error if the entry doesn't exist, or if `rela_path` doesn't lead to an existing entry at all. /// /// Note that trying to remove a path with an empty component is also forbidden. pub fn remove(&mut self, rela_path: I) -> Result<&mut Self, Error> where I: IntoIterator, C: AsRef, { self.path_buf.borrow_mut().clear(); self.upsert_or_remove_at_pathbuf(rela_path, None) } /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written /// to that point. /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed. /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory. pub fn get(&self, rela_path: I) -> Option<&tree::Entry> where I: IntoIterator, C: AsRef, { self.path_buf.borrow_mut().clear(); self.get_inner(rela_path) } /// Insert a new entry of `kind` with `id` at `rela_path`, an iterator over each path component in the tree, /// like `a/b/c`. Names are matched case-sensitively. /// /// Existing leaf-entries will be overwritten unconditionally, and it is assumed that `id` is available in the object database /// or will be made available at a later point to assure the integrity of the produced tree. /// /// Intermediate trees will be created if they don't exist in the object database, otherwise they will be loaded and entries /// will be inserted into them instead. /// /// Note that `id` can be [null](ObjectId::null()) to create a placeholder. These will not be written, and paths leading /// through them will not be considered a problem. /// /// `id` can also be an empty tree, along with [the respective `kind`](EntryKind::Tree), even though that's normally not allowed /// in Git trees. pub fn upsert(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, Error> where I: IntoIterator, C: AsRef, { self.path_buf.borrow_mut().clear(); self.upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal))) } fn get_inner(&self, rela_path: I) -> Option<&tree::Entry> where I: IntoIterator, C: AsRef, { let mut path_buf = self.path_buf.borrow_mut(); let mut cursor = self.trees.get(path_buf.as_bstr()).expect("root is always present"); let mut rela_path = rela_path.into_iter().peekable(); while let Some(name) = rela_path.next() { let name = name.as_ref(); let is_last = rela_path.peek().is_none(); match cursor .entries .binary_search_by(|e| cmp_entry_with_name(e, name, true)) .or_else(|_| cursor.entries.binary_search_by(|e| cmp_entry_with_name(e, name, false))) { Ok(idx) if is_last => return Some(&cursor.entries[idx]), Ok(idx) => { if cursor.entries[idx].mode.is_tree() { push_path_component(&mut path_buf, name); cursor = self.trees.get(path_buf.as_bstr())?; } else { break; } } Err(_) => break, }; } None } fn write_at_pathbuf( &mut self, mut out: impl FnMut(&Tree) -> Result, mode: WriteMode, ) -> Result { assert_ne!(self.trees.len(), 0, "there is at least the root tree"); // back is for children, front is for parents. let path_buf = self.path_buf.borrow_mut(); let mut parents = vec![( None::, path_buf.clone(), self.trees .remove(path_buf.as_bstr()) .expect("root tree is always present"), )]; let mut children = Vec::new(); while let Some((parent_idx, mut rela_path, mut tree)) = children.pop().or_else(|| parents.pop()) { let mut all_entries_unchanged_or_written = true; for entry in &tree.entries { if entry.mode.is_tree() { let prev_len = push_path_component(&mut rela_path, &entry.filename); if let Some(sub_tree) = self.trees.remove(&rela_path) { all_entries_unchanged_or_written = false; let next_parent_idx = parents.len(); children.push((Some(next_parent_idx), rela_path.clone(), sub_tree)); } rela_path.truncate(prev_len); } } if all_entries_unchanged_or_written { tree.entries.retain(|e| !e.oid.is_null()); if let Some((_, _, parent_to_adjust)) = parent_idx.map(|idx| parents.get_mut(idx).expect("always present, pointing towards zero")) { let name = filename(rela_path.as_bstr()); let entry_idx = parent_to_adjust .entries .binary_search_by(|e| cmp_entry_with_name(e, name, true)) .expect("the parent always knows us by name"); if tree.entries.is_empty() { parent_to_adjust.entries.remove(entry_idx); } else { match out(&tree) { Ok(id) => { parent_to_adjust.entries[entry_idx].oid = id; } Err(err) => { let root_tree = parents.into_iter().next().expect("root wasn't consumed yet"); self.trees.insert(root_tree.1, root_tree.2); return Err(err); } } } } else if parents.is_empty() { debug_assert!(children.is_empty(), "we consume children before parents"); debug_assert_eq!(rela_path, **path_buf, "this should always be the root tree"); // There may be left-over trees if they are replaced with blobs for example. match out(&tree) { Ok(id) => { let root_tree_id = id; match mode { WriteMode::Normal => { self.trees.clear(); } WriteMode::FromCursor => {} } self.trees.insert(rela_path, tree); return Ok(root_tree_id); } Err(err) => { self.trees.insert(rela_path, tree); return Err(err); } } } else if !tree.entries.is_empty() { out(&tree)?; } } else { parents.push((parent_idx, rela_path, tree)); } } unreachable!("we exit as soon as everything is consumed") } fn upsert_or_remove_at_pathbuf( &mut self, rela_path: I, kind_and_id: Option<(EntryKind, ObjectId, UpsertMode)>, ) -> Result<&mut Self, Error> where I: IntoIterator, C: AsRef, { let mut path_buf = self.path_buf.borrow_mut(); let mut cursor = self.trees.get_mut(path_buf.as_bstr()).expect("root is always present"); let mut rela_path = rela_path.into_iter().peekable(); let new_kind_is_tree = kind_and_id.map_or(false, |(kind, _, _)| kind == EntryKind::Tree); while let Some(name) = rela_path.next() { let name = name.as_ref(); if name.is_empty() { return Err(Error::EmptyPathComponent); } let is_last = rela_path.peek().is_none(); let mut needs_sorting = false; let current_level_must_be_tree = !is_last || new_kind_is_tree; let check_type_change = |entry: &tree::Entry| entry.mode.is_tree() != current_level_must_be_tree; let tree_to_lookup = match cursor .entries .binary_search_by(|e| cmp_entry_with_name(e, name, false)) .or_else(|file_insertion_idx| { cursor .entries .binary_search_by(|e| cmp_entry_with_name(e, name, true)) .map_err(|dir_insertion_index| { if current_level_must_be_tree { dir_insertion_index } else { file_insertion_idx } }) }) { Ok(idx) => { match kind_and_id { None => { if is_last { cursor.entries.remove(idx); break; } else { let entry = &cursor.entries[idx]; if entry.mode.is_tree() { Some(entry.oid) } else { break; } } } Some((kind, id, _mode)) => { let entry = &mut cursor.entries[idx]; if is_last { // unconditionally overwrite what's there. entry.oid = id; needs_sorting = check_type_change(entry); entry.mode = kind.into(); None } else if entry.mode.is_tree() { // Possibly lookup the existing tree on our way down the path. Some(entry.oid) } else { // it is no tree, but we are traversing a path, so turn it into one. entry.oid = id.kind().null(); needs_sorting = check_type_change(entry); entry.mode = EntryKind::Tree.into(); None } } } } Err(insertion_idx) => match kind_and_id { None => break, Some((kind, id, _mode)) => { cursor.entries.insert( insertion_idx, tree::Entry { filename: name.into(), mode: if is_last { kind.into() } else { EntryKind::Tree.into() }, oid: if is_last { id } else { id.kind().null() }, }, ); None } }, }; if needs_sorting { cursor.entries.sort(); } if is_last && kind_and_id.map_or(false, |(_, _, mode)| mode == UpsertMode::Normal) { break; } push_path_component(&mut path_buf, name); cursor = match self.trees.entry(path_buf.clone()) { hash_map::Entry::Occupied(e) => e.into_mut(), hash_map::Entry::Vacant(e) => e.insert( if let Some(tree_id) = tree_to_lookup.filter(|tree_id| !tree_id.is_empty_tree()) { self.find.find_tree(&tree_id, &mut self.tree_buf)?.into() } else { Tree::default() }, ), }; } drop(path_buf); Ok(self) } /// Set the root tree of the modification to `root`, assuring it has a well-known state. /// /// Note that this erases all previous edits. /// /// This is useful if the same editor is re-used for various trees. pub fn set_root(&mut self, root: Tree) -> &mut Self { self.trees.clear(); self.trees.insert(empty_path(), root); self } } mod cursor { use crate::tree::editor::{Cursor, UpsertMode, WriteMode}; use crate::tree::{Editor, EntryKind}; use crate::{tree, Tree}; use bstr::{BStr, BString}; use gix_hash::ObjectId; /// Cursor handling impl<'a> Editor<'a> { /// Turn ourselves as a cursor, which points to the same tree as the editor. /// /// This is useful if a method takes a [`Cursor`], not an [`Editor`]. pub fn to_cursor(&mut self) -> Cursor<'_, 'a> { Cursor { parent: self, prefix: BString::default(), } } /// Create a cursor at the given `rela_path`, which must be a tree or is turned into a tree as its own edit. /// /// The returned cursor will then allow applying edits to the tree at `rela_path` as root. /// If `rela_path` is a single empty string, it is equivalent to using the current instance itself. pub fn cursor_at(&mut self, rela_path: I) -> Result, super::Error> where I: IntoIterator, C: AsRef, { self.path_buf.borrow_mut().clear(); self.upsert_or_remove_at_pathbuf( rela_path, Some((EntryKind::Tree, self.object_hash.null(), UpsertMode::AssureTreeOnly)), )?; let prefix = self.path_buf.borrow_mut().clone(); Ok(Cursor { prefix, /* set during the upsert call */ parent: self, }) } } impl Cursor<'_, '_> { /// Obtain the entry at `rela_path` or return `None` if none was found, or the tree wasn't yet written /// to that point. /// Note that after [writing](Self::write) only the root path remains, all other intermediate trees are removed. /// The entry can be anything that can be stored in a tree, but may have a null-id if it's a newly /// inserted tree. Also, ids of trees might not be accurate as they may have been changed in memory. pub fn get(&self, rela_path: I) -> Option<&tree::Entry> where I: IntoIterator, C: AsRef, { self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent.get_inner(rela_path) } /// Like [`Editor::upsert()`], but with the constraint of only editing in this cursor's tree. pub fn upsert(&mut self, rela_path: I, kind: EntryKind, id: ObjectId) -> Result<&mut Self, super::Error> where I: IntoIterator, C: AsRef, { self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent .upsert_or_remove_at_pathbuf(rela_path, Some((kind, id, UpsertMode::Normal)))?; Ok(self) } /// Like [`Editor::remove()`], but with the constraint of only editing in this cursor's tree. pub fn remove(&mut self, rela_path: I) -> Result<&mut Self, super::Error> where I: IntoIterator, C: AsRef, { self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent.upsert_or_remove_at_pathbuf(rela_path, None)?; Ok(self) } /// Like [`Editor::write()`], but will write only the subtree of the cursor. pub fn write(&mut self, out: impl FnMut(&Tree) -> Result) -> Result { self.parent.path_buf.borrow_mut().clone_from(&self.prefix); self.parent.write_at_pathbuf(out, WriteMode::FromCursor) } } } #[derive(Copy, Clone, Eq, PartialEq)] enum UpsertMode { Normal, /// Only make sure there is a tree at the given location (requires kind tree and null-id) AssureTreeOnly, } enum WriteMode { Normal, /// Perform less cleanup to assure parent-editor still stays intact FromCursor, } fn cmp_entry_with_name(a: &tree::Entry, filename: &BStr, is_tree: bool) -> Ordering { let common = a.filename.len().min(filename.len()); a.filename[..common].cmp(&filename[..common]).then_with(|| { let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/')); let b = filename.get(common).or_else(|| is_tree.then_some(&b'/')); a.cmp(&b) }) } fn filename(path: &BStr) -> &BStr { path.rfind_byte(b'/').map_or(path, |pos| &path[pos + 1..]) } fn empty_path() -> BString { BString::default() } fn push_path_component(base: &mut BString, component: &[u8]) -> usize { let prev_len = base.len(); debug_assert!(base.last() != Some(&b'/')); if !base.is_empty() { base.push_byte(b'/'); } base.push_str(component); prev_len } gix-object-0.46.1/src/tree/mod.rs000064400000000000000000000215561046102023000146330ustar 00000000000000use crate::{ bstr::{BStr, BString}, tree, Tree, TreeRef, }; use std::cell::RefCell; use std::cmp::Ordering; /// pub mod editor; mod ref_iter; /// pub mod write; /// The state needed to apply edits instantly to in-memory trees. /// /// It's made so that each tree is looked at in the object database at most once, and held in memory for /// all edits until everything is flushed to write all changed trees. /// /// The editor is optimized to edit existing trees, but can deal with building entirely new trees as well /// with some penalties. #[doc(alias = "TreeUpdateBuilder", alias = "git2")] #[derive(Clone)] pub struct Editor<'a> { /// A way to lookup trees. find: &'a dyn crate::FindExt, /// The kind of hashes to produce object_hash: gix_hash::Kind, /// All trees we currently hold in memory. Each of these may change while adding and removing entries. /// null-object-ids mark tree-entries whose value we don't know yet, they are placeholders that will be /// dropped when writing at the latest. trees: std::collections::HashMap, /// A buffer to build up paths when finding the tree to edit. path_buf: RefCell, /// Our buffer for storing tree-data in, right before decoding it. tree_buf: Vec, } /// The mode of items storable in a tree, similar to the file mode on a unix file system. /// /// Used in [`mutable::Entry`][crate::tree::Entry] and [`EntryRef`]. /// /// Note that even though it can be created from any `u16`, it should be preferable to /// create it by converting [`EntryKind`] into `EntryMode`. #[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct EntryMode(pub u16); impl std::fmt::Debug for EntryMode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "EntryMode({:#o})", self.0) } } /// A discretized version of ideal and valid values for entry modes. /// /// Note that even though it can represent every valid [mode](EntryMode), it might /// loose information due to that as well. #[derive(Clone, Copy, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] #[repr(u16)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum EntryKind { /// A tree, or directory Tree = 0o040000u16, /// A file that is not executable Blob = 0o100644, /// A file that is executable BlobExecutable = 0o100755, /// A symbolic link Link = 0o120000, /// A commit of a git submodule Commit = 0o160000, } impl From for EntryMode { fn from(value: EntryKind) -> Self { EntryMode(value as u16) } } impl From for EntryKind { fn from(value: EntryMode) -> Self { value.kind() } } /// Serialization impl EntryKind { /// Return the representation as used in the git internal format. pub fn as_octal_str(&self) -> &'static BStr { use EntryKind::*; let bytes: &[u8] = match self { Tree => b"40000", Blob => b"100644", BlobExecutable => b"100755", Link => b"120000", Commit => b"160000", }; bytes.into() } } impl std::ops::Deref for EntryMode { type Target = u16; fn deref(&self) -> &Self::Target { &self.0 } } const IFMT: u16 = 0o170000; impl EntryMode { /// Discretize the raw mode into an enum with well-known state while dropping unnecessary details. pub const fn kind(&self) -> EntryKind { let etype = self.0 & IFMT; if etype == 0o100000 { if self.0 & 0o000100 == 0o000100 { EntryKind::BlobExecutable } else { EntryKind::Blob } } else if etype == EntryKind::Link as u16 { EntryKind::Link } else if etype == EntryKind::Tree as u16 { EntryKind::Tree } else { EntryKind::Commit } } /// Return true if this entry mode represents a Tree/directory pub const fn is_tree(&self) -> bool { self.0 & IFMT == EntryKind::Tree as u16 } /// Return true if this entry mode represents the commit of a submodule. pub const fn is_commit(&self) -> bool { self.0 & IFMT == EntryKind::Commit as u16 } /// Return true if this entry mode represents a symbolic link pub const fn is_link(&self) -> bool { self.0 & IFMT == EntryKind::Link as u16 } /// Return true if this entry mode represents anything BUT Tree/directory pub const fn is_no_tree(&self) -> bool { self.0 & IFMT != EntryKind::Tree as u16 } /// Return true if the entry is any kind of blob. pub const fn is_blob(&self) -> bool { self.0 & IFMT == 0o100000 } /// Return true if the entry is an executable blob. pub const fn is_executable(&self) -> bool { matches!(self.kind(), EntryKind::BlobExecutable) } /// Return true if the entry is any kind of blob or symlink. pub const fn is_blob_or_symlink(&self) -> bool { matches!( self.kind(), EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link ) } /// Represent the mode as descriptive string. pub const fn as_str(&self) -> &'static str { use EntryKind::*; match self.kind() { Tree => "tree", Blob => "blob", BlobExecutable => "exe", Link => "link", Commit => "commit", } } /// Return the representation as used in the git internal format, which is octal and written /// to the `backing` buffer. The respective sub-slice that was written to is returned. pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr { if self.0 == 0 { std::slice::from_ref(&b'0') } else { let mut nb = 0; let mut n = self.0; while n > 0 { let remainder = (n % 8) as u8; backing[nb] = b'0' + remainder; n /= 8; nb += 1; } let res = &mut backing[..nb]; res.reverse(); res } .into() } } impl TreeRef<'_> { /// Convert this instance into its own version, creating a copy of all data. /// /// This will temporarily allocate an extra copy in memory, so at worst three copies of the tree exist /// at some intermediate point in time. Use [`Self::into_owned()`] to avoid this. pub fn to_owned(&self) -> Tree { self.clone().into() } /// Convert this instance into its own version, creating a copy of all data. pub fn into_owned(self) -> Tree { self.into() } } /// An element of a [`TreeRef`][crate::TreeRef::entries]. #[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct EntryRef<'a> { /// The kind of object to which `oid` is pointing. pub mode: tree::EntryMode, /// The name of the file in the parent tree. pub filename: &'a BStr, /// The id of the object representing the entry. // TODO: figure out how these should be called. id or oid? It's inconsistent around the codebase. // Answer: make it 'id', as in `git2` #[cfg_attr(feature = "serde", serde(borrow))] pub oid: &'a gix_hash::oid, } impl PartialOrd for EntryRef<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for EntryRef<'_> { fn cmp(&self, b: &Self) -> Ordering { let a = self; let common = a.filename.len().min(b.filename.len()); a.filename[..common].cmp(&b.filename[..common]).then_with(|| { let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/')); let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/')); a.cmp(&b) }) } } /// An entry in a [`Tree`], similar to an entry in a directory. #[derive(PartialEq, Eq, Debug, Hash, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Entry { /// The kind of object to which `oid` is pointing to. pub mode: EntryMode, /// The name of the file in the parent tree. pub filename: BString, /// The id of the object representing the entry. pub oid: gix_hash::ObjectId, } impl PartialOrd for Entry { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for Entry { fn cmp(&self, b: &Self) -> Ordering { let a = self; let common = a.filename.len().min(b.filename.len()); a.filename[..common].cmp(&b.filename[..common]).then_with(|| { let a = a.filename.get(common).or_else(|| a.mode.is_tree().then_some(&b'/')); let b = b.filename.get(common).or_else(|| b.mode.is_tree().then_some(&b'/')); a.cmp(&b) }) } } gix-object-0.46.1/src/tree/ref_iter.rs000064400000000000000000000175541046102023000156560ustar 00000000000000use bstr::BStr; use winnow::{error::ParserError, prelude::*}; use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; impl<'a> TreeRefIter<'a> { /// Instantiate an iterator from the given tree data. pub fn from_bytes(data: &'a [u8]) -> TreeRefIter<'a> { TreeRefIter { data } } /// Follow a sequence of `path` components starting from this instance, and look them up in `odb` one by one using `buffer` /// until the last component is looked up and its tree entry is returned. /// /// # Performance Notes /// /// Searching tree entries is currently done in sequence, which allows the search to be allocation free. It would be possible /// to reuse a vector and use a binary search instead, which might be able to improve performance over all. /// However, a benchmark should be created first to have some data and see which trade-off to choose here. pub fn lookup_entry( &self, odb: impl crate::Find, buffer: &'a mut Vec, path: I, ) -> Result, crate::find::Error> where I: IntoIterator, P: PartialEq, { buffer.clear(); let mut path = path.into_iter().peekable(); buffer.extend_from_slice(self.data); while let Some(component) = path.next() { match TreeRefIter::from_bytes(buffer) .filter_map(Result::ok) .find(|entry| component.eq(entry.filename)) { Some(entry) => { if path.peek().is_none() { return Ok(Some(entry.into())); } else { let next_id = entry.oid.to_owned(); let obj = odb.try_find(&next_id, buffer)?; let Some(obj) = obj else { return Ok(None) }; if !obj.kind.is_tree() { return Ok(None); } } } None => return Ok(None), } } Ok(None) } /// Like [`Self::lookup_entry()`], but takes any [`AsRef`](`std::path::Path`) directly via `relative_path`, /// a path relative to this tree. /// `odb` and `buffer` are used to lookup intermediate trees. /// /// # Note /// /// If any path component contains illformed UTF-8 and thus can't be converted to bytes on platforms which can't do so natively, /// the returned component will be empty which makes the lookup fail. pub fn lookup_entry_by_path( &self, odb: impl crate::Find, buffer: &'a mut Vec, relative_path: impl AsRef, ) -> Result, crate::find::Error> { use crate::bstr::ByteSlice; self.lookup_entry( odb, buffer, relative_path.as_ref().components().map(|c: std::path::Component<'_>| { gix_path::os_str_into_bstr(c.as_os_str()) .unwrap_or_else(|_| "".into()) .as_bytes() }), ) } } impl<'a> TreeRef<'a> { /// Deserialize a Tree from `data`. pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { let input = &mut data; match decode::tree.parse_next(input) { Ok(tag) => Ok(tag), Err(err) => Err(crate::decode::Error::with_err(err, input)), } } /// Find an entry named `name` knowing if the entry is a directory or not, using a binary search. /// /// Note that it's impossible to binary search by name alone as the sort order is special. pub fn bisect_entry(&self, name: &BStr, is_dir: bool) -> Option> { static NULL_HASH: gix_hash::ObjectId = gix_hash::Kind::shortest().null(); let search = EntryRef { mode: if is_dir { tree::EntryKind::Tree } else { tree::EntryKind::Blob } .into(), filename: name, oid: &NULL_HASH, }; self.entries .binary_search_by(|e| e.cmp(&search)) .ok() .map(|idx| self.entries[idx]) } /// Create an instance of the empty tree. /// /// It's particularly useful as static part of a program. pub const fn empty() -> TreeRef<'static> { TreeRef { entries: Vec::new() } } } impl<'a> TreeRefIter<'a> { /// Consume self and return all parsed entries. pub fn entries(self) -> Result>, crate::decode::Error> { self.collect() } } impl<'a> Iterator for TreeRefIter<'a> { type Item = Result, crate::decode::Error>; fn next(&mut self) -> Option { if self.data.is_empty() { return None; } match decode::fast_entry(self.data) { Some((data_left, entry)) => { self.data = data_left; Some(Ok(entry)) } None => { let failing = self.data; self.data = &[]; #[allow(clippy::unit_arg)] Some(Err(crate::decode::Error::with_err( winnow::error::ErrMode::from_error_kind(&failing, winnow::error::ErrorKind::Verify), failing, ))) } } } } impl<'a> TryFrom<&'a [u8]> for tree::EntryMode { type Error = &'a [u8]; fn try_from(mode: &'a [u8]) -> Result { mode_from_decimal(mode) .map(|(mode, _rest)| tree::EntryMode(mode as u16)) .ok_or(mode) } } fn mode_from_decimal(i: &[u8]) -> Option<(u32, &[u8])> { let mut mode = 0u32; let mut spacer_pos = 1; for b in i.iter().take_while(|b| **b != b' ') { if *b < b'0' || *b > b'7' { return None; } mode = (mode << 3) + u32::from(b - b'0'); spacer_pos += 1; } if i.len() < spacer_pos { return None; } let (_, i) = i.split_at(spacer_pos); Some((mode, i)) } impl TryFrom for tree::EntryMode { type Error = u32; fn try_from(mode: u32) -> Result { Ok(match mode { 0o40000 | 0o120000 | 0o160000 => tree::EntryMode(mode as u16), blob_mode if blob_mode & 0o100000 == 0o100000 => tree::EntryMode(mode as u16), _ => return Err(mode), }) } } mod decode { use bstr::ByteSlice; use winnow::{error::ParserError, prelude::*}; use crate::{ tree, tree::{ref_iter::mode_from_decimal, EntryRef}, TreeRef, }; pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> { let (mode, i) = mode_from_decimal(i)?; let mode = tree::EntryMode::try_from(mode).ok()?; let (filename, i) = i.split_at(i.find_byte(0)?); let i = &i[1..]; const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot let (oid, i) = match i.len() { len if len < HASH_LEN_FIXME => return None, _ => i.split_at(20), }; Some(( i, EntryRef { mode, filename: filename.as_bstr(), oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), }, )) } pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult, E> { let mut out = Vec::new(); let mut i = &**i; while !i.is_empty() { let Some((rest, entry)) = fast_entry(i) else { #[allow(clippy::unit_arg)] return Err(winnow::error::ErrMode::from_error_kind( &i, winnow::error::ErrorKind::Verify, )); }; i = rest; out.push(entry); } Ok(TreeRef { entries: out }) } } gix-object-0.46.1/src/tree/write.rs000064400000000000000000000064121046102023000152000ustar 00000000000000use std::io; use bstr::{BString, ByteSlice}; use crate::{ encode::SPACE, tree::{Entry, EntryRef}, Kind, Tree, TreeRef, }; /// The Error used in [`Tree::write_to()`][crate::WriteTo::write_to()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Nullbytes are invalid in file paths as they are separators: {name:?}")] NullbyteInFilename { name: BString }, } impl From for io::Error { fn from(err: Error) -> Self { io::Error::new(io::ErrorKind::Other, err) } } /// Serialization impl crate::WriteTo for Tree { /// Serialize this tree to `out` in the git internal format. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { debug_assert_eq!( &self.entries, &{ let mut entries_sorted = self.entries.clone(); entries_sorted.sort(); entries_sorted }, "entries for serialization must be sorted by filename" ); let mut buf = Default::default(); for Entry { mode, filename, oid } in &self.entries { out.write_all(mode.as_bytes(&mut buf))?; out.write_all(SPACE)?; if filename.find_byte(0).is_some() { return Err(Error::NullbyteInFilename { name: (*filename).to_owned(), } .into()); } out.write_all(filename)?; out.write_all(b"\0")?; out.write_all(oid.as_bytes())?; } Ok(()) } fn kind(&self) -> Kind { Kind::Tree } fn size(&self) -> u64 { let mut buf = Default::default(); self.entries .iter() .map(|Entry { mode, filename, oid }| { (mode.as_bytes(&mut buf).len() + 1 + filename.len() + 1 + oid.as_bytes().len()) as u64 }) .sum() } } /// Serialization impl crate::WriteTo for TreeRef<'_> { /// Serialize this tree to `out` in the git internal format. fn write_to(&self, out: &mut dyn io::Write) -> io::Result<()> { debug_assert_eq!( &{ let mut entries_sorted = self.entries.clone(); entries_sorted.sort(); entries_sorted }, &self.entries, "entries for serialization must be sorted by filename" ); let mut buf = Default::default(); for EntryRef { mode, filename, oid } in &self.entries { out.write_all(mode.as_bytes(&mut buf))?; out.write_all(SPACE)?; if filename.find_byte(0).is_some() { return Err(Error::NullbyteInFilename { name: (*filename).to_owned(), } .into()); } out.write_all(filename)?; out.write_all(b"\0")?; out.write_all(oid.as_bytes())?; } Ok(()) } fn kind(&self) -> Kind { Kind::Tree } fn size(&self) -> u64 { let mut buf = Default::default(); self.entries .iter() .map(|EntryRef { mode, filename, oid }| { (mode.as_bytes(&mut buf).len() + 1 + filename.len() + 1 + oid.as_bytes().len()) as u64 }) .sum() } }