pdf-0.9.0/Cargo.lock0000644000000450530000000000100075750ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "adler32" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "aes" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "433cfd6710c9986c576a25ca913c39d66a6474107b406f34f91d4a8923395241" dependencies = [ "cfg-if", "cipher", "cpufeatures", ] [[package]] name = "ahash" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if", "once_cell", "version_check", ] [[package]] name = "async-trait" version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" dependencies = [ "generic-array", ] [[package]] name = "block-padding" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a90ec2df9600c28a01c56c4784c9207a96d2451833aeceb8cc97e4c9548bb78" dependencies = [ "generic-array", ] [[package]] name = "cbc" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" dependencies = [ "cipher", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cipher" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1873270f8f7942c191139cb8a40fd228da6c3fd2fc376d7e92d47aa14aeb59e" dependencies = [ "crypto-common", "inout", ] [[package]] name = "core2" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" dependencies = [ "memchr", ] [[package]] name = "cpufeatures" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" dependencies = [ "libc", ] [[package]] name = "crc32fast" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] name = "crossbeam-channel" version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "lazy_static", "memoffset", "scopeguard", ] [[package]] name = "crossbeam-utils" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" dependencies = [ "cfg-if", "lazy_static", ] [[package]] name = "crypto-common" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" dependencies = [ "generic-array", "typenum", ] [[package]] name = "dary_heap" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" [[package]] name = "datasize" version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3319c13ed12c1ce89494db62541bc66759c8870c3562bdf7b25b930420a00432" dependencies = [ "datasize_derive", ] [[package]] name = "datasize_derive" version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4abd50b37ab87677c31190aad6b4186be9993a618ff753c4b007551de6841ee8" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "deflate" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c86f7e25f518f4b81808a2cf1c50996a61f5c2eb394b2393bd87f2a4780a432f" dependencies = [ "adler32", ] [[package]] name = "digest" version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" dependencies = [ "block-buffer", "crypto-common", ] [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "equivalent" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "euclid" version = "0.22.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b52c2ef4a78da0ba68fbe1fd920627411096d2ac478f7f4c9f3a54ba6705bade" dependencies = [ "num-traits", ] [[package]] name = "fastrand" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" dependencies = [ "instant", ] [[package]] name = "fax" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2cec1797683c06c2f3de5edb3fde4d99c70e96f3204f6aaff944078353e5c55" dependencies = [ "fax_derive", ] [[package]] name = "fax_derive" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c1d7ffc9f2dc8316348c75281a99c8fdc60c1ddf4f82a366d117bf1b74d5a39" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "generic-array" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" dependencies = [ "typenum", "version_check", ] [[package]] name = "glob" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "globalcache" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469dba5c15b33d67400508ff1f640e8906fa6c8d5ee80540203eb9029ce475df" dependencies = [ "async-trait", ] [[package]] name = "hashbrown" version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ "ahash", ] [[package]] name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" [[package]] name = "heck" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "indexmap" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", "hashbrown 0.14.3", ] [[package]] name = "inout" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" dependencies = [ "block-padding", "generic-array", ] [[package]] name = "instant" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ "cfg-if", ] [[package]] name = "istring" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80fa5ca8bf0d6cc95e94ac834a7c27da3daacd089dc6284d85df18db23510664" dependencies = [ "datasize", ] [[package]] name = "itertools" version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" dependencies = [ "either", ] [[package]] name = "jpeg-decoder" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e" dependencies = [ "rayon", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" [[package]] name = "libflate" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7d5654ae1795afc7ff76f4365c2c8791b0feb18e8996a96adad8ffd7c3b2bf" dependencies = [ "adler32", "core2", "crc32fast", "dary_heap", "libflate_lz77", ] [[package]] name = "libflate_lz77" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be5f52fb8c451576ec6b79d3f4deb327398bc05bbdbd99021a6e77a4c855d524" dependencies = [ "core2", "hashbrown 0.13.2", "rle-decode-fast", ] [[package]] name = "log" version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] name = "md5" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memmap2" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" dependencies = [ "libc", ] [[package]] name = "memoffset" version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" dependencies = [ "autocfg", ] [[package]] name = "num-traits" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi", "libc", ] [[package]] name = "once_cell" version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "pdf" version = "0.9.0" dependencies = [ "aes", "bitflags", "cbc", "datasize", "deflate", "euclid", "fax", "glob", "globalcache", "indexmap", "istring", "itertools", "jpeg-decoder", "libflate", "log", "md5", "memmap2", "once_cell", "pdf_derive", "sha2", "snafu", "stringprep", "tempfile", "weezl", ] [[package]] name = "pdf_derive" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1038b9cb38dec35eeee9f23eacfb2480087982f9b7e9221efa8034eea9ca2360" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "proc-macro2" version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" dependencies = [ "autocfg", "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", "num_cpus", ] [[package]] name = "redox_syscall" version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" dependencies = [ "bitflags", ] [[package]] name = "remove_dir_all" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" dependencies = [ "winapi", ] [[package]] name = "rle-decode-fast" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "sha2" version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" dependencies = [ "cfg-if", "cpufeatures", "digest", ] [[package]] name = "snafu" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5177903bf45656592d9eb5c0e22f408fc023aae51dbe2088889b71633ba451f2" dependencies = [ "doc-comment", "snafu-derive", ] [[package]] name = "snafu-derive" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "410b26ed97440d90ced3e2488c868d56a86e2064f5d7d6f417909b286afe25e5" dependencies = [ "heck", "proc-macro2", "quote", "syn", ] [[package]] name = "stringprep" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1" dependencies = [ "unicode-bidi", "unicode-normalization", ] [[package]] name = "syn" version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tempfile" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" dependencies = [ "cfg-if", "fastrand", "libc", "redox_syscall", "remove_dir_all", "winapi", ] [[package]] name = "tinyvec" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" dependencies = [ "tinyvec_macros", ] [[package]] name = "tinyvec_macros" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "typenum" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" [[package]] name = "unicode-bidi" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" [[package]] name = "unicode-ident" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" [[package]] name = "unicode-normalization" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" dependencies = [ "tinyvec", ] [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "weezl" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c97e489d8f836838d497091de568cf16b117486d529ec5579233521065bd5e4" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" pdf-0.9.0/Cargo.toml0000644000000043520000000000100076150ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "pdf" version = "0.9.0" authors = [ "Erlend Langseth <3rlendhl@gmail.com>", "Sebastian Köln ", ] description = "PDF reader" documentation = "https://docs.rs/pdf" readme = "README.md" keywords = ["pdf"] license = "MIT" repository = "https://github.com/pdf-rs/pdf" [lib] doctest = false [[example]] name = "content" [[example]] name = "metadata" [[example]] name = "names" [[example]] name = "read" [[example]] name = "other_page_content" [dependencies.aes] version = "0.8.2" [dependencies.bitflags] version = "1.3" [dependencies.cbc] version = "0.1" [dependencies.datasize] version = "0.2.13" [dependencies.deflate] version = "1.0.0" [dependencies.euclid] version = "0.22.7" optional = true [dependencies.fax] version = "0.2.0" [dependencies.globalcache] version = "0.2" features = ["sync"] optional = true [dependencies.indexmap] version = "2.1.0" [dependencies.istring] version = "0.3.3" features = [ "std", "size", ] [dependencies.itertools] version = "0.10.0" [dependencies.jpeg-decoder] version = "0.3.0" default-features = false [dependencies.libflate] version = "2.0.0" [dependencies.log] version = "0.4.14" [dependencies.md5] version = "0.7" [dependencies.memmap2] version = "0.5.0" optional = true [dependencies.once_cell] version = "1.5.2" [dependencies.pdf_derive] version = "0.2.0" [dependencies.sha2] version = "0.10.2" [dependencies.snafu] version = "0.7.1" [dependencies.stringprep] version = "0.1.2" [dependencies.tempfile] version = "3.2.0" optional = true [dependencies.weezl] version = "0.1.4" [dev-dependencies.glob] version = "0.3.0" [features] cache = ["globalcache"] default = [ "sync", "cache", ] dump = ["tempfile"] mmap = ["memmap2"] sync = [] threads = ["jpeg-decoder/default"] pdf-0.9.0/Cargo.toml.orig000064400000000000000000000025321046102023000132740ustar 00000000000000[package] name = "pdf" version = "0.9.0" authors = ["Erlend Langseth <3rlendhl@gmail.com>", "Sebastian Köln "] repository = "https://github.com/pdf-rs/pdf" readme = "../README.md" keywords = ["pdf"] license = "MIT" documentation = "https://docs.rs/pdf" edition = "2018" description = "PDF reader" [features] mmap = ["memmap2"] dump = ["tempfile"] threads = ["jpeg-decoder/default"] sync = [] cache = ["globalcache"] default = ["sync", "cache"] [dependencies] pdf_derive = { version = "0.2.0", path = "../pdf_derive" } snafu = "0.7.1" libflate = "2.0.0" deflate = "1.0.0" itertools = "0.10.0" memmap2 = { version = "0.5.0", optional = true } weezl = "0.1.4" once_cell = "1.5.2" log = "0.4.14" tempfile = { version = "3.2.0", optional = true } md5 = "0.7" jpeg-decoder = { version = "0.3.0", default-features = false } aes = "0.8.2" cbc = "0.1" stringprep = "0.1.2" sha2 = "0.10.2" fax = "0.2.0" euclid = { version = "0.22.7", optional = true } bitflags = "1.3" istring = { version = "0.3.3", features = ["std", "size"] } datasize = "0.2.13" globalcache = { version = "0.2", features = ["sync"], optional = true } indexmap = "2.1.0" [dev-dependencies] glob = "0.3.0" [lib] doctest = false [[example]] name = "content" [[example]] name = "metadata" [[example]] name = "names" [[example]] name = "read" [[example]] name = "other_page_content" pdf-0.9.0/README.md000064400000000000000000000026561046102023000116730ustar 00000000000000# pdf-rs [![test](https://github.com/pdf-rs/pdf/actions/workflows/test.yml/badge.svg)](https://github.com/pdf-rs/pdf/actions/workflows/test.yml) [![clippy&fmt](https://github.com/pdf-rs/pdf/actions/workflows/lint.yml/badge.svg)](https://github.com/pdf-rs/pdf/actions/workflows/lint.yml) Read, alter and write PDF files. Modifying and writing PDFs is still experimental. One easy way you can contribute is to add different PDF files to `tests/files` and see if they pass the tests (`cargo test`). Feel free to contribute with ideas, issues or code! Please join [us on Zulip](https://type.zulipchat.com/#narrow/stream/209232-pdf) if you have any questions or problems. # Workspace This repository uses a Cargo Workspace and default members. This means by default only the `pdf` library is build. To build additional parts, pass `--package=read` to build the subcrate you are interested in (here the `read` example). # Examples Examples are located in `pdf/examples/` and can be executed using: ``` cargo run --example {content,metadata,names,read,text} -- ``` # Renderer and Viewer A library for rendering PDFs via [Pathfinder](https://github.com/servo/pathfinder) and minimal viewer can be found [here](https://github.com/pdf-rs/pdf_render). # Inspect There is a tool for visualizing a PDF file as an interactive hierarchy of primitives at [inspect-prim](https://github.com/pdf-rs/inspect-prim). Just clone and `cargo run`. pdf-0.9.0/examples/content.rs000064400000000000000000000030051046102023000142370ustar 00000000000000use std::env; use std::path::PathBuf; use pdf::error::PdfError; use pdf::content::*; use pdf::file::FileOptions; use pdf::object::*; use pdf::build::*; use pdf::primitive::PdfString; fn main() -> Result<(), PdfError> { let path = PathBuf::from(env::args_os().nth(1).expect("no file given")); let mut builder = PdfBuilder::new(FileOptions::cached()); let mut pages = Vec::new(); let content = Content::from_ops(vec![ Op::MoveTo { p: Point { x: 100., y: 100. } }, Op::LineTo { p: Point { x: 100., y: 200. } }, Op::LineTo { p: Point { x: 200., y: 200. } }, Op::LineTo { p: Point { x: 200., y: 100. } }, Op::Close, Op::Stroke, ]); let mut new_page = PageBuilder::from_content(content, &NoResolve)?; new_page.media_box = Some(pdf::object::Rect { left: 0.0, top: 0.0, bottom: 400.0, right: 400.0 }); let resources = Resources::default(); /* let font = Font { name: Some("Test".into()), subtype: pdf::font::FontType::TrueType, data: FontData::TrueType(TFont { base_font: None, }) } resources.fonts.insert("f1", font); */ new_page.resources = resources; pages.push(new_page); let catalog = CatalogBuilder::from_pages(pages); let mut info = InfoDict::default(); info.title = Some(PdfString::from("test")); let data = builder.info(info).build(catalog)?; std::fs::write(path, data)?; Ok(()) } pdf-0.9.0/examples/metadata.rs000064400000000000000000000016731046102023000143560ustar 00000000000000use std::env::args; use pdf::error::PdfError; use pdf::file::{FileOptions}; use pdf::object::{FieldDictionary, FieldType, Resolve}; /// extract and print a PDF's metadata fn main() -> Result<(), PdfError> { let path = args() .nth(1) .expect("Please provide a file path to the PDF you want to explore."); let file = FileOptions::cached().open(&path).unwrap(); let resolver = file.resolver(); if let Some(ref info) = file.trailer.info_dict { dbg!(info); } if let Some(ref forms) = file.get_root().forms { for field in forms.fields.iter() { print_field(field, &resolver); } } Ok(()) } fn print_field(field: &FieldDictionary, resolve: &impl Resolve) { if field.typ == Some(FieldType::Signature) { println!("{:?}", field); } for &kid in field.kids.iter() { let child = resolve.get(kid).unwrap(); print_field(&child, resolve); } }pdf-0.9.0/examples/names.rs000064400000000000000000000076451046102023000137060ustar 00000000000000extern crate pdf; use std::env::args; use std::fmt; use std::collections::HashMap; use pdf::file::{FileOptions}; use pdf::object::*; use pdf::primitive::{Primitive, PdfString}; struct Indent(usize); impl fmt::Display for Indent { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for _ in 0 .. self.0 { write!(f, " ")?; } Ok(()) } } fn walk_outline(r: &impl Resolve, mut node: RcRef, name_map: &impl Fn(&str) -> usize, page_map: &impl Fn(PlainRef) -> usize, depth: usize) { let indent = Indent(depth); loop { if let Some(ref title) = node.title { println!("{}title: {:?}", indent, title.to_string_lossy()); } if let Some(ref dest) = node.dest { match dest { Primitive::String(ref s) => { let name = s.to_string_lossy(); let page_nr = name_map(&name); println!("{}dest: {:?} -> page nr. {:?}", indent, name, page_nr); } Primitive::Array(ref a) => match a[0] { Primitive::Reference(r) => { let page_nr = page_map(r); println!("{}dest: {:?} -> page nr. {:?}", indent, a, page_nr); } _ => unimplemented!("invalid reference in array"), } _ => unimplemented!("invalid dest"), } } if let Some(Action::Goto(MaybeNamedDest::Direct(Dest { page: Some(page), ..}))) = node.action { let page_nr = page_map(page.get_inner()); println!("{}action -> page nr. {:?}", indent, page_nr); } if let Some(ref a) = node.se { println!("{} -> {:?}", indent, a); } if let Some(entry_ref) = node.first { let entry = r.get(entry_ref).unwrap(); walk_outline(r, entry, name_map, page_map, depth + 1); } if let Some(entry_ref) = node.next { node = r.get(entry_ref).unwrap(); continue; } break; } } fn main() { let path = args().nth(1).expect("no file given"); println!("read: {}", path); let file = FileOptions::cached().open(&path).unwrap(); let resolver = file.resolver(); let catalog = file.get_root(); let mut pages_map: HashMap = HashMap::new(); let mut count = 0; let mut dests_cb = |key: &PdfString, val: &Option| { //println!("{:?} {:?}", key, val); if let Some(Dest { page: Some(page), ..}) = val { pages_map.insert(key.to_string_lossy(), page.get_inner()); } count += 1; }; if let Some(ref names) = catalog.names { if let Some(ref dests) = names.dests { dests.walk(&resolver, &mut dests_cb).unwrap(); } } let mut pages = HashMap::new(); fn add_tree(r: &impl Resolve, pages: &mut HashMap, tree: &PageTree, current_page: &mut usize) { for &node_ref in &tree.kids { let node = r.get(node_ref).unwrap(); match *node { PagesNode::Tree(ref tree) => { add_tree(r, pages, tree, current_page); } PagesNode::Leaf(ref _page) => { pages.insert(node_ref.get_inner(), *current_page); *current_page += 1; } } } } add_tree(&resolver, &mut pages, &catalog.pages, &mut 0); let get_page_nr = |name: &str| -> usize { let page = pages_map[name]; pages[&page] }; let page_nr = |r: PlainRef| -> usize { pages[&r] }; if let Some(ref outlines) = catalog.outlines { if let Some(entry_ref) = outlines.first { let entry = resolver.get(entry_ref).unwrap(); walk_outline(&resolver, entry, &get_page_nr, &page_nr, 0); } } println!("{} items", count); } pdf-0.9.0/examples/other_page_content.rs000064400000000000000000000111121046102023000164320ustar 00000000000000use pdf::content::Rect; use pdf::error::PdfError; use pdf::file::FileOptions; use pdf::object::Resolve; use pdf::primitive::{Dictionary, Primitive}; use std::env::args; /// Extract data from a page entry that is under "other". /// This example looks for stikethroughs in the annotations entry /// and returns a Vec for the bounds of the struckthrough text. fn main() -> Result<(), PdfError> { let path = args() .nth(1) .expect("Please provide a file path to the PDF you want to explore."); let file = FileOptions::cached().open(&path).unwrap(); let resolver = file.resolver(); for (i, page) in file.pages().enumerate() { let page = page.unwrap(); let strikethroughs = annotation_strikethrough(&page.other, &resolver)?; println!( "Found {} strikethrough annotations on page {}.", strikethroughs.len(), i + 1 ); for strikethrough in strikethroughs { println!(); println!("Struck text:"); println!("{:#?}", strikethrough.0); println!(); println!("Text spans {} lines", strikethrough.1.len()); println!(); println!("Strikethrough bounding boxes:"); for rect in strikethrough.1 { println!("{:#?}", rect); println!(); } println!(); println!(); } } Ok(()) } fn annotation_strikethrough( other_dict: &Dictionary, resolver: &impl Resolve, ) -> Result)>, PdfError> { let mut strikethroughs: Vec<(String, Vec)> = Vec::new(); if !other_dict.is_empty() { let annotations = other_dict.get("Annots".into()); if let Some(annotations) = annotations { let annotations_resolved = annotations.clone().resolve(resolver)?; let annotations_array = annotations_resolved.into_array()?; for annotation in annotations_array.iter() { let mut paths: Vec = Vec::new(); let annotation_resolved = annotation.clone().resolve(resolver)?; let annotation_dict = annotation_resolved.into_dictionary()?; // If you have multiline strikethrough "Rect" will be the bounding // box around all the strikethrough lines. // "QuadPoints" gives 8 points for each line that is struckthrough, // so if a single annotation involves text on two lines, QuadPoints // should have 16 values in it. It starts with bottom left and // runs counter-clockwise. let subtype = annotation_dict.get("Subtype".into()); if let Some(subtype) = subtype { let subtype = subtype.clone().into_name()?; if subtype.as_str() == "StrikeOut" { let rects = annotation_dict.get("QuadPoints".into()); let text = annotation_dict.get("Contents".into()); if let (Some(rects), Some(text)) = (rects, text) { let text = text.to_string()?; // Check multiples of 8. let rects_array = rects.clone().into_array()?; if rects_array.len() % 8 == 0 { let rects: Vec> = rects_array.chunks(8).map(|chunk| chunk.to_vec()).collect(); for rect in rects { let mut quad_points: Vec = Vec::new(); for num in rect { let number = num.as_number()?; quad_points.push(number); } if quad_points.len() == 8 { paths.push(Rect { x: quad_points[0], y: quad_points[1], width: quad_points[2] - quad_points[0], height: quad_points[7] - quad_points[1], }); } } strikethroughs.push((text, paths)) } } } } } } } Ok(strikethroughs) } pdf-0.9.0/examples/read.rs000064400000000000000000000067521046102023000135140ustar 00000000000000extern crate pdf; use std::env::args; use std::time::SystemTime; use std::fs; use std::collections::HashMap; use pdf::file::{FileOptions, Log}; use pdf::object::*; use pdf::primitive::Primitive; use pdf::error::PdfError; use pdf::enc::StreamFilter; struct VerboseLog; impl Log for VerboseLog { fn load_object(&self, r: PlainRef) { println!("load {r:?}"); } fn log_get(&self, r: PlainRef) { println!("get {r:?}"); } } fn main() -> Result<(), PdfError> { let path = args().nth(1).expect("no file given"); println!("read: {}", path); let now = SystemTime::now(); let file = FileOptions::cached().log(VerboseLog).open(&path).unwrap(); let resolver = file.resolver(); if let Some(ref info) = file.trailer.info_dict { let title = info.title.as_ref().map(|p| p.to_string_lossy()); let author = info.author.as_ref().map(|p| p.to_string_lossy()); let descr = match (title, author) { (Some(title), None) => title, (None, Some(author)) => format!("[no title] – {}", author), (Some(title), Some(author)) => format!("{} – {}", title, author), _ => "PDF".into() }; println!("{}", descr); } let mut images: Vec<_> = vec![]; let mut fonts = HashMap::new(); for page in file.pages() { let page = page.unwrap(); let resources = page.resources().unwrap(); for (i, font) in resources.fonts.values().enumerate() { let name = match &font.name { Some(name) => name.as_str().into(), None => i.to_string(), }; fonts.insert(name, font.clone()); } images.extend(resources.xobjects.iter().map(|(_name, &r)| resolver.get(r).unwrap()) .filter(|o| matches!(**o, XObject::Image(_))) ); } for (i, o) in images.iter().enumerate() { let img = match **o { XObject::Image(ref im) => im, _ => continue }; let (data, filter) = img.raw_image_data(&resolver)?; let ext = match filter { Some(StreamFilter::DCTDecode(_)) => "jpeg", Some(StreamFilter::JBIG2Decode(_)) => "jbig2", Some(StreamFilter::JPXDecode) => "jp2k", Some(StreamFilter::FlateDecode(_)) => "png", _ => continue, }; let fname = format!("extracted_image_{}.{}", i, ext); fs::write(fname.as_str(), data).unwrap(); println!("Wrote file {}", fname); } println!("Found {} image(s).", images.len()); for (name, font) in fonts.iter() { let fname = format!("font_{}", name); if let Some(Ok(data)) = font.embedded_data(&resolver) { fs::write(fname.as_str(), data).unwrap(); println!("Wrote file {}", fname); } } println!("Found {} font(s).", fonts.len()); if let Some(ref forms) = file.get_root().forms { println!("Forms:"); for field in forms.fields.iter() { print!(" {:?} = ", field.name); match field.value { Primitive::String(ref s) => println!("{}", s.to_string_lossy()), Primitive::Integer(i) => println!("{}", i), Primitive::Name(ref s) => println!("{}", s), ref p => println!("{:?}", p), } } } if let Ok(elapsed) = now.elapsed() { println!("Time: {}s", elapsed.as_secs() as f64 + elapsed.subsec_nanos() as f64 * 1e-9); } Ok(()) } pdf-0.9.0/src/any.rs000064400000000000000000000060741046102023000123360ustar 00000000000000use std::any::TypeId; use std::rc::Rc; use std::sync::Arc; use datasize::DataSize; use crate::object::{Object}; use crate::error::{Result, PdfError}; pub trait AnyObject { fn type_name(&self) -> &'static str; fn type_id(&self) -> TypeId; fn size(&self) -> usize; } #[repr(transparent)] pub struct NoSize(T); impl AnyObject for NoSize { fn size(&self) -> usize { 0 } fn type_id(&self) -> TypeId { TypeId::of::() } fn type_name(&self) -> &'static str { std::any::type_name::() } } #[repr(transparent)] pub struct WithSize(T); impl AnyObject for WithSize { fn size(&self) -> usize { datasize::data_size(&self.0) } fn type_id(&self) -> TypeId { TypeId::of::() } fn type_name(&self) -> &'static str { std::any::type_name::() } } #[derive(DataSize)] pub struct Any(Rc); impl Any { pub fn downcast(self) -> Result> where T: AnyObject + 'static { if TypeId::of::() == self.0.type_id() { unsafe { let raw: *const dyn AnyObject = Rc::into_raw(self.0); Ok(Rc::from_raw(raw as *const T)) } } else { Err(type_mismatch::(self.0.type_name())) } } pub fn new(rc: Rc) -> Any where WithSize: AnyObject, T: 'static { Any(unsafe { std::mem::transmute::, Rc>>(rc) } as _) } pub fn new_without_size(rc: Rc) -> Any where NoSize: AnyObject, T: 'static { Any(unsafe { std::mem::transmute::, Rc>>(rc) } as _) } pub fn type_name(&self) -> &'static str { self.0.type_name() } } #[derive(Clone, DataSize)] pub struct AnySync(Arc); #[cfg(feature="cache")] impl globalcache::ValueSize for AnySync { #[inline] fn size(&self) -> usize { self.0.size() } } impl AnySync { pub fn downcast(self) -> Result> where T: 'static { if TypeId::of::() == self.0.type_id() { unsafe { let raw: *const (dyn AnyObject+Sync+Send) = Arc::into_raw(self.0); Ok(Arc::from_raw(raw as *const T)) } } else { Err(type_mismatch::(self.0.type_name())) } } pub fn new(arc: Arc) -> AnySync where WithSize: AnyObject, T: Sync + Send + 'static { AnySync(unsafe { std::mem::transmute::, Arc>>(arc) } as _) } pub fn new_without_size(arc: Arc) -> AnySync where NoSize: AnyObject, T: Sync + Send + 'static { AnySync(unsafe { std::mem::transmute::, Arc>>(arc) } as _) } pub fn type_name(&self) -> &'static str { self.0.type_name() } } fn type_mismatch(name: &str) -> PdfError { PdfError::Other { msg: format!("expected {}, found {}", std::any::type_name::(), name) } } pdf-0.9.0/src/backend.rs000064400000000000000000000135541046102023000131370ustar 00000000000000use crate::error::*; use crate::parser::Lexer; use crate::parser::read_xref_and_trailer_at; use crate::xref::XRefTable; use crate::primitive::Dictionary; use crate::object::*; use std::ops::Deref; use std::ops::{ RangeFull, RangeFrom, RangeTo, Range, }; pub const MAX_ID: u32 = 1_000_000; pub trait Backend: Sized { fn read(&self, range: T) -> Result<&[u8]>; //fn write(&mut self, range: T) -> Result<&mut [u8]>; fn len(&self) -> usize; fn is_empty(&self) -> bool { self.len() == 0 } /// Returns the offset of the beginning of the file, i.e., where the `%PDF-1.5` header is. /// (currently only used internally!) fn locate_start_offset(&self) -> Result { // Read from the beginning of the file, and look for the header. // Implementation note 13 in version 1.7 of the PDF reference says that Acrobat viewers // expect the header to be within the first 1KB of the file, so we do the same here. const HEADER: &[u8] = b"%PDF-"; let buf = t!(self.read(..std::cmp::min(1024, self.len()))); buf .windows(HEADER.len()) .position(|window| window == HEADER) .ok_or_else(|| PdfError::Other{ msg: "file header is missing".to_string() }) } /// Returns the value of startxref (currently only used internally!) fn locate_xref_offset(&self) -> Result { // locate the xref offset at the end of the file // `\nPOS\n%%EOF` where POS is the position encoded as base 10 integer. // u64::MAX has 20 digits + \n\n(2) + %%EOF(5) = 27 bytes max. let mut lexer = Lexer::new(t!(self.read(..))); lexer.set_pos_from_end(0); t!(lexer.seek_substr_back(b"startxref")); t!(lexer.next()).to::() } /// Used internally by File, but could also be useful for applications that want to look at the raw PDF objects. fn read_xref_table_and_trailer(&self, start_offset: usize, resolve: &impl Resolve) -> Result<(XRefTable, Dictionary)> { let xref_offset = t!(self.locate_xref_offset()); let pos = t!(start_offset.checked_add(xref_offset).ok_or(PdfError::Invalid)); if pos >= self.len() { bail!("XRef offset outside file bounds"); } let mut lexer = Lexer::with_offset(t!(self.read(pos ..)), pos); let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve)); let highest_id = t!(trailer.get("Size") .ok_or_else(|| PdfError::MissingEntry {field: "Size".into(), typ: "XRefTable"})? .as_u32()); if highest_id > MAX_ID { bail!("too many objects"); } let mut refs = XRefTable::new(highest_id as ObjNr); for section in xref_sections { refs.add_entries_from(section)?; } let mut prev_trailer = { match trailer.get("Prev") { Some(p) => Some(t!(p.as_usize())), None => None } }; trace!("READ XREF AND TABLE"); let mut seen = vec![]; while let Some(prev_xref_offset) = prev_trailer { if seen.contains(&prev_xref_offset) { bail!("xref offsets loop"); } seen.push(prev_xref_offset); let pos = t!(start_offset.checked_add(prev_xref_offset).ok_or(PdfError::Invalid)); let mut lexer = Lexer::with_offset(t!(self.read(pos..)), pos); let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve)); for section in xref_sections { refs.add_entries_from(section)?; } prev_trailer = { match trailer.get("Prev") { Some(p) => { let prev = t!(p.as_usize()); Some(prev) } None => None } }; } Ok((refs, trailer)) } } impl Backend for T where T: Deref { //+ DerefMut { fn read(&self, range: R) -> Result<&[u8]> { let r = t!(range.to_range(self.len())); Ok(&self[r]) } /* fn write(&mut self, range: R) -> Result<&mut [u8]> { let r = range.to_range(self.len())?; Ok(&mut self[r]) } */ fn len(&self) -> usize { (**self).len() } } /// `IndexRange` is implemented by Rust's built-in range types, produced /// by range syntax like `..`, `a..`, `..b` or `c..d`. pub trait IndexRange { /// Start index (inclusive) fn start(&self) -> Option; /// End index (exclusive) fn end(&self) -> Option; /// `len`: the size of whatever container that is being indexed fn to_range(&self, len: usize) -> Result> { match (self.start(), self.end()) { (None, None) => Ok(0 .. len), (Some(start), None) if start <= len => Ok(start .. len), (None, Some(end)) if end <= len => Ok(0 .. end), (Some(start), Some(end)) if start <= end && end <= len => Ok(start .. end), _ => Err(PdfError::ContentReadPastBoundary) } } } impl IndexRange for RangeFull { #[inline] fn start(&self) -> Option { None } #[inline] fn end(&self) -> Option { None } } impl IndexRange for RangeFrom { #[inline] fn start(&self) -> Option { Some(self.start) } #[inline] fn end(&self) -> Option { None } } impl IndexRange for RangeTo { #[inline] fn start(&self) -> Option { None } #[inline] fn end(&self) -> Option { Some(self.end) } } impl IndexRange for Range { #[inline] fn start(&self) -> Option { Some(self.start) } #[inline] fn end(&self) -> Option { Some(self.end) } } pdf-0.9.0/src/build.rs000064400000000000000000000327121046102023000126440ustar 00000000000000use std::collections::HashMap; use std::collections::HashSet; use std::ops::Range; use std::sync::Arc; use datasize::DataSize; use crate::PdfError; use crate::any::AnySync; use crate::enc::StreamFilter; use crate::file::Cache; use crate::file::FileOptions; use crate::file::Log; use crate::file::Storage; use crate::file::Trailer; use crate::object::*; use crate::content::*; use crate::error::Result; use crate::parser::ParseFlags; use crate::primitive::Dictionary; use crate::primitive::Primitive; #[derive(Default)] pub struct PageBuilder { pub ops: Vec, pub media_box: Option, pub crop_box: Option, pub trim_box: Option, pub resources: Resources, pub rotate: i32, pub metadata: Option, pub lgi: Option, pub vp: Option, pub other: Dictionary, } impl PageBuilder { pub fn from_content(content: Content, resolve: &impl Resolve) -> Result { Ok(PageBuilder { ops: content.operations(resolve)?, .. PageBuilder::default() }) } pub fn from_page(page: &Page, resolve: &impl Resolve) -> Result { Ok(PageBuilder { ops: page.contents.as_ref().map(|c| c.operations(resolve)).transpose()?.unwrap_or_default(), media_box: Some(page.media_box()?), crop_box: Some(page.crop_box()?), trim_box: page.trim_box, resources: (**page.resources()?.data()).clone(), rotate: page.rotate, metadata: page.metadata.clone(), lgi: page.lgi.clone(), vp: page.vp.clone(), other: page.other.clone(), }) } pub fn clone_page(page: &Page, cloner: &mut impl Cloner) -> Result { let old_resources = &**page.resources()?.data(); let mut resources = Resources::default(); let ops = page.contents.as_ref() .map(|content| content.operations(cloner)).transpose()? .map(|ops| { ops.into_iter().map(|op| -> Result { deep_clone_op(&op, cloner, old_resources, &mut resources) }).collect() }) .transpose()? .unwrap_or_default(); Ok(PageBuilder { ops, media_box: Some(page.media_box()?), crop_box: Some(page.crop_box()?), trim_box: page.trim_box, resources, rotate: page.rotate, metadata: page.metadata.deep_clone(cloner)?, lgi: page.lgi.deep_clone(cloner)?, vp: page.vp.deep_clone(cloner)?, other: page.other.deep_clone(cloner)?, }) } pub fn size(&mut self, width: f32, height: f32) { self.media_box = Some(Rect { top: 0., left: 0., bottom: height, right: width, }); } } pub struct CatalogBuilder { pages: Vec } impl CatalogBuilder { pub fn from_pages(pages: Vec) -> CatalogBuilder { CatalogBuilder { pages } } pub fn build(self, update: &mut impl Updater) -> Result { let kids_promise: Vec<_> = self.pages.iter() .map(|_page| update.promise::()) .collect(); let kids: Vec<_> = kids_promise.iter() .map(|p| Ref::new(p.get_inner())) .collect(); let tree = PagesRc::create(PageTree { parent: None, count: kids.len() as _, kids, resources: None, media_box: None, crop_box: None }, update)?; for (page, promise) in self.pages.into_iter().zip(kids_promise) { let content = Content::from_ops(page.ops); let resources = update.create(page.resources)?.into(); let page = Page { parent: tree.clone(), contents: Some(content), media_box: page.media_box, crop_box: page.crop_box, trim_box: page.trim_box, resources: Some(resources), rotate: page.rotate, metadata: page.metadata, lgi: page.lgi, vp: page.vp, other: page.other, }; update.fulfill(promise, PagesNode::Leaf(page))?; } Ok(Catalog { version: Some("1.7".into()), pages: tree, names: None, dests: None, metadata: None, outlines: None, struct_tree_root: None, forms: None, }) } } pub struct PdfBuilder { pub storage: Storage, SC, OC, L>, pub info: Option, pub id: Option<[String; 2]>, } impl PdfBuilder where SC: Cache>>, OC: Cache, Arc>>, L: Log, { pub fn new(fileoptions: FileOptions<'_, SC, OC, L>) -> Self { let storage = fileoptions.storage(); PdfBuilder { storage, info: None, id: None } } pub fn info(mut self, info: InfoDict) -> Self { self.info = Some(info); self } pub fn id(mut self, a: String, b: String) -> Self { self.id = Some([a, b]); self } pub fn build(mut self, catalog: CatalogBuilder) -> Result> { let catalog = catalog.build(&mut self.storage)?; let mut trailer = Trailer { root: self.storage.create(catalog)?, encrypt_dict: None, size: 0, id: vec!["foo".into(), "bar".into()], info_dict: self.info, prev_trailer_pos: None, }; self.storage.save(&mut trailer)?; Ok(self.storage.into_inner()) } } pub struct Importer<'a, R, U> { resolver: R, map: HashMap, updater: &'a mut U, rcrefs: HashMap, // ptr of old -> (old, new) shared: HashMap, } pub struct ImporterMap { resolver: R, map: HashMap, } impl<'a, R, U> Importer<'a, R, U> { pub fn new(resolver: R, updater: &'a mut U) -> Self { Importer { resolver, updater, map: Default::default(), rcrefs: Default::default(), shared: Default::default(), } } } impl<'a, R: Resolve, U> Importer<'a, R, U> { pub fn finish(self) -> ImporterMap { ImporterMap { resolver: self.resolver, map: self.map } } } impl ImporterMap { fn compare_dict(&self, a_dict: &Dictionary, b_dict: &Dictionary, new_resolve: &impl Resolve) -> Result { let mut same = true; let mut b_unvisited: HashSet<_> = b_dict.keys().collect(); for (a_key, a_val) in a_dict.iter() { if let Some(b_val) = b_dict.get(a_key) { if !self.compare_prim(a_val, b_val, new_resolve)? { println!("value for key {a_key} mismatch."); same = false; } b_unvisited.remove(a_key); } else { println!("missing key {a_key} in b."); same = false; } } for b_key in b_unvisited.iter() { println!("missing key {b_key} in a."); } Ok(same && !b_unvisited.is_empty()) } fn compare_prim(&self, a: &Primitive, b: &Primitive, new_resolve: &impl Resolve) -> Result { match (a, b) { (Primitive::Array(a_parts), Primitive::Array(b_parts)) => { if a_parts.len() != b_parts.len() { dbg!(a_parts, b_parts); println!("different length {} vs. {}", a_parts.len(), b_parts.len()); println!("a = {a_parts:?}"); println!("b = {b_parts:?}"); return Ok(false); } for (a, b) in a_parts.iter().zip(b_parts.iter()) { if !self.compare_prim(a, b, new_resolve)? { return Ok(false); } } Ok(true) } (Primitive::Dictionary(a_dict), Primitive::Dictionary(b_dict)) => { self.compare_dict(a_dict, b_dict, new_resolve) } (Primitive::Reference(r1), Primitive::Reference(r2)) => { match self.map.get(&r1) { Some(r) if r == r2 => Ok(true), _ => Ok(false) } } (Primitive::Stream(a_s), Primitive::Stream(b_s)) => { if !self.compare_dict(&a_s.info, &b_s.info, new_resolve)? { println!("stream dicts differ"); return Ok(false) } let a_data = a_s.raw_data(&self.resolver)?; let b_data = b_s.raw_data(new_resolve)?; if a_data != b_data { println!("data differs."); return Ok(false) } Ok(true) } (Primitive::Integer(a), Primitive::Number(b)) => Ok(*a as f32 == *b), (Primitive::Number(a), Primitive::Integer(b)) => Ok(*a == *b as f32), (Primitive::Reference(a_ref), b) => { let a = self.resolver.resolve(*a_ref)?; self.compare_prim(&a, b, new_resolve) } (a, Primitive::Reference(b_ref)) => { let b = new_resolve.resolve(*b_ref)?; self.compare_prim(a, &b, new_resolve) } (ref a, ref b) => { if a == b { Ok(true) } else { println!("{a:?} != {b:?}"); Ok(false) } } } } pub fn verify(&self, new_resolve: &impl Resolve) -> Result { let mut same = true; for (&old_ref, &new_ref) in self.map.iter() { let old = self.resolver.resolve(old_ref)?; let new = new_resolve.resolve(new_ref)?; if !self.compare_prim(&old, &new, new_resolve)? { same = false; } } Ok(same) } } impl<'a, R: Resolve, U> Resolve for Importer<'a, R, U> { fn get(&self, r: Ref) -> Result> { self.resolver.get(r) } fn get_data_or_decode(&self, id: PlainRef, range: Range, filters: &[StreamFilter]) -> Result> { self.resolver.get_data_or_decode(id, range, filters) } fn options(&self) -> &ParseOptions { self.resolver.options() } fn resolve(&self, r: PlainRef) -> Result { self.resolver.resolve(r) } fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result { self.resolver.resolve_flags(r, flags, depth) } fn stream_data(&self, id: PlainRef, range: Range) -> Result> { self.resolver.stream_data(id, range) } } impl<'a, R, U: Updater> Updater for Importer<'a, R, U> { fn create(&mut self, obj: T) -> Result> { self.updater.create(obj) } fn fulfill(&mut self, promise: PromisedRef, obj: T) -> Result> { self.updater.fulfill(promise, obj) } fn promise(&mut self) -> PromisedRef { self.updater.promise() } fn update(&mut self, old: PlainRef, obj: T) -> Result> { self.updater.update(old, obj) } } impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> { fn clone_ref(&mut self, old: Ref) -> Result> { if let Some(&new_ref) = self.map.get(&old.get_inner()) { return Ok(Ref::new(new_ref)); } let obj = self.resolver.get(old)?; let clone = obj.deep_clone(self)?; let r = self.updater.create(clone)?; self.map.insert(old.get_inner(), r.get_ref().get_inner()); Ok(r.get_ref()) } fn clone_plainref(&mut self, old: PlainRef) -> Result { if let Some(&new_ref) = self.map.get(&old) { return Ok(new_ref); } let obj = self.resolver.resolve(old)?; let clone = obj.deep_clone(self)?; let new = self.updater.create(clone)? .get_ref().get_inner(); self.map.insert(old, new); Ok(new) } fn clone_rcref(&mut self, old: &RcRef) -> Result> { let old_ref = old.get_ref().get_inner(); if let Some(&new_ref) = self.map.get(&old_ref) { let arc = self.rcrefs.get(&new_ref).unwrap().clone().downcast()?; return Ok(RcRef::new(new_ref, arc)); } let new = old.data().deep_clone(self)?; let new = self.updater.create::(new)?; self.rcrefs.insert(new.get_ref().get_inner(), AnySync::new(new.data().clone())); self.map.insert(old_ref, new.get_ref().get_inner()); Ok(new) } fn clone_shared(&mut self, old: &Shared) -> Result> { let key = &**old as *const T as usize; if let Some((old, new)) = self.shared.get(&key) { return new.clone().downcast(); } let new = Shared::new(old.as_ref().deep_clone(self)?); self.shared.insert(key, (AnySync::new_without_size(old.clone()), AnySync::new_without_size(new.clone()))); Ok(new) } }pdf-0.9.0/src/content.rs000064400000000000000000001130051046102023000132120ustar 00000000000000/// PDF content streams. use std::fmt::{self, Display}; use std::cmp::Ordering; use itertools::Itertools; use istring::SmallString; use datasize::DataSize; use std::sync::Arc; use crate::error::*; use crate::object::*; use crate::parser::{Lexer, parse_with_lexer, ParseFlags}; use crate::primitive::*; use crate::enc::StreamFilter; use crate as pdf; /// Represents a PDF content stream - a `Vec` of `Operator`s #[derive(Debug, Clone, DataSize)] pub struct Content { /// The raw content stream parts. usually one, but could be any number. pub parts: Vec>, } impl Content { pub fn operations(&self, resolve: &impl Resolve) -> Result> { let mut data = vec![]; for part in self.parts.iter() { data.extend_from_slice(&t!(part.data(resolve))); } parse_ops(&data, resolve) } } pub fn parse_ops(data: &[u8], resolve: &impl Resolve) -> Result> { let mut ops = OpBuilder::new(); ops.parse(data, resolve)?; Ok(ops.ops) } macro_rules! names { ($args:ident, $($x:ident),*) => ( $( let $x = name(&mut $args)?; )* ) } macro_rules! numbers { ($args:ident, $($x:ident),*) => ( $( let $x = number(&mut $args)?; )* ) } macro_rules! points { ($args:ident, $($point:ident),*) => ( $( let $point = point(&mut $args)?; )* ) } fn name(args: &mut impl Iterator) -> Result { args.next().ok_or(PdfError::NoOpArg)?.into_name() } fn number(args: &mut impl Iterator) -> Result { args.next().ok_or(PdfError::NoOpArg)?.as_number() } fn string(args: &mut impl Iterator) -> Result { args.next().ok_or(PdfError::NoOpArg)?.into_string() } fn point(args: &mut impl Iterator) -> Result { let x = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let y = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; Ok(Point { x, y }) } fn rect(args: &mut impl Iterator) -> Result { let x = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let y = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let width = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let height = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; Ok(Rect { x, y, width, height }) } fn rgb(args: &mut impl Iterator) -> Result { let red = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let green = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let blue = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; Ok(Rgb { red, green, blue }) } fn cmyk(args: &mut impl Iterator) -> Result { let cyan = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let magenta = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let yellow = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; let key = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; Ok(Cmyk { cyan, magenta, yellow, key }) } fn matrix(args: &mut impl Iterator) -> Result { Ok(Matrix { a: number(args)?, b: number(args)?, c: number(args)?, d: number(args)?, e: number(args)?, f: number(args)?, }) } fn array(args: &mut impl Iterator) -> Result> { match args.next() { Some(Primitive::Array(arr)) => Ok(arr), None => Ok(vec![]), _ => Err(PdfError::NoOpArg) } } fn expand_abbr_name(name: SmallString, alt: &[(&str, &str)]) -> SmallString { for &(p, r) in alt { if name == p { return r.into(); } } name } fn expand_abbr(p: Primitive, alt: &[(&str, &str)]) -> Primitive { match p { Primitive::Name(name) => Primitive::Name(expand_abbr_name(name, alt)), Primitive::Array(items) => Primitive::Array(items.into_iter().map(|p| expand_abbr(p, alt)).collect()), p => p } } fn inline_image(lexer: &mut Lexer, resolve: &impl Resolve) -> Result> { let mut dict = Dictionary::new(); loop { let backup_pos = lexer.get_pos(); let obj = parse_with_lexer(lexer, &NoResolve, ParseFlags::ANY); let key = match obj { Ok(Primitive::Name(key)) => key, Err(e) if e.is_eof() => return Err(e), Err(_) => { lexer.set_pos(backup_pos); break; } Ok(_) => bail!("invalid key type") }; let key = expand_abbr_name(key, &[ ("BPC", "BitsPerComponent"), ("CS", "ColorSpace"), ("D", "Decode"), ("DP", "DecodeParms"), ("F", "Filter"), ("H", "Height"), ("IM", "ImageMask"), ("I", "Interpolate"), ("W", "Width"), ]); let val = parse_with_lexer(lexer, &NoResolve, ParseFlags::ANY)?; dict.insert(key, val); } lexer.next_expect("ID")?; let data_start = lexer.get_pos() + 1; // find the end before try parsing. if lexer.seek_substr("\nEI").is_none() { bail!("inline image exceeds expected data range"); } let data_end = lexer.get_pos() - 3; // ugh let bits_per_component = dict.get("BitsPerComponent").map(|p| p.as_integer()).transpose()?; let color_space = dict.get("ColorSpace").map(|p| ColorSpace::from_primitive(expand_abbr(p.clone(), &[ ("G", "DeviceGray"), ("RGB", "DeviceRGB"), ("CMYK", "DeviceCMYK"), ("I", "Indexed") ] ), resolve)).transpose()?; let decode = dict.get("Decode").map(|p| Object::from_primitive(p.clone(), resolve)).transpose()?; let decode_parms = dict.get("DecodeParms").map(|p| p.clone().resolve(resolve)?.into_dictionary()).transpose()?.unwrap_or_default(); let filter = dict.remove("Filter").map(|p| expand_abbr(p, &[ ("AHx", "ASCIIHexDecode"), ("A85", "ASCII85Decode"), ("LZW", "LZWDecode"), ("Fl", "FlateDecode"), ("RL", "RunLengthDecode"), ("CCF", "CCITTFaxDecode"), ("DCT", "DCTDecode"), ] )); let filters = match filter { Some(Primitive::Array(parts)) => parts.into_iter() .map(|p| p.as_name().and_then(|kind| StreamFilter::from_kind_and_params(kind, decode_parms.clone(), resolve))) .collect::>()?, Some(Primitive::Name(kind)) => vec![StreamFilter::from_kind_and_params(&kind, decode_parms, resolve)?], None => vec![], _ => bail!("invalid filter") }; let height = dict.require("InlineImage", "Height")?.as_u32()?; let image_mask = dict.get("ImageMask").map(|p| p.as_bool()).transpose()?.unwrap_or(false); let intent = dict.remove("Intent").map(|p| RenderingIntent::from_primitive(p, &NoResolve)).transpose()?; let interpolate = dict.get("Interpolate").map(|p| p.as_bool()).transpose()?.unwrap_or(false); let width = dict.require("InlineImage", "Width")?.as_u32()?; let image_dict = ImageDict { width, height, color_space, bits_per_component, intent, image_mask, mask: None, decode, interpolate, struct_parent: None, id: None, smask: None, other: dict, }; let data = lexer.new_substr(data_start .. data_end).to_vec(); Ok(Arc::new(ImageXObject { inner: Stream::from_compressed(image_dict, data, filters) })) } struct OpBuilder { last: Point, compability_section: bool, ops: Vec } impl OpBuilder { fn new() -> Self { OpBuilder { last: Point { x: 0., y: 0. }, compability_section: false, ops: Vec::new() } } fn parse(&mut self, data: &[u8], resolve: &impl Resolve) -> Result<()> { let mut lexer = Lexer::new(data); let mut buffer = Vec::with_capacity(5); loop { let backup_pos = lexer.get_pos(); let obj = parse_with_lexer(&mut lexer, resolve, ParseFlags::ANY); match obj { Ok(obj) => { // Operand buffer.push(obj) } Err(e) => { if e.is_eof() { break; } // It's not an object/operand - treat it as an operator. lexer.set_pos(backup_pos); let op = t!(lexer.next()); let operator = t!(op.as_str(), op); match self.add(operator, buffer.drain(..), &mut lexer, resolve) { Ok(()) => {}, Err(e) if resolve.options().allow_invalid_ops => { warn!("OP Err: {:?}", e); }, Err(e) => return Err(e), } } } match lexer.get_pos().cmp(&data.len()) { Ordering::Greater => err!(PdfError::ContentReadPastBoundary), Ordering::Less => (), Ordering::Equal => break } } Ok(()) } fn add(&mut self, op: &str, mut args: impl Iterator, lexer: &mut Lexer, resolve: &impl Resolve) -> Result<()> { use Winding::*; let ops = &mut self.ops; let mut push = move |op| ops.push(op); match op { "b" => { push(Op::Close); push(Op::FillAndStroke { winding: NonZero }); }, "B" => push(Op::FillAndStroke { winding: NonZero }), "b*" => { push(Op::Close); push(Op::FillAndStroke { winding: EvenOdd }); } "B*" => push(Op::FillAndStroke { winding: EvenOdd }), "BDC" => push(Op::BeginMarkedContent { tag: name(&mut args)?, properties: Some(args.next().ok_or(PdfError::NoOpArg)?) }), "BI" => push(Op::InlineImage { image: inline_image(lexer, resolve)? }), "BMC" => push(Op::BeginMarkedContent { tag: name(&mut args)?, properties: None }), "BT" => push(Op::BeginText), "BX" => self.compability_section = true, "c" => { points!(args, c1, c2, p); push(Op::CurveTo { c1, c2, p }); self.last = p; } "cm" => { numbers!(args, a, b, c, d, e, f); push(Op::Transform { matrix: Matrix { a, b, c, d, e, f }}); } "CS" => { names!(args, name); push(Op::StrokeColorSpace { name }); } "cs" => { names!(args, name); push(Op::FillColorSpace { name }); } "d" => { let p = args.next().ok_or(PdfError::NoOpArg)?; let pattern = p.as_array()?.iter().map(|p| p.as_number()).collect::, PdfError>>()?; let phase = args.next().ok_or(PdfError::NoOpArg)?.as_number()?; push(Op::Dash { pattern, phase }); } "d0" => {} "d1" => {} "Do" | "Do0" => { names!(args, name); push(Op::XObject { name }); } "DP" => push(Op::MarkedContentPoint { tag: name(&mut args)?, properties: Some(args.next().ok_or(PdfError::NoOpArg)?) }), "EI" => bail!("Parse Error. Unexpected 'EI'"), "EMC" => push(Op::EndMarkedContent), "ET" => push(Op::EndText), "EX" => self.compability_section = false, "f" | "F" => push(Op::Fill { winding: NonZero }), "f*" => push(Op::Fill { winding: EvenOdd }), "G" => push(Op::StrokeColor { color: Color::Gray(number(&mut args)?) }), "g" => push(Op::FillColor { color: Color::Gray(number(&mut args)?) }), "gs" => push(Op::GraphicsState { name: name(&mut args)? }), "h" => push(Op::Close), "i" => push(Op::Flatness { tolerance: number(&mut args)? }), "ID" => bail!("Parse Error. Unexpected 'ID'"), "j" => { let n = args.next().ok_or(PdfError::NoOpArg)?.as_integer()?; let join = match n { 0 => LineJoin::Miter, 1 => LineJoin::Round, 2 => LineJoin::Bevel, _ => bail!("invalid line join {}", n) }; push(Op::LineJoin { join }); } "J" => { let n = args.next().ok_or(PdfError::NoOpArg)?.as_integer()?; let cap = match n { 0 => LineCap::Butt, 1 => LineCap::Round, 2 => LineCap::Square, _ => bail!("invalid line cap {}", n) }; push(Op::LineCap { cap }); } "K" => { let color = Color::Cmyk(cmyk(&mut args)?); push(Op::StrokeColor { color }); } "k" => { let color = Color::Cmyk(cmyk(&mut args)?); push(Op::FillColor { color }); } "l" => { let p = point(&mut args)?; push(Op::LineTo { p }); self.last = p; } "m" => { let p = point(&mut args)?; push(Op::MoveTo { p }); self.last = p; } "M" => push(Op::MiterLimit { limit: number(&mut args)? }), "MP" => push(Op::MarkedContentPoint { tag: name(&mut args)?, properties: None }), "n" => push(Op::EndPath), "q" => push(Op::Save), "Q" => push(Op::Restore), "re" => push(Op::Rect { rect: rect(&mut args)? }), "RG" => push(Op::StrokeColor { color: Color::Rgb(rgb(&mut args)?) }), "rg" => push(Op::FillColor { color: Color::Rgb(rgb(&mut args)?) }), "ri" => { let s = name(&mut args)?; let intent = RenderingIntent::from_str(&s) .ok_or_else(|| PdfError::Other { msg: format!("invalid rendering intent {}", s) })?; push(Op::RenderingIntent { intent }); }, "s" => { push(Op::Close); push(Op::Stroke); } "S" => push(Op::Stroke), "SC" | "SCN" => { push(Op::StrokeColor { color: Color::Other(args.collect()) }); } "sc" | "scn" => { push(Op::FillColor { color: Color::Other(args.collect()) }); } "sh" => { } "T*" => push(Op::TextNewline), "Tc" => push(Op::CharSpacing { char_space: number(&mut args)? }), "Td" => push(Op::MoveTextPosition { translation: point(&mut args)? }), "TD" => { let translation = point(&mut args)?; push(Op::Leading { leading: -translation.y }); push(Op::MoveTextPosition { translation }); } "Tf" => push(Op::TextFont { name: name(&mut args)?, size: number(&mut args)? }), "Tj" => push(Op::TextDraw { text: string(&mut args)? }), "TJ" => { let mut result = Vec::::new(); for spacing_or_text in array(&mut args)?.into_iter() { let spacing_or_text = match spacing_or_text { Primitive::Integer(i) => TextDrawAdjusted::Spacing(i as f32), Primitive::Number(f) => TextDrawAdjusted::Spacing(f), Primitive::String(text) => TextDrawAdjusted::Text(text), p => bail!("invalid primitive in TJ operator: {:?}", p) }; result.push(spacing_or_text); } push(Op::TextDrawAdjusted { array: result }) } "TL" => push(Op::Leading { leading: number(&mut args)? }), "Tm" => push(Op::SetTextMatrix { matrix: matrix(&mut args)? }), "Tr" => { use TextMode::*; let n = args.next().ok_or(PdfError::NoOpArg)?.as_integer()?; let mode = match n { 0 => Fill, 1 => Stroke, 2 => FillThenStroke, 3 => Invisible, 4 => FillAndClip, 5 => StrokeAndClip, _ => { bail!("Invalid text render mode: {}", n); } }; push(Op::TextRenderMode { mode }); } "Ts" => push(Op::TextRise { rise: number(&mut args)? }), "Tw" => push(Op::WordSpacing { word_space: number(&mut args)? }), "Tz" => push(Op::TextScaling { horiz_scale: number(&mut args)? }), "v" => { points!(args, c2, p); push(Op::CurveTo { c1: self.last, c2, p }); self.last = p; } "w" => push(Op::LineWidth { width: number(&mut args)? }), "W" => push(Op::Clip { winding: NonZero }), "W*" => push(Op::Clip { winding: EvenOdd }), "y" => { points!(args, c1, p); push(Op::CurveTo { c1, c2: p, p }); self.last = p; } "'" => { push(Op::TextNewline); push(Op::TextDraw { text: string(&mut args)? }); } "\"" => { push(Op::WordSpacing { word_space: number(&mut args)? }); push(Op::CharSpacing { char_space: number(&mut args)? }); push(Op::TextNewline); push(Op::TextDraw { text: string(&mut args)? }); } o if !self.compability_section => { bail!("invalid operator {}", o) }, _ => {} } Ok(()) } } impl Object for Content { /// Convert primitive to Self fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { type ContentStream = Stream<()>; let mut parts: Vec = vec![]; match p { Primitive::Array(arr) => { for p in arr { let part = t!(ContentStream::from_primitive(p, resolve)); parts.push(part); } } Primitive::Reference(r) => return Self::from_primitive(t!(resolve.resolve(r)), resolve), p => { let part = t!(ContentStream::from_primitive(p, resolve)); parts.push(part); } } Ok(Content { parts }) } } #[derive(Debug, DataSize, DeepClone)] pub struct FormXObject { pub stream: Stream, } impl FormXObject { pub fn dict(&self) -> &FormDict { &self.stream.info.info } pub fn operations(&self, resolve: &impl Resolve) -> Result> { let mut ops = OpBuilder::new(); let data = self.stream.data(resolve)?; t!(ops.parse(&data, resolve)); Ok(ops.ops) } } impl Object for FormXObject { /// Convert primitive to Self fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let stream = t!(Stream::::from_primitive(p, resolve)); Ok(FormXObject { stream, }) } } #[allow(clippy::float_cmp)] // TODO pub fn serialize_ops(mut ops: &[Op]) -> Result> { use std::io::Write; let mut data = Vec::new(); let mut current_point = None; let f = &mut data; while ops.len() > 0 { let mut advance = 1; match ops[0] { Op::BeginMarkedContent { ref tag, properties: Some(ref name) } => { serialize_name(tag, f)?; write!(f, " ")?; name.serialize(f)?; writeln!(f, " BDC")?; } Op::BeginMarkedContent { ref tag, properties: None } => { serialize_name(tag, f)?; writeln!(f, " BMC")?; } Op::MarkedContentPoint { ref tag, properties: Some(ref name) } => { serialize_name(tag, f)?; write!(f, " ")?; name.serialize(f)?; writeln!(f, " DP")?; } Op::MarkedContentPoint { ref tag, properties: None } => { serialize_name(tag, f)?; writeln!(f, " MP")?; } Op::EndMarkedContent => writeln!(f, "EMC")?, Op::Close => match ops.get(1) { Some(Op::Stroke) => { writeln!(f, "s")?; advance += 1; } Some(Op::FillAndStroke { winding: Winding::NonZero }) => { writeln!(f, "b")?; advance += 1; } Some(Op::FillAndStroke { winding: Winding::EvenOdd }) => { writeln!(f, "b*")?; advance += 1; } _ => writeln!(f, "h")?, } Op::MoveTo { p } => { writeln!(f, "{} m", p)?; current_point = Some(p); } Op::LineTo { p } => { writeln!(f, "{} l", p)?; current_point = Some(p); }, Op::CurveTo { c1, c2, p } => { if Some(c1) == current_point { writeln!(f, "{} {} v", c2, p)?; } else if c2 == p { writeln!(f, "{} {} y", c1, p)?; } else { writeln!(f, "{} {} {} c", c1, c2, p)?; } current_point = Some(p); }, Op::Rect { rect } => writeln!(f, "{} re", rect)?, Op::EndPath => writeln!(f, "n")?, Op::Stroke => writeln!(f, "S")?, Op::FillAndStroke { winding: Winding::NonZero } => writeln!(f, "B")?, Op::FillAndStroke { winding: Winding::EvenOdd } => writeln!(f, "B*")?, Op::Fill { winding: Winding::NonZero } => writeln!(f, "f")?, Op::Fill { winding: Winding::EvenOdd } => writeln!(f, "f*")?, Op::Shade { ref name } => { serialize_name(name, f)?; writeln!(f, " sh")?; }, Op::Clip { winding: Winding::NonZero } => writeln!(f, "W")?, Op::Clip { winding: Winding::EvenOdd } => writeln!(f, "W*")?, Op::Save => writeln!(f, "q")?, Op::Restore => writeln!(f, "Q")?, Op::Transform { matrix } => writeln!(f, "{} cm", matrix)?, Op::LineWidth { width } => writeln!(f, "{} w", width)?, Op::Dash { ref pattern, phase } => write!(f, "[{}] {} d", pattern.iter().format(" "), phase)?, Op::LineJoin { join } => writeln!(f, "{} j", join as u8)?, Op::LineCap { cap } => writeln!(f, "{} J", cap as u8)?, Op::MiterLimit { limit } => writeln!(f, "{} M", limit)?, Op::Flatness { tolerance } => writeln!(f, "{} i", tolerance)?, Op::GraphicsState { ref name } => { serialize_name(name, f)?; writeln!(f, " gs")?; }, Op::StrokeColor { color: Color::Gray(g) } => writeln!(f, "{} G", g)?, Op::StrokeColor { color: Color::Rgb(rgb) } => writeln!(f, "{} RG", rgb)?, Op::StrokeColor { color: Color::Cmyk(cmyk) } => writeln!(f, "{} K", cmyk)?, Op::StrokeColor { color: Color::Other(ref args) } => { for p in args { p.serialize(f)?; write!(f, " ")?; } writeln!(f, "SCN")?; } Op::FillColor { color: Color::Gray(g) } => writeln!(f, "{} g", g)?, Op::FillColor { color: Color::Rgb(rgb) } => writeln!(f, "{} rg", rgb)?, Op::FillColor { color: Color::Cmyk(cmyk) } => writeln!(f, "{} k", cmyk)?, Op::FillColor { color: Color::Other(ref args) } => { for p in args { p.serialize(f)?; write!(f, " ")?; } writeln!(f, "scn")?; } Op::FillColorSpace { ref name } => { serialize_name(name, f)?; writeln!(f, " cs")?; }, Op::StrokeColorSpace { ref name } => { serialize_name(name, f)?; writeln!(f, " CS")?; }, Op::RenderingIntent { intent } => writeln!(f, "{} ri", intent.to_str())?, Op::BeginText => writeln!(f, "BT")?, Op::EndText => writeln!(f, "ET")?, Op::CharSpacing { char_space } => writeln!(f, "{} Tc", char_space)?, Op::WordSpacing { word_space } => { if let [ Op::CharSpacing { char_space }, Op::TextNewline, Op::TextDraw { ref text }, .. ] = ops[1..] { write!(f, "{} {} ", word_space, char_space)?; text.serialize(f)?; writeln!(f, " \"")?; advance += 3; } else { writeln!(f, "{} Tw", word_space)?; } } Op::TextScaling { horiz_scale } => writeln!(f, "{} Tz", horiz_scale)?, Op::Leading { leading } => match ops[1..] { [Op::MoveTextPosition { translation }, ..] if leading == -translation.x => { writeln!(f, "{} {} TD", translation.x, translation.y)?; advance += 1; } _ => { writeln!(f, "{} TL", leading)?; } } Op::TextFont { ref name, ref size } => { serialize_name(name, f)?; writeln!(f, " {} Tf", size)?; }, Op::TextRenderMode { mode } => writeln!(f, "{} Tr", mode as u8)?, Op::TextRise { rise } => writeln!(f, "{} Ts", rise)?, Op::MoveTextPosition { translation } => writeln!(f, "{} {} Td", translation.x, translation.y)?, Op::SetTextMatrix { matrix } => writeln!(f, "{} Tm", matrix)?, Op::TextNewline => { if let [Op::TextDraw { ref text }, ..] = ops[1..] { text.serialize(f)?; writeln!(f, " '")?; advance += 1; } else { writeln!(f, "T*")?; } }, Op::TextDraw { ref text } => { text.serialize(f)?; writeln!(f, " Tj")?; }, Op::TextDrawAdjusted { ref array } => { write!(f, "[")?; for (i, val) in array.iter().enumerate() { if i > 0 { write!(f, " ")?; } match val { TextDrawAdjusted::Spacing(s) => write!(f, "{s}")?, TextDrawAdjusted::Text(data) => data.serialize(f)?, } } writeln!(f, "] TJ")?; }, Op::InlineImage { image: _ } => unimplemented!(), Op::XObject { ref name } => { serialize_name(name, f)?; writeln!(f, " Do")?; }, } ops = &ops[advance..]; } Ok(data) } impl Content { pub fn from_ops(operations: Vec) -> Self { let data = serialize_ops(&operations).unwrap(); Content { parts: vec![Stream::new((), data)] } } } impl ObjectWrite for Content { fn to_primitive(&self, update: &mut impl Updater) -> Result { if self.parts.len() == 1 { let obj = self.parts[0].to_primitive(update)?; update.create(obj)?.to_primitive(update) } else { self.parts.to_primitive(update) } } } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] pub enum Winding { EvenOdd, NonZero } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] pub enum LineCap { Butt = 0, Round = 1, Square = 2, } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] pub enum LineJoin { Miter = 0, Round = 1, Bevel = 2, } #[cfg(feature = "euclid")] pub struct PdfSpace(); #[derive(Debug, Copy, Clone, PartialEq, Default, DataSize)] #[repr(C, align(8))] pub struct Point { pub x: f32, pub y: f32 } impl Display for Point { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {}", self.x, self.y) } } #[cfg(feature = "euclid")] impl Into> for Point { fn into(self) -> euclid::Point2D { let Point { x, y } = self; euclid::Point2D::new(x, y) } } #[cfg(feature = "euclid")] impl From> for Point { fn from(from: euclid::Point2D) -> Self { let euclid::Point2D { x, y, .. } = from; Point { x, y } } } #[cfg(feature = "euclid")] impl Into> for Point { fn into(self) -> euclid::Vector2D { let Point { x, y } = self; euclid::Vector2D::new(x, y) } } #[cfg(feature = "euclid")] impl From> for Point { fn from(from: euclid::Vector2D) -> Self { let euclid::Vector2D { x, y, .. } = from; Point { x, y } } } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] #[repr(C, align(8))] pub struct Rect { pub x: f32, pub y: f32, pub width: f32, pub height: f32, } impl Display for Rect { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {} {} {}", self.x, self.y, self.width, self.height) } } #[cfg(feature = "euclid")] impl Into> for Rect { fn into(self) -> euclid::Box2D { let Rect { x, y, width, height } = self; assert!(width > 0.0); assert!(height > 0.0); euclid::Box2D::new(euclid::Point2D::new(x, y), euclid::Point2D::new(x + width, y + height)) } } #[cfg(feature = "euclid")] impl From> for Rect { fn from(from: euclid::Box2D) -> Self { let euclid::Box2D { min: euclid::Point2D { x, y, .. }, max: euclid::Point2D { x: x2, y: y2, .. }, .. } = from; assert!(x < x2); assert!(y < y2); Rect { x, y, width: x2 - x, height: y2 - y } } } #[derive(Debug, Copy, Clone, PartialEq, DataSize, DeepClone)] #[repr(C, align(8))] pub struct Matrix { pub a: f32, pub b: f32, pub c: f32, pub d: f32, pub e: f32, pub f: f32, } impl Display for Matrix { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {} {} {} {} {}", self.a, self.b, self.c, self.d, self.e, self.f) } } impl Default for Matrix { fn default() -> Self { Matrix { a: 1.0, b: 0.0, c: 0.0, d: 1.0, e: 0.0, f: 0.0, } } } impl Object for Matrix { fn from_primitive(p: Primitive, _resolve: &impl Resolve) -> Result { matrix(&mut p.into_array()?.into_iter()) } } impl ObjectWrite for Matrix { fn to_primitive(&self, update: &mut impl Updater) -> Result { let Matrix { a, b, c, d, e, f } = *self; Primitive::array::([a, b, c, d, e, f].iter(), update) } } #[cfg(feature = "euclid")] impl Into> for Matrix { fn into(self) -> euclid::Transform2D { let Matrix { a, b, c, d, e, f} = self; euclid::Transform2D::new(a, b, c, d, e, f) } } #[cfg(feature = "euclid")] impl From> for Matrix { fn from(from: euclid::Transform2D) -> Self { let euclid::Transform2D { m11: a, m12: b, m21: c, m22: d, m31: e, m32: f, .. } = from; Matrix { a, b, c, d, e, f } } } #[derive(Debug, Clone, DataSize)] pub enum Color { Gray(f32), Rgb(Rgb), Cmyk(Cmyk), Other(Vec), } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] pub enum TextMode { Fill, Stroke, FillThenStroke, Invisible, FillAndClip, StrokeAndClip } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] pub struct Rgb { pub red: f32, pub green: f32, pub blue: f32, } impl Display for Rgb { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {} {}", self.red, self.green, self.blue) } } #[derive(Debug, Copy, Clone, PartialEq, DataSize)] pub struct Cmyk { pub cyan: f32, pub magenta: f32, pub yellow: f32, pub key: f32, } impl Display for Cmyk { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {} {} {}", self.cyan, self.magenta, self.yellow, self.key) } } #[derive(Debug, Clone, DataSize)] pub enum TextDrawAdjusted { Text(PdfString), Spacing(f32), } impl Display for TextDrawAdjusted { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Text(text) => write!(f, "{:?}", text), Self::Spacing(spacing) => spacing.fmt(f), } } } /// Graphics Operator /// /// See PDF32000 A.2 #[derive(Debug, Clone, DataSize)] pub enum Op { /// Begin a marked comtent sequence /// /// Pairs with the following EndMarkedContent. /// /// generated by operators `BMC` and `BDC` BeginMarkedContent { tag: Name, properties: Option }, /// End a marked content sequence. /// /// Pairs with the previous BeginMarkedContent. /// /// generated by operator `EMC` EndMarkedContent, /// A marked content point. /// /// generated by operators `MP` and `DP`. MarkedContentPoint { tag: Name, properties: Option }, Close, MoveTo { p: Point }, LineTo { p: Point }, CurveTo { c1: Point, c2: Point, p: Point }, Rect { rect: Rect }, EndPath, Stroke, /// Fill and Stroke operation /// /// generated by operators `b`, `B`, `b*`, `B*` /// `close` indicates whether the path should be closed first FillAndStroke { winding: Winding }, Fill { winding: Winding }, /// Fill using the named shading pattern /// /// operator: `sh` Shade { name: Name }, Clip { winding: Winding }, Save, Restore, Transform { matrix: Matrix }, LineWidth { width: f32 }, Dash { pattern: Vec, phase: f32 }, LineJoin { join: LineJoin }, LineCap { cap: LineCap }, MiterLimit { limit: f32 }, Flatness { tolerance: f32 }, GraphicsState { name: Name }, StrokeColor { color: Color }, FillColor { color: Color }, FillColorSpace { name: Name }, StrokeColorSpace { name: Name }, RenderingIntent { intent: RenderingIntent }, BeginText, EndText, CharSpacing { char_space: f32 }, WordSpacing { word_space: f32 }, TextScaling { horiz_scale: f32 }, Leading { leading: f32 }, TextFont { name: Name, size: f32 }, TextRenderMode { mode: TextMode }, /// `Ts` TextRise { rise: f32 }, /// `Td`, `TD` MoveTextPosition { translation: Point }, /// `Tm` SetTextMatrix { matrix: Matrix }, /// `T*` TextNewline, /// `Tj` TextDraw { text: PdfString }, TextDrawAdjusted { array: Vec }, XObject { name: Name }, InlineImage { image: Arc }, } pub fn deep_clone_op(op: &Op, cloner: &mut impl Cloner, old_resources: &Resources, resources: &mut Resources) -> Result { match *op { Op::GraphicsState { ref name } => { if !resources.graphics_states.contains_key(name) { if let Some(gs) = old_resources.graphics_states.get(name) { resources.graphics_states.insert(name.clone(), gs.deep_clone(cloner)?); } } Ok(Op::GraphicsState { name: name.clone() }) } Op::MarkedContentPoint { ref tag, ref properties } => { Ok(Op::MarkedContentPoint { tag: tag.clone(), properties: properties.deep_clone(cloner)? }) } Op::BeginMarkedContent { ref tag, ref properties } => { Ok(Op::BeginMarkedContent { tag: tag.clone(), properties: properties.deep_clone(cloner)? }) } Op::TextFont { ref name, size } => { if !resources.fonts.contains_key(name) { if let Some(f) = old_resources.fonts.get(name) { resources.fonts.insert(name.clone(), f.deep_clone(cloner)?); } } Ok(Op::TextFont { name: name.clone(), size }) } Op::XObject { ref name } => { if !resources.xobjects.contains_key(name) { if let Some(xo) = old_resources.xobjects.get(name) { resources.xobjects.insert(name.clone(), xo.deep_clone(cloner)?); } } Ok(Op::XObject { name: name.clone() }) } ref op => Ok(op.clone()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_inline_image() { let data = br###" /W 768 /H 150 /BPC 1 /IM true /F [/A85 /Fl] ID Gb"0F_%"1Ö"#B1qiGGG^V6GZ#ZkijB5'RjB4S^5I61&$Ni:Xh=4S_9KYN;c9MUZPn/h,c]oCLUmg*Fo?0Hs0nQHp41KkO\Ls5+g0aoD*btT?l]lq0YAucfaoqHp4 1KkO\Ls5+g0aoD*btT?l^#mD&ORf[0~> EI "###; let mut lexer = Lexer::new(data); assert!(inline_image(&mut lexer, &NoResolve).is_ok()); } } pdf-0.9.0/src/crypt.rs000064400000000000000000000561751046102023000127170ustar 00000000000000/// PDF "cryptography" – This is why you don't write your own crypto. use crate as pdf; use aes::cipher::generic_array::{sequence::Split, GenericArray}; use aes::cipher::{BlockDecryptMut, BlockEncryptMut, KeyIvInit}; use aes::cipher::block_padding::{NoPadding, Pkcs7}; use sha2::{Digest, Sha256, Sha384, Sha512}; use std::fmt; use std::collections::HashMap; use datasize::DataSize; use crate::object::PlainRef; use crate::primitive::{Dictionary, PdfString, Name}; use crate::error::{PdfError, Result}; type Aes128CbcEnc = cbc::Encryptor; type Aes128CbcDec = cbc::Decryptor; type Aes256CbcDec = cbc::Decryptor; const PADDING: [u8; 32] = [ 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08, 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A ]; #[derive(Copy)] pub struct Rc4 { i: u8, j: u8, state: [u8; 256] } impl Clone for Rc4 { fn clone(&self) -> Rc4 { *self } } impl Rc4 { pub fn new(key: &[u8]) -> Rc4 { assert!(!key.is_empty() && key.len() <= 256); let mut rc4 = Rc4 { i: 0, j: 0, state: [0; 256] }; for (i, x) in rc4.state.iter_mut().enumerate() { *x = i as u8; } let mut j: u8 = 0; for i in 0..256 { j = j.wrapping_add(rc4.state[i]).wrapping_add(key[i % key.len()]); rc4.state.swap(i, j as usize); } rc4 } fn next(&mut self) -> u8 { self.i = self.i.wrapping_add(1); self.j = self.j.wrapping_add(self.state[self.i as usize]); self.state.swap(self.i as usize, self.j as usize); self.state[(self.state[self.i as usize].wrapping_add(self.state[self.j as usize])) as usize] } pub fn encrypt(key: &[u8], data: &mut [u8]) { let mut rc4 = Rc4::new(key); for b in data.iter_mut() { *b ^= rc4.next(); } } } /// 7.6.1 Table 20 + 7.6.3.2 Table 21 #[derive(Object, Debug, Clone, DataSize)] pub struct CryptDict { #[pdf(key="O")] o: PdfString, #[pdf(key="U")] u: PdfString, #[pdf(key="R")] r: u32, #[pdf(key="P")] p: i32, #[pdf(key="V")] v: i32, #[pdf(key="Length", default="40")] bits: u32, #[pdf(key="CF")] crypt_filters: HashMap, #[pdf(key="StmF")] default_crypt_filter: Option, #[pdf(key="EncryptMetadata", default="true")] encrypt_metadata: bool, #[pdf(key = "OE")] oe: Option, #[pdf(key = "UE")] ue: Option, #[pdf(other)] _other: Dictionary } #[derive(Object, Debug, Clone, Copy, DataSize)] pub enum CryptMethod { None, V2, AESV2, AESV3, } #[derive(Object, Debug, Clone, Copy, DataSize)] pub enum AuthEvent { DocOpen, EFOpen } #[derive(Object, Debug, Clone, DataSize)] #[pdf(Type="CryptFilter?")] pub struct CryptFilter { #[pdf(key="CFM", default="CryptMethod::None")] pub method: CryptMethod, #[pdf(key="AuthEvent", default="AuthEvent::DocOpen")] pub auth_event: AuthEvent, #[pdf(key="Length")] pub length: Option, #[pdf(other)] _other: Dictionary } pub struct Decoder { key_size: usize, key: Vec, // maximum length method: CryptMethod, /// A reference to the /Encrypt dictionary, if it is in an indirect /// object. The strings in this dictionary are not encrypted, so /// decryption must be skipped when accessing them. pub(crate) encrypt_indirect_object: Option, /// A reference to the /Metadata dictionary, if it is an indirect /// object. If /EncryptMedata is set to false in the /Encrypt dictionary, /// then the strings in the /Metadata dictionary are not encrypted, so /// decryption must be skipped when accessing them. pub(crate) metadata_indirect_object: Option, /// Whether the metadata is encrypted, as indicated by /EncryptMetadata /// in the /Encrypt dictionary. encrypt_metadata: bool, } impl Decoder { pub fn default(dict: &CryptDict, id: &[u8]) -> Result { Decoder::from_password(dict, id, b"") } fn key(&self) -> &[u8] { &self.key[.. std::cmp::min(self.key_size, 16)] } pub fn new(key: Vec, key_size: usize, method: CryptMethod, encrypt_metadata: bool) -> Decoder { Decoder { key_size, key, method, encrypt_indirect_object: None, metadata_indirect_object: None, encrypt_metadata, } } pub fn from_password(dict: &CryptDict, id: &[u8], pass: &[u8]) -> Result { fn compute_u_rev_2(key: &[u8]) -> Vec { // algorithm 4 let mut data = PADDING.to_vec(); Rc4::encrypt(key, &mut data); data } fn check_password_rev_2(document_u: &[u8], key: &[u8]) -> bool { compute_u_rev_2(key) == document_u } fn compute_u_rev_3_4(id: &[u8], key: &[u8]) -> [u8; 16] { // algorithm 5 // a) we derived the key already. // b) let mut hash = md5::Context::new(); hash.consume(PADDING); // c) hash.consume(id); // d) let mut data = *hash.compute(); Rc4::encrypt(key, &mut data); // e) for i in 1u8..=19 { let mut key = key.to_owned(); for b in &mut key { *b ^= i; } Rc4::encrypt(&key, &mut data); } // f) data } fn check_password_rev_3_4(document_u: &[u8], id: &[u8], key: &[u8]) -> bool { document_u.starts_with(&compute_u_rev_3_4(id, key)) } fn check_password_rc4(revision: u32, document_u: &[u8], id: &[u8], key: &[u8]) -> bool { if revision == 2 { check_password_rev_2(document_u, key) } else { check_password_rev_3_4(document_u, id, key) } } fn key_derivation_user_password_rc4( revision: u32, key_size: usize, dict: &CryptDict, id: &[u8], pass: &[u8], ) -> Vec { let o = dict.o.as_bytes(); let p = dict.p; // 7.6.3.3 - Algorithm 2 // a) and b) let mut hash = md5::Context::new(); if pass.len() < 32 { hash.consume(pass); hash.consume(&PADDING[..32 - pass.len()]); } else { hash.consume(&pass[..32]); } // c) hash.consume(o); // d) hash.consume(p.to_le_bytes()); // e) hash.consume(id); // f) if revision >= 4 && !dict.encrypt_metadata { hash.consume([0xff, 0xff, 0xff, 0xff]); } // g) let mut data = *hash.compute(); // h) if revision >= 3 { for _ in 0..50 { data = *md5::compute(&data[..std::cmp::min(key_size, 16)]); } } let mut key = vec![0u8; key_size.max(16)]; key[..16].copy_from_slice(&data); key } fn key_derivation_owner_password_rc4( revision: u32, key_size: usize, pass: &[u8], ) -> Result> { if key_size > 16 { bail!("key size > 16"); } let mut hash = md5::Context::new(); if pass.len() < 32 { hash.consume(pass); hash.consume(&PADDING[..32 - pass.len()]); } else { hash.consume(&pass[..32]); } if revision >= 3 { for _ in 0..50 { let digest = *std::mem::replace(&mut hash, md5::Context::new()).compute(); hash.consume(digest); } } let digest = &hash.compute()[..key_size]; Ok(digest.to_vec()) } let (key_bits, method) = match dict.v { 1 => (40, CryptMethod::V2), 2 => (dict.bits, CryptMethod::V2), 4 ..= 6 => { let default = dict .crypt_filters .get(try_opt!(dict.default_crypt_filter.as_ref()).as_str()) .ok_or_else(|| other!("missing crypt filter entry {:?}", dict.default_crypt_filter.as_ref()))?; match default.method { CryptMethod::V2 | CryptMethod::AESV2 => ( default.length.map(|n| 8 * n).unwrap_or(dict.bits), default.method, ), CryptMethod::AESV3 if dict.v == 5 => ( default.length.map(|n| 8 * n).unwrap_or(dict.bits), default.method, ), m => err!(other!("unimplemented crypt method {:?}", m)), } } v => err!(other!("unsupported V value {}", v)), }; let level = dict.r; if !(2..=6).contains(&level) { err!(other!("unsupported standard security handler revision {}", level)) }; if level <= 4 { let key_size = key_bits as usize / 8; let key = key_derivation_user_password_rc4(level, key_size, dict, id, pass); if check_password_rc4(level, dict.u.as_bytes(), id, &key[..std::cmp::min(key_size, 16)]) { let decoder = Decoder::new(key, key_size, method, dict.encrypt_metadata); Ok(decoder) } else { let password_wrap_key = key_derivation_owner_password_rc4(level, key_size, pass)?; let mut data = dict.o.as_bytes().to_vec(); let rounds = if level == 2 { 1u8 } else { 20u8 }; for round in 0..rounds { let mut round_key = password_wrap_key.clone(); for byte in round_key.iter_mut() { *byte ^= round; } Rc4::encrypt(&round_key, &mut data); } let unwrapped_user_password = data; let key = key_derivation_user_password_rc4( level, key_size, dict, id, &unwrapped_user_password, ); if check_password_rc4(level, dict.u.as_bytes(), id, &key[..key_size]) { let decoder = Decoder::new(key, key_size, method, dict.encrypt_metadata); Ok(decoder) } else { Err(PdfError::InvalidPassword) } } } else if level == 5 || level == 6 { let u = dict.u.as_bytes(); if u.len() != 48 { err!(format!( "U in Encrypt dictionary should have a length of 48 bytes, not {}", u.len(), ) .into()); } let user_hash = &u[0..32]; let user_validation_salt = &u[32..40]; let user_key_salt = &u[40..48]; let o = dict.o.as_bytes(); if o.len() != 48 { err!(format!( "O in Encrypt dictionary should have a length of 48 bytes, not {}", o.len(), ) .into()); } let owner_hash = &o[0..32]; let owner_validation_salt = &o[32..40]; let owner_key_salt = &o[40..48]; let password_unicode = t!(String::from_utf8(pass.to_vec()).map_err(|_| PdfError::InvalidPassword)); let password_prepped = t!(stringprep::saslprep(&password_unicode).map_err(|_| PdfError::InvalidPassword)); let mut password_encoded = password_prepped.as_bytes(); if password_encoded.len() > 127 { password_encoded = &password_encoded[..127]; } let ue = t!(dict.ue.as_ref().ok_or_else(|| PdfError::MissingEntry { typ: "Encrypt", field: "UE".into(), })) .as_bytes() .to_vec(); let oe = t!(dict.oe.as_ref().ok_or_else(|| PdfError::MissingEntry { typ: "Encrypt", field: "OE".into(), })) .as_bytes() .to_vec(); let (intermediate_key, mut wrapped_key) = if level == 6 { let user_hash_computed = Self::revision_6_kdf(password_encoded, user_validation_salt, b""); if user_hash_computed == user_hash { ( Self::revision_6_kdf(password_encoded, user_key_salt, b"").into(), ue, ) } else { let owner_hash_computed = Self::revision_6_kdf(password_encoded, owner_validation_salt, u); if owner_hash_computed == owner_hash { ( Self::revision_6_kdf(password_encoded, owner_key_salt, u).into(), oe, ) } else { err!(PdfError::InvalidPassword); } } } else { // level == 5 let mut user_check_hash = Sha256::new(); user_check_hash.update(password_encoded); user_check_hash.update(user_validation_salt); let user_hash_computed = user_check_hash.finalize(); #[allow(clippy::branches_sharing_code)] if user_hash_computed.as_slice() == user_hash { let mut intermediate_kdf_hash = Sha256::new(); intermediate_kdf_hash.update(password_encoded); intermediate_kdf_hash.update(user_key_salt); (intermediate_kdf_hash.finalize(), ue) } else { let mut owner_check_hash = Sha256::new(); owner_check_hash.update(password_encoded); owner_check_hash.update(owner_validation_salt); owner_check_hash.update(u); let owner_hash_computed = owner_check_hash.finalize(); if owner_hash_computed.as_slice() == owner_hash { let mut intermediate_kdf_hash = Sha256::new(); intermediate_kdf_hash.update(password_encoded); intermediate_kdf_hash.update(owner_key_salt); intermediate_kdf_hash.update(u); (intermediate_kdf_hash.finalize(), oe) } else { err!(PdfError::InvalidPassword); } } }; let zero_iv = GenericArray::from_slice(&[0u8; 16]); let key_slice = t!(Aes256CbcDec::new(&intermediate_key, zero_iv) .decrypt_padded_mut::(&mut wrapped_key) .map_err(|_| PdfError::InvalidPassword)); let decoder = Decoder::new(key_slice.into(), 32, method, dict.encrypt_metadata); Ok(decoder) } else { err!(format!("unsupported V value {}", level).into()) } } fn revision_6_kdf(password: &[u8], salt: &[u8], u: &[u8]) -> [u8; 32] { let mut data = [0u8; (128 + 64 + 48) * 64]; let mut data_total_len = 0; let mut sha256 = Sha256::new(); let mut sha384 = Sha384::new(); let mut sha512 = Sha512::new(); let mut input_sha256 = Sha256::new(); input_sha256.update(password); input_sha256.update(salt); input_sha256.update(u); let input = input_sha256.finalize(); let (mut key, mut iv) = input.split(); let mut block = [0u8; 64]; let mut block_size = 32; (block[..block_size]).copy_from_slice(&input[..block_size]); let mut i = 0; while i < 64 || i < data[data_total_len - 1] as usize + 32 { let aes = Aes128CbcEnc::new(&key, &iv); let data_repeat_len = password.len() + block_size + u.len(); data[..password.len()].copy_from_slice(password); data[password.len()..password.len() + block_size].copy_from_slice(&block[..block_size]); data[password.len() + block_size..data_repeat_len].copy_from_slice(u); for j in 1..64 { data.copy_within(..data_repeat_len, j * data_repeat_len); } data_total_len = data_repeat_len * 64; // The plaintext length will always be a multiple of the block size, unwrap is okay let encrypted = aes .encrypt_padded_mut::(&mut data[..data_total_len], data_total_len) .unwrap(); let sum: usize = encrypted[..16].iter().map(|byte| *byte as usize).sum(); block_size = sum % 3 * 16 + 32; match block_size { 32 => { sha256.update(encrypted); (block[..block_size]).copy_from_slice(&sha256.finalize_reset()); } 48 => { sha384.update(encrypted); (block[..block_size]).copy_from_slice(&sha384.finalize_reset()); } 64 => { sha512.update(encrypted); (block[..block_size]).copy_from_slice(&sha512.finalize_reset()); } _ => unreachable!(), } key.copy_from_slice(&block[..16]); iv.copy_from_slice(&block[16..32]); i += 1; } let mut hash = [0u8; 32]; hash.copy_from_slice(&block[..32]); hash } pub fn decrypt<'buf>(&self, id: PlainRef, data: &'buf mut [u8]) -> Result<&'buf [u8]> { if self.encrypt_indirect_object == Some(id) { // Strings inside the /Encrypt dictionary are not encrypted return Ok(data); } if !self.encrypt_metadata && self.metadata_indirect_object == Some(id) { // Strings inside the /Metadata dictionary are not encrypted when /EncryptMetadata is // false return Ok(data); } if data.is_empty() { return Ok(data); } // Algorithm 1 // a) we have those already match self.method { CryptMethod::None => unreachable!(), CryptMethod::V2 => { // b) let mut key = [0; 16 + 5]; let n = self.key_size; key[..n].copy_from_slice(self.key()); key[n..n + 3].copy_from_slice(&id.id.to_le_bytes()[..3]); key[n + 3..n + 5].copy_from_slice(&id.gen.to_le_bytes()[..2]); // c) let key = *md5::compute(&key[..n + 5]); // d) Rc4::encrypt(&key[..(n + 5).min(16)], data); Ok(data) } CryptMethod::AESV2 => { // b) let mut key = [0; 32 + 5 + 4]; let n = std::cmp::min(self.key_size, 16); key[..n].copy_from_slice(self.key()); key[n..n + 3].copy_from_slice(&id.id.to_le_bytes()[..3]); key[n + 3..n + 5].copy_from_slice(&id.gen.to_le_bytes()[..2]); key[n + 5..n + 9].copy_from_slice(b"sAlT"); // c) let key = *md5::compute(&key[..n + 9]); // d) let key = &key[..(n + 5).min(16)]; if data.len() < 16 { return Err(PdfError::DecryptionFailure); } let (iv, ciphertext) = data.split_at_mut(16); let cipher = t!(Aes128CbcDec::new_from_slices(key, iv).map_err(|_| PdfError::DecryptionFailure)); Ok(t!(cipher .decrypt_padded_mut::(ciphertext) .map_err(|_| PdfError::DecryptionFailure))) } CryptMethod::AESV3 => { if data.len() < 16 { return Err(PdfError::DecryptionFailure); } let (iv, ciphertext) = data.split_at_mut(16); let cipher = t!(Aes256CbcDec::new_from_slices(self.key(), iv).map_err(|_| PdfError::DecryptionFailure)); Ok(t!(cipher .decrypt_padded_mut::(ciphertext) .map_err(|_| PdfError::DecryptionFailure))) } } } } impl fmt::Debug for Decoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("Decoder") .field("key", &self.key()) .field("method", &self.method) .finish() } } #[cfg(test)] mod tests { #[test] fn unencrypted_strings() { let data_prefix = b"%PDF-1.5\n\ 1 0 obj\n\ << /Type /Catalog /Pages 2 0 R >>\n\ endobj\n\ 2 0 obj\n\ << /Type /Pages /Kids [3 0 R] /Count 1 >>\n\ endobj\n\ 3 0 obj\n\ << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>\n\ endobj\n\ 4 0 obj\n\ << /Length 0 >>\n\ stream\n\ endstream\n\ endobj\n\ 5 0 obj\n\ <<\n\ /V 4\n\ /CF <<\n\ /StdCF << /Type /CryptFilter /CFM /V2 >>\n\ >>\n\ /StmF /StdCF\n\ /StrF /StdCF\n\ /R 4\n\ /O (owner pwd hash!!)\n\ /U \n\ /P -4\n\ >>\n\ endobj\n\ xref\n\ 1 5\n"; let mut data = data_prefix.to_vec(); for obj_nr in 1..=5 { let needle = format!("\n{} 0 obj\n", obj_nr).into_bytes(); let offset = data_prefix .windows(needle.len()) .position(|w| w == needle) .unwrap() + 1; let mut line = format!("{:010} {:05} n\r\n", offset, 0).into_bytes(); assert_eq!(line.len(), 20); data.append(&mut line); } let trailer_snippet = b"trailer\n\ <<\n\ /Size 6\n\ /Root 1 0 R\n\ /Encrypt 5 0 R\n\ /ID [ ]\n\ >>\n\ startxref\n"; data.extend_from_slice(trailer_snippet); let xref_offset = data_prefix .windows("xref".len()) .rposition(|w| w == b"xref") .unwrap(); data.append(&mut format!("{}\n%%EOF", xref_offset).into_bytes()); let file = crate::file::FileOptions::uncached().load(data).unwrap(); // PDF reference says strings in the encryption dictionary are "not // encrypted by the usual methods." assert_eq!( file.trailer.encrypt_dict.unwrap().o.as_ref(), b"owner pwd hash!!", ); } } pdf-0.9.0/src/data/t01_lzw+base85.txt000064400000000000000000000010251046102023000152250ustar 00000000000000J..)6T`?p&c!Jnl@ RM]WM;jjH6Gnc75idkL5]+cPZKEBPWdR>FF(kj1_R%W_d &/jS!;iuad7h?[L-F$+]]0A3Ck*$I0KZ?;<)CJtqi65Xb Vc3\n5ua:Q/=0$W<#N3U;H,MQKqfg1?:lUpR;6oN[C2E4 ZNr8Udn.'p+?#X+1>0Kuk$bCDF/(3fL5]Oq)^kJZ!C2H1 'TO]Rl?Q:&'<5&iP!$Rq;BXRecDN[IJB`,)o8XJOSJ9sD S]hQ;Rj@!ND)bD_q&C\g:inYC%)&u#:u,M6Bm%IY!Kb1+ ":aAa'S`ViJglLb8iG1p&i;eVoK&juJHs9%;Xomop"5KatWRT"JQ#qYuL, JD?M$0QP)lKn06l1apKDC@\qJ4B!!(5m+j.7F790m(Vj8 8l8Q:_CZ(Gm1%X\N1&u!FKHMB~>pdf-0.9.0/src/data/t01_plain.txt000064400000000000000000000000001046102023000144210ustar 00000000000000pdf-0.9.0/src/enc.rs000064400000000000000000000471361046102023000123200ustar 00000000000000#![allow(clippy::many_single_char_names)] #![allow(dead_code)] // TODO use itertools::Itertools; use crate as pdf; use crate::error::*; use crate::object::{Object, Resolve, Stream}; use crate::primitive::{Primitive, Dictionary}; use std::convert::TryInto; use std::io::{Read, Write}; use once_cell::sync::OnceCell; use datasize::DataSize; #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] pub struct LZWFlateParams { #[pdf(key="Predictor", default="1")] pub predictor: i32, #[pdf(key="Colors", default="1")] pub n_components: i32, #[pdf(key="BitsPerComponent", default="8")] pub bits_per_component: i32, #[pdf(key="Columns", default="1")] pub columns: i32, #[pdf(key="EarlyChange", default="1")] pub early_change: i32, } impl Default for LZWFlateParams { fn default() -> LZWFlateParams { LZWFlateParams { predictor: 1, n_components: 1, bits_per_component: 8, columns: 1, early_change: 1 } } } #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] pub struct DCTDecodeParams { // TODO The default value of ColorTransform is 1 if the image has three components and 0 otherwise. // 0: No transformation. // 1: If the image has three color components, transform RGB values to YUV before encoding and from YUV to RGB after decoding. // If the image has four components, transform CMYK values to YUVK before encoding and from YUVK to CMYK after decoding. // This option is ignored if the image has one or two color components. #[pdf(key="ColorTransform")] pub color_transform: Option, } #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] pub struct CCITTFaxDecodeParams { #[pdf(key="K", default="0")] pub k: i32, #[pdf(key="EndOfLine", default="false")] pub end_of_line: bool, #[pdf(key="EncodedByteAlign", default="false")] pub encoded_byte_align: bool, #[pdf(key="Columns", default="1728")] pub columns: u32, #[pdf(key="Rows", default="0")] pub rows: u32, #[pdf(key="EndOfBlock", default="true")] pub end_of_block: bool, #[pdf(key="BlackIs1", default="false")] pub black_is_1: bool, #[pdf(key="DamagedRowsBeforeError", default="0")] pub damaged_rows_before_error: u32, } #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] pub struct JBIG2DecodeParams { #[pdf(key="JBIG2Globals")] pub globals: Option> } #[derive(Debug, Clone, DataSize, DeepClone)] pub enum StreamFilter { ASCIIHexDecode, ASCII85Decode, LZWDecode (LZWFlateParams), FlateDecode (LZWFlateParams), JPXDecode, //Jpeg2k DCTDecode (DCTDecodeParams), CCITTFaxDecode (CCITTFaxDecodeParams), JBIG2Decode(JBIG2DecodeParams), Crypt, RunLengthDecode } impl StreamFilter { pub fn from_kind_and_params(kind: &str, params: Dictionary, r: &impl Resolve) -> Result { let params = Primitive::Dictionary (params); Ok( match kind { "ASCIIHexDecode" => StreamFilter::ASCIIHexDecode, "ASCII85Decode" => StreamFilter::ASCII85Decode, "LZWDecode" => StreamFilter::LZWDecode (LZWFlateParams::from_primitive(params, r)?), "FlateDecode" => StreamFilter::FlateDecode (LZWFlateParams::from_primitive(params, r)?), "JPXDecode" => StreamFilter::JPXDecode, "DCTDecode" => StreamFilter::DCTDecode (DCTDecodeParams::from_primitive(params, r)?), "CCITTFaxDecode" => StreamFilter::CCITTFaxDecode (CCITTFaxDecodeParams::from_primitive(params, r)?), "JBIG2Decode" => StreamFilter::JBIG2Decode(JBIG2DecodeParams::from_primitive(params, r)?), "Crypt" => StreamFilter::Crypt, "RunLengthDecode" => StreamFilter::RunLengthDecode, ty => bail!("Unrecognized filter type {:?}", ty), } ) } } #[inline] pub fn decode_nibble(c: u8) -> Option { match c { n @ b'0' ..= b'9' => Some(n - b'0'), a @ b'a' ..= b'h' => Some(a - b'a' + 0xa), a @ b'A' ..= b'H' => Some(a - b'A' + 0xA), _ => None } } #[inline] fn encode_nibble(c: u8) -> u8 { match c { 0 ..= 9 => b'0'+ c, 10 ..= 15 => b'a' - 10 + c, _ => unreachable!() } } pub fn decode_hex(data: &[u8]) -> Result> { let mut out = Vec::with_capacity(data.len() / 2); let pairs = data.iter().cloned() .take_while(|&b| b != b'>') .filter(|&b| !matches!(b, 0 | 9 | 10 | 12 | 13 | 32)) .tuples(); for (i, (high, low)) in pairs.enumerate() { if let (Some(low), Some(high)) = (decode_nibble(low), decode_nibble(high)) { out.push(high << 4 | low); } else { return Err(PdfError::HexDecode {pos: i * 2, bytes: [high, low]}) } } Ok(out) } pub fn encode_hex(data: &[u8]) -> Vec { let mut buf = Vec::with_capacity(data.len() * 2); for &b in data { buf.push(encode_nibble(b >> 4)); buf.push(encode_nibble(b & 0xf)); } buf } #[inline] fn sym_85(byte: u8) -> Option { match byte { b @ 0x21 ..= 0x75 => Some(b - 0x21), _ => None } } fn word_85([a, b, c, d, e]: [u8; 5]) -> Option<[u8; 4]> { fn s(b: u8) -> Option { sym_85(b).map(|n| n as u32) } let (a, b, c, d, e) = (s(a)?, s(b)?, s(c)?, s(d)?, s(e)?); let q = (((a * 85 + b) * 85 + c) * 85 + d) * 85 + e; Some(q.to_be_bytes()) } pub fn decode_85(data: &[u8]) -> Result> { let mut out = Vec::with_capacity((data.len() + 4) / 5 * 4); let mut stream = data.iter().cloned() .filter(|&b| !matches!(b, b' ' | b'\n' | b'\r' | b'\t')); let mut symbols = stream.by_ref() .take_while(|&b| b != b'~'); let (tail_len, tail) = loop { match symbols.next() { Some(b'z') => out.extend_from_slice(&[0; 4]), Some(a) => { let (b, c, d, e) = match (symbols.next(), symbols.next(), symbols.next(), symbols.next()) { (Some(b), Some(c), Some(d), Some(e)) => (b, c, d, e), (None, _, _, _) => break (1, [a, b'u', b'u', b'u', b'u']), (Some(b), None, _, _) => break (2, [a, b, b'u', b'u', b'u']), (Some(b), Some(c), None, _) => break (3, [a, b, c, b'u', b'u']), (Some(b), Some(c), Some(d), None) => break (4, [a, b, c, d, b'u']), }; out.extend_from_slice(&word_85([a, b, c, d, e]).ok_or(PdfError::Ascii85TailError)?); } None => break (0, [b'u'; 5]) } }; if tail_len > 0 { let last = word_85(tail).ok_or(PdfError::Ascii85TailError)?; out.extend_from_slice(&last[.. tail_len-1]); } match (stream.next(), stream.next()) { (Some(b'>'), None) => Ok(out), _ => Err(PdfError::Ascii85TailError) } } #[inline] fn divmod(n: u32, m: u32) -> (u32, u32) { (n / m, n % m) } #[inline] fn a85(n: u32) -> u8 { n as u8 + 0x21 } #[inline] fn base85_chunk(c: [u8; 4]) -> [u8; 5] { let n = u32::from_be_bytes(c); let (n, e) = divmod(n, 85); let (n, d) = divmod(n, 85); let (n, c) = divmod(n, 85); let (a, b) = divmod(n, 85); [a85(a), a85(b), a85(c), a85(d), a85(e)] } fn encode_85(data: &[u8]) -> Vec { let mut buf = Vec::with_capacity((data.len() / 4) * 5 + 10); let mut chunks = data.chunks_exact(4); for chunk in chunks.by_ref() { let c: [u8; 4] = chunk.try_into().unwrap(); if c == [0; 4] { buf.push(b'z'); } else { buf.extend_from_slice(&base85_chunk(c)); } } let r = chunks.remainder(); if r.len() > 0 { let mut c = [0; 4]; c[.. r.len()].copy_from_slice(r); let out = base85_chunk(c); buf.extend_from_slice(&out[.. r.len() + 1]); } buf.extend_from_slice(b"~>"); buf } fn inflate_bytes_zlib(data: &[u8]) -> Result> { use libflate::zlib::Decoder; let mut decoder = Decoder::new(data)?; let mut decoded = Vec::new(); decoder.read_to_end(&mut decoded)?; Ok(decoded) } fn inflate_bytes(data: &[u8]) -> Result> { use libflate::deflate::Decoder; let mut decoder = Decoder::new(data); let mut decoded = Vec::new(); decoder.read_to_end(&mut decoded)?; Ok(decoded) } pub fn flate_decode(data: &[u8], params: &LZWFlateParams) -> Result> { let predictor = params.predictor as usize; let n_components = params.n_components as usize; let columns = params.columns as usize; let stride = columns * n_components; // First flate decode let decoded = { if let Ok(data) = inflate_bytes_zlib(data) { data } else if let Ok(data) = inflate_bytes(data) { data } else { dump_data(data); bail!("can't inflate"); } }; // Then unfilter (PNG) // For this, take the old out as input, and write output to out if predictor > 10 { let inp = decoded; // input buffer let rows = inp.len() / (stride+1); // output buffer let mut out = vec![0; rows * stride]; // Apply inverse predictor let null_vec = vec![0; stride]; let mut in_off = 0; // offset into input buffer let mut out_off = 0; // offset into output buffer let mut last_out_off = 0; // last offset to output buffer while in_off + stride < inp.len() { let predictor = PredictorType::from_u8(inp[in_off])?; in_off += 1; // +1 because the first byte on each row is predictor let row_in = &inp[in_off .. in_off + stride]; let (prev_row, row_out) = if out_off == 0 { (&null_vec[..], &mut out[out_off .. out_off+stride]) } else { let (prev, curr) = out.split_at_mut(out_off); (&prev[last_out_off ..], &mut curr[.. stride]) }; unfilter(predictor, n_components, prev_row, row_in, row_out); last_out_off = out_off; in_off += stride; out_off += stride; } Ok(out) } else { Ok(decoded) } } fn flate_encode(data: &[u8]) -> Vec { use libflate::deflate::Encoder; let mut encoded = Vec::new(); let mut encoder = Encoder::new(&mut encoded); encoder.write_all(data).unwrap(); encoded } pub fn dct_decode(data: &[u8], _params: &DCTDecodeParams) -> Result> { use jpeg_decoder::Decoder; let mut decoder = Decoder::new(data); let pixels = decoder.decode()?; Ok(pixels) } pub fn lzw_decode(data: &[u8], params: &LZWFlateParams) -> Result> { use weezl::{BitOrder, decode::Decoder}; let mut out = vec![]; let mut decoder = if params.early_change != 0 { Decoder::with_tiff_size_switch(BitOrder::Msb, 9) } else { Decoder::new(BitOrder::Msb, 9) }; decoder .into_stream(&mut out) .decode_all(data).status?; Ok(out) } fn lzw_encode(data: &[u8], params: &LZWFlateParams) -> Result> { use weezl::{BitOrder, encode::Encoder}; if params.early_change != 0 { bail!("encoding early_change != 0 is not supported"); } let mut compressed = vec![]; Encoder::new(BitOrder::Msb, 9) .into_stream(&mut compressed) .encode_all(data).status?; Ok(compressed) } pub fn fax_decode(data: &[u8], params: &CCITTFaxDecodeParams) -> Result> { use fax::{Color, decoder::{pels, decode_g4}}; if params.k < 0 { let columns = params.columns as usize; let rows = params.rows as usize; let height = if params.rows == 0 { None } else { Some(params.rows as u16)}; let mut buf = Vec::with_capacity(columns * rows); decode_g4(data.iter().cloned(), columns as u16, height, |line| { buf.extend(pels(line, columns as u16).map(|c| match c { Color::Black => 0, Color::White => 255 })); assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns); }).ok_or(PdfError::Other { msg: "faxdecode failed".into() })?; assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns); if rows != 0 && buf.len() != columns * rows { bail!("decoded length does not match (expected {rows}∙{columns}, got {})", buf.len()); } Ok(buf) } else { unimplemented!() } } pub fn run_length_decode(data: &[u8]) -> Result> { // Used as specification let mut buf = Vec::new(); let d = data; let mut c = 0; while c < data.len() { let length = d[c]; // length is first byte if length < 128 { let start = c + 1; let end = start + length as usize + 1; // copy _following_ length + 1 bytes literally buf.extend_from_slice(&d[start..end]); c = end; // move cursor to next run } else if length >= 129 { let copy = 257 - length as usize; // copy 2 - 128 times let b = d[c + 1]; // copied byte buf.extend(std::iter::repeat(b).take(copy)); c += 2; // move cursor to next run } else { break; // EOD } } Ok(buf) } pub type DecodeFn = dyn Fn(&[u8]) -> Result> + Sync + Send + 'static; static JPX_DECODER: OnceCell> = OnceCell::new(); static JBIG2_DECODER: OnceCell> = OnceCell::new(); pub fn set_jpx_decoder(f: Box) { let _ = JPX_DECODER.set(f); } pub fn set_jbig2_decoder(f: Box) { let _ = JBIG2_DECODER.set(f); } pub fn jpx_decode(data: &[u8]) -> Result> { JPX_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jp2k decoder not set".into()})?(data) } pub fn jbig2_decode(data: &[u8], globals: &[u8]) -> Result> { let data = [ // file header // &[0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x01], globals, data, // end of page &[0x00, 0x00, 0x00, 0x03, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00], // end of stream &[0x00, 0x00, 0x00, 0x04, 0x33, 0x01, 0x00, 0x00, 0x00, 0x00], ].concat(); JBIG2_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jbig2 decoder not set".into()})?(&data) } pub fn decode(data: &[u8], filter: &StreamFilter) -> Result> { match *filter { StreamFilter::ASCIIHexDecode => decode_hex(data), StreamFilter::ASCII85Decode => decode_85(data), StreamFilter::LZWDecode(ref params) => lzw_decode(data, params), StreamFilter::FlateDecode(ref params) => flate_decode(data, params), StreamFilter::RunLengthDecode => run_length_decode(data), StreamFilter::DCTDecode(ref params) => dct_decode(data, params), _ => bail!("unimplemented {filter:?}"), } } pub fn encode(data: &[u8], filter: &StreamFilter) -> Result> { match *filter { StreamFilter::ASCIIHexDecode => Ok(encode_hex(data)), StreamFilter::ASCII85Decode => Ok(encode_85(data)), StreamFilter::LZWDecode(ref params) => lzw_encode(data, params), StreamFilter::FlateDecode (ref _params) => Ok(flate_encode(data)), _ => unimplemented!(), } } /* * Predictor - copied and adapted from PNG crate.. */ #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] #[allow(dead_code)] pub enum PredictorType { NoFilter = 0, Sub = 1, Up = 2, Avg = 3, Paeth = 4 } impl PredictorType { /// u8 -> Self. Temporary solution until Rust provides a canonical one. pub fn from_u8(n: u8) -> Result { match n { 0 => Ok(PredictorType::NoFilter), 1 => Ok(PredictorType::Sub), 2 => Ok(PredictorType::Up), 3 => Ok(PredictorType::Avg), 4 => Ok(PredictorType::Paeth), n => Err(PdfError::IncorrectPredictorType {n}) } } } fn filter_paeth(a: u8, b: u8, c: u8) -> u8 { let ia = a as i16; let ib = b as i16; let ic = c as i16; let p = ia + ib - ic; let pa = (p - ia).abs(); let pb = (p - ib).abs(); let pc = (p - ic).abs(); if pa <= pb && pa <= pc { a } else if pb <= pc { b } else { c } } pub fn unfilter(filter: PredictorType, bpp: usize, prev: &[u8], inp: &[u8], out: &mut [u8]) { use self::PredictorType::*; let len = inp.len(); assert_eq!(len, out.len()); assert_eq!(len, prev.len()); if bpp > len { return; } match filter { NoFilter => { out[..len].copy_from_slice(&inp[..len]); } Sub => { out[..bpp].copy_from_slice(&inp[..bpp]); for i in bpp..len { out[i] = inp[i].wrapping_add(out[i - bpp]); } } Up => { for i in 0..len { out[i] = inp[i].wrapping_add(prev[i]); } } Avg => { for i in 0..bpp { out[i] = inp[i].wrapping_add(prev[i] / 2); } for i in bpp..len { out[i] = inp[i].wrapping_add( ((out[i - bpp] as i16 + prev[i] as i16) / 2) as u8 ); } } Paeth => { for i in 0..bpp { out[i] = inp[i].wrapping_add( filter_paeth(0, prev[i], 0) ); } for i in bpp..len { out[i] = inp[i].wrapping_add( filter_paeth(out[i - bpp], prev[i], prev[i - bpp]) ); } } } } #[allow(unused)] pub fn filter(method: PredictorType, bpp: usize, previous: &[u8], current: &mut [u8]) { use self::PredictorType::*; let len = current.len(); match method { NoFilter => (), Sub => { for i in (bpp..len).rev() { current[i] = current[i].wrapping_sub(current[i - bpp]); } } Up => { for i in 0..len { current[i] = current[i].wrapping_sub(previous[i]); } } Avg => { for i in (bpp..len).rev() { current[i] = current[i].wrapping_sub(current[i - bpp].wrapping_add(previous[i]) / 2); } for i in 0..bpp { current[i] = current[i].wrapping_sub(previous[i] / 2); } } Paeth => { for i in (bpp..len).rev() { current[i] = current[i].wrapping_sub(filter_paeth(current[i - bpp], previous[i], previous[i - bpp])); } for i in 0..bpp { current[i] = current[i].wrapping_sub(filter_paeth(0, previous[i], 0)); } } } } #[cfg(test)] mod tests { use super::*; #[test] fn base_85() { fn s(b: &[u8]) -> &str { std::str::from_utf8(b).unwrap() } let case = &b"hello world!"[..]; let encoded = encode_85(case); assert_eq!(s(&encoded), "BOu!rD]j7BEbo80~>"); let decoded = decode_85(&encoded).unwrap(); assert_eq!(case, &*decoded); /* assert_eq!( s(&decode_85( &lzw_decode( &decode_85(&include_bytes!("data/t01_lzw+base85.txt")[..]).unwrap(), &LZWFlateParams::default() ).unwrap() ).unwrap()), include_str!("data/t01_plain.txt") ); */ } #[test] fn run_length_decode_test() { let x = run_length_decode(&[254, b'a', 255, b'b', 2, b'c', b'b', b'c', 254, b'a', 128]).unwrap(); assert_eq!(b"aaabbcbcaaa", x.as_slice()); } } pdf-0.9.0/src/encoding.rs000064400000000000000000000070311046102023000133270ustar 00000000000000use std::collections::HashMap; use istring::SmallString; use crate as pdf; use crate::object::{Object, Resolve, ObjectWrite, DeepClone}; use crate::primitive::{Primitive, Dictionary}; use crate::error::{Result}; use datasize::DataSize; #[derive(Debug, Clone, DataSize)] pub struct Encoding { pub base: BaseEncoding, pub differences: HashMap, } #[derive(Object, ObjectWrite, Debug, Clone, Eq, PartialEq, DataSize)] pub enum BaseEncoding { StandardEncoding, SymbolEncoding, MacRomanEncoding, WinAnsiEncoding, MacExpertEncoding, #[pdf(name = "Identity-H")] IdentityH, None, #[pdf(other)] Other(String), } impl Object for Encoding { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { name @ Primitive::Name(_) => { Ok(Encoding { base: BaseEncoding::from_primitive(name, resolve)?, differences: HashMap::new(), }) } Primitive::Dictionary(mut dict) => { let base = match dict.remove("BaseEncoding") { Some(p) => BaseEncoding::from_primitive(p, resolve)?, None => BaseEncoding::None }; let mut gid = 0; let mut differences = HashMap::new(); if let Some(p) = dict.remove("Differences") { for part in p.resolve(resolve)?.into_array()? { match part { Primitive::Integer(code) => { gid = code as u32; } Primitive::Name(name) => { differences.insert(gid, name); gid += 1; } _ => bail!("Unknown part primitive in dictionary: {:?}", part), } } } Ok(Encoding { base, differences }) } Primitive::Reference(r) => Self::from_primitive(resolve.resolve(r)?, resolve), Primitive::Stream(s) => Self::from_primitive(Primitive::Dictionary(s.info), resolve), _ => bail!("Unknown element: {:?}", p), } } } impl ObjectWrite for Encoding { fn to_primitive(&self, update: &mut impl pdf::object::Updater) -> Result { let base = self.base.to_primitive(update)?; if self.differences.len() == 0 { Ok(base) } else { let mut list = vec![]; let mut diff_list: Vec<_> = self.differences.iter().collect(); diff_list.sort(); let mut last = None; for &(&gid, name) in diff_list.iter() { if !last.map(|n| n + 1 == gid).unwrap_or(false) { list.push(Primitive::Integer(gid as i32)); } list.push(Primitive::Name(name.clone())); last = Some(gid); } let mut dict = Dictionary::new(); dict.insert("BaseEncoding", base); dict.insert("Differences", Primitive::Array(list)); Ok(Primitive::Dictionary(dict)) } } } impl Encoding { pub fn standard() -> Encoding { Encoding { base: BaseEncoding::StandardEncoding, differences: HashMap::new() } } } impl DeepClone for Encoding { fn deep_clone(&self, cloner: &mut impl pdf::object::Cloner) -> Result { Ok(self.clone()) } }pdf-0.9.0/src/error.rs000064400000000000000000000226601046102023000126770ustar 00000000000000use crate::object::ObjNr; use std::io; use std::error::Error; use crate::parser::ParseFlags; use std::sync::Arc; use datasize::{DataSize, data_size}; #[derive(Debug, Snafu)] pub enum PdfError { // Syntax / parsing #[snafu(display("Unexpected end of file"))] EOF, #[snafu(display("Shared"))] Shared { source: Arc }, #[snafu(display("Not enough Operator arguments"))] NoOpArg, #[snafu(display("Error parsing from string: {}", source))] Parse { source: Box }, #[snafu(display("Invalid encoding: {}", source))] Encoding { source: Box }, #[snafu(display("Out of bounds: index {}, but len is {}", index, len))] Bounds { index: usize, len: usize }, #[snafu(display("Unexpected token '{}' at {} - expected '{}'", lexeme, pos, expected))] UnexpectedLexeme {pos: usize, lexeme: String, expected: &'static str}, #[snafu(display("Expecting an object, encountered {} at pos {}. Rest:\n{}\n\n((end rest))", first_lexeme, pos, rest))] UnknownType {pos: usize, first_lexeme: String, rest: String}, #[snafu(display("Unknown variant '{}' for enum {}", name, id))] UnknownVariant { id: &'static str, name: String }, #[snafu(display("'{}' not found.", word))] NotFound { word: String }, #[snafu(display("Cannot follow reference during parsing - no resolve fn given (most likely /Length of Stream)."))] Reference, // TODO: which one? #[snafu(display("Erroneous 'type' field in xref stream - expected 0, 1 or 2, found {}", found))] XRefStreamType { found: u64 }, #[snafu(display("Parsing read past boundary of Contents."))] ContentReadPastBoundary, #[snafu(display("Primitive not allowed"))] PrimitiveNotAllowed { allowed: ParseFlags, found: ParseFlags }, ////////////////// // Encode/decode #[snafu(display("Hex decode error. Position {}, bytes {:?}", pos, bytes))] HexDecode {pos: usize, bytes: [u8; 2]}, #[snafu(display("Ascii85 tail error"))] Ascii85TailError, #[snafu(display("Failed to convert '{}' into PredictorType", n))] IncorrectPredictorType {n: u8}, ////////////////// // Dictionary #[snafu(display("Can't parse field {} of struct {}.", field, typ))] FromPrimitive { typ: &'static str, field: &'static str, source: Box }, #[snafu(display("Field /{} is missing in dictionary for type {}.", field, typ))] MissingEntry { typ: &'static str, field: String }, #[snafu(display("Expected to find value {} for key {}. Found {} instead.", value, key, found))] KeyValueMismatch { key: String, value: String, found: String, }, #[snafu(display("Expected dictionary /Type = {}. Found /Type = {}.", expected, found))] WrongDictionaryType {expected: String, found: String}, ////////////////// // Misc #[snafu(display("Tried to dereference free object nr {}.", obj_nr))] FreeObject {obj_nr: u64}, #[snafu(display("Tried to dereference non-existing object nr {}.", obj_nr))] NullRef {obj_nr: u64}, #[snafu(display("Expected primitive {}, found primitive {} instead.", expected, found))] UnexpectedPrimitive {expected: &'static str, found: &'static str}, /* WrongObjectType {expected: &'static str, found: &'static str} { description("Function called on object of wrong type.") display("Expected {}, found {}.", expected, found) } */ #[snafu(display("Object stream index out of bounds ({}/{}).", index, max))] ObjStmOutOfBounds {index: usize, max: usize}, #[snafu(display("Page out of bounds ({}/{}).", page_nr, max))] PageOutOfBounds {page_nr: u32, max: u32}, #[snafu(display("Page {} could not be found in the page tree.", page_nr))] PageNotFound {page_nr: u32}, #[snafu(display("Entry {} in xref table unspecified", id))] UnspecifiedXRefEntry {id: ObjNr}, #[snafu(display("Invalid password"))] InvalidPassword, #[snafu(display("Decryption failure"))] DecryptionFailure, #[snafu(display("JPEG"))] Jpeg { source: jpeg_decoder::Error }, #[snafu(display("IO Error"))] Io { source: io::Error }, #[snafu(display("{}", msg))] Other { msg: String }, #[snafu(display("NoneError at {}:{}:{}:{}", file, line, column, context))] NoneError { file: &'static str, line: u32, column: u32, context: Context }, #[snafu(display("Try at {}:{}:{}:{}", file, line, column, context))] Try { file: &'static str, line: u32, column: u32, context: Context, source: Box }, #[snafu(display("PostScriptParseError"))] PostScriptParse, #[snafu(display("PostScriptExecError"))] PostScriptExec, #[snafu(display("UTF16 decode error"))] Utf16Decode, #[snafu(display("UTF8 decode error"))] Utf8Decode, #[snafu(display("CID decode error"))] CidDecode, #[snafu(display("Max nesting depth reached"))] MaxDepth, #[snafu(display("Invalid"))] Invalid, } impl PdfError { pub fn trace(&self) { trace(self, 0); } pub fn is_eof(&self) -> bool { match self { PdfError::EOF => true, PdfError::Try { ref source, .. } => source.is_eof(), _ => false } } } datasize::non_dynamic_const_heap_size!(PdfError, 0); #[cfg(feature="cache")] impl globalcache::ValueSize for PdfError { #[inline] fn size(&self) -> usize { data_size(self) } } fn trace(err: &dyn Error, depth: usize) { println!("{}: {}", depth, err); if let Some(source) = err.source() { trace(source, depth+1); } } #[derive(Debug)] pub struct Context(pub Vec<(&'static str, String)>); impl std::fmt::Display for Context { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { for (i, &(key, ref val)) in self.0.iter().enumerate() { if i == 0 { writeln!(f)?; } writeln!(f, " {} = {}", key, val)?; } Ok(()) } } pub type Result = std::result::Result; impl From for PdfError { fn from(source: io::Error) -> PdfError { PdfError::Io { source } } } impl From for PdfError { fn from(msg: String) -> PdfError { PdfError::Other { msg } } } impl From> for PdfError { fn from(source: Arc) -> PdfError { PdfError::Shared { source } } } #[macro_export] macro_rules! try_opt { ($e:expr $(,$c:expr)*) => ( match $e { Some(v) => v, None => { let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]); return Err($crate::PdfError::NoneError { file: file!(), line: line!(), column: column!(), context, }); } } ); } #[macro_export] macro_rules! t { ($e:expr $(,$c:expr)*) => { match $e { Ok(v) => v, Err(e) => { let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]); return Err($crate::PdfError::Try { file: file!(), line: line!(), column: column!(), context, source: e.into() }) } } }; } #[macro_export] macro_rules! ctx { ($e:expr, $($c:expr),*) => { match $e { Ok(v) => Ok(v), Err(e) => { let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]); Err($crate::PdfError::TryContext { file: file!(), line: line!(), column: column!(), context, source: e.into() }) } } }; } macro_rules! err_from { ($($st:ty),* => $variant:ident) => ( $( impl From<$st> for PdfError { fn from(e: $st) -> PdfError { PdfError::$variant { source: e.into() } } } )* ) } err_from!(std::str::Utf8Error, std::string::FromUtf8Error, std::string::FromUtf16Error, istring::FromUtf8Error, istring::FromUtf8Error => Encoding); err_from!(std::num::ParseIntError, std::string::ParseError => Parse); err_from!(jpeg_decoder::Error => Jpeg); macro_rules! other { ($($t:tt)*) => ($crate::PdfError::Other { msg: format!($($t)*) }) } macro_rules! err { ($e: expr) => ({ return Err($e); }) } macro_rules! bail { ($($t:tt)*) => { err!($crate::PdfError::Other { msg: format!($($t)*) }) } } macro_rules! unimplemented { () => (bail!("Unimplemented @ {}:{}", file!(), line!())) } #[cfg(not(feature = "dump"))] pub fn dump_data(_data: &[u8]) {} #[cfg(feature = "dump")] pub fn dump_data(data: &[u8]) { use std::io::Write; if let Some(path) = ::std::env::var_os("PDF_OUT") { let (mut file, path) = tempfile::Builder::new() .prefix("") .tempfile_in(path).unwrap() .keep().unwrap(); file.write_all(&data).unwrap(); info!("data written to {:?}", path); } else { info!("set PDF_OUT to an existing directory to dump stream data"); } } #[cfg(test)] mod tests { use super::PdfError; fn assert_send() {} fn assert_sync() {} #[test] fn error_is_send_and_sync() { // note that these checks happens at compile time, not when the test is run assert_send::(); assert_sync::(); } } pdf-0.9.0/src/file.rs000064400000000000000000000520761046102023000124710ustar 00000000000000//! This is kind of the entry-point of the type-safe PDF functionality. use std::marker::PhantomData; use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::path::Path; use std::io::Write; use crate as pdf; use crate::error::*; use crate::object::*; use crate::primitive::{Primitive, Dictionary, PdfString}; use crate::backend::Backend; use crate::any::*; use crate::parser::{Lexer, parse_with_lexer}; use crate::parser::{parse_indirect_object, parse, ParseFlags}; use crate::xref::{XRef, XRefTable, XRefInfo}; use crate::crypt::Decoder; use crate::crypt::CryptDict; use crate::enc::{StreamFilter, decode}; use std::ops::Range; use datasize::DataSize; #[cfg(feature="cache")] pub use globalcache::{ValueSize, sync::SyncCache}; #[must_use] pub struct PromisedRef { inner: PlainRef, _marker: PhantomData } impl PromisedRef { pub fn get_inner(&self) -> PlainRef { self.inner } pub fn get_ref(&self) -> Ref { Ref::new(self.inner) } } pub trait Cache { fn get_or_compute(&self, key: PlainRef, compute: impl FnOnce() -> T) -> T; } pub struct NoCache; impl Cache for NoCache { fn get_or_compute(&self, _key: PlainRef, compute: impl FnOnce() -> T) -> T { compute() } } #[cfg(feature="cache")] impl Cache for Arc> { fn get_or_compute(&self, key: PlainRef, compute: impl FnOnce() -> T) -> T { self.get(key, compute) } } pub trait Log { fn load_object(&self, _r: PlainRef) {} fn log_get(&self, _r: PlainRef) {} } pub struct NoLog; impl Log for NoLog {} pub struct Storage { // objects identical to those in the backend cache: OC, stream_cache: SC, // objects that differ from the backend changes: HashMap, refs: XRefTable, decoder: Option, options: ParseOptions, backend: B, // Position of the PDF header in the file. start_offset: usize, log: L } impl Storage, OC, SC, L> where OC: Cache>>, SC: Cache, Arc>>, L: Log, { pub fn empty(object_cache: OC, stream_cache: SC, log: L) -> Self { Storage { cache: object_cache, stream_cache, changes: HashMap::new(), refs: XRefTable::new(0), decoder: None, options: ParseOptions::strict(), backend: Vec::from(&b"%PDF-1.7\n"[..]), start_offset: 0, log } } } impl Storage where B: Backend, OC: Cache>>, SC: Cache, Arc>>, L: Log, { pub fn into_inner(self) -> B { self.backend } pub fn resolver(&self) -> impl Resolve + '_ { StorageResolver::new(self) } pub fn with_cache(backend: B, options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result { Ok(Storage { start_offset: backend.locate_start_offset()?, backend, refs: XRefTable::new(0), cache: object_cache, stream_cache, changes: HashMap::new(), decoder: None, options, log }) } fn decode(&self, id: PlainRef, range: Range, filters: &[StreamFilter]) -> Result> { let data = self.backend.read(range)?; let mut data = Vec::from(data); if let Some(ref decoder) = self.decoder { data = Vec::from(t!(decoder.decrypt(id, &mut data))); } for filter in filters { data = t!(decode(&data, filter), filter); } Ok(data.into()) } pub fn load_storage_and_trailer(&mut self) -> Result { self.load_storage_and_trailer_password(b"") } pub fn load_storage_and_trailer_password(&mut self, password: &[u8]) -> Result { let resolver = StorageResolver::new(self); let (refs, trailer) = t!(self.backend.read_xref_table_and_trailer(self.start_offset, &resolver)); self.refs = refs; if let Some(crypt) = trailer.get("Encrypt") { let key = trailer .get("ID") .ok_or(PdfError::MissingEntry { typ: "Trailer", field: "ID".into(), })? .as_array()?[0] .as_string()? .as_bytes(); let resolver = StorageResolver::new(self); let dict = CryptDict::from_primitive(crypt.clone(), &resolver)?; self.decoder = Some(t!(Decoder::from_password(&dict, key, password))); if let Primitive::Reference(reference) = crypt { self.decoder.as_mut().unwrap().encrypt_indirect_object = Some(*reference); } if let Some(Primitive::Reference(catalog_ref)) = trailer.get("Root") { let resolver = StorageResolver::new(self); let catalog = t!(t!(resolver.resolve(*catalog_ref)).resolve(&resolver)?.into_dictionary()); if let Some(Primitive::Reference(metadata_ref)) = catalog.get("Metadata") { self.decoder.as_mut().unwrap().metadata_indirect_object = Some(*metadata_ref); } } } Ok(trailer) } pub fn scan(&self) -> impl Iterator> + '_ { let xref_offset = self.backend.locate_xref_offset().unwrap(); let slice = self.backend.read(self.start_offset .. xref_offset).unwrap(); let mut lexer = Lexer::with_offset(slice, 0); fn skip_xref(lexer: &mut Lexer) -> Result<()> { while lexer.next()? != "trailer" { } Ok(()) } let resolver = StorageResolver::new(self); std::iter::from_fn(move || { loop { let pos = lexer.get_pos(); match parse_indirect_object(&mut lexer, &resolver, self.decoder.as_ref(), ParseFlags::all()) { Ok((r, p)) => return Some(Ok(ScanItem::Object(r, p))), Err(e) if e.is_eof() => return None, Err(e) => { lexer.set_pos(pos); if let Ok(s) = lexer.next() { debug!("next: {:?}", String::from_utf8_lossy(s.as_slice())); match &*s { b"xref" => { if let Err(e) = skip_xref(&mut lexer) { return Some(Err(e)); } if let Ok(trailer) = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::DICT).and_then(|p| p.into_dictionary()) { return Some(Ok(ScanItem::Trailer(trailer))); } } b"startxref" if lexer.next().is_ok() => { continue; } _ => {} } } return Some(Err(e)); } } } }) } fn resolve_ref(&self, r: PlainRef, flags: ParseFlags, resolve: &impl Resolve) -> Result { match self.changes.get(&r.id) { Some(p) => Ok((*p).clone()), None => match t!(self.refs.get(r.id)) { XRef::Raw {pos, ..} => { let mut lexer = Lexer::with_offset(t!(self.backend.read(self.start_offset + pos ..)), self.start_offset + pos); let p = t!(parse_indirect_object(&mut lexer, resolve, self.decoder.as_ref(), flags)).1; Ok(p) } XRef::Stream {stream_id, index} => { if !flags.contains(ParseFlags::STREAM) { return Err(PdfError::PrimitiveNotAllowed { found: ParseFlags::STREAM, allowed: flags }); } // use get to cache the object stream let obj_stream = resolve.get::(Ref::from_id(stream_id))?; let (data, range) = t!(obj_stream.get_object_slice(index, resolve)); let slice = data.get(range.clone()).ok_or_else(|| other!("invalid range {:?}, but only have {} bytes", range, data.len()))?; parse(slice, resolve, flags) } XRef::Free {..} => err!(PdfError::FreeObject {obj_nr: r.id}), XRef::Promised => unimplemented!(), XRef::Invalid => err!(PdfError::NullRef {obj_nr: r.id}), } } } } pub enum ScanItem { Object(PlainRef, Primitive), Trailer(Dictionary) } struct StorageResolver<'a, B, OC, SC, L> { storage: &'a Storage, chain: Mutex>, } impl<'a, B, OC, SC, L> StorageResolver<'a, B, OC, SC, L> { pub fn new(storage: &'a Storage) -> Self { StorageResolver { storage, chain: Mutex::new(vec![]) } } } struct Defer(F); impl Drop for Defer { fn drop(&mut self) { (self.0)(); } } impl<'a, B, OC, SC, L> Resolve for StorageResolver<'a, B, OC, SC, L> where B: Backend, OC: Cache>>, SC: Cache, Arc>>, L: Log { fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, _depth: usize) -> Result { let storage = self.storage; storage.log.load_object(r); storage.resolve_ref(r, flags, self) } fn get(&self, r: Ref) -> Result> { let key = r.get_inner(); self.storage.log.log_get(key); { debug!("get {key:?} as {}", std::any::type_name::()); let mut chain = self.chain.lock().unwrap(); if chain.contains(&key) { bail!("Recursive reference"); } chain.push(key); } let _defer = Defer(|| { let mut chain = self.chain.lock().unwrap(); assert_eq!(chain.pop(), Some(key)); }); let res = self.storage.cache.get_or_compute(key, || { match self.resolve(key).and_then(|p| T::from_primitive(p, self)) { Ok(obj) => Ok(AnySync::new(Shared::new(obj))), Err(e) => { let p = self.resolve(key); warn!("failed to decode {p:?} as {}", std::any::type_name::()); Err(Arc::new(e)) } } }); match res { Ok(any) => Ok(RcRef::new(key, any.downcast()?)), Err(e) => Err(PdfError::Shared { source: e.clone()}), } } fn options(&self) -> &ParseOptions { &self.storage.options } fn stream_data(&self, id: PlainRef, range: Range) -> Result> { self.storage.decode(id, range, &[]) } fn get_data_or_decode(&self, id: PlainRef, range: Range, filters: &[StreamFilter]) -> Result> { self.storage.stream_cache.get_or_compute(id, || self.storage.decode(id, range, filters).map_err(Arc::new)) .map_err(|e| e.into()) } } impl Updater for Storage where B: Backend, OC: Cache>>, SC: Cache, Arc>>, L: Log, { fn create(&mut self, obj: T) -> Result> { let id = self.refs.len() as u64; self.refs.push(XRef::Promised); let primitive = obj.to_primitive(self)?; self.changes.insert(id, primitive); let rc = Shared::new(obj); let r = PlainRef { id, gen: 0 }; Ok(RcRef::new(r, rc)) } fn update(&mut self, old: PlainRef, obj: T) -> Result> { let r = match self.refs.get(old.id)? { XRef::Free { .. } => panic!(), XRef::Raw { gen_nr, .. } => PlainRef { id: old.id, gen: gen_nr + 1 }, XRef::Stream { .. } => return self.create(obj), XRef::Promised => PlainRef { id: old.id, gen: 0 }, XRef::Invalid => panic!() }; let primitive = obj.to_primitive(self)?; self.changes.insert(old.id, primitive); let rc = Shared::new(obj); Ok(RcRef::new(r, rc)) } fn promise(&mut self) -> PromisedRef { let id = self.refs.len() as u64; self.refs.push(XRef::Promised); PromisedRef { inner: PlainRef { id, gen: 0 }, _marker: PhantomData } } fn fulfill(&mut self, promise: PromisedRef, obj: T) -> Result> { self.update(promise.inner, obj) } } impl Storage, OC, SC, L> where OC: Cache>>, SC: Cache, Arc>>, L: Log { pub fn save(&mut self, trailer: &mut Trailer) -> Result<&[u8]> { // writing the trailer generates another id for the info dictionary trailer.size = (self.refs.len() + 2) as _; let trailer = trailer.to_dict(self)?; let xref_promise = self.promise::>(); let mut changes: Vec<_> = self.changes.iter().collect(); changes.sort_unstable_by_key(|&(id, _)| id); for (&id, primitive) in changes.iter() { let pos = self.backend.len(); self.refs.set(id, XRef::Raw { pos: pos as _, gen_nr: 0 }); writeln!(self.backend, "{} {} obj", id, 0)?; primitive.serialize(&mut self.backend)?; writeln!(self.backend, "endobj")?; } let xref_pos = self.backend.len(); self.refs.set(xref_promise.get_inner().id, XRef::Raw { pos: xref_pos, gen_nr: 0 }); // only write up to the xref stream obj id let stream = self.refs.write_stream(xref_promise.get_inner().id as usize + 1)?; writeln!(self.backend, "{} {} obj", xref_promise.get_inner().id, 0)?; let mut xref_and_trailer = stream.to_pdf_stream(&mut NoUpdate)?; for (k, v) in trailer.into_iter() { xref_and_trailer.info.insert(k, v); } xref_and_trailer.serialize(&mut self.backend)?; writeln!(self.backend, "endobj")?; let _ = self.fulfill(xref_promise, stream)?; write!(self.backend, "\nstartxref\n{}\n%%EOF", xref_pos).unwrap(); Ok(&self.backend) } } #[cfg(feature="cache")] pub type ObjectCache = Arc>>>; #[cfg(feature="cache")] pub type StreamCache = Arc, Arc>>>; #[cfg(feature="cache")] pub type CachedFile = File; pub struct File { storage: Storage, pub trailer: Trailer, } impl Updater for File where B: Backend, OC: Cache>>, SC: Cache, Arc>>, L: Log, { fn create(&mut self, obj: T) -> Result> { self.storage.create(obj) } fn update(&mut self, old: PlainRef, obj: T) -> Result> { self.storage.update(old, obj) } fn promise(&mut self) -> PromisedRef { self.storage.promise() } fn fulfill(&mut self, promise: PromisedRef, obj: T) -> Result> { self.storage.fulfill(promise, obj) } } impl File, OC, SC, L> where OC: Cache>>, SC: Cache, Arc>>, L: Log, { pub fn save_to(&mut self, path: impl AsRef) -> Result<()> { std::fs::write(path, self.storage.save(&mut self.trailer)?)?; Ok(()) } } pub struct FileOptions<'a, OC, SC, L> { oc: OC, sc: SC, log: L, password: &'a [u8], parse_options: ParseOptions, } impl FileOptions<'static, NoCache, NoCache, NoLog> { pub fn uncached() -> Self { FileOptions { oc: NoCache, sc: NoCache, password: b"", parse_options: ParseOptions::strict(), log: NoLog, } } } #[cfg(feature="cache")] impl FileOptions<'static, ObjectCache, StreamCache, NoLog> { pub fn cached() -> Self { FileOptions { oc: SyncCache::new(), sc: SyncCache::new(), password: b"", parse_options: ParseOptions::strict(), log: NoLog } } } impl<'a, OC, SC, L> FileOptions<'a, OC, SC, L> where OC: Cache>>, SC: Cache, Arc>>, L: Log, { pub fn password(self, password: &'a [u8]) -> FileOptions<'a, OC, SC, L> { FileOptions { password, .. self } } pub fn cache(self, oc: O, sc: S) -> FileOptions<'a, O, S, L> { let FileOptions { oc: _, sc: _, password, parse_options, log } = self; FileOptions { oc, sc, password, parse_options, log, } } pub fn log(self, log: Log) -> FileOptions<'a, OC, SC, Log> { let FileOptions { oc, sc, password, parse_options, .. } = self; FileOptions { oc, sc, password, parse_options, log, } } pub fn parse_options(self, parse_options: ParseOptions) -> Self { FileOptions { parse_options, .. self } } /// open a file pub fn open(self, path: impl AsRef) -> Result, OC, SC, L>> { let data = std::fs::read(path)?; self.load(data) } pub fn storage(self) -> Storage, OC, SC, L> { let FileOptions { oc, sc, log, .. } = self; Storage::empty(oc, sc, log) } /// load data from the given backend pub fn load(self, backend: B) -> Result> { let FileOptions { oc, sc, password, parse_options, log } = self; File::load_data(backend, password, parse_options, oc, sc, log) } } impl File where B: Backend, OC: Cache>>, SC: Cache, Arc>>, L: Log, { fn load_data(backend: B, password: &[u8], options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result { let mut storage = Storage::with_cache(backend, options, object_cache, stream_cache, log)?; let trailer = storage.load_storage_and_trailer_password(password)?; let resolver = StorageResolver::new(&storage); let trailer = t!(Trailer::from_primitive( Primitive::Dictionary(trailer), &resolver, )); Ok(File { storage, trailer }) } pub fn new(storage: Storage, trailer: Trailer) -> Self { File { storage, trailer } } pub fn resolver(&self) -> impl Resolve + '_ { StorageResolver::new(&self.storage) } pub fn get_root(&self) -> &Catalog { &self.trailer.root } pub fn pages(&self) -> impl Iterator> + '_ { (0 .. self.num_pages()).map(move |n| self.get_page(n)) } pub fn num_pages(&self) -> u32 { self.trailer.root.pages.count } pub fn get_page(&self, n: u32) -> Result { let resolver = StorageResolver::new(&self.storage); self.trailer.root.pages.page(&resolver, n) } pub fn update_catalog(&mut self, catalog: Catalog) -> Result<()> { self.trailer.root = self.create(catalog)?; Ok(()) } pub fn set_options(&mut self, options: ParseOptions) { self.storage.options = options; } pub fn scan(&self) -> impl Iterator> + '_ { self.storage.scan() } pub fn log(&self) -> &L { &self.storage.log } } #[derive(Object, ObjectWrite, DataSize)] pub struct Trailer { #[pdf(key = "Size")] pub size: i32, #[pdf(key = "Prev")] pub prev_trailer_pos: Option, #[pdf(key = "Root")] pub root: RcRef, #[pdf(key = "Encrypt")] pub encrypt_dict: Option>, #[pdf(key = "Info", indirect)] pub info_dict: Option, #[pdf(key = "ID")] pub id: Vec, } /* pub struct XRefStream { pub data: Vec, pub info: XRefInfo, } impl Object for XRefStream { fn serialize(&self, _out: &mut W) -> io::Result<()> { unimplemented!(); } fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let stream = p.to_stream(resolve)?; let info = XRefInfo::from_primitive(Primitive::Dictionary (stream.info), resolve)?; let data = stream.data.clone(); Ok(XRefStream { data: data, info: info, }) } } */ pdf-0.9.0/src/font.rs000064400000000000000000000610271046102023000125140ustar 00000000000000use crate as pdf; use crate::object::*; use crate::primitive::*; use crate::error::*; use crate::encoding::Encoding; use std::collections::HashMap; use std::fmt::Write; use crate::parser::{Lexer, parse_with_lexer, ParseFlags}; use std::convert::TryInto; use std::sync::Arc; use istring::SmallString; use datasize::DataSize; use itertools::Itertools; #[allow(non_upper_case_globals, dead_code)] mod flags { pub const FixedPitch: u32 = 1 << 0; pub const Serif: u32 = 1 << 1; pub const Symbolic: u32 = 1 << 2; pub const Script: u32 = 1 << 3; pub const Nonsymbolic: u32 = 1 << 5; pub const Italic: u32 = 1 << 6; pub const AllCap: u32 = 1 << 16; pub const SmallCap: u32 = 1 << 17; pub const ForceBold: u32 = 1 << 18; } #[derive(Object, ObjectWrite, Debug, Copy, Clone, DataSize, DeepClone)] pub enum FontType { Type0, Type1, MMType1, Type3, TrueType, CIDFontType0, //Type1 CIDFontType2, // TrueType } #[derive(Debug, DataSize, DeepClone)] pub struct Font { pub subtype: FontType, pub name: Option, pub data: FontData, pub encoding: Option, // FIXME: Should use RcRef pub to_unicode: Option>>, /// other keys not mapped in other places. May change over time without notice, and adding things probably will break things. So don't expect this to be part of the stable API pub _other: Dictionary } #[derive(Debug, DataSize, DeepClone)] pub enum FontData { Type1(TFont), Type0(Type0Font), TrueType(TFont), CIDFontType0(CIDFont), CIDFontType2(CIDFont), Other(Dictionary), } #[derive(Debug, DataSize, DeepClone)] pub enum CidToGidMap { Identity, Table(Vec) } impl Object for CidToGidMap { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { Primitive::Name(name) if name == "Identity" => { Ok(CidToGidMap::Identity) } p @ Primitive::Stream(_) | p @ Primitive::Reference(_) => { let stream: Stream<()> = Stream::from_primitive(p, resolve)?; let data = stream.data(resolve)?; Ok(CidToGidMap::Table(data.chunks(2).map(|c| (c[0] as u16) << 8 | c[1] as u16).collect())) }, p => Err(PdfError::UnexpectedPrimitive { expected: "/Identity or Stream", found: p.get_debug_name() }) } } } impl ObjectWrite for CidToGidMap { fn to_primitive(&self, update: &mut impl Updater) -> Result { match self { CidToGidMap::Identity => Ok(Name::from("Identity").into()), CidToGidMap::Table(ref table) => { let mut data = Vec::with_capacity(table.len() * 2); data.extend(table.iter().flat_map(|&v| <[u8; 2]>::into_iter(v.to_be_bytes()))); Stream::new((), data).to_primitive(update) } } } } impl Object for Font { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let mut dict = p.resolve(resolve)?.into_dictionary()?; let subtype = t!(FontType::from_primitive(dict.require("Font", "Subtype")?, resolve)); // BaseFont is required for all FontTypes except Type3 dict.expect("Font", "Type", "Font", true)?; let base_font_primitive = dict.get("BaseFont"); let base_font = match (base_font_primitive, subtype) { (Some(name), _) => Some(t!(t!(name.clone().resolve(resolve)).into_name(), name)), (None, FontType::Type3) => None, (_, _) => return Err(PdfError::MissingEntry { typ: "Font", field: "BaseFont".to_string() }) }; let encoding = dict.remove("Encoding").map(|p| Object::from_primitive(p, resolve)).transpose()?; let to_unicode = match dict.remove("ToUnicode") { Some(p) => Some(Object::from_primitive(p, resolve)?), None => None }; let _other = dict.clone(); let data = match subtype { FontType::Type0 => FontData::Type0(Type0Font::from_dict(dict, resolve)?), FontType::Type1 => FontData::Type1(TFont::from_dict(dict, resolve)?), FontType::TrueType => FontData::TrueType(TFont::from_dict(dict, resolve)?), FontType::CIDFontType0 => FontData::CIDFontType0(CIDFont::from_dict(dict, resolve)?), FontType::CIDFontType2 => FontData::CIDFontType2(CIDFont::from_dict(dict, resolve)?), _ => FontData::Other(dict) }; Ok(Font { subtype, name: base_font, data, encoding, to_unicode, _other }) } } impl ObjectWrite for Font { fn to_primitive(&self, update: &mut impl Updater) -> Result { let mut dict = match self.data { FontData::CIDFontType0(ref d) | FontData::CIDFontType2(ref d) => d.to_dict(update)?, FontData::TrueType(ref d) | FontData::Type1(ref d) => d.to_dict(update)?, FontData::Type0(ref d) => d.to_dict(update)?, FontData::Other(ref dict) => dict.clone(), }; if let Some(ref to_unicode) = self.to_unicode { dict.insert("ToUnicode", to_unicode.to_primitive(update)?); } if let Some(ref encoding) = self.encoding { dict.insert("Encoding", encoding.to_primitive(update)?); } if let Some(ref name) = self.name { dict.insert("BaseFont", name.to_primitive(update)?); } let subtype = match self.data { FontData::Type0(_) => FontType::Type0, FontData::Type1(_) => FontType::Type1, FontData::TrueType(_) => FontType::TrueType, FontData::CIDFontType0(_) => FontType::CIDFontType0, FontData::CIDFontType2(_) => FontType::CIDFontType2, FontData::Other(_) => bail!("unimplemented") }; dict.insert("Subtype", subtype.to_primitive(update)?); dict.insert("Type", Name::from("Font")); Ok(Primitive::Dictionary(dict)) } } #[derive(Debug)] pub struct Widths { values: Vec, default: f32, first_char: usize } impl Widths { pub fn get(&self, cid: usize) -> f32 { if cid < self.first_char { self.default } else { self.values.get(cid - self.first_char).cloned().unwrap_or(self.default) } } fn new(default: f32) -> Widths { Widths { default, values: Vec::new(), first_char: 0 } } fn ensure_cid(&mut self, cid: usize) { if let Some(offset) = cid.checked_sub(self.first_char) { // cid may be < first_char // reserve difference of offset to capacity // if enough capacity to cover offset, saturates to zero, and reserve will do nothing self.values.reserve(offset.saturating_sub(self.values.capacity())); } } #[allow(clippy::float_cmp)] // TODO fn set(&mut self, cid: usize, width: f32) { self._set(cid, width); debug_assert_eq!(self.get(cid), width); } fn _set(&mut self, cid: usize, width: f32) { use std::iter::repeat; if self.values.is_empty() { self.first_char = cid; self.values.push(width); return; } if cid == self.first_char + self.values.len() { self.values.push(width); return; } if cid < self.first_char { self.values.splice(0 .. 0, repeat(self.default).take(self.first_char - cid)); self.first_char = cid; self.values[0] = width; return; } if cid > self.values.len() + self.first_char { self.ensure_cid(cid); self.values.extend(repeat(self.default).take(cid - self.first_char - self.values.len())); self.values.push(width); return; } self.values[cid - self.first_char] = width; } } impl Font { pub fn embedded_data(&self, resolve: &impl Resolve) -> Option>> { match self.data { FontData::Type0(ref t) => t.descendant_fonts.get(0).and_then(|f| f.embedded_data(resolve)), FontData::CIDFontType0(ref c) | FontData::CIDFontType2(ref c) => c.font_descriptor.data(resolve), FontData::Type1(ref t) | FontData::TrueType(ref t) => t.font_descriptor.as_ref().and_then(|d| d.data(resolve)), _ => None } } pub fn is_cid(&self) -> bool { matches!(self.data, FontData::Type0(_) | FontData::CIDFontType0(_) | FontData::CIDFontType2(_)) } pub fn cid_to_gid_map(&self) -> Option<&CidToGidMap> { match self.data { FontData::Type0(ref inner) => inner.descendant_fonts.get(0).and_then(|f| f.cid_to_gid_map()), FontData::CIDFontType0(ref f) | FontData::CIDFontType2(ref f) => f.cid_to_gid_map.as_ref(), _ => None } } pub fn encoding(&self) -> Option<&Encoding> { self.encoding.as_ref() } pub fn info(&self) -> Option<&TFont> { match self.data { FontData::Type1(ref info) => Some(info), FontData::TrueType(ref info) => Some(info), _ => None } } pub fn widths(&self, resolve: &impl Resolve) -> Result> { match self.data { FontData::Type0(ref t0) => t0.descendant_fonts[0].widths(resolve), FontData::Type1(ref info) | FontData::TrueType(ref info) => { match *info { TFont { first_char: Some(first), ref widths, .. } => Ok(Some(Widths { default: 0.0, first_char: first as usize, values: widths.as_ref().cloned().unwrap_or_default() })), _ => Ok(None) } }, FontData::CIDFontType0(ref cid) | FontData::CIDFontType2(ref cid) => { let mut widths = Widths::new(cid.default_width); let mut iter = cid.widths.iter(); while let Some(p) = iter.next() { let c1 = p.as_usize()?; match iter.next() { Some(Primitive::Array(array)) => { widths.ensure_cid(c1 + array.len() - 1); for (i, w) in array.iter().enumerate() { widths.set(c1 + i, w.as_number()?); } }, Some(&Primitive::Reference(r)) => { match resolve.resolve(r)? { Primitive::Array(array) => { widths.ensure_cid(c1 + array.len() - 1); for (i, w) in array.iter().enumerate() { widths.set(c1 + i, w.as_number()?); } } p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) }) } } Some(&Primitive::Integer(c2)) => { let w = try_opt!(iter.next()).as_number()?; for c in c1 ..= (c2 as usize) { widths.set(c, w); } }, p => return Err(PdfError::Other { msg: format!("unexpected primitive in W array: {:?}", p) }) } } Ok(Some(widths)) }, _ => Ok(None) } } pub fn to_unicode(&self, resolve: &impl Resolve) -> Option> { self.to_unicode.as_ref().map(|s| (**s).data(resolve).and_then(|d| parse_cmap(&d))) } } #[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)] pub struct TFont { #[pdf(key="BaseFont")] pub base_font: Option, /// per spec required, but some files lack it. #[pdf(key="FirstChar")] pub first_char: Option, /// same #[pdf(key="LastChar")] pub last_char: Option, #[pdf(key="Widths")] pub widths: Option>, #[pdf(key="FontDescriptor")] pub font_descriptor: Option } #[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)] pub struct Type0Font { #[pdf(key="DescendantFonts")] pub descendant_fonts: Vec>, #[pdf(key="ToUnicode")] pub to_unicode: Option>>, } #[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)] pub struct CIDFont { #[pdf(key="CIDSystemInfo")] pub system_info: Dictionary, #[pdf(key="FontDescriptor")] pub font_descriptor: FontDescriptor, #[pdf(key="DW", default="1000.")] pub default_width: f32, #[pdf(key="W")] pub widths: Vec, #[pdf(key="CIDToGIDMap")] pub cid_to_gid_map: Option, #[pdf(other)] pub _other: Dictionary } #[derive(Object, ObjectWrite, Debug, DataSize, DeepClone)] pub struct FontDescriptor { #[pdf(key="FontName")] pub font_name: Name, #[pdf(key="FontFamily")] pub font_family: Option, #[pdf(key="FontStretch")] pub font_stretch: Option, #[pdf(key="FontWeight")] pub font_weight: Option, #[pdf(key="Flags")] pub flags: u32, #[pdf(key="FontBBox")] pub font_bbox: Rect, #[pdf(key="ItalicAngle")] pub italic_angle: f32, // required as per spec, but still missing in some cases #[pdf(key="Ascent")] pub ascent: Option, #[pdf(key="Descent")] pub descent: Option, #[pdf(key="Leading", default="0.")] pub leading: f32, #[pdf(key="CapHeight")] pub cap_height: Option, #[pdf(key="XHeight", default="0.")] pub xheight: f32, #[pdf(key="StemV", default="0.")] pub stem_v: f32, #[pdf(key="StemH", default="0.")] pub stem_h: f32, #[pdf(key="AvgWidth", default="0.")] pub avg_width: f32, #[pdf(key="MaxWidth", default="0.")] pub max_width: f32, #[pdf(key="MissingWidth", default="0.")] pub missing_width: f32, #[pdf(key="FontFile")] pub font_file: Option>>, #[pdf(key="FontFile2")] pub font_file2: Option>>, #[pdf(key="FontFile3")] pub font_file3: Option>>, #[pdf(key="CharSet")] pub char_set: Option } impl FontDescriptor { pub fn data(&self, resolve: &impl Resolve) -> Option>> { if let Some(ref s) = self.font_file { Some((**s).data(resolve)) } else if let Some(ref s) = self.font_file2 { Some((**s).data(resolve)) } else if let Some(ref s) = self.font_file3 { Some((**s).data(resolve)) } else { None } } } #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] #[pdf(key="Subtype")] pub enum FontTypeExt { Type1C, CIDFontType0C, OpenType } #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] pub struct FontStream3 { #[pdf(key="Subtype")] pub subtype: FontTypeExt } #[derive(Object, ObjectWrite, Debug, PartialEq, Eq, PartialOrd, Ord, Clone, DataSize, DeepClone)] pub enum FontStretch { UltraCondensed, ExtraCondensed, Condensed, SemiCondensed, Normal, SemiExpanded, Expanded, ExtraExpanded, UltraExpanded } #[derive(Clone, Debug, Default)] pub struct ToUnicodeMap { // todo: reduce allocations inner: HashMap } impl ToUnicodeMap { pub fn new() -> Self { Self::default() } /// Create a new ToUnicodeMap from key/value pairs. /// /// subject to change pub fn create(iter: impl Iterator) -> Self { ToUnicodeMap { inner: iter.collect() } } pub fn get(&self, gid: u16) -> Option<&str> { self.inner.get(&gid).map(|s| s.as_str()) } pub fn insert(&mut self, gid: u16, unicode: SmallString) { self.inner.insert(gid, unicode); } pub fn iter(&self) -> impl Iterator { self.inner.iter().map(|(&gid, unicode)| (gid, unicode.as_str())) } pub fn len(&self) -> usize { self.inner.len() } pub fn is_empty(&self) -> bool { self.inner.is_empty() } } /// helper function to decode UTF-16-BE data /// takes a slice of u8 and returns an iterator for char or an decoding error pub fn utf16be_to_char( data: &[u8], ) -> impl Iterator> + '_ { char::decode_utf16(data.chunks(2).map(|w| u16::from_be_bytes([w[0], w[1]]))) } /// converts UTF16-BE to a string replacing illegal/unknown characters pub fn utf16be_to_string_lossy(data: &[u8]) -> String { utf16be_to_char(data) .map(|r| r.unwrap_or(std::char::REPLACEMENT_CHARACTER)) .collect() } /// converts UTF16-BE to a string errors out in illegal/unknonw characters pub fn utf16be_to_string(data: &[u8]) -> pdf::error::Result { utf16be_to_char(data) .map(|r| r.map_err(|_| PdfError::Utf16Decode)) .collect() } fn parse_cid(s: &PdfString) -> Result { let b = s.as_bytes(); match b.len() { 2 => Ok(u16::from_be_bytes(b.try_into().unwrap())), 1 => Ok(b[0] as u16), _ => Err(PdfError::CidDecode), } } fn parse_cmap(data: &[u8]) -> Result { let mut lexer = Lexer::new(data); let mut map = ToUnicodeMap::new(); while let Ok(substr) = lexer.next() { match substr.as_slice() { b"beginbfchar" => loop { let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING); if a.is_err() { break; } let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING); match (a, b) { (Ok(Primitive::String(cid_data)), Ok(Primitive::String(unicode_data))) => { let cid = parse_cid(&cid_data)?; let bytes = unicode_data.as_bytes(); match utf16be_to_string(bytes) { Ok(unicode) => map.insert(cid, unicode), Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"), } } _ => break, } }, b"beginbfrange" => loop { let a = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING); if a.is_err() { break; } let b = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING); let c = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::STRING | ParseFlags::ARRAY); match (a, b, c) { ( Ok(Primitive::String(cid_start_data)), Ok(Primitive::String(cid_end_data)), Ok(Primitive::String(unicode_data)), ) if unicode_data.data.len() > 0 => { let cid_start = parse_cid(&cid_start_data)?; let cid_end = parse_cid(&cid_end_data)?; let mut unicode_data = unicode_data.into_bytes(); for cid in cid_start..=cid_end { match utf16be_to_string(&unicode_data) { Ok(unicode) => map.insert(cid, unicode), Err(_) => warn!("invalid unicode for cid {cid} {unicode_data:?}"), } let last = unicode_data.last_mut().unwrap(); if *last < 255 { *last += 1; } else { break; } } } ( Ok(Primitive::String(cid_start_data)), Ok(Primitive::String(cid_end_data)), Ok(Primitive::Array(unicode_data_arr)), ) => { let cid_start = parse_cid(&cid_start_data)?; let cid_end = parse_cid(&cid_end_data)?; for (cid, unicode_data) in (cid_start..=cid_end).zip(unicode_data_arr) { let bytes = unicode_data.as_string()?.as_bytes(); match utf16be_to_string(bytes) { Ok(unicode) => map.insert(cid, unicode), Err(_) => warn!("invalid unicode for cid {cid} {bytes:?}"), } } } _ => break, } }, b"endcmap" => break, _ => {} } } Ok(map) } fn write_cid(w: &mut String, cid: u16) { write!(w, "<{:04X}>", cid).unwrap(); } fn write_unicode(out: &mut String, unicode: &str) { let mut buf = [0; 2]; write!(out, "<").unwrap(); for c in unicode.chars() { let slice = c.encode_utf16(&mut buf); for &word in slice.iter() { write!(out, "{:04X}", word).unwrap(); } } write!(out, ">").unwrap(); } pub fn write_cmap(map: &ToUnicodeMap) -> String { let mut buf = String::new(); let mut list: Vec<(u16, &str)> = map.inner.iter().map(|(&cid, s)| (cid, s.as_str())).collect(); list.sort(); let mut remaining = &list[..]; let blocks = std::iter::from_fn(move || { if remaining.len() == 0 { return None; } let first_cid = remaining[0].0; let seq_len = remaining.iter().enumerate().take_while(|&(i, &(cid, _))| cid == first_cid + i as u16).count(); let (block, tail) = remaining.split_at(seq_len); remaining = tail; Some(block) }); for (single, group) in &blocks.group_by(|b| b.len() == 1) { if single { writeln!(buf, "beginbfchar").unwrap(); for block in group { for &(cid, uni) in block { write_cid(&mut buf, cid); write!(buf, " ").unwrap(); write_unicode(&mut buf, uni); writeln!(buf).unwrap(); } } writeln!(buf, "endbfchar").unwrap(); } else { writeln!(buf, "beginbfrange").unwrap(); for block in group { write_cid(&mut buf, block[0].0); write!(buf, " ").unwrap(); write_cid(&mut buf, block.last().unwrap().0); write!(buf, " [").unwrap(); for (i, &(_cid, u)) in block.iter().enumerate() { if i > 0 { write!(buf, ", ").unwrap(); } write_unicode(&mut buf, u); } writeln!(buf, "]").unwrap(); } writeln!(buf, "endbfrange").unwrap(); } } buf } #[cfg(test)] mod tests { use crate::font::{utf16be_to_string, utf16be_to_char, utf16be_to_string_lossy}; #[test] fn utf16be_to_string_quick() { let v = vec![0x20, 0x09]; let s = utf16be_to_string(&v); assert_eq!(s.unwrap(), "\u{2009}"); assert!(!v.is_empty()); } #[test] fn test_to_char() { // 𝄞music let v = [ 0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75, 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00, 0x63, 0xD8, 0x34, ]; assert_eq!( utf16be_to_char(&v) .map(|r| r.map_err(|e| e.unpaired_surrogate())) .collect::>(), vec![ Ok('𝄞'), Ok('m'), Ok('u'), Ok('s'), Err(0xDD1E), Ok('i'), Ok('c'), Err(0xD834) ] ); let mut lossy = String::from("𝄞mus"); lossy.push(std::char::REPLACEMENT_CHARACTER); lossy.push('i'); lossy.push('c'); lossy.push(std::char::REPLACEMENT_CHARACTER); let r = utf16be_to_string(&v); if let Err(r) = r { // FIXME: compare against PdfError::Utf16Decode variant assert_eq!(r.to_string(), "UTF16 decode error"); } assert_eq!(utf16be_to_string(&v[..8]).unwrap(), String::from("𝄞mu")); assert_eq!(utf16be_to_string_lossy(&v), lossy); } } pdf-0.9.0/src/lib.rs000064400000000000000000000012231046102023000123040ustar 00000000000000#![allow(non_camel_case_types)] /* TODO temporary becaues of pdf_derive */ #![allow(unused_doc_comments)] // /* TODO temporary because of err.rs */ #![allow(clippy::len_zero, clippy::should_implement_trait, clippy::manual_map, clippy::from_over_into)] #[macro_use] extern crate pdf_derive; #[macro_use] extern crate snafu; #[macro_use] extern crate log; #[macro_use] pub mod error; pub mod object; pub mod xref; pub mod primitive; pub mod file; pub mod backend; pub mod content; pub mod parser; pub mod font; pub mod any; pub mod encoding; pub mod build; // mod content; pub mod enc; pub mod crypt; // pub use content::*; pub use crate::error::PdfError; pdf-0.9.0/src/macros.rs000064400000000000000000000022511046102023000130240ustar 00000000000000macro_rules! write_entry { ($out:expr, $key:tt, $val:expr) => { { $out.write(b" ")?; $key.serialize($out)?; $out.write(b" ")?; $val.serialize($out)?; $out.write(b"\n")?; } } } macro_rules! write_entrys { ($out:expr, $key:tt << $val:expr $(,)*) => { write_entry!($out, $key, $val); }; ($out:expr, $key:tt << $val:expr, $($rest:tt)*) => { { write_entry!($out, $key, $val); write_entrys!($out, $($rest)*); } }; ($out:expr, $key:tt ? << $val:expr $(,)*) => { match &$val { &Some(ref v) => write_entry!($out, $key, v), &None => {} } }; ($out:expr, $key:tt ? << $val:expr, $($rest:tt)*) => { { match &$val { &Some(ref v) => write_entry!($out, $key, v), &None => {} } write_entrys!($out, $($rest)*); } } } macro_rules! write_dict { ($out:expr, $($rest:tt)*) => { { write!($out, "<<\n")?; write_entrys!($out, $($rest)*); write!($out, ">>")?; } }; } pdf-0.9.0/src/object/color.rs000064400000000000000000000156461046102023000141400ustar 00000000000000use datasize::DataSize; use crate as pdf; use crate::object::*; use crate::error::*; #[derive(Object, Debug, DataSize, DeepClone, ObjectWrite)] pub struct IccInfo { #[pdf(key="N")] pub components: u32, #[pdf(key="Alternate")] pub alternate: Option>, #[pdf(key="Range")] pub range: Option>, #[pdf(key="Metadata")] pub metadata: Option>, } #[derive(Debug, Clone, DeepClone)] pub enum ColorSpace { DeviceGray, DeviceRGB, DeviceCMYK, DeviceN { names: Vec, alt: Box, tint: Function, attr: Option }, CalGray(Dictionary), CalRGB(Dictionary), CalCMYK(Dictionary), Indexed(Box, u8, Arc<[u8]>), Separation(Name, Box, Function), Icc(RcRef>), Pattern, Named(Name), Other(Vec) } impl DataSize for ColorSpace { const IS_DYNAMIC: bool = true; const STATIC_HEAP_SIZE: usize = 0; #[inline] fn estimate_heap_size(&self) -> usize { match *self { ColorSpace::DeviceGray | ColorSpace::DeviceRGB | ColorSpace::DeviceCMYK => 0, ColorSpace::DeviceN { ref names, ref alt, ref tint, ref attr } => { names.estimate_heap_size() + alt.estimate_heap_size() + tint.estimate_heap_size() + attr.estimate_heap_size() } ColorSpace::CalGray(ref d) | ColorSpace::CalRGB(ref d) | ColorSpace::CalCMYK(ref d) => { d.estimate_heap_size() } ColorSpace::Indexed(ref cs, _, ref data) => { cs.estimate_heap_size() + data.estimate_heap_size() } ColorSpace::Separation(ref name, ref cs, ref f) => { name.estimate_heap_size() + cs.estimate_heap_size() + f.estimate_heap_size() } ColorSpace::Icc(ref s) => s.estimate_heap_size(), ColorSpace::Pattern => 0, ColorSpace::Other(ref v) => v.estimate_heap_size(), ColorSpace::Named(ref n) => n.estimate_heap_size() } } } fn get_index(arr: &[Primitive], idx: usize) -> Result<&Primitive> { arr.get(idx).ok_or(PdfError::Bounds { index: idx, len: arr.len() }) } impl Object for ColorSpace { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { ColorSpace::from_primitive_depth(p, resolve, 5) } } impl ColorSpace { fn from_primitive_depth(p: Primitive, resolve: &impl Resolve, depth: usize) -> Result { let p = p.resolve(resolve)?; if let Ok(name) = p.as_name() { let cs = match name { "DeviceGray" => ColorSpace::DeviceGray, "DeviceRGB" => ColorSpace::DeviceRGB, "DeviceCMYK" => ColorSpace::DeviceCMYK, "Pattern" => ColorSpace::Pattern, name => ColorSpace::Named(name.into()), }; return Ok(cs); } let arr = t!(p.into_array()); let typ_p = t!(get_index(&arr, 0)).clone().resolve(resolve)?; let typ = t!(typ_p.as_name()); if depth == 0 { bail!("ColorSpace base recursion"); } match typ { "Indexed" => { let base = Box::new(t!(ColorSpace::from_primitive_depth(t!(get_index(&arr, 1)).clone(), resolve, depth-1))); let hival = t!(t!(get_index(&arr, 2)).as_u8()); let lookup = match t!(get_index(&arr, 3)) { &Primitive::Reference(r) => resolve.resolve(r)?, p => p.clone() }; let lookup = match lookup { Primitive::String(string) => { let data: Vec = string.into_bytes().into(); data.into() } Primitive::Stream(stream) => { let s: Stream::<()> = Stream::from_stream(stream, resolve)?; t!(s.data(resolve)) }, p => return Err(PdfError::UnexpectedPrimitive { expected: "String or Stream", found: p.get_debug_name() }) }; Ok(ColorSpace::Indexed(base, hival, lookup)) } "Separation" => { let name = t!(t!(get_index(&arr, 1)).clone().into_name()); let alternate = Box::new(t!(ColorSpace::from_primitive_depth(t!(get_index(&arr, 2)).clone(), resolve, depth-1))); let tint = t!(Function::from_primitive(t!(get_index(&arr, 3)).clone(), resolve)); Ok(ColorSpace::Separation(name, alternate, tint)) } "ICCBased" => { let s = t!(RcRef::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)); Ok(ColorSpace::Icc(s)) } "DeviceN" => { let names = t!(Object::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)); let alt = t!(Object::from_primitive(t!(get_index(&arr, 2)).clone(), resolve)); let tint = t!(Function::from_primitive(t!(get_index(&arr, 3)).clone(), resolve)); let attr = arr.get(4).map(|p| Dictionary::from_primitive(p.clone(), resolve)).transpose()?; Ok(ColorSpace::DeviceN { names, alt, tint, attr}) } "CalGray" => { let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?; Ok(ColorSpace::CalGray(dict)) } "CalRGB" => { let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?; Ok(ColorSpace::CalRGB(dict)) } "CalCMYK" => { let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?; Ok(ColorSpace::CalCMYK(dict)) } "Pattern" => { Ok(ColorSpace::Pattern) } _ => Ok(ColorSpace::Other(arr)) } } } impl ObjectWrite for ColorSpace { fn to_primitive(&self, update: &mut impl Updater) -> Result { match *self { ColorSpace::DeviceCMYK => Ok(Primitive::name("DeviceCMYK")), ColorSpace::DeviceRGB => Ok(Primitive::name("DeviceRGB")), ColorSpace::Indexed(ref base, hival, ref lookup) => { let base = base.to_primitive(update)?; let hival = Primitive::Integer(hival.into()); let lookup = if lookup.len() < 100 { PdfString::new((**lookup).into()).into() } else { Stream::new((), lookup.clone()).to_primitive(update)? }; Ok(Primitive::Array(vec![Primitive::name("Indexed"), base, hival, lookup])) } ref p => { dbg!(p); unimplemented!() } } } } pdf-0.9.0/src/object/function.rs000064400000000000000000000377721046102023000146530ustar 00000000000000use crate as pdf; use crate::object::*; use crate::error::*; use itertools::izip; use datasize::DataSize; #[derive(Object, Debug, Clone, ObjectWrite)] struct RawFunction { #[pdf(key="FunctionType")] function_type: u32, #[pdf(key="Domain")] domain: Vec, #[pdf(key="Range")] range: Option>, #[pdf(key="Size")] size: Option>, #[pdf(key="BitsPerSample")] _bits_per_sample: Option, #[pdf(key="Order", default="1")] order: u32, #[pdf(key="Encode")] encode: Option>, #[pdf(key="Decode")] decode: Option>, #[pdf(other)] other: Dictionary } #[derive(Object, Debug, Clone)] struct Function2 { #[pdf(key="C0")] c0: Option>, #[pdf(key="C1")] c1: Option>, #[pdf(key="N")] exponent: f32, } #[derive(Debug, Clone, DataSize)] pub enum Function { Sampled(SampledFunction), Interpolated(Vec), Stiching, Calculator, PostScript { func: PsFunc, domain: Vec, range: Vec }, } impl Function { pub fn apply(&self, x: &[f32], out: &mut [f32]) -> Result<()> { match *self { Function::Sampled(ref func) => { func.apply(x, out) } Function::Interpolated(ref parts) => { if parts.len() != out.len() { bail!("incorrect output length: expected {}, found {}.", parts.len(), out.len()) } for (f, y) in parts.iter().zip(out) { *y = f.apply(x[0]); } Ok(()) } Function::PostScript { ref func, .. } => func.exec(x, out), _ => bail!("unimplemted function {:?}", self) } } pub fn input_dim(&self) -> usize { match *self { Function::PostScript { ref domain, .. } => domain.len() / 2, Function::Sampled(ref f) => f.input.len(), _ => panic!() } } pub fn output_dim(&self) -> usize { match *self { Function::PostScript { ref range, .. } => range.len() / 2, Function::Sampled(ref f) => f.output.len(), _ => panic!() } } } impl FromDict for Function { fn from_dict(dict: Dictionary, resolve: &impl Resolve) -> Result { use std::f32::INFINITY; let raw = RawFunction::from_dict(dict, resolve)?; match raw.function_type { 2 => { let f2 = Function2::from_dict(raw.other, resolve)?; let n_dim = match (raw.range.as_ref(), f2.c0.as_ref(), f2.c1.as_ref()) { (Some(range), _, _) => range.len() / 2, (_, Some(c0), _) => c0.len(), (_, _, Some(c1)) => c1.len(), _ => bail!("unknown dimensions") }; let mut parts = Vec::with_capacity(n_dim); let input_range = (raw.domain[0], raw.domain[1]); for dim in 0 .. n_dim { let output_range = ( raw.range.as_ref().and_then(|r| r.get(2*dim).cloned()).unwrap_or(-INFINITY), raw.range.as_ref().and_then(|r| r.get(2*dim+1).cloned()).unwrap_or(INFINITY) ); let c0 = f2.c0.as_ref().and_then(|c0| c0.get(dim).cloned()).unwrap_or(0.0); let c1 = f2.c1.as_ref().and_then(|c1| c1.get(dim).cloned()).unwrap_or(1.0); let exponent = f2.exponent; parts.push(InterpolatedFunctionDim { input_range, output_range, c0, c1, exponent }); } Ok(Function::Interpolated(parts)) }, i => { dbg!(raw); bail!("unsupported function type {}", i) } } } } impl Object for Function { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { Primitive::Dictionary(dict) => Self::from_dict(dict, resolve), Primitive::Stream(s) => { let stream = Stream::::from_stream(s, resolve)?; let data = stream.data(resolve)?; match stream.info.function_type { 4 => { let s = std::str::from_utf8(&data)?; let func = PsFunc::parse(s)?; let info = stream.info.info; Ok(Function::PostScript { func, domain: info.domain, range: info.range.unwrap() }) }, 0 => { let info = stream.info.info; let order = match info.order { 1 => Interpolation::Linear, 3 => Interpolation::Cubic, n => bail!("Invalid interpolation order {}", n), }; let size = try_opt!(info.size); let range = try_opt!(info.range); let encode = info.encode.unwrap_or_else(|| size.iter().flat_map(|&n| [0.0, (n-1) as f32]).collect()); let decode = info.decode.unwrap_or_else(|| range.clone()); Ok(Function::Sampled(SampledFunction { input: izip!(info.domain.chunks_exact(2), encode.chunks_exact(2), size.iter()).map(|(c, e, &s)| { SampledFunctionInput { domain: (c[0], c[1]), encode_offset: e[0], encode_scale: e[1], size: s as usize, } }).collect(), output: decode.chunks_exact(2).map(|c| SampledFunctionOutput { offset: c[0], scale: (c[1] - c[0]) / 255., }).collect(), data, order, range, })) } ref p => bail!("found a function stream with type {:?}", p) } }, Primitive::Reference(r) => Self::from_primitive(resolve.resolve(r)?, resolve), _ => bail!("double indirection") } } } impl ObjectWrite for Function { fn to_primitive(&self, update: &mut impl Updater) -> Result { unimplemented!() /* let dict = match self { Function::Interpolated(parts) => { let first: &InterpolatedFunctionDim = try_opt!(parts.get(0)); let f2 = Function2 { c0: parts.iter().map(|p| p.c0).collect(), c1: parts.iter().map(|p| p.c0).collect(), exponent: first.exponent }; let f = RawFunction { function_type: 2, domain: vec![first.input_range.0, first.input_range.1], range: parts.iter().flat_map(|p| [p.output_range.0, p.output_range.1]).collect(), decode: None, encode: None, order }; } } */ } } impl DeepClone for Function { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { Ok(self.clone()) } } #[derive(Debug, Clone, DataSize)] struct SampledFunctionInput { domain: (f32, f32), encode_offset: f32, encode_scale: f32, size: usize, } impl SampledFunctionInput { fn map(&self, x: f32) -> (usize, usize, f32) { let x = x.clamp(self.domain.0, self.domain.1); let y = x.mul_add(self.encode_scale, self.encode_offset); (y.floor() as usize, self.size, y.fract()) } } #[derive(Debug, Clone, DataSize)] struct SampledFunctionOutput { offset: f32, scale: f32 } impl SampledFunctionOutput { fn map(&self, x: f32) -> f32 { x.mul_add(self.scale, self.offset) } } #[derive(Debug, Clone, DataSize)] enum Interpolation { Linear, #[allow(dead_code)] // TODO Cubic, } #[derive(Debug, Clone, DataSize)] pub struct SampledFunction { input: Vec, output: Vec, data: Arc<[u8]>, order: Interpolation, range: Vec, } impl SampledFunction { fn apply(&self, x: &[f32], out: &mut [f32]) -> Result<()> { if x.len() != self.input.len() { bail!("input dimension mismatch {} != {}", x.len(), self.input.len()); } let n_out = out.len(); if out.len() * 2 != self.range.len() { bail!("output dimension mismatch 2 * {} != {}", out.len(), self.range.len()) } match x.len() { 1 => { match self.order { Interpolation::Linear => { let (i, _, s) = self.input[0].map(x[0]); let idx = i * n_out; for (o, &a) in out.iter_mut().zip(&self.data[idx..]) { *o = a as f32 * (1. - s); } for (o, &b) in out.iter_mut().zip(&self.data[idx + n_out..]) { *o += b as f32 * s; } } _ => unimplemented!() } } 2 => match self.order { Interpolation::Linear => { let (i0, s0, f0) = self.input[0].map(x[0]); let (i1, _, f1) = self.input[1].map(x[1]); let (j0, j1) = (i0+1, i1+1); let (g0, g1) = (1. - f0, 1. - f1); out.fill(0.0); let mut add = |i0, i1, f| { let idx = (i0 + s0 * i1) * n_out; if let Some(part) = self.data.get(idx .. idx+n_out) { for (o, &b) in out.iter_mut().zip(part) { *o += f * b as f32; } } }; add(i0, i1, g0 * g1); add(j0, i1, f0 * g1); add(i0, j1, g0 * f1); add(j0, j1, f0 * f1); } _ => unimplemented!() } 3 => match self.order { Interpolation::Linear => { let (i0, s0, f0) = self.input[0].map(x[0]); let (i1, s1, f1) = self.input[1].map(x[1]); let (i2, _, f2) = self.input[2].map(x[2]); let (j0, j1, j2) = (i0+1, i1+1, i2+1); let (g0, g1, g2) = (1. - f0, 1. - f1, 1. - f2); out.fill(0.0); let mut add = |i0, i1, i2, f| { let idx = (i0 + s0 * (i1 + s1 * i2)) * n_out; if let Some(part) = self.data.get(idx .. idx+n_out) { for (o, &b) in out.iter_mut().zip(part) { *o += f * b as f32; } } }; add(i0, i1, i2, g0 * g1 * g2); add(j0, i1, i2, f0 * g1 * g2); add(i0, j1, i2, g0 * f1 * g2); add(j0, j1, i2, f0 * f1 * g2); add(i0, i1, j2, g0 * g1 * f2); add(j0, i1, j2, f0 * g1 * f2); add(i0, j1, j2, g0 * f1 * f2); add(j0, j1, j2, f0 * f1 * f2); } _ => unimplemented!() } n => bail!("Order {}", n) } for (o, y) in self.output.iter().zip(out.iter_mut()) { *y = o.map(*y); } Ok(()) } } #[derive(Debug, Clone, DataSize)] pub struct InterpolatedFunctionDim { pub input_range: (f32, f32), pub output_range: (f32, f32), pub c0: f32, pub c1: f32, pub exponent: f32, } impl InterpolatedFunctionDim { pub fn apply(&self, x: f32) -> f32 { let y = self.c0 + x.powf(self.exponent) * (self.c1 - self.c0); let (y0, y1) = self.output_range; y.min(y1).max(y0) } } #[derive(Debug)] pub enum PostScriptError { StackUnderflow, IncorrectStackSize } #[derive(Debug, Clone, DataSize)] pub struct PsFunc { pub ops: Vec } macro_rules! op { ($stack:ident; $($v:ident),* => $($e:expr),*) => ( { $(let $v = $stack.pop().ok_or(PostScriptError::StackUnderflow)?;)* $($stack.push($e);)* } ) } impl PsFunc { fn exec_inner(&self, stack: &mut Vec) -> Result<(), PostScriptError> { for &op in &self.ops { match op { PsOp::Int(i) => stack.push(i as f32), PsOp::Value(v) => stack.push(v), PsOp::Dup => op!(stack; v => v, v), PsOp::Exch => op!(stack; b, a => b, a), PsOp::Add => op!(stack; b, a => a + b), PsOp::Sub => op!(stack; b, a => a - b), PsOp::Mul => op!(stack; b, a => a * b), PsOp::Abs => op!(stack; a => a.abs()), PsOp::Roll => { let j = stack.pop().ok_or(PostScriptError::StackUnderflow)? as isize; let n = stack.pop().ok_or(PostScriptError::StackUnderflow)? as usize; let start = stack.len() - n; let slice = &mut stack[start..]; if j > 0 { slice.rotate_right(j as usize); } else { slice.rotate_left(-j as usize); } } PsOp::Index => { let n = stack.pop().ok_or(PostScriptError::StackUnderflow)? as usize; if n >= stack.len() { return Err(PostScriptError::StackUnderflow); } let val = stack[stack.len() - n - 1]; stack.push(val); } PsOp::Cvr => {} PsOp::Pop => { stack.pop().ok_or(PostScriptError::StackUnderflow)?; } } } Ok(()) } pub fn exec(&self, input: &[f32], output: &mut [f32]) -> Result<()> { let mut stack = Vec::with_capacity(10); stack.extend_from_slice(input); match self.exec_inner(&mut stack) { Ok(()) => {}, Err(_) => return Err(PdfError::PostScriptExec) } if output.len() != stack.len() { bail!("incorrect output length: expected {}, found {}.", stack.len(), output.len()) } output.copy_from_slice(&stack); Ok(()) } pub fn parse(s: &str) -> Result { let start = s.find('{').ok_or(PdfError::PostScriptParse)?; let end = s.rfind('}').ok_or(PdfError::PostScriptParse)?; let ops: Result, _> = s[start + 1 .. end].split_ascii_whitespace().map(PsOp::parse).collect(); Ok(PsFunc { ops: ops? }) } } #[derive(Copy, Clone, Debug, DataSize)] pub enum PsOp { Int(i32), Value(f32), Add, Sub, Abs, Mul, Dup, Exch, Roll, Index, Cvr, Pop, } impl PsOp { pub fn parse(s: &str) -> Result { if let Ok(i) = s.parse::() { Ok(PsOp::Int(i)) } else if let Ok(f) = s.parse::() { Ok(PsOp::Value(f)) } else { Ok(match s { "add" => PsOp::Add, "sub" => PsOp::Sub, "abs" => PsOp::Abs, "mul" => PsOp::Mul, "dup" => PsOp::Dup, "exch" => PsOp::Exch, "roll" => PsOp::Roll, "index" => PsOp::Index, "cvr" => PsOp::Cvr, "pop" => PsOp::Pop, _ => { bail!("unimplemented op {}", s); } }) } } } pdf-0.9.0/src/object/mod.rs000064400000000000000000000552701046102023000135760ustar 00000000000000//! `Object` trait, along with some implementations. References. //! //! Some of the structs are incomplete (missing fields that are in the PDF references). mod types; mod stream; mod color; mod function; pub use self::types::*; pub use self::stream::*; pub use self::color::*; pub use self::function::*; pub use crate::file::PromisedRef; use crate::parser::ParseFlags; use crate::primitive::*; use crate::error::*; use crate::enc::*; use std::fmt; use std::marker::PhantomData; use std::collections::HashMap; use std::sync::Arc; use std::ops::{Deref, Range}; use std::hash::{Hash, Hasher}; use std::convert::TryInto; use datasize::DataSize; use itertools::Itertools; pub type ObjNr = u64; pub type GenNr = u64; pub struct ParseOptions { pub allow_error_in_option: bool, pub allow_xref_error: bool, pub allow_invalid_ops: bool, pub allow_missing_endobj: bool, } impl ParseOptions { pub const fn tolerant() -> Self { ParseOptions { allow_error_in_option: true, allow_xref_error: true, allow_invalid_ops: true, allow_missing_endobj: true, } } pub const fn strict() -> Self { ParseOptions { allow_error_in_option: false, allow_xref_error: false, allow_invalid_ops: true, allow_missing_endobj: false, } } } pub trait Resolve: { fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result; fn resolve(&self, r: PlainRef) -> Result { self.resolve_flags(r, ParseFlags::ANY, 16) } fn get(&self, r: Ref) -> Result>; fn options(&self) -> &ParseOptions; fn stream_data(&self, id: PlainRef, range: Range) -> Result>; fn get_data_or_decode(&self, id: PlainRef, range: Range, filters: &[StreamFilter]) -> Result>; } pub struct NoResolve; impl Resolve for NoResolve { fn resolve_flags(&self, _: PlainRef, _: ParseFlags, _: usize) -> Result { Err(PdfError::Reference) } fn get(&self, _r: Ref) -> Result> { Err(PdfError::Reference) } fn options(&self) -> &ParseOptions { static STRICT: ParseOptions = ParseOptions::strict(); &STRICT } fn get_data_or_decode(&self, _: PlainRef, _: Range, _: &[StreamFilter]) -> Result> { Err(PdfError::Reference) } fn stream_data(&self, id: PlainRef, range: Range) -> Result> { Err(PdfError::Reference) } } /// A PDF Object pub trait Object: Sized + Sync + Send + 'static { /// Convert primitive to Self fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result; } pub trait Cloner: Updater + Resolve { fn clone_plainref(&mut self, old: PlainRef) -> Result; fn clone_ref(&mut self, old: Ref) -> Result>; fn clone_rcref(&mut self, old: &RcRef) -> Result>; fn clone_shared(&mut self, old: &Shared) -> Result>; } pub trait DeepClone: Sized + Sync + Send + 'static { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result; } pub trait Updater { fn create(&mut self, obj: T) -> Result>; fn update(&mut self, old: PlainRef, obj: T) -> Result>; fn promise(&mut self) -> PromisedRef; fn fulfill(&mut self, promise: PromisedRef, obj: T) -> Result>; } pub struct NoUpdate; impl Updater for NoUpdate { fn create(&mut self, _obj: T) -> Result> { panic!() } fn update(&mut self, _old: PlainRef, _obj: T) -> Result> { panic!() } fn promise(&mut self) -> PromisedRef { panic!() } fn fulfill(&mut self, _promise: PromisedRef, _obj: T) -> Result> { panic!() } } pub trait ObjectWrite { fn to_primitive(&self, update: &mut impl Updater) -> Result; } pub trait FromDict: Sized { fn from_dict(dict: Dictionary, resolve: &impl Resolve) -> Result; } pub trait ToDict: ObjectWrite { fn to_dict(&self, update: &mut impl Updater) -> Result; } pub trait SubType {} pub trait Trace { fn trace(&self, _cb: &mut impl FnMut(PlainRef)) {} } /////// // Refs /////// // TODO move to primitive.rs #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, DataSize)] pub struct PlainRef { pub id: ObjNr, pub gen: GenNr, } impl Object for PlainRef { fn from_primitive(p: Primitive, _: &impl Resolve) -> Result { p.into_reference() } } impl ObjectWrite for PlainRef { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Reference(*self)) } } impl DeepClone for PlainRef { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { cloner.clone_plainref(*self) } } // NOTE: Copy & Clone implemented manually ( https://github.com/rust-lang/rust/issues/26925 ) #[derive(DataSize)] pub struct Ref { inner: PlainRef, _marker: PhantomData } impl Clone for Ref { fn clone(&self) -> Ref { *self } } impl Copy for Ref {} impl Ref { pub fn new(inner: PlainRef) -> Ref { Ref { inner, _marker: PhantomData, } } pub fn from_id(id: ObjNr) -> Ref { Ref { inner: PlainRef {id, gen: 0}, _marker: PhantomData, } } pub fn get_inner(&self) -> PlainRef { self.inner } pub fn upcast(self) -> Ref where T: SubType { Ref::new(self.inner) } } impl Object for Ref { fn from_primitive(p: Primitive, _: &impl Resolve) -> Result { Ok(Ref::new(p.into_reference()?)) } } impl ObjectWrite for Ref { fn to_primitive(&self, update: &mut impl Updater) -> Result { self.inner.to_primitive(update) } } impl DeepClone for Ref { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { cloner.clone_ref(*self) } } impl Trace for Ref { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { cb(self.inner); } } impl fmt::Debug for Ref { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Ref({})", self.inner.id) } } impl Hash for Ref { fn hash(&self, state: &mut H) { self.inner.hash(state) } } impl PartialEq for Ref { fn eq(&self, rhs: &Self) -> bool { self.inner.eq(&rhs.inner) } } impl Eq for Ref {} pub type Shared = Arc; #[derive(Debug, DataSize)] pub struct RcRef { inner: PlainRef, data: Shared } impl From> for Primitive { fn from(value: RcRef) -> Self { Primitive::Reference(value.inner) } } impl RcRef { pub fn new(inner: PlainRef, data: Shared) -> RcRef { RcRef { inner, data } } pub fn get_ref(&self) -> Ref { Ref::new(self.inner) } pub fn data(&self) -> &Shared { &self.data } } impl Object for RcRef { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { Primitive::Reference(r) => resolve.get(Ref::new(r)), p => Err(PdfError::UnexpectedPrimitive {expected: "Reference", found: p.get_debug_name()}) } } } impl ObjectWrite for RcRef { fn to_primitive(&self, update: &mut impl Updater) -> Result { self.inner.to_primitive(update) } } impl DeepClone for RcRef { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { cloner.clone_rcref(self) } } impl Deref for RcRef { type Target = T; fn deref(&self) -> &T { &self.data } } impl Clone for RcRef { fn clone(&self) -> RcRef { RcRef { inner: self.inner, data: self.data.clone(), } } } impl Trace for RcRef { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { cb(self.inner); } } impl<'a, T> From<&'a RcRef> for Ref { fn from(r: &'a RcRef) -> Ref { Ref::new(r.inner) } } impl Hash for RcRef { fn hash(&self, state: &mut H) { std::ptr::hash(&**self, state) } } impl PartialEq for RcRef { fn eq(&self, rhs: &Self) -> bool { std::ptr::eq(&**self, &**rhs) } } impl Eq for RcRef {} #[derive(Debug, DataSize)] pub enum MaybeRef { Direct(Shared), Indirect(RcRef), } impl MaybeRef { pub fn as_ref(&self) -> Option> { match *self { MaybeRef::Indirect(ref r) => Some(r.get_ref()), _ => None } } pub fn data(&self) -> &Shared { match *self { MaybeRef::Direct(ref t) => t, MaybeRef::Indirect(ref r) => &r.data } } } impl Object for MaybeRef { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { Ok(match p { Primitive::Reference(r) => MaybeRef::Indirect(resolve.get(Ref::new(r))?), p => MaybeRef::Direct(Shared::new(T::from_primitive(p, resolve)?)) }) } } impl ObjectWrite for MaybeRef { fn to_primitive(&self, update: &mut impl Updater) -> Result { match self { MaybeRef::Direct(ref inner) => inner.to_primitive(update), MaybeRef::Indirect(r) => r.to_primitive(update) } } } impl DeepClone for MaybeRef { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { match *self { MaybeRef::Direct(ref old) => cloner.clone_shared(old).map(MaybeRef::Direct), MaybeRef::Indirect(ref old) => cloner.clone_rcref(old).map(MaybeRef::Indirect) } } } impl Deref for MaybeRef { type Target = T; fn deref(&self) -> &T { match *self { MaybeRef::Direct(ref t) => t, MaybeRef::Indirect(ref r) => r } } } impl Clone for MaybeRef { fn clone(&self) -> Self { match *self { MaybeRef::Direct(ref rc) => MaybeRef::Direct(rc.clone()), MaybeRef::Indirect(ref r) => MaybeRef::Indirect(r.clone()) } } } impl Trace for MaybeRef { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { match *self { MaybeRef::Indirect(ref rc) => rc.trace(cb), MaybeRef::Direct(_) => () } } } impl From> for MaybeRef { fn from(r: Shared) -> MaybeRef { MaybeRef::Direct(r) } } impl From> for Shared { fn from(r: MaybeRef) -> Shared { match r { MaybeRef::Direct(rc) => rc, MaybeRef::Indirect(r) => r.data } } } impl<'a, T> From<&'a MaybeRef> for Shared { fn from(r: &'a MaybeRef) -> Shared { match r { MaybeRef::Direct(ref rc) => rc.clone(), MaybeRef::Indirect(ref r) => r.data.clone() } } } impl From> for MaybeRef { fn from(r: RcRef) -> MaybeRef { MaybeRef::Indirect(r) } } impl Hash for MaybeRef { fn hash(&self, state: &mut H) { std::ptr::hash(&**self, state) } } impl PartialEq for MaybeRef { fn eq(&self, rhs: &Self) -> bool { std::ptr::eq(&**self, &**rhs) } } impl Eq for MaybeRef {} ////////////////////////////////////// // Object for Primitives & other types ////////////////////////////////////// impl Object for i32 { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Reference(id) => r.resolve(id)?.as_integer(), p => p.as_integer() } } } impl ObjectWrite for i32 { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Integer(*self)) } } impl Object for u32 { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Reference(id) => r.resolve(id)?.as_u32(), p => p.as_u32() } } } impl ObjectWrite for u32 { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Integer(*self as _)) } } impl Object for usize { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Reference(id) => Ok(r.resolve(id)?.as_u32()? as usize), p => Ok(p.as_u32()? as usize) } } } impl ObjectWrite for usize { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Integer(*self as _)) } } impl Object for f32 { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Reference(id) => r.resolve(id)?.as_number(), p => p.as_number() } } } impl ObjectWrite for f32 { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Number(*self)) } } impl Object for bool { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Reference(id) => r.resolve(id)?.as_bool(), p => p.as_bool() } } } impl ObjectWrite for bool { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Boolean(*self)) } } impl Object for Dictionary { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Dictionary(dict) => Ok(dict), Primitive::Reference(id) => Dictionary::from_primitive(r.resolve(id)?, r), _ => Err(PdfError::UnexpectedPrimitive {expected: "Dictionary", found: p.get_debug_name()}), } } } impl Object for Name { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { p.resolve(resolve)?.into_name() } } impl ObjectWrite for Name { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Name(self.0.clone())) } } impl Object for Vec { /// Will try to convert `p` to `T` first, then try to convert `p` to Vec fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { Ok( match p { Primitive::Array(_) => { p.resolve(r)?.into_array()? .into_iter() .map(|p| T::from_primitive(p, r)) .collect::>>()? }, Primitive::Null => { Vec::new() } Primitive::Reference(id) => Self::from_primitive(r.resolve(id)?, r)?, _ => vec![T::from_primitive(p, r)?] } ) } } impl ObjectWrite for Vec { fn to_primitive(&self, update: &mut impl Updater) -> Result { Primitive::array::(self.iter(), update) } } impl DeepClone for Vec { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { self.iter().map(|t| t.deep_clone(cloner)).collect() } } impl Trace for Vec { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { for i in self.iter() { i.trace(cb); } } } /* pub struct Data(pub Vec); impl Object for Data { fn serialize(&self, out: &mut W) -> Result<()> { unimplemented!() } /// Will try to convert `p` to `T` first, then try to convert `p` to Vec fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::Array(_) => { p.into_array(r)? .into_iter() .map(|p| u8::from_primitive(p, r)) .collect::>>()? }, Primitive::Null => { Vec::new() } Primitive::Reference(id) => Self::from_primitive(r.resolve(id)?, r)?, _ => } } }*/ impl Object for Primitive { fn from_primitive(p: Primitive, _: &impl Resolve) -> Result { Ok(p) } } impl ObjectWrite for Primitive { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(self.clone()) } } impl DeepClone for Primitive { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { match *self { Primitive::Array(ref parts) => Ok(Primitive::Array(parts.into_iter().map(|p| p.deep_clone(cloner)).try_collect()?)), Primitive::Boolean(b) => Ok(Primitive::Boolean(b)), Primitive::Dictionary(ref dict) => Ok(Primitive::Dictionary(dict.deep_clone(cloner)?)), Primitive::Integer(i) => Ok(Primitive::Integer(i)), Primitive::Name(ref name) => Ok(Primitive::Name(name.clone())), Primitive::Null => Ok(Primitive::Null), Primitive::Number(n) => Ok(Primitive::Number(n)), Primitive::Reference(r) => Ok(Primitive::Reference(r.deep_clone(cloner)?)), Primitive::Stream(ref s) => Ok(Primitive::Stream(s.deep_clone(cloner)?)), Primitive::String(ref s) => Ok(Primitive::String(s.clone())) } } } impl Trace for Primitive { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { match *self { Primitive::Reference(r) => cb(r), Primitive::Array(ref parts) => parts.iter().for_each(|p| p.trace(cb)), Primitive::Dictionary(ref dict) => dict.values().for_each(|p| p.trace(cb)), _ => () } } } impl Object for HashMap { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { Primitive::Null => Ok(HashMap::new()), Primitive::Dictionary (dict) => { let mut new = Self::new(); for (key, val) in dict.iter() { new.insert(key.clone(), V::from_primitive(val.clone(), resolve)?); } Ok(new) } Primitive::Reference (id) => HashMap::from_primitive(resolve.resolve(id)?, resolve), p => Err(PdfError::UnexpectedPrimitive {expected: "Dictionary", found: p.get_debug_name()}) } } } impl ObjectWrite for HashMap { fn to_primitive(&self, update: &mut impl Updater) -> Result { if self.is_empty() { Ok(Primitive::Null) } else { let mut dict = Dictionary::new(); for (k, v) in self.iter() { dict.insert(k.clone(), v.to_primitive(update)?); } Ok(Primitive::Dictionary(dict)) } } } impl DeepClone for HashMap { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { self.iter().map(|(k, v)| Ok((k.clone(), v.deep_clone(cloner)?))).collect() } } impl Object for Option { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { Primitive::Null => Ok(None), p => match T::from_primitive(p, resolve) { Ok(p) => Ok(Some(p)), // References to non-existing objects ought not to be an error Err(PdfError::NullRef {..}) => Ok(None), Err(PdfError::FreeObject {..}) => Ok(None), Err(e) if resolve.options().allow_error_in_option => { warn!("ignoring {:?}", e); Ok(None) } Err(e) => Err(e) } } } } impl ObjectWrite for Option { fn to_primitive(&self, update: &mut impl Updater) -> Result { match self { None => Ok(Primitive::Null), Some(t) => t.to_primitive(update) } } } impl DeepClone for Option { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { match self { None => Ok(None), Some(t) => t.deep_clone(cloner).map(Some) } } } impl Trace for Option { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { if let Some(ref t) = *self { t.trace(cb) } } } impl Object for Box { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { T::from_primitive(p, resolve).map(Box::new) } } impl ObjectWrite for Box { fn to_primitive(&self, update: &mut impl Updater) -> Result { (**self).to_primitive(update) } } impl Trace for Box { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { (**self).trace(cb) } } impl Object for () { fn from_primitive(_p: Primitive, _resolve: &impl Resolve) -> Result { Ok(()) } } impl ObjectWrite for () { fn to_primitive(&self, _: &mut impl Updater) -> Result { Ok(Primitive::Null) } } impl Trace for () {} impl Object for (T, U) where T: Object, U: Object { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let arr = p.resolve(resolve)?.into_array()?; if arr.len() != 2 { bail!("expected array of length 2 (found {})", arr.len()); } let [a, b]: [Primitive; 2] = arr.try_into().unwrap(); Ok((T::from_primitive(a, resolve)?, U::from_primitive(b, resolve)?)) } } impl ObjectWrite for (T, U) where T: ObjectWrite, U: ObjectWrite { fn to_primitive(&self, update: &mut impl Updater) -> Result { Ok(Primitive::Array(vec![self.0.to_primitive(update)?, self.1.to_primitive(update)?])) } } impl Trace for (T, U) { fn trace(&self, cb: &mut impl FnMut(PlainRef)) { self.0.trace(cb); self.1.trace(cb); } } impl DeepClone for Box { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { Ok(Box::new((&**self).deep_clone(cloner)?)) } } macro_rules! deep_clone_simple { ($($t:ty),*) => ( $( impl DeepClone for $t { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { Ok(self.clone()) } } )* ) } deep_clone_simple!(f32, i32, u32, bool, Name, (), Date, PdfString, Rect, u8, Arc<[u8]>, Vec); impl DeepClone for (A, B) { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { Ok((self.0.deep_clone(cloner)?, self.1.deep_clone(cloner)?)) } } pdf-0.9.0/src/object/stream.rs000064400000000000000000000322461046102023000143100ustar 00000000000000use datasize::DataSize; use crate as pdf; use crate::object::*; use crate::primitive::*; use crate::error::*; use crate::parser::Lexer; use crate::enc::{StreamFilter, decode}; use std::ops::{Deref, Range}; use std::fmt; #[derive(Clone)] pub (crate) enum StreamData { Generated(Arc<[u8]>), Original(Range, PlainRef), } datasize::non_dynamic_const_heap_size!(StreamData, std::mem::size_of::()); /// Simple Stream object with only some additional entries from the stream dict (I). #[derive(Clone, DataSize)] pub struct Stream { pub info: StreamInfo, pub (crate) inner_data: StreamData, } impl Stream { pub fn from_stream(s: PdfStream, resolve: &impl Resolve) -> Result { let PdfStream {info, inner} = s; let info = StreamInfo::::from_primitive(Primitive::Dictionary (info), resolve)?; let inner_data = match inner { StreamInner::InFile { id, file_range } => StreamData::Original(file_range, id), StreamInner::Pending { data } => StreamData::Generated(data) }; Ok(Stream { info, inner_data }) } /// the data is not compressed. the specified filters are to be applied when compressing the data pub fn new_with_filters(i: I, data: impl Into>, filters: Vec) -> Stream { Stream { info: StreamInfo { filters, file: None, file_filters: Vec::new(), info: i }, inner_data: StreamData::Generated(data.into()), } } pub fn new(i: I, data: impl Into>) -> Stream { Stream { info: StreamInfo { filters: Vec::new(), file: None, file_filters: Vec::new(), info: i }, inner_data: StreamData::Generated(data.into()), } } /// the data is already compressed with the specified filters pub fn from_compressed(i: I, data: impl Into>, filters: Vec) -> Stream { Stream { info: StreamInfo { filters: filters.clone(), file: None, file_filters: Vec::new(), info: i }, inner_data: StreamData::Generated(data.into()), } } pub fn data(&self, resolve: &impl Resolve) -> Result> { match self.inner_data { StreamData::Generated(ref data) => { let filters = &self.info.filters; if filters.len() == 0 { Ok(data.clone()) } else { use std::borrow::Cow; let mut data: Cow<[u8]> = (&**data).into(); for filter in filters { data = t!(decode(&data, filter), filter).into(); } Ok(data.into()) } } StreamData::Original(ref file_range, id) => { resolve.get_data_or_decode(id, file_range.clone(), &self.info.filters) } } } } impl fmt::Debug for Stream { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { self.info.info.fmt(f) } } impl Object for Stream { /// Convert primitive to Self fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let s = PdfStream::from_primitive(p, resolve)?; Stream::from_stream(s, resolve) } } impl Stream { pub fn to_pdf_stream(&self, update: &mut impl Updater) -> Result { let mut info = match self.info.info.to_primitive(update)? { Primitive::Dictionary(dict) => dict, Primitive::Null => Dictionary::new(), p => bail!("stream info has to be a dictionary (found {:?})", p) }; let mut params = None; if self.info.filters.len() > 0 { for f in self.info.filters.iter() { if let Some(para) = match f { StreamFilter::LZWDecode(ref p) => Some(p.to_primitive(update)?), StreamFilter::FlateDecode(ref p) => Some(p.to_primitive(update)?), StreamFilter::DCTDecode(ref p) => Some(p.to_primitive(update)?), StreamFilter::CCITTFaxDecode(ref p) => Some(p.to_primitive(update)?), StreamFilter::JBIG2Decode(ref p) => Some(p.to_primitive(update)?), _ => None } { assert!(params.is_none()); params = Some(para); } } let mut filters = self.info.filters.iter().map(|filter| match filter { StreamFilter::ASCIIHexDecode => "ASCIIHexDecode", StreamFilter::ASCII85Decode => "ASCII85Decode", StreamFilter::LZWDecode(ref _p) => "LZWDecode", StreamFilter::FlateDecode(ref _p) => "FlateDecode", StreamFilter::JPXDecode => "JPXDecode", StreamFilter::DCTDecode(ref _p) => "DCTDecode", StreamFilter::CCITTFaxDecode(ref _p) => "CCITTFaxDecode", StreamFilter::JBIG2Decode(ref _p) => "JBIG2Decode", StreamFilter::Crypt => "Crypt", StreamFilter::RunLengthDecode => "RunLengthDecode", }) .map(|s| Primitive::Name(s.into())); match self.info.filters.len() { 0 => {}, 1 => { info.insert("Filter", filters.next().unwrap().to_primitive(update)?); } _ => { info.insert("Filter", Primitive::array::(filters, update)?); } } } if let Some(para) = params { info.insert("DecodeParms", para); } let inner = match self.inner_data { StreamData::Generated(ref data) => { info.insert("Length", Primitive::Integer(data.len() as _)); StreamInner::Pending { data: data.clone() } }, StreamData::Original(ref file_range, id) => { info.insert("Length", Primitive::Integer(file_range.len() as _)); StreamInner::InFile { id, file_range: file_range.clone() } } }; Ok(PdfStream { info, inner }) } } impl ObjectWrite for Stream { fn to_primitive(&self, update: &mut impl Updater) -> Result { self.to_pdf_stream(update).map(Primitive::Stream) } } impl DeepClone for Stream { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { let data = match self.inner_data { StreamData::Generated(ref data) => data.clone(), StreamData::Original(ref range, id) => cloner.stream_data(id, range.clone())? }; Ok(Stream { info: self.info.deep_clone(cloner)?, inner_data: StreamData::Generated(data), }) } } impl Deref for Stream { type Target = StreamInfo; fn deref(&self) -> &StreamInfo { &self.info } } /// General stream type. `I` is the additional information to be read from the stream dict. #[derive(Debug, Clone, DataSize, DeepClone)] pub struct StreamInfo { // General dictionary entries /// Filters that the `data` is currently encoded with (corresponds to both `/Filter` and /// `/DecodeParms` in the PDF specs), constructed in `from_primitive()`. pub filters: Vec, /// Eventual file containing the stream contentst pub file: Option, /// Filters to apply to external file specified in `file`. pub file_filters: Vec, // TODO: /* /// Filters to apply to external file specified in `file`. #[pdf(key="FFilter")] file_filters: Vec, #[pdf(key="FDecodeParms")] file_decode_parms: Vec, /// Number of bytes in the decoded stream #[pdf(key="DL")] dl: Option, */ // Specialized dictionary entries pub info: I, } impl Deref for StreamInfo { type Target = I; fn deref(&self) -> &I { &self.info } } impl Default for StreamInfo { fn default() -> StreamInfo { StreamInfo { filters: Vec::new(), file: None, file_filters: Vec::new(), info: I::default(), } } } impl StreamInfo { /* /// If the stream is not encoded, this is a no-op. `decode()` should be called whenever it's uncertain /// whether the stream is encoded. pub fn encode(&mut self, _filter: StreamFilter) { // TODO this should add the filter to `self.filters` and encode the data with the given // filter unimplemented!(); }*/ pub fn get_filters(&self) -> &[StreamFilter] { &self.filters } } impl Object for StreamInfo { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let mut dict = Dictionary::from_primitive(p, resolve)?; let _length = usize::from_primitive( dict.remove("Length").ok_or(PdfError::MissingEntry{ typ: "StreamInfo", field: "Length".into() })?, resolve)?; let filters = Vec::::from_primitive( dict.remove("Filter").unwrap_or(Primitive::Null), resolve)?; let decode_params = Vec::>::from_primitive( dict.remove("DecodeParms").unwrap_or(Primitive::Null), resolve)?; let file = Option::::from_primitive( dict.remove("F").unwrap_or(Primitive::Null), resolve)?; let file_filters = Vec::::from_primitive( dict.remove("FFilter").unwrap_or(Primitive::Null), resolve)?; let file_decode_params = Vec::::from_primitive( dict.remove("FDecodeParms").unwrap_or(Primitive::Null), resolve)?; let mut new_filters = Vec::new(); let mut new_file_filters = Vec::new(); for (i, filter) in filters.iter().enumerate() { let params = match decode_params.get(i) { Some(Some(params)) => params.clone(), _ => Dictionary::default(), }; new_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?); } for (i, filter) in file_filters.iter().enumerate() { let params = match file_decode_params.get(i) { Some(params) => params.clone(), None => Dictionary::default(), }; new_file_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?); } Ok(StreamInfo { // General filters: new_filters, file, file_filters: new_file_filters, // Special info: T::from_primitive(Primitive::Dictionary (dict), resolve)?, }) } } #[derive(Object, Default, Debug, DataSize)] #[pdf(Type = "ObjStm")] pub struct ObjStmInfo { #[pdf(key = "N")] /// Number of compressed objects in the stream. pub num_objects: usize, #[pdf(key = "First")] /// The byte offset in the decoded stream, of the first compressed object. pub first: usize, #[pdf(key = "Extends")] /// A reference to an eventual ObjectStream which this ObjectStream extends. pub extends: Option>>, } #[derive(DataSize)] pub struct ObjectStream { /// Byte offset of each object. Index is the object number. offsets: Vec, /// The object number of this object. _id: ObjNr, inner: Stream } impl Object for ObjectStream { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let stream: Stream = Stream::from_primitive(p, resolve)?; let mut offsets = Vec::new(); { debug!("parsing stream"); let data = stream.data(resolve)?; let mut lexer = Lexer::new(&data); for _ in 0..(stream.info.num_objects as ObjNr) { let _obj_nr = lexer.next()?.to::()?; let offset = lexer.next()?.to::()?; offsets.push(offset); } } Ok(ObjectStream { offsets, _id: 0, // TODO inner: stream }) } } impl ObjectStream { pub fn get_object_slice(&self, index: usize, resolve: &impl Resolve) -> Result<(Arc<[u8]>, Range)> { if index >= self.offsets.len() { err!(PdfError::ObjStmOutOfBounds {index, max: self.offsets.len()}); } let start = self.inner.info.first + self.offsets[index]; let data = self.inner.data(resolve)?; let end = if index == self.offsets.len() - 1 { data.len() } else { self.inner.info.first + self.offsets[index + 1] }; Ok((data, start..end)) } /// Returns the number of contained objects pub fn n_objects(&self) -> usize { self.offsets.len() } pub fn _data(&self, resolve: &impl Resolve) -> Result> { self.inner.data(resolve) } } pdf-0.9.0/src/object/types.rs000064400000000000000000001320061046102023000141540ustar 00000000000000//! Models of PDF types use std::collections::HashMap; use datasize::DataSize; use crate as pdf; use crate::content::deep_clone_op; use crate::object::*; use crate::error::*; use crate::content::{Content, FormXObject, Matrix, parse_ops, serialize_ops, Op}; use crate::font::Font; use crate::enc::StreamFilter; /// Node in a page tree - type is either `Page` or `PageTree` #[derive(Debug, Clone, DataSize)] pub enum PagesNode { Tree(PageTree), Leaf(Page), } impl Object for PagesNode { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let mut dict = p.resolve(resolve)?.into_dictionary()?; match dict.require("PagesNode", "Type")?.as_name()? { "Page" => Ok(PagesNode::Leaf(t!(Page::from_dict(dict, resolve)))), "Pages" => Ok(PagesNode::Tree(t!(PageTree::from_dict(dict, resolve)))), other => Err(PdfError::WrongDictionaryType {expected: "Page or Pages".into(), found: other.into()}), } } } impl ObjectWrite for PagesNode { fn to_primitive(&self, update: &mut impl Updater) -> Result { match *self { PagesNode::Tree(ref t) => t.to_primitive(update), PagesNode::Leaf(ref l) => l.to_primitive(update), } } } /* use std::iter::once; use itertools::Either; // needs recursive types impl PagesNode { pub fn pages<'a>(&'a self, resolve: &'a impl Resolve) -> impl Iterator> + 'a { match self { PagesNode::Tree(ref tree) => Either::Left(Box::new(tree.pages(resolve))), PagesNode::Leaf(ref page) => Either::Right(once(Ok(PageRc(page.clone())))) } } } */ /// A `PagesNode::Leaf` wrapped in a `RcRef` /// #[derive(Debug, Clone, DataSize)] pub struct PageRc(RcRef); impl Deref for PageRc { type Target = Page; fn deref(&self) -> &Page { match *self.0 { PagesNode::Leaf(ref page) => page, _ => unreachable!() } } } impl PageRc { pub fn create(page: Page, update: &mut impl Updater) -> Result { Ok(PageRc(update.create(PagesNode::Leaf(page))?)) } pub fn get_ref(&self) -> Ref { self.0.get_ref() } } /// A `PagesNode::Tree` wrapped in a `RcRef` /// #[derive(Debug, Clone, DataSize)] pub struct PagesRc(RcRef); impl Deref for PagesRc { type Target = PageTree; fn deref(&self) -> &PageTree { match *self.0 { PagesNode::Tree(ref tree) => tree, _ => unreachable!() } } } impl PagesRc { pub fn create(tree: PageTree, update: &mut impl Updater) -> Result { Ok(PagesRc(update.create(PagesNode::Tree(tree))?)) } } impl Object for PagesRc { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let node = t!(RcRef::from_primitive(p, resolve)); match *node { PagesNode::Leaf(_) => Err(PdfError::WrongDictionaryType {expected: "Pages".into(), found: "Page".into()}), PagesNode::Tree(_) => Ok(PagesRc(node)) } } } impl ObjectWrite for PagesRc { fn to_primitive(&self, update: &mut impl Updater) -> Result { self.0.to_primitive(update) } } #[derive(Object, ObjectWrite, Debug, DataSize)] #[pdf(Type = "Catalog?")] pub struct Catalog { #[pdf(key="Version")] pub version: Option, #[pdf(key="Pages")] pub pages: PagesRc, // PageLabels: number_tree, #[pdf(key="Names")] pub names: Option>, #[pdf(key="Dests")] pub dests: Option>, // ViewerPreferences: dict // PageLayout: name // PageMode: name #[pdf(key="Outlines")] pub outlines: Option, // Threads: array // OpenAction: array or dict // AA: dict // URI: dict // AcroForm: dict #[pdf(key="AcroForm")] pub forms: Option, // Metadata: stream #[pdf(key="Metadata")] pub metadata: Option>>, #[pdf(key="StructTreeRoot")] pub struct_tree_root: Option, // MarkInfo: dict // Lang: text string // SpiderInfo: dict // OutputIntents: array // PieceInfo: dict // OCProperties: dict // Perms: dict // Legal: dict // Requirements: array // Collection: dict // NeedsRendering: bool } #[derive(Object, ObjectWrite, Debug, Default, Clone, DataSize)] #[pdf(Type = "Pages?")] pub struct PageTree { #[pdf(key="Parent")] pub parent: Option, #[pdf(key="Kids")] pub kids: Vec>, #[pdf(key="Count")] pub count: u32, #[pdf(key="Resources")] pub resources: Option>, #[pdf(key="MediaBox")] pub media_box: Option, #[pdf(key="CropBox")] pub crop_box: Option, } impl PageTree { pub fn page(&self, resolve: &impl Resolve, page_nr: u32) -> Result { self.page_limited(resolve, page_nr, 16) } fn page_limited(&self, resolve: &impl Resolve, page_nr: u32, depth: usize) -> Result { if depth == 0 { bail!("page tree depth exeeded"); } let mut pos = 0; for &kid in &self.kids { let node = resolve.get(kid)?; match *node { PagesNode::Tree(ref tree) => { if (pos .. pos + tree.count).contains(&page_nr) { return tree.page_limited(resolve, page_nr - pos, depth - 1); } pos += tree.count; } PagesNode::Leaf(ref _page) => { if pos == page_nr { return Ok(PageRc(node)); } pos += 1; } } } Err(PdfError::PageOutOfBounds {page_nr, max: pos}) } /* pub fn update_pages(&mut self, mut offset: u32, page_nr: u32, page: Page) -> Result<()> { for kid in &self.kids { // println!("{}/{} {:?}", offset, page_nr, kid); match *(self.get(*kid)?) { PagesNode::Tree(ref mut t) => { if offset + t.count < page_nr { offset += t.count; } else { return self.update_pages(t, offset, page_nr, page); } }, PagesNode::Leaf(ref mut p) => { if offset < page_nr { offset += 1; } else { assert_eq!(offset, page_nr); let p = self.storage.create(page)?; self.storage.update(kid.get_inner(), PagesNode::Leaf(p)); return Ok(()); } } } } Err(PdfError::PageNotFound {page_nr: page_nr}) } pub fn pages<'a>(&'a self, resolve: &'a impl Resolve) -> impl Iterator> + 'a { self.kids.iter().flat_map(move |&r| { match resolve.get(r) { Ok(node) => Either::Left(node.pages(resolve)), Err(e) => Either::Right(once(Err(e))) } }) } */ } impl SubType for PageTree {} #[derive(Object, ObjectWrite, Debug, Clone, DataSize)] #[pdf(Type="Page?")] pub struct Page { #[pdf(key="Parent")] pub parent: PagesRc, #[pdf(key="Resources", indirect)] pub resources: Option>, #[pdf(key="MediaBox")] pub media_box: Option, #[pdf(key="CropBox")] pub crop_box: Option, #[pdf(key="TrimBox")] pub trim_box: Option, #[pdf(key="Contents")] pub contents: Option, #[pdf(key="Rotate", default="0")] pub rotate: i32, #[pdf(key="Metadata")] pub metadata: Option, #[pdf(key="LGIDict")] pub lgi: Option, #[pdf(key="VP")] pub vp: Option, #[pdf(other)] pub other: Dictionary, } fn inherit<'a, T: 'a, F>(mut parent: &'a PageTree, f: F) -> Result> where F: Fn(&'a PageTree) -> Option { loop { match (&parent.parent, f(parent)) { (_, Some(t)) => return Ok(Some(t)), (Some(ref p), None) => parent = p, (None, None) => return Ok(None) } } } impl Page { pub fn new(parent: PagesRc) -> Page { Page { parent, media_box: None, crop_box: None, trim_box: None, resources: None, contents: None, rotate: 0, metadata: None, lgi: None, vp: None, other: Dictionary::new(), } } pub fn media_box(&self) -> Result { match self.media_box { Some(b) => Ok(b), None => inherit(&self.parent, |pt| pt.media_box)? .ok_or_else(|| PdfError::MissingEntry { typ: "Page", field: "MediaBox".into() }) } } pub fn crop_box(&self) -> Result { match self.crop_box { Some(b) => Ok(b), None => match inherit(&self.parent, |pt| pt.crop_box)? { Some(b) => Ok(b), None => self.media_box() } } } pub fn resources(&self) -> Result<&MaybeRef> { match self.resources { Some(ref r) => Ok(r), None => inherit(&self.parent, |pt| pt.resources.as_ref())? .ok_or_else(|| PdfError::MissingEntry { typ: "Page", field: "Resources".into() }) } } } impl SubType for Page {} #[derive(Object, DataSize)] pub struct PageLabel { #[pdf(key="S")] pub style: Option, #[pdf(key="P")] pub prefix: Option, #[pdf(key="St")] pub start: Option } #[derive(Object, ObjectWrite, Debug, DataSize, Default, DeepClone, Clone)] pub struct Resources { #[pdf(key="ExtGState")] pub graphics_states: HashMap, #[pdf(key="ColorSpace")] pub color_spaces: HashMap, #[pdf(key="Pattern")] pub pattern: HashMap>, // shading: Option, #[pdf(key="XObject")] pub xobjects: HashMap>, // /XObject is a dictionary that map arbitrary names to XObjects #[pdf(key="Font")] pub fonts: HashMap>, #[pdf(key="Properties")] pub properties: HashMap>, } impl Resources { pub fn fonts(&self) -> impl Iterator)> { self.fonts.iter().map(|(k, v)| (k.as_str(), v)) } } #[derive(Debug, Object, ObjectWrite, DataSize, Clone, DeepClone)] pub struct PatternDict { #[pdf(key="PaintType")] pub paint_type: Option, #[pdf(key="TilingType")] pub tiling_type: Option, #[pdf(key="BBox")] pub bbox: Rect, #[pdf(key="XStep")] pub x_step: f32, #[pdf(key="YStep")] pub y_step: f32, #[pdf(key="Resources")] pub resources: Ref, #[pdf(key="Matrix")] pub matrix: Option, } #[derive(Debug, DataSize)] pub enum Pattern { Dict(PatternDict), Stream(PatternDict, Vec), } impl Pattern { pub fn dict(&self) -> &PatternDict { match *self { Pattern::Dict(ref d) => d, Pattern::Stream(ref d, _) => d, } } } impl Object for Pattern { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let p = p.resolve(resolve)?; match p { Primitive::Dictionary(dict) => Ok(Pattern::Dict(PatternDict::from_dict(dict, resolve)?)), Primitive::Stream(s) => { let stream: Stream = Stream::from_stream(s, resolve)?; let data = stream.data(resolve)?; let ops = t!(parse_ops(&data, resolve)); let dict = stream.info.info; Ok(Pattern::Stream(dict, ops)) } p => Err(PdfError::UnexpectedPrimitive { expected: "Dictionary or Stream", found: p.get_debug_name() }) } } } impl ObjectWrite for Pattern { fn to_primitive(&self, update: &mut impl Updater) -> Result { match self { Pattern::Dict(ref d) => d.to_primitive(update), Pattern::Stream(ref d, ref ops) => { let data = serialize_ops(ops)?; let stream = Stream::new_with_filters(d.clone(), data, vec![]); stream.to_primitive(update) } } } } impl DeepClone for Pattern { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { match *self { Pattern::Dict(ref d) => Ok(Pattern::Dict(d.deep_clone(cloner)?)), Pattern::Stream(ref dict, ref ops) => { let old_resources = cloner.get(dict.resources)?; let mut resources = Resources::default(); let ops: Vec = ops.iter().map(|op| deep_clone_op(op, cloner, &old_resources, &mut resources)).collect::>>()?; let dict = PatternDict { resources: cloner.create(resources)?.get_ref(), .. *dict }; Ok(Pattern::Stream(dict, ops)) } } } } #[derive(Object, ObjectWrite, DeepClone, Debug, DataSize, Copy, Clone)] pub enum LineCap { Butt = 0, Round = 1, Square = 2 } #[derive(Object, ObjectWrite, DeepClone, Debug, DataSize, Copy, Clone)] pub enum LineJoin { Miter = 0, Round = 1, Bevel = 2 } #[derive(Object, ObjectWrite, DeepClone, Debug, DataSize, Clone)] #[pdf(Type = "ExtGState?")] /// `ExtGState` pub struct GraphicsStateParameters { #[pdf(key="LW")] pub line_width: Option, #[pdf(key="LC")] pub line_cap: Option, #[pdf(key="LJ")] pub line_join: Option, #[pdf(key="ML")] pub miter_limit: Option, #[pdf(key="D")] pub dash_pattern: Option>, #[pdf(key="RI")] pub rendering_intent: Option, #[pdf(key="OP")] pub overprint: Option, #[pdf(key="op")] pub overprint_fill: Option, #[pdf(key="OPM")] pub overprint_mode: Option, #[pdf(key="Font")] pub font: Option<(Ref, f32)>, // BG // BG2 // UCR // UCR2 // TR // TR2 // HT // FL // SM // SA #[pdf(key="BM")] pub blend_mode: Option, #[pdf(key="SMask")] pub smask: Option, #[pdf(key="CA")] pub stroke_alpha: Option, #[pdf(key="ca")] pub fill_alpha: Option, #[pdf(key="AIS")] pub alpha_is_shape: Option, #[pdf(key="TK")] pub text_knockout: Option, #[pdf(other)] _other: Dictionary } #[derive(Object, Debug, DataSize, DeepClone)] #[pdf(is_stream)] pub enum XObject { #[pdf(name="PS")] Postscript (PostScriptXObject), Image (ImageXObject), Form (FormXObject), } impl ObjectWrite for XObject { fn to_primitive(&self, update: &mut impl Updater) -> Result { let (subtype, mut stream) = match self { XObject::Postscript(s) => ("PS", s.to_pdf_stream(update)?), XObject::Form(s) => ("Form", s.stream.to_pdf_stream(update)?), XObject::Image(s) => ("Image", s.inner.to_pdf_stream(update)?), }; stream.info.insert("Subtype", Name::from(subtype)); stream.info.insert("Type", Name::from("XObject")); Ok(stream.into()) } } /// A variant of XObject pub type PostScriptXObject = Stream; #[derive(Debug, DataSize, Clone, DeepClone)] pub struct ImageXObject { pub inner: Stream } impl Object for ImageXObject { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let s = PdfStream::from_primitive(p, resolve)?; Self::from_stream(s, resolve) } } impl ObjectWrite for ImageXObject { fn to_primitive(&self, update: &mut impl Updater) -> Result { self.inner.to_primitive(update) } } impl Deref for ImageXObject { type Target = ImageDict; fn deref(&self) -> &ImageDict { &self.inner.info } } pub enum ImageFormat { Raw, Jpeg, Jp2k, Jbig2, CittFax, Png } impl ImageXObject { pub fn from_stream(s: PdfStream, resolve: &impl Resolve) -> Result { let inner = Stream::from_stream(s, resolve)?; Ok(ImageXObject { inner }) } /// Decode everything except for the final image encoding (jpeg, jbig2, jp2k, ...) pub fn raw_image_data(&self, resolve: &impl Resolve) -> Result<(Arc<[u8]>, Option<&StreamFilter>)> { match self.inner.inner_data { StreamData::Generated(_) => Ok((self.inner.data(resolve)?, None)), StreamData::Original(ref file_range, id) => { let filters = self.inner.filters.as_slice(); // decode all non image filters let end = filters.iter().rposition(|f| match f { StreamFilter::ASCIIHexDecode => false, StreamFilter::ASCII85Decode => false, StreamFilter::LZWDecode(_) => false, StreamFilter::RunLengthDecode => false, StreamFilter::Crypt => true, _ => true }).unwrap_or(filters.len()); let (normal_filters, image_filters) = filters.split_at(end); let data = resolve.get_data_or_decode(id, file_range.clone(), normal_filters)?; match image_filters { [] => Ok((data, None)), [StreamFilter::DCTDecode(_)] | [StreamFilter::CCITTFaxDecode(_)] | [StreamFilter::JPXDecode] | [StreamFilter::FlateDecode(_)] | [StreamFilter::JBIG2Decode(_)] => Ok((data, Some(&image_filters[0]))), _ => bail!("??? filters={:?}", image_filters) } } } } pub fn image_data(&self, resolve: &impl Resolve) -> Result> { let (data, filter) = self.raw_image_data(resolve)?; let filter = match filter { Some(f) => f, None => return Ok(data) }; let mut data = match filter { StreamFilter::CCITTFaxDecode(ref params) => { if self.inner.info.width != params.columns { bail!("image width mismatch {} != {}", self.inner.info.width, params.columns); } let mut data = fax_decode(&data, params)?; if params.rows == 0 { // adjust size data.truncate(self.inner.info.height as usize * self.inner.info.width as usize); } data } StreamFilter::DCTDecode(ref p) => dct_decode(&data, p)?, StreamFilter::JPXDecode => jpx_decode(&data)?, StreamFilter::JBIG2Decode(ref p) => { let global_data = p.globals.as_ref().map(|s| s.data(resolve)).transpose()?; jbig2_decode(&data, global_data.as_deref().unwrap_or_default())? }, StreamFilter::FlateDecode(ref p) => flate_decode(&data, p)?, _ => unreachable!() }; if let Some(ref decode) = self.decode { if decode == &[1.0, 0.0] && self.bits_per_component == Some(1) { data.iter_mut().for_each(|b| *b = !*b); } } Ok(data.into()) } } #[derive(Object, Debug, DataSize, DeepClone, ObjectWrite)] #[pdf(Type="XObject", Subtype="PS")] pub struct PostScriptDict { // TODO #[pdf(other)] pub other: Dictionary } #[derive(Object, Debug, Clone, DataSize, DeepClone, ObjectWrite, Default)] #[pdf(Type="XObject?", Subtype="Image")] /// A variant of XObject pub struct ImageDict { #[pdf(key="Width")] pub width: u32, #[pdf(key="Height")] pub height: u32, #[pdf(key="ColorSpace")] pub color_space: Option, #[pdf(key="BitsPerComponent")] pub bits_per_component: Option, // Note: only allowed values are 1, 2, 4, 8, 16. Enum? #[pdf(key="Intent")] pub intent: Option, // Note: default: "the current rendering intent in the graphics state" - I don't think this // ought to have a default then #[pdf(key="ImageMask", default="false")] pub image_mask: bool, // Mask: stream or array #[pdf(key="Mask")] pub mask: Option, // /// Describes how to map image samples into the range of values appropriate for the image’s color space. /// If `image_mask`: either [0 1] or [1 0]. Else, the length must be twice the number of color /// components required by `color_space` (key ColorSpace) // (see Decode arrays page 344) #[pdf(key="Decode")] pub decode: Option>, #[pdf(key="Interpolate", default="false")] pub interpolate: bool, // Alternates: Vec // SMask (soft mask): stream // SMaskInData: i32 ///The integer key of the image’s entry in the structural parent tree #[pdf(key="StructParent")] pub struct_parent: Option, #[pdf(key="ID")] pub id: Option, #[pdf(key="SMask")] pub smask: Option>>, // OPI: dict // Metadata: stream // OC: dict #[pdf(other)] pub other: Dictionary } #[derive(Object, Debug, Copy, Clone, DataSize, DeepClone, ObjectWrite)] pub enum RenderingIntent { AbsoluteColorimetric, RelativeColorimetric, Saturation, Perceptual, } impl RenderingIntent { pub fn from_str(s: &str) -> Option { match s { "AbsoluteColorimetric" => Some(RenderingIntent::AbsoluteColorimetric), "RelativeColorimetric" => Some(RenderingIntent::RelativeColorimetric), "Perceptual" => Some(RenderingIntent::Perceptual), "Saturation" => Some(RenderingIntent::Saturation), _ => None } } pub fn to_str(self) -> &'static str { match self { RenderingIntent::AbsoluteColorimetric => "AbsoluteColorimetric", RenderingIntent::RelativeColorimetric => "RelativeColorimetric", RenderingIntent::Perceptual => "Perceptual", RenderingIntent::Saturation => "Saturation", } } } #[derive(Object, Debug, DataSize, DeepClone, ObjectWrite)] #[pdf(Type="XObject?", Subtype="Form")] pub struct FormDict { #[pdf(key="FormType", default="1")] pub form_type: i32, #[pdf(key="Name")] pub name: Option, #[pdf(key="LastModified")] pub last_modified: Option, #[pdf(key="BBox")] pub bbox: Rect, #[pdf(key="Matrix")] pub matrix: Option, #[pdf(key="Resources")] pub resources: Option>, #[pdf(key="Group")] pub group: Option, #[pdf(key="Ref")] pub reference: Option, #[pdf(key="Metadata")] pub metadata: Option>>, #[pdf(key="PieceInfo")] pub piece_info: Option, #[pdf(key="StructParent")] pub struct_parent: Option, #[pdf(key="StructParents")] pub struct_parents: Option, #[pdf(key="OPI")] pub opi: Option, #[pdf(other)] pub other: Dictionary, } #[derive(Object, ObjectWrite, Debug, Clone, DataSize)] pub struct InteractiveFormDictionary { #[pdf(key="Fields")] pub fields: Vec>, #[pdf(key="NeedAppearances", default="false")] pub need_appearences: bool, #[pdf(key="SigFlags", default="0")] pub sig_flags: u32, #[pdf(key="CO")] pub co: Option>>, #[pdf(key="DR")] pub dr: Option>, #[pdf(key="DA")] pub da: Option, #[pdf(key="Q")] pub q: Option, #[pdf(key="XFA")] pub xfa: Option, } #[derive(Object, ObjectWrite, Debug, Copy, Clone, PartialEq, DataSize)] pub enum FieldType { #[pdf(name="Btn")] Button, #[pdf(name="Tx")] Text, #[pdf(name="Ch")] Choice, #[pdf(name="Sig")] Signature, #[pdf(name="SigRef")] SignatureReference, } #[derive(Object, ObjectWrite, Debug)] #[pdf(Type="SV")] pub struct SeedValueDictionary { #[pdf(key="Ff", default="0")] pub flags: u32, #[pdf(key="Filter")] pub filter: Option, #[pdf(key="SubFilter")] pub sub_filter: Option>, #[pdf(key="V")] pub value: Option, #[pdf(key="DigestMethod")] pub digest_method: Vec, #[pdf(other)] pub other: Dictionary } #[derive(Object, ObjectWrite, Debug)] #[pdf(Type="Sig?")] pub struct SignatureDictionary { #[pdf(key="Filter")] pub filter: Name, #[pdf(key="SubFilter")] pub sub_filter: Name, #[pdf(key="ByteRange")] pub byte_range: Vec, #[pdf(key="Contents")] pub contents: PdfString, #[pdf(key="Cert")] pub cert: Vec, #[pdf(key="Reference")] pub reference: Option, #[pdf(key="Name")] pub name: Option, #[pdf(key="M")] pub m: Option, #[pdf(key="Location")] pub location: Option, #[pdf(key="Reason")] pub reason: Option, #[pdf(key="ContactInfo")] pub contact_info: Option, #[pdf(key="V")] pub v: i32, #[pdf(key="R")] pub r: i32, #[pdf(key="Prop_Build")] pub prop_build: Dictionary, #[pdf(key="Prop_AuthTime")] pub prop_auth_time: i32, #[pdf(key="Prop_AuthType")] pub prop_auth_type: Name, #[pdf(other)] pub other: Dictionary } #[derive(Object, ObjectWrite, Debug)] #[pdf(Type="SigRef?")] pub struct SignatureReferenceDictionary { #[pdf(key="TransformMethod")] pub transform_method: Name, #[pdf(key="TransformParams")] pub transform_params: Option, #[pdf(key="Data")] pub data: Option, #[pdf(key="DigestMethod")] pub digest_method: Option, #[pdf(other)] pub other: Dictionary } #[derive(Object, ObjectWrite, Debug, DataSize)] pub struct FieldDictionary { #[pdf(key="FT")] pub typ: Option, #[pdf(key="Parent")] pub parent: Option>, #[pdf(key="Kids")] pub kids: Vec>, #[pdf(key="T")] pub name: Option, #[pdf(key="TU")] pub alt_name: Option, #[pdf(key="TM")] pub mapping_name: Option, #[pdf(key="Ff", default="0")] pub flags: u32, #[pdf(key="SigFlags", default="0")] pub sig_flags: u32, #[pdf(key="V")] pub value: Primitive, #[pdf(key="DV")] pub default_value: Primitive, #[pdf(key="AA")] pub actions: Option, } #[derive(Debug, DataSize)] pub enum Counter { Arabic, RomanUpper, RomanLower, AlphaUpper, AlphaLower } impl Object for Counter { // fn serialize(&self, out: &mut W) -> Result<()> { // let style_code = match *self { // Counter::Arabic => "D", // Counter::RomanLower => "r", // Counter::RomanUpper => "R", // Counter::AlphaLower => "a", // Counter::AlphaUpper => "A" // }; // out.write_all(style_code.as_bytes())?; // Ok(()) // } fn from_primitive(_: Primitive, _: &impl Resolve) -> Result { unimplemented!(); } } #[derive(Debug, DataSize)] pub enum NameTreeNode { /// Intermediate (Vec>>), /// Leaf (Vec<(PdfString, T)>) } /// Note: The PDF concept of 'root' node is an intermediate or leaf node which has no 'Limits' /// entry. Hence, `limits`, #[derive(Debug, DataSize)] pub struct NameTree { pub limits: Option<(PdfString, PdfString)>, pub node: NameTreeNode, } impl NameTree { pub fn walk(&self, r: &impl Resolve, callback: &mut dyn FnMut(&PdfString, &T)) -> Result<(), PdfError> { match self.node { NameTreeNode::Leaf(ref items) => { for (name, val) in items { callback(name, val); } } NameTreeNode::Intermediate(ref items) => { for &tree_ref in items { let tree = r.get(tree_ref)?; tree.walk(r, callback)?; } } } Ok(()) } } impl Object for NameTree { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let mut dict = t!(p.resolve(resolve)?.into_dictionary()); // Quite long function..= let limits = match dict.remove("Limits") { Some(limits) => { let limits = limits.resolve(resolve)?.into_array()?; if limits.len() != 2 { bail!("Error reading NameTree: 'Limits' is not of length 2"); } let min = limits[0].clone().into_string()?; let max = limits[1].clone().into_string()?; Some((min, max)) } None => None }; let kids = dict.remove("Kids"); let names = dict.remove("Names"); // If no `kids`, try `names`. Else there is an error. Ok(match (kids, names) { (Some(kids), _) => { let kids = t!(kids.resolve(resolve)?.into_array()?.iter().map(|kid| Ref::>::from_primitive(kid.clone(), resolve) ).collect::>>()); NameTree { limits, node: NameTreeNode::Intermediate (kids) } } (None, Some(names)) => { let names = names.resolve(resolve)?.into_array()?; let mut new_names = Vec::new(); for pair in names.chunks_exact(2) { let name = pair[0].clone().resolve(resolve)?.into_string()?; let value = t!(T::from_primitive(pair[1].clone(), resolve)); new_names.push((name, value)); } NameTree { limits, node: NameTreeNode::Leaf (new_names), } } (None, None) => { warn!("Neither Kids nor Names present in NameTree node."); NameTree { limits, node: NameTreeNode::Intermediate(vec![]) } } }) } } impl ObjectWrite for NameTree { fn to_primitive(&self, _update: &mut impl Updater) -> Result { todo!("impl ObjectWrite for NameTree") } } #[derive(Debug, Clone, DataSize)] pub enum DestView { // left, top, zoom XYZ { left: Option, top: Option, zoom: f32 }, Fit, FitH { top: f32 }, FitV { left: f32 }, FitR(Rect), FitB, FitBH { top: f32 } } #[derive(Debug, Clone, DataSize)] pub enum MaybeNamedDest { Named(PdfString), Direct(Dest), } #[derive(Debug, Clone, DataSize)] pub struct Dest { pub page: Option>, pub view: DestView } impl Object for Dest { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let p = match p { Primitive::Reference(r) => resolve.resolve(r)?, p => p }; let p = match p { Primitive::Dictionary(mut dict) => dict.require("Dest", "D")?, p => p }; let array = t!(p.as_array(), p); Dest::from_array(array, resolve) } } impl Dest { fn from_array(array: &[Primitive], resolve: &impl Resolve) -> Result { let page = Object::from_primitive(try_opt!(array.get(0)).clone(), resolve)?; let kind = try_opt!(array.get(1)); let view = match kind.as_name()? { "XYZ" => DestView::XYZ { left: match *try_opt!(array.get(2)) { Primitive::Null => None, Primitive::Integer(n) => Some(n as f32), Primitive::Number(f) => Some(f), ref p => return Err(PdfError::UnexpectedPrimitive { expected: "Number | Integer | Null", found: p.get_debug_name() }), }, top: match *try_opt!(array.get(3)) { Primitive::Null => None, Primitive::Integer(n) => Some(n as f32), Primitive::Number(f) => Some(f), ref p => return Err(PdfError::UnexpectedPrimitive { expected: "Number | Integer | Null", found: p.get_debug_name() }), }, zoom: match array.get(4) { Some(Primitive::Null) => 0.0, Some(&Primitive::Integer(n)) => n as f32, Some(&Primitive::Number(f)) => f, Some(p) => return Err(PdfError::UnexpectedPrimitive { expected: "Number | Integer | Null", found: p.get_debug_name() }), None => 0.0, }, }, "Fit" => DestView::Fit, "FitH" => DestView::FitH { top: try_opt!(array.get(2)).as_number()? }, "FitV" => DestView::FitV { left: try_opt!(array.get(2)).as_number()? }, "FitR" => DestView::FitR(Rect { left: try_opt!(array.get(2)).as_number()?, bottom: try_opt!(array.get(3)).as_number()?, right: try_opt!(array.get(4)).as_number()?, top: try_opt!(array.get(5)).as_number()?, }), "FitB" => DestView::FitB, "FitBH" => DestView::FitBH { top: try_opt!(array.get(2)).as_number()? }, name => return Err(PdfError::UnknownVariant { id: "Dest", name: name.into() }) }; Ok(Dest { page, view }) } } impl Object for MaybeNamedDest { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let p = match p { Primitive::Reference(r) => resolve.resolve(r)?, p => p }; let p = match p { Primitive::Dictionary(mut dict) => dict.require("Dest", "D")?, Primitive::String(s) => return Ok(MaybeNamedDest::Named(s)), p => p }; let array = t!(p.as_array(), p); Dest::from_array(array, resolve).map(MaybeNamedDest::Direct) } } impl ObjectWrite for MaybeNamedDest { fn to_primitive(&self, update: &mut impl Updater) -> Result { match self { MaybeNamedDest::Named(s) => Ok(Primitive::String(s.clone())), MaybeNamedDest::Direct(d) => d.to_primitive(update) } } } impl ObjectWrite for Dest { fn to_primitive(&self, update: &mut impl Updater) -> Result { let mut arr = vec![self.page.to_primitive(update)?]; match self.view { DestView::XYZ { left, top, zoom } => { arr.push(Primitive::Name("XYZ".into())); arr.push(left.to_primitive(update)?); arr.push(top.to_primitive(update)?); arr.push(Primitive::Number(zoom)); } DestView::Fit => { arr.push(Primitive::Name("Fit".into())); } DestView::FitH { top } => { arr.push(Primitive::Name("FitH".into())); arr.push(Primitive::Number(top)); } DestView::FitV { left } => { arr.push(Primitive::Name("FitV".into())); arr.push(Primitive::Number(left)); } DestView::FitR(rect) => { arr.push(Primitive::Name("FitR".into())); arr.push(Primitive::Number(rect.left)); arr.push(Primitive::Number(rect.bottom)); arr.push(Primitive::Number(rect.right)); arr.push(Primitive::Number(rect.top)); } DestView::FitB => { arr.push(Primitive::Name("FitB".into())); } DestView::FitBH { top } => { arr.push(Primitive::Name("FitBH".into())); arr.push(Primitive::Number(top)); } } Ok(Primitive::Array(arr)) } } /// There is one `NameDictionary` associated with each PDF file. #[derive(Object, ObjectWrite, Debug, DataSize)] pub struct NameDictionary { #[pdf(key="Pages")] pub pages: Option>, #[pdf(key="Dests")] pub dests: Option>>, #[pdf(key="AP")] pub ap: Option>, #[pdf(key="JavaScript")] pub javascript: Option>, #[pdf(key="Templates")] pub templates: Option>, #[pdf(key="IDS")] pub ids: Option>, #[pdf(key="URLS")] pub urls: Option>, #[pdf(key="EmbeddedFiles")] pub embedded_files: Option>, /* #[pdf(key="AlternativePresentations")] alternate_presentations: NameTree, #[pdf(key="Renditions")] renditions: NameTree, */ } /* Embedded file streams can be associated with the document as a whole through * the EmbeddedFiles entry (PDF 1.4) in the PDF document’s name dictionary * (see Section 3.6.3, “Name Dictionary”). * The associated name tree maps name strings to file specifications that refer * to embedded file streams through their EF entries. */ #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] pub struct FileSpec { #[pdf(key="EF")] pub ef: Option>>>, /* #[pdf(key="RF")] rf: Option>, */ } /// Used only as elements in `FileSpec` #[derive(Object, ObjectWrite, Debug, Clone, DeepClone)] pub struct Files { #[pdf(key="F")] pub f: Option, #[pdf(key="UF")] pub uf: Option, #[pdf(key="DOS")] pub dos: Option, #[pdf(key="Mac")] pub mac: Option, #[pdf(key="Unix")] pub unix: Option, } impl DataSize for Files { const IS_DYNAMIC: bool = T::IS_DYNAMIC; const STATIC_HEAP_SIZE: usize = 5 * Option::::STATIC_HEAP_SIZE; fn estimate_heap_size(&self) -> usize { self.f.as_ref().map(|t| t.estimate_heap_size()).unwrap_or(0) + self.uf.as_ref().map(|t| t.estimate_heap_size()).unwrap_or(0) + self.dos.as_ref().map(|t| t.estimate_heap_size()).unwrap_or(0) + self.mac.as_ref().map(|t| t.estimate_heap_size()).unwrap_or(0) + self.unix.as_ref().map(|t| t.estimate_heap_size()).unwrap_or(0) } } /// PDF Embedded File Stream. #[derive(Object, Debug, Clone, DataSize, DeepClone, ObjectWrite)] pub struct EmbeddedFile { #[pdf(key="Subtype")] subtype: Option, #[pdf(key="Params")] pub params: Option, } #[derive(Object, Debug, Clone, DataSize, DeepClone, ObjectWrite)] pub struct EmbeddedFileParamDict { #[pdf(key="Size")] pub size: Option, #[pdf(key="CreationDate")] creationdate: Option, #[pdf(key="ModDate")] moddate: Option, #[pdf(key="Mac")] mac: Option, #[pdf(key="CheckSum")] checksum: Option, } #[derive(Object, Debug, Clone, DataSize)] pub struct OutlineItem { #[pdf(key="Title")] pub title: Option, #[pdf(key="Prev")] pub prev: Option>, #[pdf(key="Next")] pub next: Option>, #[pdf(key="First")] pub first: Option>, #[pdf(key="Last")] pub last: Option>, #[pdf(key="Count", default="0")] pub count: i32, #[pdf(key="Dest")] pub dest: Option, #[pdf(key="A")] pub action: Option, #[pdf(key="SE")] pub se: Option, #[pdf(key="C")] pub color: Option>, #[pdf(key="F")] pub flags: Option, } #[derive(Clone, Debug, DataSize)] pub enum Action { Goto(MaybeNamedDest), Other(Dictionary) } impl Object for Action { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { let mut d = t!(p.resolve(resolve)?.into_dictionary()); let s = try_opt!(d.get("S")).as_name()?; match s { "GoTo" => { let dest = t!(MaybeNamedDest::from_primitive(try_opt!(d.remove("D")), resolve)); Ok(Action::Goto(dest)) } _ => Ok(Action::Other(d)) } } } impl ObjectWrite for Action { fn to_primitive(&self, update: &mut impl Updater) -> Result { match self { Action::Goto(dest) => { let mut dict = Dictionary::new(); dict.insert("D", dest.to_primitive(update)?); Ok(Primitive::Dictionary(dict)) } Action::Other(dict) => Ok(Primitive::Dictionary(dict.clone())) } } } #[derive(Object, ObjectWrite, Debug, DataSize)] #[pdf(Type="Outlines?")] pub struct Outlines { #[pdf(key="Count", default="0")] pub count: i32, #[pdf(key="First")] pub first: Option>, #[pdf(key="Last")] pub last: Option>, } #[derive(Debug, Copy, Clone, DataSize)] pub struct Rect { pub left: f32, pub bottom: f32, pub right: f32, pub top: f32, } impl Object for Rect { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { let arr = p.resolve(r)?.into_array()?; if arr.len() != 4 { bail!("len != 4 {:?}", arr); } Ok(Rect { left: arr[0].as_number()?, bottom: arr[1].as_number()?, right: arr[2].as_number()?, top: arr[3].as_number()? }) } } impl ObjectWrite for Rect { fn to_primitive(&self, update: &mut impl Updater) -> Result { Primitive::array::([self.left, self.bottom, self.right, self.top].iter(), update) } } // Stuff from chapter 10 of the PDF 1.7 ref #[derive(Object, ObjectWrite, Debug, DataSize)] pub struct MarkInformation { // TODO no /Type /// indicating whether the document conforms to Tagged PDF conventions #[pdf(key="Marked", default="false")] pub marked: bool, /// Indicating the presence of structure elements that contain user properties attributes #[pdf(key="UserProperties", default="false")] pub user_properties: bool, /// Indicating the presence of tag suspects #[pdf(key="Suspects", default="false")] pub suspects: bool, } #[derive(Object, ObjectWrite, Debug, DataSize)] #[pdf(Type = "StructTreeRoot")] pub struct StructTreeRoot { #[pdf(key="K")] pub children: Vec, } #[derive(Object, ObjectWrite, Debug, DataSize)] pub struct StructElem { #[pdf(key="S")] pub struct_type: StructType, #[pdf(key="P")] pub parent: Ref, #[pdf(key="ID")] pub id: Option, /// `Pg`: A page object representing a page on which some or all of the content items designated by the K entry are rendered. #[pdf(key="Pg")] pub page: Option>, } #[derive(Object, ObjectWrite, Debug, DataSize)] pub enum StructType { Document, Part, Art, Sect, Div, BlockQuote, Caption, TOC, TOCI, Index, NonStruct, Private, Book, P, H, H1, H2, H3, H4, H5, H6, L, Ll, Lbl, LBody, Table, TR, TH, TD, THead, TBody, TFoot, Span, Quote, Note, Reference, BibEntry, Code, Link, Annot, Ruby, RB, RT, RP, Warichu, WT, WP, Figure, Formula, Form, #[pdf(other)] Other(String), } #[derive(Object, ObjectWrite, Debug, DataSize)] pub enum Trapped { True, False, Unknown, } #[derive(Object, ObjectWrite, Debug, DataSize, Default)] pub struct InfoDict { #[pdf(key="Title")] pub title: Option, #[pdf(key="Author")] pub author: Option, #[pdf(key="Subject")] pub subject: Option, #[pdf(key="Keywords")] pub keywords: Option, #[pdf(key="Creator")] pub creator: Option, #[pdf(key="Author")] pub producer: Option, #[pdf(key="CreationDate")] pub creation_date: Option, #[pdf(key="ModDate")] pub mod_date: Option, #[pdf(key="Trapped")] pub trapped: Option, } #[cfg(test)] mod tests { use super::*; #[test] fn parse_struct_type() { assert!(matches!( StructType::from_primitive(Primitive::Name("BibEntry".into()), &NoResolve), Ok(StructType::BibEntry) )); let result = StructType::from_primitive(Primitive::Name("CustomStructType".into()), &NoResolve); if let Ok(StructType::Other(name)) = &result { assert_eq!(name, "CustomStructType"); } else { panic!("Incorrect result of {:?}", &result); } } #[test] fn test_field_type() { assert_eq!( FieldType::from_primitive(Primitive::Name("Tx".into()), &NoResolve).unwrap(), FieldType::Text ); } } pdf-0.9.0/src/parser/lexer/mod.rs000064400000000000000000000373321046102023000147420ustar 00000000000000/// Lexing an input file, in the sense of breaking it up into substrings based on delimiters and /// whitespace. use std::str::FromStr; use std::ops::{Range, Deref, RangeFrom}; use std::borrow::Cow; use crate::error::*; use crate::primitive::Name; mod str; pub use self::str::{StringLexer, HexStringLexer}; /// `Lexer` has functionality to jump around and traverse the PDF lexemes of a string in any direction. #[derive(Copy, Clone)] #[allow(dead_code)] pub struct Lexer<'a> { pos: usize, buf: &'a [u8], file_offset: usize, } // find the position where condition(data[pos-1]) == false and condition(data[pos]) == true #[inline] fn boundary_rev(data: &[u8], pos: usize, condition: impl Fn(u8) -> bool) -> usize { match data[.. pos].iter().rposition(|&b| !condition(b)) { Some(start) => start + 1, None => 0 } } // find the position where condition(data[pos-1]) == true and condition(data[pos]) == false #[inline] fn boundary(data: &[u8], pos: usize, condition: impl Fn(u8) -> bool) -> usize { match data[pos ..].iter().position(|&b| !condition(b)) { Some(start) => pos + start, None => data.len() } } #[inline] fn is_whitespace(b: u8) -> bool { matches!(b, 0 | b' ' | b'\r' | b'\n' | b'\t') } #[inline] fn not(f: impl Fn(T) -> bool) -> impl Fn(T) -> bool { move |t| !f(t) } impl<'a> Lexer<'a> { pub fn new(buf: &'a [u8]) -> Lexer<'a> { Lexer { pos: 0, buf, file_offset: 0 } } pub fn with_offset(buf: &'a [u8], file_offset: usize) -> Lexer<'a> { Lexer { pos: 0, buf, file_offset } } /// Returns next lexeme. Lexer moves to the next byte after the lexeme. (needs to be tested) #[allow(clippy::should_implement_trait)] pub fn next(&mut self) -> Result> { let (lexeme, pos) = self.next_word()?; self.pos = pos; Ok(lexeme) } /// consume the whitespace sequence following the stream start pub fn next_stream(&mut self) -> Result<()> { let pos = self.skip_whitespace(self.pos)?; if !self.buf[pos ..].starts_with(b"stream") { // bail!("next token isn't 'stream'"); } let &b0 = self.buf.get(pos + 6).ok_or(PdfError::EOF)?; if b0 == b'\n' { self.pos = pos + 7; } else if b0 == b'\r' { let &b1 = self.buf.get(pos + 7).ok_or(PdfError::EOF)?; if b1 != b'\n' { bail!("invalid whitespace following 'stream'"); // bail!("invalid whitespace following 'stream'"); } self.pos = pos + 8; } else { bail!("invalid whitespace"); } Ok(()) } /// Gives previous lexeme. Lexer moves to the first byte of this lexeme. (needs to be tested) pub fn back(&mut self) -> Result> { //println!("back: {:?}", String::from_utf8_lossy(&self.buf[self.pos.saturating_sub(20) .. self.pos])); // first reverse until we find non-whitespace let end_pos = boundary_rev(self.buf, self.pos, is_whitespace); let start_pos = boundary_rev(self.buf, end_pos, not(is_whitespace)); self.pos = start_pos; Ok(self.new_substr(start_pos .. end_pos)) } /// Look at the next lexeme. Will return empty substr if the next character is EOF. pub fn peek(&self) -> Result> { match self.next_word() { Ok((substr, _)) => Ok(substr), Err(PdfError::EOF) => Ok(self.new_substr(self.pos..self.pos)), Err(e) => Err(e), } } /// Returns `Ok` if the next lexeme matches `expected` - else `Err`. pub fn next_expect(&mut self, expected: &'static str) -> Result<()> { let word = self.next()?; if word.equals(expected.as_bytes()) { Ok(()) } else { Err(PdfError::UnexpectedLexeme { pos: self.pos, lexeme: word.to_string(), expected }) } } /// skip whitespaces and return the position of the first non-whitespace character #[inline] fn skip_whitespace(&self, pos: usize) -> Result { // Move away from eventual whitespace let pos = boundary(self.buf, pos, is_whitespace); if pos >= self.buf.len() { Err(PdfError::EOF) } else { Ok(pos) } } /// Used by next, peek and back - returns substring and new position /// If forward, places pointer at the next non-whitespace character. /// If backward, places pointer at the start of the current word. // TODO ^ backward case is actually not tested or.. thought about that well. fn next_word(&self) -> Result<(Substr<'a>, usize)> { if self.pos == self.buf.len() { return Err(PdfError::EOF); } let mut pos = self.skip_whitespace(self.pos)?; while self.buf.get(pos) == Some(&b'%') { pos += 1; if let Some(off) = self.buf[pos..].iter().position(|&b| b == b'\n') { pos += off+1; } // Move away from eventual whitespace pos = self.skip_whitespace(pos)?; } let start_pos = pos; // If first character is delimiter, this lexeme only contains that character. // - except << and >> which go together, and / which marks the start of a // name token. if self.is_delimiter(pos) { if self.buf[pos] == b'/' { pos = self.advance_pos(pos)?; while !self.is_whitespace(pos) && !self.is_delimiter(pos) { match self.advance_pos(pos) { Ok(p) => pos = p, Err(_) => break, } } return Ok((self.new_substr(start_pos..pos), pos)); } if let Some(slice) = self.buf.get(pos..=pos+1) { if slice == b"<<" || slice == b">>" { pos = self.advance_pos(pos)?; } } pos = self.advance_pos(pos)?; return Ok((self.new_substr(start_pos..pos), pos)); } // Read to past the end of lexeme while !self.is_whitespace(pos) && !self.is_delimiter(pos) { match self.advance_pos(pos) { Ok(p) => pos = p, Err(_) => break, } } let result = self.new_substr(start_pos..pos); // Move away from whitespace again //pos = self.skip_whitespace(pos)?; Ok((result, pos)) } /// Just a helper for next_word. #[inline] fn advance_pos(&self, pos: usize) -> Result { if pos < self.buf.len() { Ok(pos + 1) } else { Err(PdfError::EOF) } } #[inline] pub fn next_as(&mut self) -> Result where T: FromStr, T::Err: std::error::Error + Send + Sync + 'static { self.next().and_then(|word| word.to::()) } #[inline] pub fn get_pos(&self) -> usize { self.pos } #[inline] pub fn new_substr(&self, mut range: Range) -> Substr<'a> { // if the range is backward, fix it // start is inclusive, end is exclusive. keep that in mind if range.start > range.end { let new_end = range.start + 1; range.start = range.end + 1; range.end = new_end; } Substr { file_offset: self.file_offset + range.start, slice: &self.buf[range], } } /// Just a helper function for set_pos, set_pos_from_end and offset_pos. #[inline] pub fn set_pos(&mut self, wanted_pos: usize) -> Substr<'a> { let new_pos = wanted_pos.min(self.buf.len()); let range = if self.pos < new_pos { self.pos..new_pos } else { new_pos..self.pos }; self.pos = new_pos; self.new_substr(range) } /// Returns the substr between the old and new positions #[inline] pub fn set_pos_from_end(&mut self, new_pos: usize) -> Substr<'a> { self.set_pos(self.buf.len().saturating_sub(new_pos).saturating_sub(1)) } /// Returns the substr between the old and new positions #[inline] pub fn offset_pos(&mut self, offset: usize) -> Substr<'a> { self.set_pos(self.pos.wrapping_add(offset)) } /// Moves pos to start of next line. Returns the skipped-over substring. #[allow(dead_code)] pub fn seek_newline(&mut self) -> Substr{ let start = self.pos; while self.buf[self.pos] != b'\n' && self.incr_pos() { } self.incr_pos(); self.new_substr(start..self.pos) } // TODO: seek_substr and seek_substr_back should use next() or back()? /// Moves pos to after the found `substr`. Returns Substr with traversed text if `substr` is found. #[allow(dead_code)] pub fn seek_substr(&mut self, substr: impl AsRef<[u8]>) -> Option> { // let substr = substr.as_ref(); let start = self.pos; let mut matched = 0; loop { if self.pos >= self.buf.len() { return None } if self.buf[self.pos] == substr[matched] { matched += 1; } else { matched = 0; } if matched == substr.len() { break; } self.pos += 1; } self.pos += 1; Some(self.new_substr(start..(self.pos - substr.len()))) } //TODO perhaps seek_substr_back should, like back(), move to the first letter of the substr. /// Searches for string backward. Moves to after the found `substr`, returns the traversed /// Substr if found. pub fn seek_substr_back(&mut self, substr: &[u8]) -> Result> { let end = self.pos; match self.buf[.. end].windows(substr.len()).rposition(|w| w == substr) { Some(start) => { self.pos = start + substr.len(); Ok(self.new_substr(self.pos .. end)) } None => Err(PdfError::NotFound {word: String::from_utf8_lossy(substr).into() }) } } /// Read and return slice of at most n bytes. #[allow(dead_code)] pub fn read_n(&mut self, n: usize) -> Substr<'a> { let start_pos = self.pos; self.pos += n; if self.pos >= self.buf.len() { self.pos = self.buf.len() - 1; } if start_pos < self.buf.len() { self.new_substr(start_pos..self.pos) } else { self.new_substr(0..0) } } /// Returns slice from current position to end. #[inline] pub fn get_remaining_slice(&self) -> &'a [u8] { &self.buf[self.pos..] } /// for debugging pub fn ctx(&self) -> Cow { String::from_utf8_lossy(&self.buf[self.pos.saturating_sub(40)..self.buf.len().min(self.pos+40)]) } #[inline] fn incr_pos(&mut self) -> bool { if self.pos >= self.buf.len() - 1 { false } else { self.pos += 1; true } } #[inline] fn is_whitespace(&self, pos: usize) -> bool { self.buf.get(pos).map(|&b| is_whitespace(b)).unwrap_or(false) } #[inline] fn is_delimiter(&self, pos: usize) -> bool { self.buf.get(pos).map(|b| b"()<>[]{}/%".contains(b)).unwrap_or(false) } } /// A slice from some original string - a lexeme. #[derive(Copy, Clone, Debug)] pub struct Substr<'a> { slice: &'a [u8], file_offset: usize, } impl<'a> Substr<'a> { pub fn new + ?Sized>(data: &'a T, file_offset: usize) -> Self { Substr { slice: data.as_ref(), file_offset } } // to: &S -> U. Possibly expensive conversion. // as: &S -> &U. Cheap borrow conversion // into: S -> U. Cheap ownership transfer conversion. #[allow(clippy::inherent_to_string)] pub fn to_string(&self) -> String { String::from_utf8_lossy(self.as_slice()).into() } pub fn to_name(&self) -> Result { Ok(Name(std::str::from_utf8(self.as_slice())?.into())) } pub fn to_vec(&self) -> Vec { self.slice.to_vec() } pub fn to(&self) -> Result where T: FromStr, T::Err: std::error::Error + Send + Sync + 'static { std::str::from_utf8(self.slice)?.parse::().map_err(|e| PdfError::Parse { source: e.into() }) } pub fn is_integer(&self) -> bool { if self.slice.len() == 0 { return false; } let mut slice = self.slice; if slice[0] == b'-' { if slice.len() < 2 { return false; } slice = &slice[1..]; } is_int(slice) } pub fn is_real_number(&self) -> bool { self.real_number().is_some() } pub fn real_number(&self) -> Option { if self.slice.len() == 0 { return None; } let mut slice = self.slice; if slice[0] == b'-' { if slice.len() < 2 { return None; } slice = &slice[1..]; } if let Some(i) = slice.iter().position(|&b| b == b'.') { if !is_int(&slice[..i]) { return None; } slice = &slice[i+1..]; } if let Some(len) = slice.iter().position(|&b| !b.is_ascii_digit()) { if len == 0 { return None; } let end = self.slice.len() - slice.len() + len; Some(Substr { file_offset: self.file_offset, slice: &self.slice[..end] }) } else { Some(*self) } } pub fn as_slice(&self) -> &'a [u8] { self.slice } pub fn as_str(&self) -> Result<&str> { std::str::from_utf8(self.slice).map_err(|e| PdfError::Parse { source: e.into() }) } pub fn equals(&self, other: impl AsRef<[u8]>) -> bool { self.slice == other.as_ref() } pub fn reslice(&self, range: RangeFrom) -> Substr<'a> { Substr { file_offset: self.file_offset + range.start, slice: &self.slice[range], } } pub fn file_range(&self) -> Range { self.file_offset .. self.file_offset + self.slice.len() } } #[inline] fn is_int(b: &[u8]) -> bool { b.iter().all(|&b| b.is_ascii_digit()) } impl<'a> Deref for Substr<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { self.as_slice() } } impl<'a> PartialEq<&[u8]> for Substr<'a> { fn eq(&self, rhs: &&[u8]) -> bool { self.equals(rhs) } } impl<'a> PartialEq<&str> for Substr<'a> { fn eq(&self, rhs: &&str) -> bool { self.equals(rhs.as_bytes()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_boundary_rev() { assert_eq!(boundary_rev(b" hello", 3, not(is_whitespace)), 1); assert_eq!(boundary_rev(b" hello", 3, is_whitespace), 3); } #[test] fn test_boundary() { assert_eq!(boundary(b" hello ", 3, not(is_whitespace)), 6); assert_eq!(boundary(b" hello ", 3, is_whitespace), 3); assert_eq!(boundary(b"01234 7orld", 5, is_whitespace), 7); assert_eq!(boundary(b"01234 7orld", 7, is_whitespace), 7); assert_eq!(boundary(b"q\n", 1, is_whitespace), 2); } #[test] fn test_substr() { assert!(Substr::new("123", 0).is_real_number()); assert!(Substr::new("123.", 0).is_real_number()); assert!(Substr::new("123.45", 0).is_real_number()); assert!(Substr::new(".45", 0).is_real_number()); assert!(Substr::new("-.45", 0).is_real_number()); assert!(!Substr::new("123.45", 0).is_integer()); assert!(Substr::new("123", 0).is_integer()); } } pdf-0.9.0/src/parser/lexer/str.rs000064400000000000000000000255671046102023000150020ustar 00000000000000use std::iter::Iterator; use crate::error::*; /// A lexer for PDF strings. Breaks the string up into single characters (`u8`) /// It's also possible to get the number of indices of the original array that was traversed by the /// Iterator. /// /// ``` /// let mut string: Vec = Vec::new(); /// let bytes_traversed = { /// let mut string_lexer = StringLexer::new(lexer.get_remaining_slice()); /// for character in string_lexer.iter() { /// let character = character?; /// string.push(character); /// } /// string_lexer.get_offset() as i64 /// }; /// // bytes_traversed now holds the number of bytes in the original array traversed. /// ``` /// #[derive(Clone)] pub struct StringLexer<'a> { pos: usize, // points to next byte nested: i32, // How far in () we are nested buf: &'a [u8], } impl<'a> StringLexer<'a> { /// `buf` should start right after the `(` delimiter, and may span all the way to EOF. StringLexer /// will determine the end of the string. pub fn new(buf: &'a [u8]) -> StringLexer<'a> { StringLexer { pos: 0, nested: 0, buf, } } pub fn iter<'b>(&'b mut self) -> StringLexerIter<'a, 'b> { StringLexerIter {lexer: self} } /// Get offset/pos from start of string pub fn get_offset(&self) -> usize { self.pos } /// (mostly just used by Iterator, but might be useful) pub fn next_lexeme(&mut self) -> Result> { let c = self.next_byte()?; match c { b'\\' => { let c = self.next_byte()?; Ok( match c { b'n' => Some(b'\n'), b'r' => Some(b'\r'), b't' => Some(b'\t'), b'b' => Some(b'\x08'), b'f' => Some(b'\x0c'), b'(' => Some(b'('), b')' => Some(b')'), b'\n' => { // ignore end-of-line marker if let Ok(b'\r') = self.peek_byte() { let _ = self.next_byte(); } self.next_lexeme()? } b'\r' => { // ignore end-of-line marker if let Ok(b'\n') = self.peek_byte() { let _ = self.next_byte(); } self.next_lexeme()? } b'\\' => Some(b'\\'), _ => { self.back()?; let _start = self.get_offset(); let mut char_code: u16 = 0; // A character code must follow. 1-3 numbers. for _ in 0..3 { let c = self.peek_byte()?; if (b'0'..=b'7').contains(&c) { self.next_byte()?; char_code = char_code * 8 + (c - b'0') as u16; } else { break; } } Some(char_code as u8) } } ) }, b'(' => { self.nested += 1; Ok(Some(b'(')) }, b')' => { self.nested -= 1; if self.nested < 0 { Ok(None) } else { Ok(Some(b')')) } }, c => Ok(Some(c)) } } fn next_byte(&mut self) -> Result { if self.pos < self.buf.len() { self.pos += 1; Ok(self.buf[self.pos-1]) } else { Err(PdfError::EOF) } } fn back(&mut self) -> Result<()> { if self.pos > 0 { self.pos -= 1; Ok(()) } else { Err(PdfError::EOF) } } fn peek_byte(&mut self) -> Result { if self.pos < self.buf.len() { Ok(self.buf[self.pos]) } else { Err(PdfError::EOF) } } } // "'a is valid for at least 'b" pub struct StringLexerIter<'a: 'b, 'b> { lexer: &'b mut StringLexer<'a>, } impl<'a, 'b> Iterator for StringLexerIter<'a, 'b> { type Item = Result; fn next(&mut self) -> Option> { match self.lexer.next_lexeme() { Err(e) => Some(Err(e)), Ok(Some(s)) => Some(Ok(s)), Ok(None) => None, } } } pub struct HexStringLexer<'a> { pos: usize, // points to next byte buf: &'a [u8], } impl<'a> HexStringLexer<'a> { /// `buf` should start right after the `<` delimiter, and may span all the way to EOF. /// HexStringLexer will determine the end of the string. pub fn new(buf: &'a [u8]) -> HexStringLexer<'a> { HexStringLexer { pos: 0, buf } } pub fn iter<'b>(&'b mut self) -> HexStringLexerIter<'a, 'b> { HexStringLexerIter { lexer: self } } /// Get offset/position from start of string pub fn get_offset(&self) -> usize { self.pos } fn next_non_whitespace_char(&mut self) -> Result { let mut byte = self.read_byte()?; while byte == b' ' || byte == b'\t' || byte == b'\n' || byte == b'\r' || byte == b'\x0c' { byte = self.read_byte()?; } Ok(byte) } pub fn next_hex_byte(&mut self) -> Result> { let c1 = self.next_non_whitespace_char()?; let high_nibble: u8 = match c1 { b'0' ..= b'9' => c1 - b'0', b'A' ..= b'F' => c1 - b'A' + 0xA, b'a' ..= b'f' => c1 - b'a' + 0xA, b'>' => return Ok(None), _ => return Err(PdfError::HexDecode { pos: self.pos, bytes: [c1, self.peek_byte().unwrap_or(0)] }), }; let c2 = self.next_non_whitespace_char()?; let low_nibble: u8 = match c2 { b'0' ..= b'9' => c2 - b'0', b'A' ..= b'F' => c2 - b'A' + 0xA, b'a' ..= b'f' => c2 - b'a' + 0xA, b'>' => { self.back()?; 0 } _ => return Err(PdfError::HexDecode { pos: self.pos, bytes: [c1, c2] }), }; Ok(Some((high_nibble << 4) | low_nibble)) } fn read_byte(&mut self) -> Result { if self.pos < self.buf.len() { self.pos += 1; Ok(self.buf[self.pos - 1]) } else { Err(PdfError::EOF) } } fn back(&mut self) -> Result<()> { if self.pos > 0 { self.pos -= 1; Ok(()) } else { Err(PdfError::EOF) } } fn peek_byte(&mut self) -> Result { if self.pos < self.buf.len() { Ok(self.buf[self.pos]) } else { Err(PdfError::EOF) } } } pub struct HexStringLexerIter<'a: 'b, 'b> { lexer: &'b mut HexStringLexer<'a>, } impl<'a, 'b> Iterator for HexStringLexerIter<'a, 'b> { type Item = Result; fn next(&mut self) -> Option> { match self.lexer.next_hex_byte() { Err(e) => Some(Err(e)), Ok(Some(s)) => Some(Ok(s)), Ok(None) => None, } } } #[cfg(test)] mod tests { use crate::error::Result; use crate::parser::lexer::{HexStringLexer, StringLexer}; #[test] fn tests() { let vec = b"a\\nb\\rc\\td\\(f/)\\\\hei)"; let mut lexer = StringLexer::new(vec); let lexemes: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(lexemes, b"a\nb\rc\td(f/"); } #[test] fn string_split_lines() { { let data = b"These \\\ntwo strings \\\nare the same.)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"These two strings are the same."); } { let data = b"These \\\rtwo strings \\\rare the same.)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"These two strings are the same."); } { let data = b"These \\\r\ntwo strings \\\r\nare the same.)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"These two strings are the same."); } } #[test] fn octal_escape() { { let data = b"This string contains\\245two octal characters\\307.)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, &b"This string contains\xa5two octal characters\xc7."[..]); } { let data = b"\\0053)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"\x053"); } { let data = b"\\053)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"+"); } { let data = b"\\53)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"+"); } { // overflow is ignored let data = b"\\541)"; let mut lexer = StringLexer::new(data); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!(result, b"a"); } } #[test] fn hex_test() { let input = b"901FA3>"; let mut lexer = HexStringLexer::new(input); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!( result, vec![ b'\x90', b'\x1f', b'\xa3', ] ); let input = b"901FA>"; let mut lexer = HexStringLexer::new(input); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!( result, vec![ b'\x90', b'\x1f', b'\xa0', ] ); let input = b"1 9F\t5\r\n4\x0c62a>"; let mut lexer = HexStringLexer::new(input); let result: Vec = lexer.iter().map(Result::unwrap).collect(); assert_eq!( result, vec![ b'\x19', b'\xf5', b'\x46', b'\x2a', ] ); } } pdf-0.9.0/src/parser/mod.rs000064400000000000000000000336771046102023000136330ustar 00000000000000//! Basic functionality for parsing a PDF file. mod lexer; mod parse_object; mod parse_xref; pub use self::lexer::*; pub use self::parse_object::*; pub use self::parse_xref::*; use crate::error::*; use crate::primitive::StreamInner; use crate::primitive::{Primitive, Dictionary, PdfStream, PdfString}; use crate::object::{ObjNr, GenNr, PlainRef, Resolve}; use crate::crypt::Decoder; use bitflags::bitflags; use istring::{SmallBytes, SmallString, IBytes}; const MAX_DEPTH: usize = 20; bitflags! { pub struct ParseFlags: u16 { const INTEGER = 1 << 0; const STREAM = 1 << 1; const DICT = 1 << 2; const NUMBER = 1 << 3; const NAME = 1 << 4; const ARRAY = 1 << 5; const STRING = 1 << 6; const BOOL = 1 << 7; const NULL = 1 << 8; const REF = 1 << 9; const ANY = (1 << 10) - 1; } } pub struct Context<'a> { pub decoder: Option<&'a Decoder>, pub id: PlainRef, } impl<'a> Context<'a> { pub fn decrypt<'buf>(&self, data: &'buf mut [u8]) -> Result<&'buf [u8]> { if let Some(decoder) = self.decoder { decoder.decrypt(self.id, data) } else { Ok(data) } } #[cfg(test)] fn fake() -> Self { Context { decoder: None, id: PlainRef { id: 0, gen: 0 } } } } /// Can parse stream but only if its dictionary does not contain indirect references. /// Use `parse_stream` if this is insufficient. pub fn parse(data: &[u8], r: &impl Resolve, flags: ParseFlags) -> Result { parse_with_lexer(&mut Lexer::new(data), r, flags) } /// Recursive. Can parse stream but only if its dictionary does not contain indirect references. /// Use `parse_stream` if this is not sufficient. pub fn parse_with_lexer(lexer: &mut Lexer, r: &impl Resolve, flags: ParseFlags) -> Result { parse_with_lexer_ctx(lexer, r, None, flags, MAX_DEPTH) } fn parse_dictionary_object(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, max_depth: usize) -> Result { let mut dict = Dictionary::default(); loop { // Expect a Name (and Object) or the '>>' delimiter let token = t!(lexer.next()); if token.starts_with(b"/") { let key = token.reslice(1..).to_name()?; let obj = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth)); dict.insert(key, obj); } else if token.equals(b">>") { break; } else { err!(PdfError::UnexpectedLexeme{ pos: lexer.get_pos(), lexeme: token.to_string(), expected: "/ or >>"}); } } Ok(dict) } fn parse_stream_object(dict: Dictionary, lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result { t!(lexer.next_stream()); let length = match dict.get("Length") { Some(&Primitive::Integer(n)) if n >= 0 => n as usize, Some(&Primitive::Reference(reference)) => t!(t!(r.resolve_flags(reference, ParseFlags::INTEGER, 1)).as_usize()), Some(other) => err!(PdfError::UnexpectedPrimitive { expected: "unsigned Integer or Reference", found: other.get_debug_name() }), None => err!(PdfError::MissingEntry { typ: "", field: "Length".into() }), }; let stream_substr = lexer.read_n(length); if stream_substr.len() != length { err!(PdfError::EOF) } // Finish t!(lexer.next_expect("endstream")); Ok(PdfStream { inner: StreamInner::InFile { id: ctx.id, file_range: stream_substr.file_range(), }, info: dict, }) } #[inline] fn check(flags: ParseFlags, allowed: ParseFlags) -> Result<(), PdfError> { if !flags.intersects(allowed) { return Err(PdfError::PrimitiveNotAllowed { allowed, found: flags }); } Ok(()) } /// Recursive. Can parse stream but only if its dictionary does not contain indirect references. /// Use `parse_stream` if this is not sufficient. pub fn parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result { let pos = lexer.get_pos(); match _parse_with_lexer_ctx(lexer, r, ctx, flags, max_depth) { Ok(r) => Ok(r), Err(e) => { lexer.set_pos(pos); Err(e) } } } fn _parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result { let input = lexer.get_remaining_slice(); let first_lexeme = t!(lexer.next(), std::str::from_utf8(input)); let obj = if first_lexeme.equals(b"<<") { check(flags, ParseFlags::DICT)?; if max_depth == 0 { return Err(PdfError::MaxDepth); } let dict = t!(parse_dictionary_object(lexer, r, ctx, max_depth-1)); // It might just be the dictionary in front of a stream. if t!(lexer.peek()).equals(b"stream") { let ctx = ctx.ok_or(PdfError::PrimitiveNotAllowed { allowed: ParseFlags::STREAM, found: flags })?; Primitive::Stream(t!(parse_stream_object(dict, lexer, r, ctx))) } else { Primitive::Dictionary(dict) } } else if first_lexeme.is_integer() { // May be Integer or Reference check(flags, ParseFlags::INTEGER | ParseFlags::REF)?; // First backup position let pos_bk = lexer.get_pos(); let second_lexeme = t!(lexer.next()); if second_lexeme.is_integer() { let third_lexeme = t!(lexer.next()); if third_lexeme.equals(b"R") { // It is indeed a reference to an indirect object check(flags, ParseFlags::REF)?; Primitive::Reference (PlainRef { id: t!(first_lexeme.to::()), gen: t!(second_lexeme.to::()), }) } else { check(flags, ParseFlags::INTEGER)?; // We are probably in an array of numbers - it's not a reference anyway lexer.set_pos(pos_bk); // (roll back the lexer first) Primitive::Integer(t!(first_lexeme.to::())) } } else { check(flags, ParseFlags::INTEGER)?; // It is but a number lexer.set_pos(pos_bk); // (roll back the lexer first) Primitive::Integer(t!(first_lexeme.to::())) } } else if let Some(s) = first_lexeme.real_number() { check(flags, ParseFlags::NUMBER)?; // Real Number Primitive::Number (t!(s.to::(), s.to_string())) } else if first_lexeme.starts_with(b"/") { check(flags, ParseFlags::NAME)?; // Name let mut rest: &[u8] = &first_lexeme.reslice(1..); let s = if rest.contains(&b'#') { let mut s = IBytes::new(); while let Some(idx) = rest.iter().position(|&b| b == b'#') { use crate::enc::decode_nibble; use std::convert::TryInto; let [hi, lo]: [u8; 2] = rest.get(idx+1 .. idx+3).ok_or(PdfError::EOF)?.try_into().unwrap(); let byte = match (decode_nibble(lo), decode_nibble(hi)) { (Some(low), Some(high)) => low | high << 4, _ => return Err(PdfError::HexDecode { pos: idx, bytes: [hi, lo] }), }; s.extend_from_slice(&rest[..idx]); s.push(byte); rest = &rest[idx+3..]; } s.extend_from_slice(rest); SmallBytes::from(s.as_slice()) } else { SmallBytes::from(rest) }; Primitive::Name(SmallString::from_utf8(s)?) } else if first_lexeme.equals(b"[") { check(flags, ParseFlags::ARRAY)?; if max_depth == 0 { return Err(PdfError::MaxDepth); } let mut array = Vec::new(); // Array loop { // Exit if closing delimiter if lexer.peek()?.equals(b"]") { break; } let element = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth-1)); array.push(element); } t!(lexer.next()); // Move beyond closing delimiter Primitive::Array (array) } else if first_lexeme.equals(b"(") { check(flags, ParseFlags::STRING)?; let mut string = IBytes::new(); let bytes_traversed = { let mut string_lexer = StringLexer::new(lexer.get_remaining_slice()); for character in string_lexer.iter() { string.push(t!(character)); } string_lexer.get_offset() }; // Advance to end of string lexer.offset_pos(bytes_traversed); // decrypt it if let Some(ctx) = ctx { string = t!(ctx.decrypt(&mut string)).into(); } Primitive::String (PdfString::new(string)) } else if first_lexeme.equals(b"<") { check(flags, ParseFlags::STRING)?; let mut string = IBytes::new(); let bytes_traversed = { let mut hex_string_lexer = HexStringLexer::new(lexer.get_remaining_slice()); for byte in hex_string_lexer.iter() { string.push(t!(byte)); } hex_string_lexer.get_offset() }; // Advance to end of string lexer.offset_pos(bytes_traversed); // decrypt it if let Some(ctx) = ctx { string = t!(ctx.decrypt(&mut string)).into(); } Primitive::String (PdfString::new(string)) } else if first_lexeme.equals(b"true") { check(flags, ParseFlags::BOOL)?; Primitive::Boolean (true) } else if first_lexeme.equals(b"false") { check(flags, ParseFlags::BOOL)?; Primitive::Boolean (false) } else if first_lexeme.equals(b"null") { check(flags, ParseFlags::NULL)?; Primitive::Null } else { err!(PdfError::UnknownType {pos: lexer.get_pos(), first_lexeme: first_lexeme.to_string(), rest: lexer.read_n(50).to_string()}); }; // trace!("Read object"; "Obj" => format!("{}", obj)); Ok(obj) } pub fn parse_stream(data: &[u8], resolve: &impl Resolve, ctx: &Context) -> Result { parse_stream_with_lexer(&mut Lexer::new(data), resolve, ctx) } fn parse_stream_with_lexer(lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result { let first_lexeme = t!(lexer.next()); let obj = if first_lexeme.equals(b"<<") { let dict = parse_dictionary_object(lexer, r, None, MAX_DEPTH)?; // It might just be the dictionary in front of a stream. if t!(lexer.peek()).equals(b"stream") { let ctx = Context { decoder: None, id: ctx.id }; t!(parse_stream_object(dict, lexer, r, &ctx)) } else { err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "Dictionary" }); } } else { err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "something else" }); }; Ok(obj) } #[cfg(test)] mod tests { #[test] fn dict_with_empty_name_as_value() { use crate::object::NoResolve; use super::{ParseFlags, Context}; { let data = b"<>>>"; let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap(); let dict = primitive.into_dictionary().unwrap(); assert_eq!(dict.len(), 1); let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap(); assert_eq!(app_dict.len(), 1); let name = app_dict.get("Name").unwrap().as_name().unwrap(); assert_eq!(name, ""); } { let data = b"<>>>stream\nendstream\n"; let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap(); let dict = stream.info; assert_eq!(dict.len(), 2); let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap(); assert_eq!(app_dict.len(), 1); let name = app_dict.get("Name").unwrap().as_name().unwrap(); assert_eq!(name, ""); } } #[test] fn dict_with_empty_name_as_key() { use crate::object::NoResolve; use super::{ParseFlags, Context}; { let data = b"<>"; let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap(); let dict = primitive.into_dictionary().unwrap(); assert_eq!(dict.len(), 1); assert!(dict.get("").unwrap().as_bool().unwrap()); } { let data = b"<>stream\nendstream\n"; let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap(); let dict = stream.info; assert_eq!(dict.len(), 2); assert!(dict.get("").unwrap().as_bool().unwrap()); } } #[test] fn empty_array() { use crate::object::NoResolve; use super::ParseFlags; let data = b"[]"; let primitive = super::parse(data, &NoResolve, ParseFlags::ARRAY).unwrap(); let array = primitive.into_array().unwrap(); assert!(array.is_empty()); } #[test] fn compact_array() { use crate::object::NoResolve; use crate::primitive::{Primitive, PdfString}; use super::lexer::Lexer; use super::*; let mut lx = Lexer::new(b"[(Complete L)20(egend for Physical and P)20(olitical Maps)]TJ"); assert_eq!(parse_with_lexer(&mut lx, &NoResolve, ParseFlags::ANY).unwrap(), Primitive::Array(vec![ Primitive::String(PdfString::new("Complete L".into())), Primitive::Integer(20), Primitive::String(PdfString::new("egend for Physical and P".into())), Primitive::Integer(20), Primitive::String(PdfString::new("olitical Maps".into())) ]) ); assert_eq!(lx.next().unwrap().as_str().unwrap(), "TJ"); assert!(lx.next().unwrap_err().is_eof()); } } pdf-0.9.0/src/parser/parse_object.rs000064400000000000000000000033071046102023000154770ustar 00000000000000// Considering whether to impl Object and IndirectObject here. // use crate::parser::{lexer::*, MAX_DEPTH}; use crate::error::*; use crate::primitive::{Primitive, PdfStream}; use crate::parser::{parse_with_lexer_ctx, parse_stream_with_lexer, Context, ParseFlags}; use crate::object::*; use crate::crypt::Decoder; /// Parses an Object starting at the current position of `lexer`. Almost as /// `Reader::parse_object`, but this function does not take `Reader`, at the expense that it /// cannot dereference pub fn parse_indirect_object(lexer: &mut Lexer, r: &impl Resolve, decoder: Option<&Decoder>, flags: ParseFlags) -> Result<(PlainRef, Primitive)> { let id = PlainRef { id: t!(lexer.next()).to::()?, gen: t!(lexer.next()).to::()?, }; lexer.next_expect("obj")?; let ctx = Context { decoder, id, }; let obj = t!(parse_with_lexer_ctx(lexer, r, Some(&ctx), flags, MAX_DEPTH)); if r.options().allow_missing_endobj { let pos = lexer.get_pos(); if let Err(e) = lexer.next_expect("endobj") { warn!("error parsing obj {} {}: {:?}", id.id, id.gen, e); lexer.set_pos(pos); } } else { t!(lexer.next_expect("endobj")); } Ok((id, obj)) } pub fn parse_indirect_stream(lexer: &mut Lexer, r: &impl Resolve, decoder: Option<&Decoder>) -> Result<(PlainRef, PdfStream)> { let id = PlainRef { id: t!(lexer.next()).to::()?, gen: t!(lexer.next()).to::()?, }; lexer.next_expect("obj")?; let ctx = Context { decoder, id, }; let stm = t!(parse_stream_with_lexer(lexer, r, &ctx)); t!(lexer.next_expect("endobj")); Ok((id, stm)) } pdf-0.9.0/src/parser/parse_xref.rs000064400000000000000000000124741046102023000152020ustar 00000000000000use crate::error::*; use crate::parser::lexer::Lexer; use crate::xref::{XRef, XRefSection, XRefInfo}; use crate::primitive::{Primitive, Dictionary}; use crate::object::*; use crate::parser::{parse_with_lexer, ParseFlags}; use crate::parser::parse_object::{parse_indirect_stream}; use std::convert::TryInto; // Just the part of Parser which reads xref sections from xref stream. /// Takes `&mut &[u8]` so that it can "consume" data as it reads fn parse_xref_section_from_stream(first_id: u32, mut num_entries: usize, width: &[usize], data: &mut &[u8], resolve: &impl Resolve) -> Result { let mut entries = Vec::new(); let [w0, w1, w2]: [usize; 3] = width.try_into().map_err(|_| other!("invalid xref length array"))?; if num_entries * (w0 + w1 + w2) > data.len() { if resolve.options().allow_xref_error { warn!("not enough xref data. truncating."); num_entries = data.len() / (w0 + w1 + w2); } else { bail!("not enough xref data"); } } for _ in 0..num_entries { // println!("{:?}", &data[.. width.iter().map(|&i| i as usize).sum()]); // TODO Check if width[i] are 0. Use default values from the PDF references. let _type = if w0 == 0 { 1 } else { read_u64_from_stream(w0, data) }; let field1 = read_u64_from_stream(w1, data); let field2 = read_u64_from_stream(w2, data); let entry = match _type { 0 => XRef::Free {next_obj_nr: field1 as ObjNr, gen_nr: field2 as GenNr}, 1 => XRef::Raw {pos: field1 as usize, gen_nr: field2 as GenNr}, 2 => XRef::Stream {stream_id: field1 as ObjNr, index: field2 as usize}, _ => return Err(PdfError::XRefStreamType {found: _type}), // TODO: Should actually just be seen as a reference to the null object }; entries.push(entry); } Ok(XRefSection { first_id, entries, }) } /// Helper to read an integer with a certain amount of bits `width` from stream. fn read_u64_from_stream(width: usize, data: &mut &[u8]) -> u64 { let mut result = 0; for i in (0..width).rev() { let base = 8 * i; // (width, 0] let c: u8 = data[0]; *data = &data[1..]; // Consume byte result += u64::from(c) << base; } result } /// Reads xref sections (from stream) and trailer starting at the position of the Lexer. pub fn parse_xref_stream_and_trailer(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec, Dictionary)> { let xref_stream = t!(parse_indirect_stream(lexer, resolve, None)).1; let trailer = if t!(lexer.next()) == "trailer" { let trailer = t!(parse_with_lexer(lexer, resolve, ParseFlags::DICT)); t!(trailer.into_dictionary()) } else { xref_stream.info.clone() }; let xref_stream = t!(Stream::::from_primitive(Primitive::Stream(xref_stream), resolve)); let mut data_left = &*t!(xref_stream.data(resolve)); let width = &xref_stream.w; let index = &xref_stream.index; if index.len() % 2 != 0 { return Err(PdfError::Other { msg: format!("xref stream has {} elements which is not an even number", index.len()) }); } let mut sections = Vec::new(); for (first_id, num_objects) in index.chunks_exact(2).map(|c| (c[0], c[1])) { let section = t!(parse_xref_section_from_stream(first_id, num_objects as usize, width, &mut data_left, resolve)); sections.push(section); } Ok((sections, trailer)) } /// Reads xref sections (from table) and trailer starting at the position of the Lexer. pub fn parse_xref_table_and_trailer(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec, Dictionary)> { let mut sections = Vec::new(); // Keep reading subsections until we hit `trailer` while lexer.peek()? != "trailer" { let start_id = t!(lexer.next_as::()); let num_ids = t!(lexer.next_as::()); let mut section = XRefSection::new(start_id); for i in 0..num_ids { let w1 = t!(lexer.next()); if w1 == "trailer" { return Err(PdfError::Other { msg: format!("xref table declares {} entries, but only {} follow.", num_ids, i) }); } let w2 = t!(lexer.next()); let w3 = t!(lexer.next()); if w3 == "f" { section.add_free_entry(t!(w1.to::()), t!(w2.to::())); } else if w3 == "n" { section.add_inuse_entry(t!(w1.to::()), t!(w2.to::())); } else { return Err(PdfError::UnexpectedLexeme {pos: lexer.get_pos(), lexeme: w3.to_string(), expected: "f or n"}); } } sections.push(section); } t!(lexer.next_expect("trailer")); let trailer = t!(parse_with_lexer(lexer, resolve, ParseFlags::DICT)); let trailer = t!(trailer.into_dictionary()); Ok((sections, trailer)) } pub fn read_xref_and_trailer_at(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec, Dictionary)> { let next_word = t!(lexer.next()); if next_word == "xref" { // Read classic xref table parse_xref_table_and_trailer(lexer, resolve) } else { // Read xref stream lexer.back()?; parse_xref_stream_and_trailer(lexer, resolve) } } pdf-0.9.0/src/path.rs000064400000000000000000000053071046102023000125010ustar 00000000000000use mint::Point2; type Point = Point2; pub enum FillMode { NonZero, EvenOdd } struct PathBuilder { out: W, current: Point } impl PathBuilder { pub fn new

(writer: W, start: P) -> PathBuilder where P: Into { PathBuilder { out: writer, current: start } } /// Begin a new subpath by moving the current point to `p`, /// omitting any connecting line segment. If /// the previous path construction operator in the current path /// was also m, the new m overrides it; no vestige of the /// previous m operation remains in the path. pub fn move

(&mut self, p: P) { let p = p.into(); writeln!(self.out, "{} {} m", p.x, p.y); self.current = p; } /// Append a straight line segment from the current point to the /// point `p`. The new current point shall be `p`. pub fn line

(&mut self, p: P) { let p = p.into(); writeln!(self.out, "{} {} l", p.x, p.y); self.current = p; } /// Append a quadratic Bézier curve to the current path. /// The curve shall extend from the current point to the point ´p´, /// using `c` as the Bézier control point. /// The new current point shall be `p`. /// /// NOTE: The quadratic Bézier curve is translated into a cubic Bézier curve, /// since PDF does not allow the former. pub fn quadratic

(&mut self, c: P, p: P) { let (p1, p2) = (p1.into(), p2.into()); let c1 = (2./3.) * c + (1./3.) * self.current; let c2 = (2./3.) * c + (1./3.) * p; writen!(self.out, "{} {} {} {} {} {} c", c1.x, c1.y, c2.x, c2.y, p.x, p.y); self.current = p; } /// Append a cubic Bézier curve to the current path. /// The curve shall extend from the current point to the point ´p´, /// using `c1` and `c2` as the Bézier control points. /// The new current point shall be `p`. pub fn cubic

(&mut self, c1: P, c2: P, p: P) { let (c1, c2, p) = (c1.into(), c2.into(), p.into()); if Some(c1) == self.current { writeln!(self.out, "{} {} {} {} v", c2.x, c2.y, p.x, p.y); } else if Some(c2) == self.current { writeln!(self.out, "{} {} {} {} y", c1.x, c1.y, p.x, p.y); } else { writen!(self.out, "{} {} {} {} {} {} c", c1.x, c1.y, c2.x, c2.y, p.x, p.y); } self.current = p; } pub fn close(&mut self) { writeln!(self.out, "h"); } pub fn fill(&mut self, mode: FillMode) { match mode { FillMode::NonZero => writeln!(out, "f"), FillMode::EvenOdd => writeln!(out, "f*") } } } pdf-0.9.0/src/primitive.rs000064400000000000000000000677651046102023000135750ustar 00000000000000use crate::error::*; use crate::object::{PlainRef, Resolve, Object, NoResolve, ObjectWrite, Updater, DeepClone, Cloner}; use std::sync::Arc; use std::{str, fmt, io}; use std::ops::{Index, Range}; use std::ops::Deref; use std::convert::TryInto; use std::borrow::{Borrow, Cow}; use indexmap::IndexMap; use itertools::Itertools; use istring::{SmallString, IBytes}; use datasize::DataSize; #[derive(Clone, Debug, PartialEq)] pub enum Primitive { Null, Integer (i32), Number (f32), Boolean (bool), String (PdfString), Stream (PdfStream), Dictionary (Dictionary), Array (Vec), Reference (PlainRef), Name (SmallString), } impl DataSize for Primitive { const IS_DYNAMIC: bool = true; const STATIC_HEAP_SIZE: usize = std::mem::size_of::(); fn estimate_heap_size(&self) -> usize { match self { Primitive::String(ref s) => s.estimate_heap_size(), Primitive::Stream(ref s) => s.estimate_heap_size(), Primitive::Dictionary(ref d) => d.estimate_heap_size(), Primitive::Array(ref arr) => arr.estimate_heap_size(), Primitive::Name(ref s) => s.estimate_heap_size(), _ => 0 } } } impl fmt::Display for Primitive { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Primitive::Null => write!(f, "null"), Primitive::Integer(i) => i.fmt(f), Primitive::Number(n) => n.fmt(f), Primitive::Boolean(b) => b.fmt(f), Primitive::String(ref s) => write!(f, "{:?}", s), Primitive::Stream(_) => write!(f, "stream"), Primitive::Dictionary(ref d) => d.fmt(f), Primitive::Array(ref arr) => write!(f, "[{}]", arr.iter().format(", ")), Primitive::Reference(r) => write!(f, "@{}", r.id), Primitive::Name(ref s) => write!(f, "/{}", s) } } } impl Primitive { pub fn serialize(&self, out: &mut impl io::Write) -> Result<()> { match self { Primitive::Null => write!(out, "null")?, Primitive::Integer(i) => write!(out, "{}", i)?, Primitive::Number(n) => write!(out, "{}", n)?, Primitive::Boolean(b) => write!(out, "{}", b)?, Primitive::String(ref s) => s.serialize(out)?, Primitive::Stream(ref s) => s.serialize(out)?, Primitive::Dictionary(ref d) => d.serialize(out)?, Primitive::Array(ref arr) => serialize_list(arr, out)?, Primitive::Reference(r) => write!(out, "{} {} R", r.id, r.gen)?, Primitive::Name(ref s) => serialize_name(s, out)?, } Ok(()) } pub fn array(i: I, update: &mut U) -> Result where O: ObjectWrite, I: Iterator, T: Borrow, U: Updater { i.map(|t| t.borrow().to_primitive(update)).collect::>().map(Primitive::Array) } pub fn name(name: impl Into) -> Primitive { Primitive::Name(name.into()) } } fn serialize_list(arr: &[Primitive], out: &mut impl io::Write) -> Result<()> { let mut parts = arr.iter(); write!(out, "[")?; if let Some(first) = parts.next() { first.serialize(out)?; } for p in parts { write!(out, " ")?; p.serialize(out)?; } write!(out, "]")?; Ok(()) } pub fn serialize_name(s: &str, out: &mut impl io::Write) -> Result<()> { write!(out, "/")?; for b in s.chars() { match b { '\\' | '(' | ')' => write!(out, r"\")?, c if c > '~' => panic!("only ASCII"), _ => () } write!(out, "{}", b)?; } Ok(()) } /// Primitive Dictionary type. #[derive(Default, Clone, PartialEq)] pub struct Dictionary { dict: IndexMap } impl Dictionary { pub fn new() -> Dictionary { Dictionary { dict: IndexMap::new()} } pub fn len(&self) -> usize { self.dict.len() } pub fn is_empty(&self) -> bool { self.len() == 0 } pub fn get(&self, key: &str) -> Option<&Primitive> { self.dict.get(key) } pub fn insert(&mut self, key: impl Into, val: impl Into) -> Option { self.dict.insert(key.into(), val.into()) } pub fn iter(&self) -> impl Iterator { self.dict.iter() } pub fn remove(&mut self, key: &str) -> Option { self.dict.remove(key) } /// like remove, but takes the name of the calling type and returns `PdfError::MissingEntry` if the entry is not found pub fn require(&mut self, typ: &'static str, key: &str) -> Result { self.remove(key).ok_or( PdfError::MissingEntry { typ, field: key.into() } ) } /// assert that the given key/value pair is in the dictionary (`required=true`), /// or the key is not present at all (`required=false`) pub fn expect(&self, typ: &'static str, key: &str, value: &str, required: bool) -> Result<()> { match self.dict.get(key) { Some(ty) => { let ty = ty.as_name()?; if ty != value { Err(PdfError::KeyValueMismatch { key: key.into(), value: value.into(), found: ty.into() }) } else { Ok(()) } }, None if required => Err(PdfError::MissingEntry { typ, field: key.into() }), None => Ok(()) } } } impl DataSize for Dictionary { const IS_DYNAMIC: bool = true; const STATIC_HEAP_SIZE: usize = std::mem::size_of::(); fn estimate_heap_size(&self) -> usize { self.iter().map(|(k, v)| 16 + k.estimate_heap_size() + v.estimate_heap_size()).sum() } } impl ObjectWrite for Dictionary { fn to_primitive(&self, _update: &mut impl Updater) -> Result { Ok(Primitive::Dictionary(self.clone())) } } impl DeepClone for Dictionary { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { Ok(Dictionary { dict: self.dict.iter() .map(|(key, value)| Ok((key.clone(), value.deep_clone(cloner)?))) .try_collect::<_, _, PdfError>()? }) } } impl Deref for Dictionary { type Target = IndexMap; fn deref(&self) -> &IndexMap { &self.dict } } impl Dictionary { fn serialize(&self, out: &mut impl io::Write) -> Result<()> { writeln!(out, "<<")?; for (key, val) in self.iter() { write!(out, "{} ", key)?; val.serialize(out)?; writeln!(out)?; } writeln!(out, ">>")?; Ok(()) } } impl fmt::Debug for Dictionary { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { writeln!(f, "{{")?; for (k, v) in self { writeln!(f, "{:>15}: {}", k, v)?; } write!(f, "}}") } } impl fmt::Display for Dictionary { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "<{}>", self.iter().format_with(", ", |(k, v), f| f(&format_args!("{}={}", k, v)))) } } impl<'a> Index<&'a str> for Dictionary { type Output = Primitive; fn index(&self, idx: &'a str) -> &Primitive { self.dict.index(idx) } } impl IntoIterator for Dictionary { type Item = (Name, Primitive); type IntoIter = indexmap::map::IntoIter; fn into_iter(self) -> Self::IntoIter { self.dict.into_iter() } } impl<'a> IntoIterator for &'a Dictionary { type Item = (&'a Name, &'a Primitive); type IntoIter = indexmap::map::Iter<'a, Name, Primitive>; fn into_iter(self) -> Self::IntoIter { self.dict.iter() } } /// Primitive Stream (as opposed to the higher-level `Stream`) #[derive(Clone, Debug, PartialEq, DataSize)] pub struct PdfStream { pub info: Dictionary, pub (crate) inner: StreamInner, } #[derive(Clone, Debug, PartialEq, DataSize)] pub enum StreamInner { InFile { id: PlainRef, file_range: Range }, Pending { data: Arc<[u8]> }, } impl Object for PdfStream { fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { match p { Primitive::Stream (stream) => Ok(stream), Primitive::Reference (r) => PdfStream::from_primitive(resolve.resolve(r)?, resolve), p => Err(PdfError::UnexpectedPrimitive {expected: "Stream", found: p.get_debug_name()}) } } } impl ObjectWrite for PdfStream { fn to_primitive(&self, update: &mut impl Updater) -> Result { Ok(self.clone().into()) } } impl PdfStream { pub fn serialize(&self, out: &mut impl io::Write) -> Result<()> { self.info.serialize(out)?; writeln!(out, "stream")?; match self.inner { StreamInner::InFile { .. } => { unimplemented!() } StreamInner::Pending { ref data } => { out.write_all(data)?; } } writeln!(out, "\nendstream")?; Ok(()) } pub fn raw_data(&self, resolve: &impl Resolve) -> Result> { match self.inner { StreamInner::InFile { id, ref file_range } => resolve.stream_data(id, file_range.clone()), StreamInner::Pending { ref data } => Ok(data.clone()) } } } impl DeepClone for PdfStream { fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { let data = match self.inner { StreamInner::InFile { id, ref file_range } => cloner.stream_data(id, file_range.clone())?, StreamInner::Pending { ref data } => data.clone() }; Ok(PdfStream { info: self.info.deep_clone(cloner)?, inner: StreamInner::Pending { data } }) } } macro_rules! unexpected_primitive { ($expected:ident, $found:expr) => ( Err(PdfError::UnexpectedPrimitive { expected: stringify!($expected), found: $found }) ) } #[derive(Clone, PartialEq, Eq, Hash, Debug, Ord, PartialOrd, DataSize)] pub struct Name(pub SmallString); impl Name { #[inline] pub fn as_str(&self) -> &str { &self.0 } } impl Deref for Name { type Target = str; #[inline] fn deref(&self) -> &str { &self.0 } } impl From for Name { #[inline] fn from(s: String) -> Name { Name(s.into()) } } impl From for Name { #[inline] fn from(s: SmallString) -> Name { Name(s) } } impl<'a> From<&'a str> for Name { #[inline] fn from(s: &'a str) -> Name { Name(s.into()) } } impl PartialEq for Name { #[inline] fn eq(&self, rhs: &str) -> bool { self.as_str() == rhs } } impl fmt::Display for Name { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "/{}", self.0) } } impl std::borrow::Borrow for Name { #[inline] fn borrow(&self) -> &str { self.0.as_str() } } #[test] fn test_name() { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; let s = "Hello World!"; let hasher = DefaultHasher::new(); fn hash(hasher: &DefaultHasher, value: impl Hash) -> u64 { let mut hasher = hasher.clone(); value.hash(&mut hasher); hasher.finish() } assert_eq!(hash(&hasher, Name(s.into())), hash(&hasher, s)); } /// Primitive String type. #[derive(Clone, PartialEq, Eq, Hash, DataSize)] pub struct PdfString { pub data: IBytes, } impl fmt::Debug for PdfString { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "\"")?; for &b in self.data.as_slice() { match b { b'"' => write!(f, "\\\"")?, b' ' ..= b'~' => write!(f, "{}", b as char)?, o @ 0 ..= 7 => write!(f, "\\{}", o)?, x => write!(f, "\\x{:02x}", x)? } } write!(f, "\"") } } impl Object for PdfString { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p { Primitive::String (string) => Ok(string), Primitive::Reference(id) => PdfString::from_primitive(r.resolve(id)?, &NoResolve), _ => unexpected_primitive!(String, p.get_debug_name()), } } } impl ObjectWrite for PdfString { fn to_primitive(&self, _update: &mut impl Updater) -> Result { Ok(Primitive::String(self.clone())) } } impl PdfString { pub fn serialize(&self, out: &mut impl io::Write) -> Result<()> { if self.data.iter().any(|&b| b >= 0x80) { write!(out, "<")?; for &b in self.data.as_slice() { write!(out, "{:02x}", b)?; } write!(out, ">")?; } else { write!(out, r"(")?; for &b in self.data.as_slice() { match b { b'\\' | b'(' | b')' => write!(out, r"\")?, _ => () } out.write_all(&[b])?; } write!(out, r")")?; } Ok(()) } } impl AsRef<[u8]> for PdfString { fn as_ref(&self) -> &[u8] { self.as_bytes() } } impl PdfString { pub fn new(data: IBytes) -> PdfString { PdfString { data } } pub fn as_bytes(&self) -> &[u8] { &self.data } pub fn into_bytes(self) -> IBytes { self.data } /// without encoding information the PdfString cannot be decoded into a String /// therefore only lossy decoding is possible replacing unknown characters. /// For decoding correctly see /// pdf_tools/src/lib.rs pub fn to_string_lossy(&self) -> String { if self.data.starts_with(&[0xfe, 0xff]) { crate::font::utf16be_to_string_lossy(&self.data[2..]) } else { String::from_utf8_lossy(&self.data).into() } } /// without encoding information the PdfString cannot be sensibly decoded into a String /// converts to a Rust String but only works for valid UTF-8, UTF-16BE and ASCII characters /// if invalid bytes found an Error is returned pub fn to_string(&self) -> Result { if self.data.starts_with(&[0xfe, 0xff]) { Ok(String::from(std::str::from_utf8(crate::font::utf16be_to_string(&self.data[2..])?.as_bytes()) .map_err(|_| PdfError::Utf8Decode)?)) } else { Ok(String::from(std::str::from_utf8(&self.data) .map_err(|_| PdfError::Utf8Decode)?)) } } } impl<'a> From<&'a str> for PdfString { fn from(value: &'a str) -> Self { PdfString { data: value.into() } } } // TODO: // Noticed some inconsistency here.. I think to_* and as_* should not take Resolve, and not accept // Reference. Only from_primitive() for the respective type resolves References. impl Primitive { /// For debugging / error messages: get the name of the variant pub fn get_debug_name(&self) -> &'static str { match *self { Primitive::Null => "Null", Primitive::Integer (..) => "Integer", Primitive::Number (..) => "Number", Primitive::Boolean (..) => "Boolean", Primitive::String (..) => "String", Primitive::Stream (..) => "Stream", Primitive::Dictionary (..) => "Dictionary", Primitive::Array (..) => "Array", Primitive::Reference (..) => "Reference", Primitive::Name (..) => "Name", } } /// resolve the primitive if it is a refernce, otherwise do nothing pub fn resolve(self, r: &impl Resolve) -> Result { match self { Primitive::Reference(id) => r.resolve(id), _ => Ok(self) } } pub fn as_integer(&self) -> Result { match *self { Primitive::Integer(n) => Ok(n), ref p => unexpected_primitive!(Integer, p.get_debug_name()) } } pub fn as_u8(&self) -> Result { match *self { Primitive::Integer(n) if (0..256).contains(&n) => Ok(n as u8), Primitive::Integer(_) => bail!("invalid integer"), ref p => unexpected_primitive!(Integer, p.get_debug_name()) } } pub fn as_u32(&self) -> Result { match *self { Primitive::Integer(n) if n >= 0 => Ok(n as u32), Primitive::Integer(_) => bail!("negative integer"), ref p => unexpected_primitive!(Integer, p.get_debug_name()) } } pub fn as_usize(&self) -> Result { match *self { Primitive::Integer(n) if n >= 0 => Ok(n as usize), Primitive::Integer(_) => bail!("negative integer"), ref p => unexpected_primitive!(Integer, p.get_debug_name()) } } pub fn as_number(&self) -> Result { match *self { Primitive::Integer(n) => Ok(n as f32), Primitive::Number(f) => Ok(f), ref p => unexpected_primitive!(Number, p.get_debug_name()) } } pub fn as_bool(&self) -> Result { match *self { Primitive::Boolean (b) => Ok(b), ref p => unexpected_primitive!(Number, p.get_debug_name()) } } pub fn as_name(&self) -> Result<&str> { match self { Primitive::Name(ref name) => Ok(name.as_str()), p => unexpected_primitive!(Name, p.get_debug_name()) } } pub fn as_string(&self) -> Result<&PdfString> { match self { Primitive::String(ref data) => Ok(data), p => unexpected_primitive!(String, p.get_debug_name()) } } pub fn as_array(&self) -> Result<&[Primitive]> { match self { Primitive::Array(ref v) => Ok(v), p => unexpected_primitive!(Array, p.get_debug_name()) } } pub fn into_reference(self) -> Result { match self { Primitive::Reference(id) => Ok(id), p => unexpected_primitive!(Reference, p.get_debug_name()) } } pub fn into_array(self) -> Result> { match self { Primitive::Array(v) => Ok(v), p => unexpected_primitive!(Array, p.get_debug_name()) } } pub fn into_dictionary(self) -> Result { match self { Primitive::Dictionary(dict) => Ok(dict), p => unexpected_primitive!(Dictionary, p.get_debug_name()) } } pub fn into_name(self) -> Result { match self { Primitive::Name(name) => Ok(Name(name)), p => unexpected_primitive!(Name, p.get_debug_name()) } } pub fn into_string(self) -> Result { match self { Primitive::String(data) => Ok(data), p => unexpected_primitive!(String, p.get_debug_name()) } } pub fn to_string_lossy(&self) -> Result { let s = self.as_string()?; Ok(s.to_string_lossy()) } pub fn to_string(&self) -> Result { let s = self.as_string()?; s.to_string() } pub fn into_stream(self, _r: &impl Resolve) -> Result { match self { Primitive::Stream (s) => Ok(s), p => unexpected_primitive!(Stream, p.get_debug_name()) } } } impl From for Primitive { fn from(x: i32) -> Primitive { Primitive::Integer(x) } } impl From for Primitive { fn from(x: f32) -> Primitive { Primitive::Number(x) } } impl From for Primitive { fn from(x: bool) -> Primitive { Primitive::Boolean(x) } } impl From for Primitive { fn from(Name(s): Name) -> Primitive { Primitive::Name(s) } } impl From for Primitive { fn from(x: PdfString) -> Primitive { Primitive::String (x) } } impl From for Primitive { fn from(x: PdfStream) -> Primitive { Primitive::Stream (x) } } impl From for Primitive { fn from(x: Dictionary) -> Primitive { Primitive::Dictionary (x) } } impl From> for Primitive { fn from(x: Vec) -> Primitive { Primitive::Array (x) } } impl From for Primitive { fn from(x: PlainRef) -> Primitive { Primitive::Reference (x) } } impl<'a> TryInto for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result { self.as_number() } } impl<'a> TryInto for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result { self.as_integer() } } impl<'a> TryInto for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result { match self { Primitive::Name(s) => Ok(Name(s.clone())), p => Err(PdfError::UnexpectedPrimitive { expected: "Name", found: p.get_debug_name() }) } } } impl<'a> TryInto<&'a [Primitive]> for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result<&'a [Primitive]> { self.as_array() } } impl<'a> TryInto<&'a [u8]> for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result<&'a [u8]> { match *self { Primitive::Name(ref s) => Ok(s.as_bytes()), Primitive::String(ref s) => Ok(s.as_bytes()), ref p => Err(PdfError::UnexpectedPrimitive { expected: "Name or String", found: p.get_debug_name() }) } } } impl<'a> TryInto> for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result> { match *self { Primitive::Name(ref s) => Ok(Cow::Borrowed(s)), Primitive::String(ref s) => Ok(Cow::Owned(s.to_string_lossy())), ref p => Err(PdfError::UnexpectedPrimitive { expected: "Name or String", found: p.get_debug_name() }) } } } impl<'a> TryInto for &'a Primitive { type Error = PdfError; fn try_into(self) -> Result { match *self { Primitive::Name(ref s) => Ok(s.as_str().into()), Primitive::String(ref s) => Ok(s.to_string_lossy()), ref p => Err(PdfError::UnexpectedPrimitive { expected: "Name or String", found: p.get_debug_name() }) } } } fn parse_or(buffer: &str, range: Range, default: T) -> T { buffer.get(range) .map(|s| str::parse::(s).unwrap_or_else(|_| default.clone())) .unwrap_or(default) } #[derive(Clone, Debug, PartialEq, Eq)] pub struct Date { pub year: u16, pub month: u8, pub day: u8, pub hour: u8, pub minute: u8, pub second: u8, pub rel: TimeRel, pub tz_hour: u8, pub tz_minute: u8, } #[derive(Clone, Debug, Copy, PartialEq, Eq)] pub enum TimeRel { Earlier, Later, Universal } datasize::non_dynamic_const_heap_size!(Date, std::mem::size_of::()); impl Object for Date { fn from_primitive(p: Primitive, r: &impl Resolve) -> Result { match p.resolve(r)? { Primitive::String (PdfString {data}) => { let s = str::from_utf8(&data)?; let len = s.len(); if len > 2 && &s[0..2] == "D:" { let year = match s.get(2..6) { Some(year) => { str::parse::(year)? } None => bail!("Missing obligatory year in date") }; let (time, rel, zone) = match s.find(['+', '-', 'Z']) { Some(p) => { let rel = match &s[p..p+1] { "-" => TimeRel::Earlier, "+" => TimeRel::Later, "Z" => TimeRel::Universal, _ => unreachable!() }; (&s[..p], rel, &s[p+1..]) } None => (s, TimeRel::Universal, "") }; let month = parse_or(time, 6..8, 1); let day = parse_or(time, 8..10, 1); let hour = parse_or(time, 10..12, 0); let minute = parse_or(time, 12..14, 0); let second = parse_or(time, 14..16, 0); let tz_hour = parse_or(zone, 0..2, 0); let tz_minute = parse_or(zone, 3..5, 0); Ok(Date { year, month, day, hour, minute, second, tz_hour, tz_minute, rel }) } else { bail!("Failed parsing date"); } } p => unexpected_primitive!(String, p.get_debug_name()), } } } impl ObjectWrite for Date { fn to_primitive(&self, _update: &mut impl Updater) -> Result { let Date { year, month, day, hour, minute, second, tz_hour, tz_minute, rel, } = *self; if year > 9999 || day > 99 || hour > 23 || minute >= 60 || second >= 60 || tz_hour >= 24 || tz_minute >= 60 { bail!("not a valid date"); } let o = match rel { TimeRel::Earlier => "-", TimeRel::Later => "+", TimeRel::Universal => "Z" }; let s = format!("D:{year:04}{month:02}{day:02}{hour:02}{minute:02}{second:02}{o}{tz_hour:02}'{tz_minute:02}"); Ok(Primitive::String(PdfString { data: s.into() })) } } #[cfg(test)] mod tests { use crate::{primitive::{PdfString, TimeRel}, object::{NoResolve, Object}}; use super::Date; #[test] fn utf16be_string() { let s = PdfString::new([0xfe, 0xff, 0x20, 0x09].as_slice().into()); assert_eq!(s.to_string_lossy(), "\u{2009}"); } #[test] fn utf16be_invalid_string() { let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34].as_slice().into()); let repl_ch = String::from(std::char::REPLACEMENT_CHARACTER); assert_eq!(s.to_string_lossy(), repl_ch); } #[test] #[should_panic] fn utf16be_invalid_bytelen() { let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34, 0x20].as_slice().into()); let repl_ch = String::from(std::char::REPLACEMENT_CHARACTER); assert_eq!(s.to_string_lossy(), repl_ch); } #[test] fn pdfstring_lossy_vs_ascii() { // verify UTF-16-BE fails on invalid let s = PdfString::new([0xfe, 0xff, 0xd8, 0x34].as_slice().into()); assert!(s.to_string().is_err()); // FIXME verify it is a PdfError::Utf16Decode // verify UTF-16-BE supports umlauts let s = PdfString::new([0xfe, 0xff, 0x00, 0xe4 /*ä*/].as_slice().into()); assert_eq!(s.to_string_lossy(), "ä"); assert_eq!(s.to_string().unwrap(), "ä"); // verify valid UTF-8 bytestream with umlaut works let s = PdfString::new([b'm', b'i', b't', 0xc3, 0xa4 /*ä*/].as_slice().into()); assert_eq!(s.to_string_lossy(), "mitä"); assert_eq!(s.to_string().unwrap(), "mitä"); // verify valid ISO-8859-1 bytestream with umlaut fails let s = PdfString::new([b'm', b'i', b't', 0xe4/*ä in latin1*/].as_slice().into()); let repl_ch = ['m', 'i', 't', std::char::REPLACEMENT_CHARACTER].iter().collect::(); assert_eq!(s.to_string_lossy(), repl_ch); assert!(s.to_string().is_err()); // FIXME verify it is a PdfError::Utf16Decode } #[test] fn date() { let p = PdfString::from("D:199812231952-08'00"); let d = Date::from_primitive(p.into(), &NoResolve); let d2 = Date { year: 1998, month: 12, day: 23, hour: 19, minute: 52, second: 00, rel: TimeRel::Earlier, tz_hour: 8, tz_minute: 0 }; assert_eq!(d.unwrap(), d2); } } pdf-0.9.0/src/repair.rs000064400000000000000000000023401046102023000130210ustar 00000000000000 fn build_xref_table() { warn!("can't read xref table: {:?}", e); let start_offset = t!(backend.locate_start_offset()); let mut lexer = Lexer::new(t!(backend.read(..))); let mut objects = Vec::new(); (|| -> Result<()> { loop { let offset = lexer.get_pos(); let w1 = t!(lexer.next()); let w2 = t!(lexer.next()); let w3 = t!(lexer.next_expect("obj")); try_opt!(lexer.seek_substr("endobj")); objects.push((t!(w1.to::()), t!(w2.to::()), offset)); }})(); objects.sort_unstable(); let mut first_id = objects.first().map(|&(n, _, _)| n).unwrap_or(0); let mut last_id = objects.last().map(|&(n, _, _)| n).unwrap_or(0); let mut xref = XRefTable::new(1 + last_id - first_id); for &(obj_nr, gen_nr, offset) in objects.iter() { for n in first_id + 1 .. obj_nr { xref.push(XRef::Free { next_obj_nr: obj_nr, gen_nr: 0 }); } if obj_nr == last_id { warn!("duplicate obj_nr {}", obj_nr); continue; } xref.push(XRef::Raw { pos: offset - start_offset, gen_nr }); last_id = obj_nr; } return t!(Err(e)); } fn build_catalog() { } pdf-0.9.0/src/xref.rs000064400000000000000000000153251046102023000125120ustar 00000000000000use std::fmt::{Debug, Formatter}; use crate::error::*; use crate::object::*; use crate as pdf; use datasize::DataSize; /////////////////////////// // Cross-reference table // /////////////////////////// #[derive(Copy, Clone, Debug)] pub enum XRef { /// Not currently used. Free { next_obj_nr: ObjNr, gen_nr: GenNr }, /// In use. Raw { pos: usize, gen_nr: GenNr }, /// In use and compressed inside an Object Stream Stream { stream_id: ObjNr, index: usize, }, Promised, Invalid } impl XRef { pub fn get_gen_nr(&self) -> GenNr { match *self { XRef::Free {gen_nr, ..} | XRef::Raw {gen_nr, ..} => gen_nr, XRef::Stream { .. } => 0, // TODO I think these always have gen nr 0? _ => panic!() } } } /// Runtime lookup table of all objects #[derive(Clone)] pub struct XRefTable { // None means that it's not specified, and should result in an error if used // Thought: None could also mean Free? entries: Vec } impl XRefTable { pub fn new(num_objects: ObjNr) -> XRefTable { let mut entries = Vec::new(); entries.resize(num_objects as usize, XRef::Invalid); entries.push(XRef::Free { next_obj_nr: 0, gen_nr: 0xffff }); XRefTable { entries, } } pub fn iter(&self) -> impl Iterator + '_ { self.entries.iter().enumerate() .filter(|(_, xref)| matches!(xref, XRef::Raw { .. } | XRef::Stream { .. } )) .map(|(i, _)| i as u32) } pub fn get(&self, id: ObjNr) -> Result { match self.entries.get(id as usize) { Some(&entry) => Ok(entry), None => Err(PdfError::UnspecifiedXRefEntry {id}), } } pub fn set(&mut self, id: ObjNr, r: XRef) { self.entries[id as usize] = r; } pub fn len(&self) -> usize { self.entries.len() } pub fn is_empty(&self) -> bool { self.entries.is_empty() } pub fn push(&mut self, new_entry: XRef) { self.entries.push(new_entry); } pub fn num_entries(&self) -> usize { self.entries.len() } pub fn max_field_widths(&self) -> (u64, u64) { let mut max_a = 0; let mut max_b = 0; for &e in &self.entries { let (a, b) = match e { XRef::Raw { pos, gen_nr } => (pos as u64, gen_nr), XRef::Free { next_obj_nr, gen_nr } => (next_obj_nr, gen_nr), XRef::Stream { stream_id, index } => (stream_id, index as u64), _ => continue }; max_a = max_a.max(a); max_b = max_b.max(b); } (max_a, max_b) } pub fn add_entries_from(&mut self, section: XRefSection) -> Result<()> { for (i, &entry) in section.entries() { if let Some(dst) = self.entries.get_mut(i) { // Early return if the entry we have has larger or equal generation number let should_be_updated = match *dst { XRef::Raw { gen_nr: gen, .. } | XRef::Free { gen_nr: gen, .. } => entry.get_gen_nr() > gen, XRef::Stream { .. } | XRef::Invalid => true, x => bail!("found {:?}", x) }; if should_be_updated { *dst = entry; } } } Ok(()) } pub fn write_stream(&self, size: usize) -> Result> { let (max_a, max_b) = self.max_field_widths(); let a_w = byte_len(max_a); let b_w = byte_len(max_b); let mut data = Vec::with_capacity((1 + a_w + b_w) * size); for &x in self.entries.iter().take(size) { let (t, a, b) = match x { XRef::Free { next_obj_nr, gen_nr } => (0, next_obj_nr, gen_nr), XRef::Raw { pos, gen_nr } => (1, pos as u64, gen_nr), XRef::Stream { stream_id, index } => (2, stream_id, index as u64), x => bail!("invalid xref entry: {:?}", x) }; data.push(t); data.extend_from_slice(&a.to_be_bytes()[8 - a_w ..]); data.extend_from_slice(&b.to_be_bytes()[8 - b_w ..]); } let info = XRefInfo { size: size as u32, index: vec![0, size as u32], prev: None, w: vec![1, a_w, b_w], }; Ok(Stream::new(info, data)) } } fn byte_len(n: u64) -> usize { (64 + 8 - 1 - n.leading_zeros()) as usize / 8 + (n == 0) as usize } impl Debug for XRefTable { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { for (i, entry) in self.entries.iter().enumerate() { match *entry { XRef::Free {next_obj_nr, gen_nr} => { writeln!(f, "{:4}: {:010} {:05} f", i, next_obj_nr, gen_nr)? }, XRef::Raw {pos, gen_nr} => { writeln!(f, "{:4}: {:010} {:05} n", i, pos, gen_nr)? }, XRef::Stream {stream_id, index} => { writeln!(f, "{:4}: in stream {}, index {}", i, stream_id, index)? }, XRef::Promised => { writeln!(f, "{:4}: Promised?", i)? }, XRef::Invalid => { writeln!(f, "{:4}: Invalid!", i)? } } } Ok(()) } } /// As found in PDF files #[derive(Debug)] pub struct XRefSection { pub first_id: u32, pub entries: Vec, } impl XRefSection { pub fn new(first_id: u32) -> XRefSection { XRefSection { first_id, entries: Vec::new(), } } pub fn add_free_entry(&mut self, next_obj_nr: ObjNr, gen_nr: GenNr) { self.entries.push(XRef::Free{next_obj_nr, gen_nr}); } pub fn add_inuse_entry(&mut self, pos: usize, gen_nr: GenNr) { self.entries.push(XRef::Raw{pos, gen_nr}); } pub fn entries(&self) -> impl Iterator { self.entries.iter().enumerate().map(move |(i, e)| (i + self.first_id as usize, e)) } } #[derive(Object, ObjectWrite, Debug, DataSize)] #[pdf(Type = "XRef")] pub struct XRefInfo { // XRefStream fields #[pdf(key = "Size")] pub size: u32, // #[pdf(key = "Index", default = "vec![0, size]")] /// Array of pairs of integers for each subsection, (first object number, number of entries). /// Default value (assumed when None): `(0, self.size)`. pub index: Vec, #[pdf(key = "Prev")] prev: Option, #[pdf(key = "W")] pub w: Vec, } // read_xref_table // read_xref_stream // read_xref_and_trailer_at pdf-0.9.0/tests/integration.rs000064400000000000000000000103501046102023000144350ustar 00000000000000use std::str; use pdf::file::FileOptions; use pdf::object::*; use pdf::parser::{parse, ParseFlags}; use glob::glob; macro_rules! file_path { ( $subdir:expr ) => { concat!("../files/", $subdir) } } macro_rules! run { ($e:expr) => ( match $e { Ok(v) => v, Err(e) => { e.trace(); panic!("{}", e); } } ) } #[test] fn open_file() { let _ = run!(FileOptions::uncached().open(file_path!("example.pdf"))); #[cfg(feature = "mmap")] let _ = run!({ use memmap2::Mmap; let file = std::fs::File::open(file_path!("example.pdf")).expect("can't open file"); let mmap = unsafe { Mmap::map(&file).expect("can't mmap file") }; FileOptions::cached().load(mmap) }); } #[cfg(feature="cache")] #[test] fn read_pages() { for entry in glob(file_path!("*.pdf")).expect("Failed to read glob pattern") { match entry { Ok(path) => { println!("\n == Now testing `{}` ==", path.to_str().unwrap()); let path = path.to_str().unwrap(); let file = run!(FileOptions::cached().open(path)); for i in 0 .. file.num_pages() { println!("Read page {}", i); let _ = file.get_page(i); } } Err(e) => println!("{:?}", e) } } } #[test] fn user_password() { for entry in glob(file_path!("password_protected/*.pdf")) .expect("Failed to read glob pattern") { match entry { Ok(path) => { println!("\n\n == Now testing `{}` ==\n", path.to_str().unwrap()); let path = path.to_str().unwrap(); let file = run!(FileOptions::uncached().password(b"userpassword").open(path)); for i in 0 .. file.num_pages() { println!("\nRead page {}", i); let _ = file.get_page(i); } } Err(e) => println!("{:?}", e) } } } #[test] fn owner_password() { for entry in glob(file_path!("password_protected/*.pdf")) .expect("Failed to read glob pattern") { match entry { Ok(path) => { println!("\n\n == Now testing `{}` ==\n", path.to_str().unwrap()); let path = path.to_str().unwrap(); let file = run!(FileOptions::uncached().password(b"ownerpassword").open(path)); for i in 0 .. file.num_pages() { println!("\nRead page {}", i); let _ = file.get_page(i); } } Err(e) => println!("{:?}", e) } } } // Test for invalid PDFs found by fuzzing. // We don't care if they give an Err or Ok, as long as they don't panic. #[cfg(feature="cache")] #[test] fn invalid_pdfs() { for entry in glob(file_path!("invalid/*.pdf")) .expect("Failed to read glob pattern") { match entry { Ok(path) => { let path = path.to_str().unwrap(); println!("\n\n == Now testing `{}` ==\n", path); match FileOptions::cached().open(path) { Ok(file) => { for i in 0 .. file.num_pages() { let _ = file.get_page(i); } } Err(_) => { continue; } } } Err(e) => panic!("error when reading glob patterns: {:?}", e), } } } #[cfg(feature="cache")] #[test] fn parse_objects_from_stream() { use pdf::object::NoResolve; let file = run!(FileOptions::cached().open(file_path!("xelatex.pdf"))); let resolver = file.resolver(); // .. we know that object 13 of that file is an ObjectStream let obj_stream: RcRef = run!(resolver.get(Ref::new(PlainRef {id: 13, gen: 0}))); for i in 0..obj_stream.n_objects() { let (data, range) = run!(obj_stream.get_object_slice(i, &resolver)); let slice = &data[range]; println!("Object slice #{}: {}\n", i, str::from_utf8(slice).unwrap()); run!(parse(slice, &NoResolve, ParseFlags::ANY)); } } // TODO test decoding pdf-0.9.0/tests/write.rs000064400000000000000000000015361046102023000132520ustar 00000000000000// TODO: commented out to make it compile /* extern crate pdf; use pdf::file::File; use pdf::types::*; use pdf::stream::ObjectStream; fn main() { let mut file = File::new(Vec::new()); let page_tree_promise = file.promise(); let mut page_tree = PageTree::root(); let mut page = Page::new((&page_tree_promise).into()); page.media_box = Some(Rect { left: 0., right: 100., top: 0., bottom: 200. }); // create the content stream let content = ObjectStream::new(&mut file); // add stream to file let content_ref = file.add(content); page_tree.add(file.add(PagesNode::Page(page)).unwrap()); let catalog = Catalog::new(file.fulfill(page_tree_promise, page_tree).unwrap()); let catalog_ref = file.add(catalog).unwrap(); file.finish(catalog_ref); } */ pdf-0.9.0/tests/xref.rs000064400000000000000000000005421046102023000130600ustar 00000000000000use pdf::file::FileOptions; #[test] fn infinite_loop_invalid_file() { assert!(FileOptions::uncached().load(b"startxref%PDF-".as_ref()).is_err()); } #[test] fn ending_angle_bracket() { assert!(FileOptions::uncached().load(b"%PDF-startxref>".as_ref()).is_err()); assert!(FileOptions::uncached().load(b"%PDF-startxref<".as_ref()).is_err()); }