elasticlunr-rs-3.0.2/.cargo_vcs_info.json0000644000000001360000000000100140050ustar { "git": { "sha1": "057ead48ddcefef0be1d34d19339cdd091053267" }, "path_in_vcs": "" }elasticlunr-rs-3.0.2/.gitattributes000064400000000000000000000000150072674642500155140ustar 00000000000000* text eol=lfelasticlunr-rs-3.0.2/.gitignore000064400000000000000000000005450072674642500146210ustar 00000000000000# Generated by Cargo # will have compiled files and executables /target/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock Cargo.lock # These are backup files generated by rustfmt **/*.rs.bk examples/out.json out.json **/node_modules/ .idea/elasticlunr-rs-3.0.2/.vscode/settings.json000064400000000000000000000002110072674642500167130ustar 00000000000000{ "editor.formatOnSave": true, "editor.formatOnSaveMode": "modificationsIfAvailable", "rust-analyzer.cargo.features": "all" }elasticlunr-rs-3.0.2/CHANGELOG.md000064400000000000000000000035150072674642500144420ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ## [3.0.1] - 2022-07-23 ### Changed - Updated dependencies and MSRVs to fix builds. ([#47](https://github.com/mattico/elasticlunr-rs/pull/47) et. al.) ## [3.0.0] - 2022-06-01 ### Added - Language support for Arabic ([#40](https://github.com/mattico/elasticlunr-rs/pull/40])). - Add the `Language` trait to make it easier to implement languages outside the crate. - Add `IndexBuilder::add_field_with_tokenizer` to specify the tokenizer for a field. ### Changed - Update to 2018 edition, and bump MSRV to 1.54.0. - Change benchmarks to use Criterion. - Remove dependency on lazy_static. - Update dependencies. - Use Unicode character classes for trimmer. - `IndexBuilder` functions which add fields will now panic if the same field is added multiple times. - Fix `IndexBuilder` not respecting field insertion order. ### Removed - Remove the `default` feature. You now need to opt-in to the `languages` feature. - Remove the deprecated function `Pipeline::for_language`. - Remove the `pipeline::tokenize*` functions, which are now implemented as part of the `Language` trait. - Remove `Index::add_doc_with_tokenizer(s)`, replaced by `IndexBuilder::add_field_with_tokenizer`. - Remove the `Language` enum. Use the `Language` trait implementations in the `lang` modules, and the free functions `lang::from_name`, `lang::from_code`, and `lang::languages`. [Unreleased]: https://github.com/mattico/elasticlunr-rs/compare/v3.0.0...HEAD [3.0.0]: https://github.com/mattico/elasticlunr-rs/compare/v2.3.14...v3.0.0 [3.0.1]: https://github.com/mattico/elasticlunr-rs/compare/v3.0.0...v3.0.1 elasticlunr-rs-3.0.2/Cargo.lock0000644000001027670000000000100117750ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anyhow" version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi 0.1.19", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bincode" version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ "serde", ] [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bstr" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", "regex-automata", "serde", ] [[package]] name = "build_const" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" [[package]] name = "bumpalo" version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "byteorder" version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cedarwood" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90" dependencies = [ "smallvec", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" [[package]] name = "ciborium-ll" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "3.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" dependencies = [ "atty", "bitflags", "clap_derive", "clap_lex", "indexmap", "once_cell", "strsim", "termcolor", "textwrap", ] [[package]] name = "clap_derive" version = "3.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ "heck", "proc-macro-error", "proc-macro2", "quote", "syn", ] [[package]] name = "clap_lex" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" dependencies = [ "os_str_bytes", ] [[package]] name = "crc" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" dependencies = [ "build_const", ] [[package]] name = "crc32fast" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] name = "criterion" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" dependencies = [ "anes", "atty", "cast", "ciborium", "clap", "criterion-plot", "itertools", "lazy_static", "num-traits", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-channel" version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", "scopeguard", ] [[package]] name = "crossbeam-utils" version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" dependencies = [ "cfg-if", ] [[package]] name = "csv" version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ "bstr", "csv-core", "itoa 0.4.8", "ryu", "serde", ] [[package]] name = "csv-core" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" dependencies = [ "memchr", ] [[package]] name = "either" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "elasticlunr-rs" version = "3.0.2" dependencies = [ "criterion", "jieba-rs", "lindera", "lindera-core", "maplit", "regex", "rust-stemmers", "serde", "serde_derive", "serde_json", ] [[package]] name = "encoding" version = "0.2.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" dependencies = [ "encoding-index-japanese", "encoding-index-korean", "encoding-index-simpchinese", "encoding-index-singlebyte", "encoding-index-tradchinese", ] [[package]] name = "encoding-index-japanese" version = "1.20141219.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" dependencies = [ "encoding_index_tests", ] [[package]] name = "encoding-index-korean" version = "1.20141219.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" dependencies = [ "encoding_index_tests", ] [[package]] name = "encoding-index-simpchinese" version = "1.20141219.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" dependencies = [ "encoding_index_tests", ] [[package]] name = "encoding-index-singlebyte" version = "1.20141219.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" dependencies = [ "encoding_index_tests", ] [[package]] name = "encoding-index-tradchinese" version = "1.20141219.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" dependencies = [ "encoding_index_tests", ] [[package]] name = "encoding_index_tests" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" [[package]] name = "env_logger" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" dependencies = [ "atty", "humantime", "log", "regex", "termcolor", ] [[package]] name = "filetime" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a3de6e8d11b22ff9edc6d916f890800597d60f8b2da1caf2955c274638d6412" dependencies = [ "cfg-if", "libc", "redox_syscall", "windows-sys", ] [[package]] name = "flate2" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" dependencies = [ "crc32fast", "miniz_oxide", ] [[package]] name = "fxhash" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" dependencies = [ "byteorder", ] [[package]] name = "getrandom" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "glob" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "hermit-abi" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" dependencies = [ "libc", ] [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "indexmap" version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown 0.12.3", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "itoa" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jieba-rs" version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c7e12f50325401dde50c29ca32cff44bae20873135b39f4e19ecf305226dd80" dependencies = [ "cedarwood", "fxhash", "hashbrown 0.11.2", "lazy_static", "phf", "phf_codegen", "regex", ] [[package]] name = "js-sys" version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "lindera" version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dddd011921cac0ec59025a6b6e26c2cd9af3adce384b56c753c31df71a07965" dependencies = [ "anyhow", "bincode", "byteorder", "encoding", "lindera-cc-cedict-builder", "lindera-core", "lindera-dictionary", "lindera-ipadic", "lindera-ipadic-builder", "lindera-ko-dic-builder", "lindera-unidic-builder", "serde", "serde_json", "thiserror", ] [[package]] name = "lindera-cc-cedict-builder" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "584491a91b758f92ef3202aaf969d837522f2c11390c4de0049a356d63bc0b0f" dependencies = [ "anyhow", "bincode", "byteorder", "clap", "csv", "encoding", "env_logger", "glob", "lindera-core", "lindera-decompress", "log", "yada", ] [[package]] name = "lindera-core" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c726ee1bf3282621a802d50f5e03d3f88aae41456815e1d0cb2271a538ff83ec" dependencies = [ "anyhow", "bincode", "byteorder", "encoding", "log", "serde", "thiserror", "yada", ] [[package]] name = "lindera-decompress" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f9df38ea9310a1256cdee64ff0ebe3f17c49314e3176e53d2213371729d6744" dependencies = [ "anyhow", "lzma-rs", "serde", ] [[package]] name = "lindera-dictionary" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a525b654642ff9f27927c5abba33f4c651e984b54a65e4f787c0b8c8e22e4a6" dependencies = [ "anyhow", "bincode", "byteorder", "lindera-core", ] [[package]] name = "lindera-ipadic" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4797e016fc7dc0709ddb8c31da3b9e923e33e14043a4ff58431dd9c447ffacd2" dependencies = [ "bincode", "byteorder", "encoding", "flate2", "lindera-core", "lindera-ipadic-builder", "once_cell", "tar", ] [[package]] name = "lindera-ipadic-builder" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bd3ecfb07e8810f5ba313fa836804b66120f0ea76c2d93948c2ddcf4f81fd90" dependencies = [ "anyhow", "bincode", "byteorder", "clap", "encoding", "env_logger", "glob", "lindera-core", "lindera-decompress", "log", "serde", "yada", ] [[package]] name = "lindera-ko-dic-builder" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc86f26560ea69e91413eecc078d8e13f39b3c1fdc5a242d79d7622f6fab3a83" dependencies = [ "anyhow", "bincode", "byteorder", "clap", "csv", "encoding", "env_logger", "glob", "lindera-core", "lindera-decompress", "log", "yada", ] [[package]] name = "lindera-unidic-builder" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05c1bb8b7d38ffec7d949ee2c603b6ef96dfa7cf4937e91bad295a2d2b267b82" dependencies = [ "anyhow", "bincode", "byteorder", "clap", "csv", "encoding", "env_logger", "glob", "lindera-core", "lindera-decompress", "log", "yada", ] [[package]] name = "log" version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] name = "lzma-rs" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1" dependencies = [ "byteorder", "crc", ] [[package]] name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" dependencies = [ "autocfg", ] [[package]] name = "miniz_oxide" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] [[package]] name = "num-traits" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ "hermit-abi 0.2.6", "libc", ] [[package]] name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "oorandom" version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "os_str_bytes" version = "6.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" [[package]] name = "phf" version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" dependencies = [ "phf_generator", "phf_shared", ] [[package]] name = "phf_generator" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" dependencies = [ "phf_shared", "rand", ] [[package]] name = "phf_shared" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" dependencies = [ "siphasher", ] [[package]] name = "plotters" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" [[package]] name = "plotters-svg" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" dependencies = [ "plotters-backend", ] [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro-error" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", "syn", "version_check", ] [[package]] name = "proc-macro-error-attr" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", "version_check", ] [[package]] name = "proc-macro2" version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "rayon" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", "num_cpus", ] [[package]] name = "redox_syscall" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ "bitflags", ] [[package]] name = "regex" version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "rust-stemmers" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" dependencies = [ "serde", "serde_derive", ] [[package]] name = "ryu" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" dependencies = [ "itoa 1.0.6", "ryu", "serde", ] [[package]] name = "siphasher" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "smallvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tar" version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" dependencies = [ "filetime", "libc", "xattr", ] [[package]] name = "termcolor" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" dependencies = [ "winapi-util", ] [[package]] name = "textwrap" version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "unicode-ident" version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" [[package]] name = "web-sys" version = "0.3.61" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "xattr" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" dependencies = [ "libc", ] [[package]] name = "yada" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d12cb7a57bbf2ab670ed9545bae3648048547f9039279a89ce000208e585c1" elasticlunr-rs-3.0.2/Cargo.toml0000644000000043410000000000100120050ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.57.0" name = "elasticlunr-rs" version = "3.0.2" authors = ["Matt Ickstadt "] exclude = [ "tests/lunr-fixture-gen/", "js/", ".github/", ] description = "A partial port of elasticlunr.js to Rust for generating static document search indexes" documentation = "https://docs.rs/elasticlunr-rs" readme = "README.md" keywords = [ "search", "index", "indexing", "lunr", "elasticlunr", ] license = "MIT/Apache-2.0" repository = "https://github.com/mattico/elasticlunr-rs" [lib] name = "elasticlunr" [[bench]] name = "bench" harness = false [dependencies.jieba-rs] version = "0.6" optional = true [dependencies.lindera] version = "0.14" features = ["ipadic"] optional = true [dependencies.lindera-core] version = "0.13.5" optional = true [dependencies.regex] version = "1" [dependencies.rust-stemmers] version = "1.2.0" optional = true [dependencies.serde] version = "1" [dependencies.serde_derive] version = "1.0.34" [dependencies.serde_json] version = "1" [dev-dependencies.criterion] version = "0.4.0" [dev-dependencies.maplit] version = "1" [features] ar = [] da = ["rust-stemmers"] de = ["rust-stemmers"] du = ["rust-stemmers"] es = ["rust-stemmers"] fi = ["rust-stemmers"] fr = ["rust-stemmers"] hu = ["rust-stemmers"] it = ["rust-stemmers"] ja = [ "lindera", "lindera-core", ] ko = [] languages = [ "ar", "da", "de", "du", "es", "fi", "fr", "hu", "it", "ja", "ko", "no", "pt", "ro", "ru", "sv", "tr", "zh", ] no = ["rust-stemmers"] pt = ["rust-stemmers"] ro = ["rust-stemmers"] ru = ["rust-stemmers"] sv = ["rust-stemmers"] tr = ["rust-stemmers"] zh = ["jieba-rs"] [badges.maintenance] status = "passively-maintained" elasticlunr-rs-3.0.2/Cargo.toml.orig000064400000000000000000000031150072674642500155140ustar 00000000000000[package] authors = ["Matt Ickstadt "] license = "MIT/Apache-2.0" name = "elasticlunr-rs" version = "3.0.2" description = "A partial port of elasticlunr.js to Rust for generating static document search indexes" documentation = "https://docs.rs/elasticlunr-rs" repository = "https://github.com/mattico/elasticlunr-rs" keywords = ["search", "index", "indexing", "lunr", "elasticlunr"] exclude = ["tests/lunr-fixture-gen/", "js/", ".github/"] readme = "README.md" edition = "2018" rust-version = "1.57.0" [badges] maintenance = { status = "passively-maintained" } [lib] name = "elasticlunr" [[bench]] name = "bench" harness = false [dev-dependencies] criterion = "0.4.0" maplit = "1" [dependencies] regex = "1" rust-stemmers = { version = "1.2.0", optional = true } # 1.2.0 minimum for Norwegian serde = "1" serde_derive = "1.0.34" # First verstion to support #[serde(flatten)] serde_json = "1" jieba-rs = { version = "0.6", optional = true } lindera = { version = "0.14", optional = true, features = ["ipadic"] } lindera-core = { version = "0.13.5", optional = true } [features] languages = ["ar", "da", "de", "du", "es", "fi", "fr", "hu", "it", "ja", "ko", "no", "pt", "ro", "ru", "sv", "tr", "zh"] ar = [] da = ["rust-stemmers"] de = ["rust-stemmers"] du = ["rust-stemmers"] es = ["rust-stemmers"] fi = ["rust-stemmers"] fr = ["rust-stemmers"] hu = ["rust-stemmers"] it = ["rust-stemmers"] ja = ["lindera", "lindera-core"] ko = [] no = ["rust-stemmers"] pt = ["rust-stemmers"] ro = ["rust-stemmers"] ru = ["rust-stemmers"] sv = ["rust-stemmers"] tr = ["rust-stemmers"] zh = ["jieba-rs"] elasticlunr-rs-3.0.2/LICENSE-APACHE000064400000000000000000000251360072674642500145600ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.elasticlunr-rs-3.0.2/LICENSE-JS000064400000000000000000000022050072674642500141430ustar 00000000000000Portions of this library's code is ported from elasticlunr.js Used under the terms of the MIT license. Copyright (C) 2017 by Wei Song Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.elasticlunr-rs-3.0.2/LICENSE-MIT000064400000000000000000000020430072674642500142600ustar 00000000000000Copyright (c) 2017 Matthew Ickstadt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.elasticlunr-rs-3.0.2/LICENSE-WORDS000064400000000000000000000022220072674642500145240ustar 00000000000000Word lists originally from https://github.com/brenes/stopwords-filter Used under the terms of the MIT license. Copyright (c) 2012 David J. Brenes Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.elasticlunr-rs-3.0.2/README.md000064400000000000000000000053240072674642500141100ustar 00000000000000# elasticlunr-rs ![Build Status](https://github.com/mattico/elasticlunr-rs/workflows/CI/badge.svg) [![Documentation](https://docs.rs/elasticlunr-rs/badge.svg)](https://docs.rs/elasticlunr-rs) [![Crates.io](https://img.shields.io/crates/v/elasticlunr-rs.svg)](https://crates.io/crates/elasticlunr-rs) ![Maintenance](https://img.shields.io/badge/Maintenance-Passive-yellow) ![MSRV](https://img.shields.io/badge/MSRV-1.57.0-orange) A partial port of [elasticlunr.js][eljs] to Rust. Intended to be used for generating compatible search indices. This library is passively maintained to support existing users. New users are encouraged to use a different library such as [stork](https://github.com/jameslittle230/stork). ## Example ```Rust use std::fs::File; use std::io::Write; use elasticlunr::Index; let mut index = Index::new(&["title", "body"]); index.add_doc("1", &["This is a title", "This is body text!"]); // Add more documents... let mut file = File::create("out.json").unwrap(); file.write_all(index.to_json_pretty().as_bytes()); ``` ## Minimum Supported Rust Version 1.60.0 Changing the minimum supported Rust version is not considered a breaking change for semver purposes. The supported version is constrained by the version supported by our transitive dependencies. Earlier rustc versions may work if you have older versions of these in your `Cargo.lock`, but this is not tested. ## Languages This library includes optional support for non-English languages, see the features in `Cargo.toml`. Like in the JavaScript version, the language support is designed to be compatible with the [lunr-languages plugins][lunr-languages]. Some languages use a modified version, which is included in the `js` directory of the repository. ## License This repository is offered under the terms of the - Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) - MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) at your option. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. Includes code ported from [elasticlunr.js][eljs] Copyright (C) 2017 by Wei Song, used under license. See LICENSE-JS for details. Includes stop word lists ported from [stopwords-filter][swft] Copyright (C) 2012 David J. Brenes, used under license. See LICENSE-WORDS for details. Bundled javascript code in the repository (not included in the cargo package) may have other licenses. [lunr-languages]: https://github.com/MihaiValentin/lunr-languages [eljs]: https://github.com/weixsong/elasticlunr.js [swft]: https://github.com/brenes/stopwords-filterelasticlunr-rs-3.0.2/benches/bench.rs000064400000000000000000000014130072674642500156600ustar 00000000000000use criterion::{black_box, criterion_group, criterion_main, Criterion}; use elasticlunr::Index; fn bench_main(c: &mut Criterion) { // BTreeMap: 3,165,389 ns/iter (+/- 420,869) // BTreeMap: 2,920,902 ns/iter (+/- 118,729) c.bench_function("create_index", |b| { let text = include_str!("../tests/data/en.in.txt"); let sections: Vec<_> = text.split("\n\n").collect(); b.iter(|| { let mut index = Index::new(&["section"]); for (i, section) in sections.iter().enumerate() { index.add_doc(&format!("section_{}", i), &[section]); } black_box(index.to_json()); }) }); } criterion_group!(benches, bench_main); criterion_main!(benches); elasticlunr-rs-3.0.2/examples/export_json.rs000064400000000000000000000011250072674642500173620ustar 00000000000000use elasticlunr::Index; use std::fs::File; use std::io::Write; fn main() { let mut index = Index::new(&["title", "body"]); index.add_doc( "1", &[ "This Week in Rust 207", "Hello and welcome to another issue of This Week in Rust!", ], ); index.add_doc( "2", &[ "This Week in Rust 206", "Hello and welcome to another issue of This Week in Rust!", ], ); let mut file = File::create("examples/out.json").unwrap(); file.write_all(index.to_json_pretty().as_bytes()).unwrap(); } elasticlunr-rs-3.0.2/src/config.rs000064400000000000000000000070770072674642500152420ustar 00000000000000//! These types are not used for generating `Index`es. They are provided to help with //! creating compatible JSON structures for configuring the JavaScript search //! function. //! //! *Reference:* //! use std::collections::BTreeMap; /// Used to set the search configuration for a specific field. /// When `expand` or `bool` is `None`, elasticlunr.js will use the value from /// the global configuration. The `boost` field, if present, /// increases the importance of this field when ordering search results. #[derive(Serialize, Deserialize, Default, Debug, Copy, Clone, Eq, PartialEq)] pub struct SearchOptionsField { #[serde(skip_serializing_if = "Option::is_none")] pub boost: Option, #[serde(skip_serializing_if = "Option::is_none")] pub bool: Option, #[serde(skip_serializing_if = "Option::is_none")] pub expand: Option, } /// Sets which boolean model is used for searching with /// multiple terms. Defaults to `Or`. /// /// - *AND* requires every search term to be present in results /// - *OR* accepts results which have at least one term /// #[derive(Serialize, Deserialize, Debug, Copy, Clone, Eq, PartialEq)] #[serde(rename_all = "SCREAMING_SNAKE_CASE")] pub enum SearchBool { Or, And, } impl Default for SearchBool { fn default() -> Self { SearchBool::Or } } /// The search configuration map which is passed to the /// elasticlunr.js `Index.search()` function. /// /// |Key |Default| /// |--------|-------| /// |`bool` |`OR` | /// |`expand`|`false`| #[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] pub struct SearchOptions { pub bool: SearchBool, pub expand: bool, pub fields: BTreeMap, } #[cfg(test)] mod tests { use super::*; use serde_json; #[test] fn test_normal_config() { let options = SearchOptions { fields: btreemap![ "title".into() => SearchOptionsField { boost: Some(5), ..Default::default() }, "body".into() => SearchOptionsField { boost: Some(1), ..Default::default() }, ], ..Default::default() }; let stringed = serde_json::to_string(&options).unwrap(); assert_eq!( stringed, r#"{"bool":"OR","expand":false,"fields":{"body":{"boost":1},"title":{"boost":5}}}"# ); } #[test] fn test_complex_config() { let options = SearchOptions { fields: btreemap! { "title".into() => SearchOptionsField { expand: Some(true), ..Default::default() }, "body".into() => SearchOptionsField { bool: Some(SearchBool::Or), ..Default::default() }, "breadcrumbs".into() => SearchOptionsField { bool: Some(SearchBool::default()), boost: Some(200), ..Default::default() }, }, expand: false, bool: SearchBool::And, }; let stringed = serde_json::to_string_pretty(&options).unwrap(); assert_eq!( stringed, r#"{ "bool": "AND", "expand": false, "fields": { "body": { "bool": "OR" }, "breadcrumbs": { "boost": 200, "bool": "OR" }, "title": { "expand": true } } }"# ); } } elasticlunr-rs-3.0.2/src/document_store.rs000064400000000000000000000230110072674642500170110ustar 00000000000000//! Implements an elasticlunr.js document store. Most users do not need to use this module directly. use std::collections::BTreeMap; /// The document store saves the complete text of each item saved to the index, if enabled. /// Most users do not need to use this type directly. #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct DocumentStore { pub save: bool, pub docs: BTreeMap>, pub doc_info: BTreeMap>, // Redundant with docs.len(), but needed for serialization pub length: usize, } impl DocumentStore { pub fn new(save: bool) -> Self { DocumentStore { save, docs: BTreeMap::new(), doc_info: BTreeMap::new(), length: 0, } } pub fn len(&self) -> usize { self.docs.len() } pub fn is_empty(&self) -> bool { self.len() == 0 } pub fn is_stored(&self) -> bool { self.save } pub fn has_doc(&self, doc_ref: &str) -> bool { self.docs.contains_key(doc_ref) } pub fn add_doc(&mut self, doc_ref: &str, doc: BTreeMap) { if !self.has_doc(doc_ref) { self.length += 1; } self.docs.insert( doc_ref.into(), if self.save { doc } else { BTreeMap::new() }, ); } pub fn get_doc(&self, doc_ref: &str) -> Option> { self.docs.get(doc_ref).cloned() } pub fn remove_doc(&mut self, doc_ref: &str) { if self.has_doc(doc_ref) { self.length -= 1; } self.docs.remove(doc_ref); } pub fn add_field_length(&mut self, doc_ref: &str, field: &str, length: usize) { self.doc_info .entry(doc_ref.into()) .or_insert_with(BTreeMap::new) .insert(field.into(), length); } pub fn get_field_length(&self, doc_ref: &str, field: &str) -> usize { if self.has_doc(doc_ref) { self.doc_info .get(doc_ref) .and_then(|e| e.get(field)) .cloned() .unwrap_or(0) } else { 0 } } } #[cfg(test)] mod tests { use super::*; #[test] fn add_doc_tokens() { let mut store = DocumentStore::new(true); let doc = btreemap! { "title".into() => "eggs bread".into() }; store.add_doc("1", doc.clone()); assert_eq!(store.get_doc("1").unwrap(), doc); } #[test] fn create_doc_no_store() { let mut store = DocumentStore::new(false); let doc = btreemap! { "title".into() => "eggs bread".into() }; store.add_doc("1", doc); assert_eq!(store.len(), 1); assert_eq!(store.is_stored(), false); assert_eq!(store.has_doc("1"), true); } #[test] fn add_doc_no_store() { let mut store = DocumentStore::new(false); let doc1 = btreemap! { "title".into() => "eggs bread".into() }; let doc2 = btreemap! { "title".into() => "hello world".into() }; store.add_doc("1", doc1); store.add_doc("2", doc2); assert_eq!(store.len(), 2); assert_eq!(store.is_stored(), false); assert_eq!(store.has_doc("1"), true); assert_eq!(store.has_doc("2"), true); } #[test] fn is_stored_true() { let store = DocumentStore::new(true); assert_eq!(store.is_stored(), true); } #[test] fn is_stored_false() { let store = DocumentStore::new(false); assert_eq!(store.is_stored(), false); } #[test] fn get_doc_no_store() { let mut store = DocumentStore::new(false); let doc1 = btreemap! { "title".into() => "eggs bread".into() }; let doc2 = btreemap! { "title".into() => "hello world".into() }; store.add_doc("1", doc1); store.add_doc("2", doc2); assert_eq!(store.len(), 2); assert_eq!(store.is_stored(), false); assert_eq!(store.get_doc("1").unwrap(), BTreeMap::new()); assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); } #[test] fn get_nonexistant_doc_no_store() { let mut store = DocumentStore::new(false); let doc1 = btreemap! { "title".into() => "eggs bread".into() }; let doc2 = btreemap! { "title".into() => "hello world".into() }; store.add_doc("1", doc1); store.add_doc("2", doc2); assert_eq!(store.len(), 2); assert_eq!(store.is_stored(), false); assert_eq!(store.get_doc("6"), None); assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); } #[test] fn remove_doc_no_store() { let mut store = DocumentStore::new(false); let doc1 = btreemap! { "title".into() => "eggs bread".into() }; let doc2 = btreemap! { "title".into() => "hello world".into() }; store.add_doc("1", doc1); store.add_doc("2", doc2); store.remove_doc("1"); assert_eq!(store.len(), 1); assert_eq!(store.is_stored(), false); assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); assert_eq!(store.get_doc("1"), None); } #[test] fn remove_nonexistant_doc() { let mut store = DocumentStore::new(false); let doc1 = btreemap! { "title".into() => "eggs bread".into() }; let doc2 = btreemap! { "title".into() => "hello world".into() }; store.add_doc("1", doc1); store.add_doc("2", doc2); store.remove_doc("8"); assert_eq!(store.len(), 2); assert_eq!(store.is_stored(), false); assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); assert_eq!(store.get_doc("1").unwrap(), BTreeMap::new()); } #[test] fn get_num_docs() { let mut store = DocumentStore::new(true); assert_eq!(store.len(), 0); store.add_doc("1", btreemap! { "title".into() => "eggs bread".into() }); assert_eq!(store.len(), 1); } #[test] fn get_doc() { let mut store = DocumentStore::new(true); assert_eq!(store.len(), 0); store.add_doc("1", btreemap! { "title".into() => "eggs bread".into() }); assert_eq!( store.get_doc("1").unwrap(), btreemap! { "title".into() => "eggs bread".into() } ); } #[test] fn get_doc_many_fields() { let mut store = DocumentStore::new(true); assert_eq!(store.len(), 0); store.add_doc( "1", btreemap! { "title".into() => "eggs bread".into() }, ); store.add_doc( "2", btreemap! { "title".into() => "boo bar".into() }, ); store.add_doc( "3", btreemap! { "title".into() => "oracle".into(), "body".into() => "Oracle is demonspawn".into() }, ); assert_eq!( store.get_doc("3").unwrap(), btreemap! { "title".into() => "oracle".into(), "body".into() => "Oracle is demonspawn".into() } ); assert_eq!(store.len(), 3); } #[test] fn get_nonexistant_doc() { let mut store = DocumentStore::new(true); assert_eq!(store.len(), 0); store.add_doc( "1", btreemap! { "title".into() => "eggs bread".into() }, ); store.add_doc( "2", btreemap! { "title".into() => "boo bar".into() }, ); store.add_doc( "3", btreemap! { "title".into() => "oracle".into(), "body".into() => "Oracle is demonspawn".into() }, ); assert_eq!(store.get_doc("4"), None); assert_eq!(store.get_doc("0"), None); assert_eq!(store.len(), 3); } #[test] fn check_store_has_key() { let mut store = DocumentStore::new(true); assert!(!store.has_doc("foo")); store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); assert!(store.has_doc("foo")); } #[test] fn remove_doc() { let mut store = DocumentStore::new(true); store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); assert!(store.has_doc("foo")); assert_eq!(store.len(), 1); store.remove_doc("foo"); assert!(!store.has_doc("foo")); assert_eq!(store.len(), 0); } #[test] fn remove_nonexistant_store() { let mut store = DocumentStore::new(true); store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); assert!(store.has_doc("foo")); assert_eq!(store.len(), 1); store.remove_doc("bar"); assert!(store.has_doc("foo")); assert_eq!(store.len(), 1); } #[test] fn add_field_len() { let mut store = DocumentStore::new(true); store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); store.add_field_length("foo", "title", 2); assert_eq!(store.get_field_length("foo", "title"), 2); } #[test] fn add_field_length_multiple() { let mut store = DocumentStore::new(true); store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); store.add_field_length("foo", "title", 2); store.add_field_length("foo", "body", 10); assert_eq!(store.get_field_length("foo", "title"), 2); assert_eq!(store.get_field_length("foo", "body"), 10); } } elasticlunr-rs-3.0.2/src/inverted_index.rs000064400000000000000000000267740072674642500170110ustar 00000000000000//! Implements an elasticlunr.js inverted index. Most users do not need to use this module directly. use std::collections::BTreeMap; #[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq)] struct TermFrequency { #[serde(rename = "tf")] pub term_freq: f64, } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default)] struct IndexItem { pub docs: BTreeMap, #[serde(rename = "df")] pub doc_freq: i64, #[serde(flatten, serialize_with = "IndexItem::serialize")] pub children: BTreeMap, } impl IndexItem { fn new() -> Self { Default::default() } fn serialize(map: &BTreeMap, ser: S) -> Result where S: ::serde::Serializer, { use serde::ser::SerializeMap; let mut ser_map = ser.serialize_map(Some(map.len()))?; let mut buf = [0u8; 4]; for (key, value) in map { let key = key.encode_utf8(&mut buf); ser_map.serialize_entry(key, value)?; } ser_map.end() } fn add_token(&mut self, doc_ref: &str, token: &str, term_freq: f64) { let mut iter = token.chars(); if let Some(character) = iter.next() { let mut item = self .children .entry(character) .or_insert_with(IndexItem::new); for character in iter { let tmp = item; item = tmp.children.entry(character).or_insert_with(IndexItem::new); } if !item.docs.contains_key(doc_ref) { item.doc_freq += 1; } item.docs .insert(doc_ref.into(), TermFrequency { term_freq }); } } fn get_node(&self, token: &str) -> Option<&IndexItem> { let mut root = self; for ch in token.chars() { if let Some(item) = root.children.get(&ch) { root = item; } else { return None; } } Some(root) } fn remove_token(&mut self, doc_ref: &str, token: &str) { let mut iter = token.char_indices(); if let Some((_, ch)) = iter.next() { if let Some(item) = self.children.get_mut(&ch) { if let Some((idx, _)) = iter.next() { item.remove_token(doc_ref, &token[idx..]); } else if item.docs.contains_key(doc_ref) { item.docs.remove(doc_ref); item.doc_freq -= 1; } } } } } /// Implements an elasticlunr.js inverted index. Most users do not need to use this type directly. #[derive(Serialize, Deserialize, Debug, PartialEq, Default)] pub struct InvertedIndex { root: IndexItem, } impl InvertedIndex { pub fn new() -> Self { Default::default() } pub fn add_token(&mut self, doc_ref: &str, token: &str, term_freq: f64) { self.root.add_token(doc_ref, token, term_freq) } pub fn has_token(&self, token: &str) -> bool { self.root.get_node(token).map_or(false, |_| true) } pub fn remove_token(&mut self, doc_ref: &str, token: &str) { self.root.remove_token(doc_ref, token) } pub fn get_docs(&self, token: &str) -> Option> { self.root.get_node(token).map(|node| { node.docs .iter() .map(|(k, &v)| (k.clone(), v.term_freq)) .collect() }) } pub fn get_term_frequency(&self, doc_ref: &str, token: &str) -> f64 { self.root .get_node(token) .and_then(|node| node.docs.get(doc_ref)) .map_or(0., |docs| docs.term_freq) } pub fn get_doc_frequency(&self, token: &str) -> i64 { self.root.get_node(token).map_or(0, |node| node.doc_freq) } } #[cfg(test)] mod tests { use super::*; #[test] fn adding_token() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); assert_eq!(inverted_index.get_doc_frequency("foo"), 1); assert_eq!(inverted_index.get_term_frequency("123", "foo"), 1.); } #[test] fn has_token() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); assert!(inverted_index.has_token(token)); assert!(inverted_index.has_token("fo")); assert!(inverted_index.has_token("f")); assert!(!inverted_index.has_token("bar")); assert!(!inverted_index.has_token("foo ")); assert!(!inverted_index.has_token("foo ")) } #[test] fn adding_another_document_to_the_token() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); inverted_index.add_token("456", token, 1.); assert_eq!(inverted_index.get_term_frequency("123", "foo"), 1.); assert_eq!(inverted_index.get_term_frequency("456", "foo"), 1.); assert_eq!(inverted_index.get_doc_frequency("foo"), 2); } #[test] fn df_of_nonexistant_token() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); inverted_index.add_token("456", token, 1.); assert_eq!(inverted_index.get_doc_frequency("foo"), 2); assert_eq!(inverted_index.get_doc_frequency("fox"), 0); } #[test] fn adding_existing_doc() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); inverted_index.add_token("456", token, 1.); inverted_index.add_token("456", token, 100.); assert_eq!(inverted_index.get_term_frequency("456", "foo"), 100.); assert_eq!(inverted_index.get_doc_frequency("foo"), 2); } #[test] fn checking_token_exists_in() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); assert!(inverted_index.has_token(token)); } #[test] fn checking_if_a_token_does_not_exist() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); assert!(!inverted_index.has_token("fooo")); assert!(!inverted_index.has_token("bar")); assert!(!inverted_index.has_token("fof")); } #[test] fn retrieving_items() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 1.); assert_eq!( inverted_index.get_docs(token).unwrap(), btreemap! { "123".into() => 1. } ); assert_eq!(inverted_index.get_docs(""), Some(BTreeMap::new())); inverted_index.add_token("234", "boo", 100.); inverted_index.add_token("345", "too", 101.); assert_eq!( inverted_index.get_docs(token).unwrap(), btreemap! { "123".into() => 1. } ); inverted_index.add_token("234", token, 100.); inverted_index.add_token("345", token, 101.); assert_eq!( inverted_index.get_docs(token).unwrap(), btreemap! { "123".into() => 1., "234".into() => 100., "345".into() => 101., } ); } #[test] fn retrieving_nonexistant_items() { let inverted_index = InvertedIndex::new(); assert_eq!(inverted_index.get_docs("foo"), None); assert_eq!(inverted_index.get_docs("fox"), None); } #[test] fn df_of_items() { let mut inverted_index = InvertedIndex::new(); inverted_index.add_token("123", "foo", 1.); inverted_index.add_token("456", "foo", 1.); inverted_index.add_token("789", "bar", 1.); assert_eq!(inverted_index.get_doc_frequency("foo"), 2); assert_eq!(inverted_index.get_doc_frequency("bar"), 1); assert_eq!(inverted_index.get_doc_frequency("baz"), 0); assert_eq!(inverted_index.get_doc_frequency("ba"), 0); assert_eq!(inverted_index.get_doc_frequency("b"), 0); assert_eq!(inverted_index.get_doc_frequency("fo"), 0); assert_eq!(inverted_index.get_doc_frequency("f"), 0); } #[test] fn removing_document_from_token() { let mut inverted_index = InvertedIndex::new(); assert_eq!(inverted_index.get_docs("foo"), None); inverted_index.add_token("123", "foo", 1.); assert_eq!( inverted_index.get_docs("foo").unwrap(), btreemap! { "123".into() => 1., } ); inverted_index.remove_token("123", "foo"); assert_eq!(inverted_index.get_docs("foo"), Some(BTreeMap::new())); assert_eq!(inverted_index.get_doc_frequency("foo"), 0); assert_eq!(inverted_index.has_token("foo"), true); } #[test] fn removing_nonexistant_document() { let mut inverted_index = InvertedIndex::new(); inverted_index.add_token("123", "foo", 1.); inverted_index.add_token("567", "bar", 1.); inverted_index.remove_token("foo", "456"); assert_eq!( inverted_index.get_docs("foo").unwrap(), btreemap! { "123".into() => 1. } ); assert_eq!(inverted_index.get_doc_frequency("foo"), 1); } #[test] fn removing_documet_nonexistant_key() { let mut inverted_index = InvertedIndex::new(); inverted_index.remove_token("123", "foo"); assert!(!inverted_index.has_token("foo")); assert_eq!(inverted_index.get_doc_frequency("foo"), 0); } #[test] fn get_term_frequency() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 2.); inverted_index.add_token("456", token, 3.); assert_eq!(inverted_index.get_term_frequency("123", token), 2.); assert_eq!(inverted_index.get_term_frequency("456", token), 3.); assert_eq!(inverted_index.get_term_frequency("789", token), 0.); } #[test] fn get_term_frequency_nonexistant_token() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 2.); inverted_index.add_token("456", token, 3.); assert_eq!(inverted_index.get_term_frequency("123", "ken"), 0.); assert_eq!(inverted_index.get_term_frequency("456", "ken"), 0.); } #[test] fn get_term_frequency_nonexistant_docref() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 2.); inverted_index.add_token("456", token, 3.); assert_eq!(inverted_index.get_term_frequency(token, "12"), 0.); assert_eq!(inverted_index.get_term_frequency(token, "23"), 0.); assert_eq!(inverted_index.get_term_frequency(token, "45"), 0.); } #[test] fn get_term_frequency_nonexistant_token_and_docref() { let mut inverted_index = InvertedIndex::new(); let token = "foo"; inverted_index.add_token("123", token, 2.); inverted_index.add_token("456", token, 3.); assert_eq!(inverted_index.get_term_frequency("token", "1"), 0.); assert_eq!(inverted_index.get_term_frequency("abc", "2"), 0.); assert_eq!(inverted_index.get_term_frequency("fo", "123"), 0.); } } elasticlunr-rs-3.0.2/src/lang/ar.rs000064400000000000000000000030300072674642500153010ustar 00000000000000use super::Language; use crate::pipeline::{Pipeline, PipelineFn}; use regex::Regex; /// Arabic Language /// /// Designed to be compatibile with the included Javascript implementation. See `js/lunr.ar.js`. pub struct Arabic {} impl Arabic { pub fn new() -> Self { Self {} } } impl Language for Arabic { fn name(&self) -> String { "Arabic".into() } fn code(&self) -> String { "ar".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![Box::new(Stemmer::new())], } } } struct Stemmer { diacritics: Regex, alefs: Regex, } impl Stemmer { pub fn new() -> Self { let diacritics = Regex::new("[\u{0640}\u{064b}-\u{065b}]").unwrap(); let alefs = Regex::new("[\u{0622}\u{0623}\u{0625}\u{0671}\u{0649}]").unwrap(); Self { diacritics, alefs } } } impl PipelineFn for Stemmer { fn name(&self) -> String { "stemmer-ar".into() } fn filter(&self, token: String) -> Option { // remove diacritics and elongating character let result = self.diacritics.replace(&token, ""); // replace all variations of alef (آأإٱى) to a plain alef (ا) let result = self.alefs.replace(&result, "\u{0627}"); if result.is_empty() { None } else if result == token { Some(token) } else { Some(result.into()) } } } elasticlunr-rs-3.0.2/src/lang/common.rs000064400000000000000000000041640072674642500162000ustar 00000000000000use crate::pipeline::PipelineFn; use regex::Regex; use std::collections::HashSet; #[derive(Clone)] pub struct StopWordFilter { name: String, stop_words: HashSet, } impl StopWordFilter { pub fn new(name: &str, stop_words: &[&str]) -> Self { Self { name: name.into(), stop_words: stop_words.iter().map(|s| s.to_string()).collect(), } } } impl PipelineFn for StopWordFilter { fn name(&self) -> String { self.name.clone() } fn filter(&self, token: String) -> Option { if self.stop_words.contains(&token) { None } else { Some(token) } } } #[derive(Clone)] pub struct RegexTrimmer { name: String, trimmer: Regex, } impl RegexTrimmer { pub fn new(name: &str, word_chars: &str) -> Self { let name = name.into(); let trimmer = Regex::new(&format!("^[^{0}]+|[^{0}]+$", word_chars)).unwrap(); Self { name, trimmer } } } impl PipelineFn for RegexTrimmer { fn name(&self) -> String { self.name.clone() } fn filter(&self, token: String) -> Option { let result = self.trimmer.replace_all(&token, ""); if result.is_empty() { None } else if result == token { Some(token) } else { Some(result.into()) } } } #[cfg(feature = "rust-stemmers")] pub struct RustStemmer { name: String, stemmer: rust_stemmers::Stemmer, } #[cfg(feature = "rust-stemmers")] impl RustStemmer { pub fn new(name: &str, algo: rust_stemmers::Algorithm) -> Self { Self { name: name.into(), stemmer: rust_stemmers::Stemmer::create(algo), } } } #[cfg(feature = "rust-stemmers")] impl PipelineFn for RustStemmer { fn name(&self) -> String { self.name.clone() } fn filter(&self, token: String) -> Option { let result = self.stemmer.stem(&token); if result.is_empty() { None } else if result == token { Some(token) } else { Some(result.into()) } } } elasticlunr-rs-3.0.2/src/lang/da.rs000064400000000000000000000031320072674642500152660ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Danish {} impl Danish { pub fn new() -> Self { Self {} } } impl Language for Danish { fn name(&self) -> String { "Danish".into() } fn code(&self) -> String { "da".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-da", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-da", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-da", Algorithm::Danish)), ], } } } const STOP_WORDS: &[&str] = &[ "", "ad", "af", "alle", "alt", "anden", "at", "blev", "blive", "bliver", "da", "de", "dem", "den", "denne", "der", "deres", "det", "dette", "dig", "din", "disse", "dog", "du", "efter", "eller", "en", "end", "er", "et", "for", "fra", "ham", "han", "hans", "har", "havde", "have", "hende", "hendes", "her", "hos", "hun", "hvad", "hvis", "hvor", "i", "ikke", "ind", "jeg", "jer", "jo", "kunne", "man", "mange", "med", "meget", "men", "mig", "min", "mine", "mit", "mod", "ned", "noget", "nogle", "nu", "når", "og", "også", "om", "op", "os", "over", "på", "selv", "sig", "sin", "sine", "sit", "skal", "skulle", "som", "sådan", "thi", "til", "ud", "under", "var", "vi", "vil", "ville", "vor", "være", "været", ]; elasticlunr-rs-3.0.2/src/lang/de.rs000064400000000000000000000074220072674642500153000ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct German {} impl German { pub fn new() -> Self { Self {} } } impl Language for German { fn name(&self) -> String { "German".into() } fn code(&self) -> String { "de".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-de", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-de", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-de", Algorithm::German)), ], } } } const STOP_WORDS: &[&str] = &[ "", "aber", "alle", "allem", "allen", "aller", "alles", "als", "also", "am", "an", "ander", "andere", "anderem", "anderen", "anderer", "anderes", "anderm", "andern", "anderr", "anders", "auch", "auf", "aus", "bei", "bin", "bis", "bist", "da", "damit", "dann", "das", "dasselbe", "dazu", "daß", "dein", "deine", "deinem", "deinen", "deiner", "deines", "dem", "demselben", "den", "denn", "denselben", "der", "derer", "derselbe", "derselben", "des", "desselben", "dessen", "dich", "die", "dies", "diese", "dieselbe", "dieselben", "diesem", "diesen", "dieser", "dieses", "dir", "doch", "dort", "du", "durch", "ein", "eine", "einem", "einen", "einer", "eines", "einig", "einige", "einigem", "einigen", "einiger", "einiges", "einmal", "er", "es", "etwas", "euch", "euer", "eure", "eurem", "euren", "eurer", "eures", "für", "gegen", "gewesen", "hab", "habe", "haben", "hat", "hatte", "hatten", "hier", "hin", "hinter", "ich", "ihm", "ihn", "ihnen", "ihr", "ihre", "ihrem", "ihren", "ihrer", "ihres", "im", "in", "indem", "ins", "ist", "jede", "jedem", "jeden", "jeder", "jedes", "jene", "jenem", "jenen", "jener", "jenes", "jetzt", "kann", "kein", "keine", "keinem", "keinen", "keiner", "keines", "können", "könnte", "machen", "man", "manche", "manchem", "manchen", "mancher", "manches", "mein", "meine", "meinem", "meinen", "meiner", "meines", "mich", "mir", "mit", "muss", "musste", "nach", "nicht", "nichts", "noch", "nun", "nur", "ob", "oder", "ohne", "sehr", "sein", "seine", "seinem", "seinen", "seiner", "seines", "selbst", "sich", "sie", "sind", "so", "solche", "solchem", "solchen", "solcher", "solches", "soll", "sollte", "sondern", "sonst", "um", "und", "uns", "unse", "unsem", "unsen", "unser", "unses", "unter", "viel", "vom", "von", "vor", "war", "waren", "warst", "was", "weg", "weil", "weiter", "welche", "welchem", "welchen", "welcher", "welches", "wenn", "werde", "werden", "wie", "wieder", "will", "wir", "wird", "wirst", "wo", "wollen", "wollte", "während", "würde", "würden", "zu", "zum", "zur", "zwar", "zwischen", "über", ]; elasticlunr-rs-3.0.2/src/lang/du.rs000064400000000000000000000032130072674642500153120ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Dutch {} impl Dutch { pub fn new() -> Self { Self {} } } impl Language for Dutch { fn name(&self) -> String { "Dutch".into() } fn code(&self) -> String { "du".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-du", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-du", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-du", Algorithm::Dutch)), ], } } } const STOP_WORDS: &[&str] = &[ "", "aan", "al", "alles", "als", "altijd", "andere", "ben", "bij", "daar", "dan", "dat", "de", "der", "deze", "die", "dit", "doch", "doen", "door", "dus", "een", "eens", "en", "er", "ge", "geen", "geweest", "haar", "had", "heb", "hebben", "heeft", "hem", "het", "hier", "hij", "hoe", "hun", "iemand", "iets", "ik", "in", "is", "ja", "je", "kan", "kon", "kunnen", "maar", "me", "meer", "men", "met", "mij", "mijn", "moet", "na", "naar", "niet", "niets", "nog", "nu", "of", "om", "omdat", "onder", "ons", "ook", "op", "over", "reeds", "te", "tegen", "toch", "toen", "tot", "u", "uit", "uw", "van", "veel", "voor", "want", "waren", "was", "wat", "werd", "wezen", "wie", "wil", "worden", "wordt", "zal", "ze", "zelf", "zich", "zij", "zijn", "zo", "zonder", "zou", ]; elasticlunr-rs-3.0.2/src/lang/en.rs000064400000000000000000000324100072674642500153050ustar 00000000000000use super::{common::StopWordFilter, Language}; use crate::pipeline::{FnWrapper, Pipeline, PipelineFn}; use regex::Regex; const WORDS: &[&str] = &[ "", "a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your", ]; #[derive(Clone)] pub struct English { stemmer: Stemmer, } impl English { pub fn new() -> Self { let stemmer = Stemmer::new(); Self { stemmer } } } impl Language for English { fn name(&self) -> String { "English".into() } fn code(&self) -> String { "en".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(FnWrapper("trimmer".into(), trimmer)), Box::new(StopWordFilter::new("stopWordFilter", WORDS)), Box::new(self.stemmer.clone()), ], } } } fn trimmer(token: String) -> Option { Some( token .trim_matches(|c: char| !c.is_digit(36) && c != '_') .into(), ) } static STEP_2: &[(&str, &str)] = &[ ("ational", "ate"), ("tional", "tion"), ("enci", "ence"), ("anci", "ance"), ("izer", "ize"), ("bli", "ble"), ("alli", "al"), ("entli", "ent"), ("eli", "e"), ("ousli", "ous"), ("ization", "ize"), ("ation", "ate"), ("ator", "ate"), ("alism", "al"), ("iveness", "ive"), ("fulness", "ful"), ("ousness", "ous"), ("aliti", "al"), ("iviti", "ive"), ("biliti", "ble"), ("logi", "log"), ]; static STEP_3: &[(&str, &str)] = &[ ("icate", "ic"), ("ative", ""), ("alize", "al"), ("iciti", "ic"), ("ical", "ic"), ("ful", ""), ("ness", ""), ]; // This is a direct port of the stemmer from elasticlunr.js // It's not very efficient and very not-rusty, but it // generates identical output. #[derive(Clone)] struct Stemmer { re_mgr0: Regex, re_mgr1: Regex, re_meq1: Regex, re_s_v: Regex, re_1a: Regex, re2_1a: Regex, re_1b: Regex, re2_1b: Regex, re2_1b_2: Regex, re3_1b_2: Regex, re4_1b_2: Regex, re_1c: Regex, re_2: Regex, re_3: Regex, re_4: Regex, re2_4: Regex, re_5: Regex, re3_5: Regex, } impl PipelineFn for Stemmer { fn name(&self) -> String { "stemmer".into() } fn filter(&self, token: String) -> Option { Some(self.stem(token)) } } // vowel macro_rules! V { () => { "[aeiouy]" }; } // consonant sequence macro_rules! CS { () => { "[^aeiou][^aeiouy]*" }; } // vowel sequence macro_rules! VS { () => { "[aeiouy][aeiou]*" }; } #[inline] fn concat_string(strs: &[&str]) -> String { strs.iter().cloned().collect() } impl Stemmer { fn new() -> Self { let mgr0 = concat!("^(", CS!(), ")?", VS!(), CS!()); let meq1 = concat!("^(", CS!(), ")?", VS!(), CS!(), "(", VS!(), ")?$"); let mgr1 = concat!("^(", CS!(), ")?", VS!(), CS!(), VS!(), CS!()); let s_v = concat!("^(", CS!(), ")?", V!()); let re_mgr0 = Regex::new(mgr0).unwrap(); let re_mgr1 = Regex::new(mgr1).unwrap(); let re_meq1 = Regex::new(meq1).unwrap(); let re_s_v = Regex::new(s_v).unwrap(); let re_1a = Regex::new("^(.+?)(ss|i)es$").unwrap(); let re2_1a = Regex::new("^(.+?)([^s])s$").unwrap(); let re_1b = Regex::new("^(.+?)eed$").unwrap(); let re2_1b = Regex::new("^(.+?)(ed|ing)$").unwrap(); let re2_1b_2 = Regex::new("(at|bl|iz)$").unwrap(); let re3_1b_2 = Regex::new("([^aeiouylsz]{2})$").unwrap(); let re4_1b_2 = Regex::new(concat!("^", CS!(), V!(), "[^aeiouwxy]$")).unwrap(); let re_1c = Regex::new("^(.+?[^aeiou])y$").unwrap(); let re_2 = Regex::new( "^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|\ ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$", ) .unwrap(); let re_3 = Regex::new("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$").unwrap(); let re_4 = Regex::new( "^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$", ) .unwrap(); let re2_4 = Regex::new("^(.+?)(s|t)(ion)$").unwrap(); let re_5 = Regex::new("^(.+?)e$").unwrap(); let re3_5 = Regex::new(concat!("^", CS!(), V!(), "[^aeiouwxy]$")).unwrap(); Stemmer { re_mgr0, re_mgr1, re_meq1, re_s_v, re_1a, re2_1a, re_1b, re2_1b, re2_1b_2, re3_1b_2, re4_1b_2, re_1c, re_2, re_3, re_4, re2_4, re_5, re3_5, } } /// Implements the Porter stemming algorithm pub fn stem(&self, mut w: String) -> String { if w.len() < 3 { return w; } let starts_with_y = w.as_bytes()[0] == b'y'; if starts_with_y { w.remove(0); w.insert(0, 'Y'); } // TODO: There's probably a better way to handle the // borrowchecker than cloning w a million times // Step 1a if let Some(caps) = self.re_1a.captures(&w.clone()) { w = concat_string(&[&caps[1], &caps[2]]); } if let Some(caps) = self.re2_1a.captures(&w.clone()) { w = concat_string(&[&caps[1], &caps[2]]); } // Step 1b if let Some(caps) = self.re_1b.captures(&w.clone()) { let stem = &caps[1]; if self.re_mgr0.is_match(stem) { w.pop(); } } else if let Some(caps) = self.re2_1b.captures(&w.clone()) { let stem = &caps[1]; if self.re_s_v.is_match(stem) { w = stem.into(); let mut re3_1b_2_matched = false; if self.re2_1b_2.is_match(&w) { w.push('e'); } else if let Some(m) = self.re3_1b_2.find(&w.clone()) { let mut suffix = m.as_str().chars(); // Make sure the two characters are the same since we can't use backreferences if suffix.next() == suffix.next() { re3_1b_2_matched = true; w.pop(); } } // re4_1b_2 still runs if re3_1b_2 matches but // the matched chcaracters are not the same if !re3_1b_2_matched && self.re4_1b_2.is_match(&w) { w.push('e'); } } } // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first // letter of the word (so cry -> cri, by -> by, say -> say) if let Some(caps) = self.re_1c.captures(&w.clone()) { let stem = &caps[1]; w = concat_string(&[stem, "i"]); } // Step 2 if let Some(caps) = self.re_2.captures(&w.clone()) { let stem = &caps[1]; let suffix = &caps[2]; if self.re_mgr0.is_match(stem) { w = concat_string(&[stem, STEP_2.iter().find(|&&(k, _)| k == suffix).unwrap().1]); } } // Step 3 if let Some(caps) = self.re_3.captures(&w.clone()) { let stem = &caps[1]; let suffix = &caps[2]; if self.re_mgr0.is_match(stem) { w = concat_string(&[stem, STEP_3.iter().find(|&&(k, _)| k == suffix).unwrap().1]); } } // Step 4 if let Some(caps) = self.re_4.captures(&w.clone()) { let stem = &caps[1]; if self.re_mgr1.is_match(stem) { w = stem.into(); } } else if let Some(caps) = self.re2_4.captures(&w.clone()) { let stem = concat_string(&[&caps[1], &caps[2]]); if self.re_mgr1.is_match(&stem) { w = stem; } } // Step 5 if let Some(caps) = self.re_5.captures(&w.clone()) { let stem = &caps[1]; if self.re_mgr1.is_match(stem) || (self.re_meq1.is_match(stem) && !(self.re3_5.is_match(stem))) { w = stem.into(); } } if w.ends_with("ll") && self.re_mgr1.is_match(&w) { w.pop(); } // replace the original 'y' if starts_with_y { w.remove(0); w.insert(0, 'y'); } w } } #[cfg(test)] mod tests { use super::*; macro_rules! pipeline_eq { ($func:expr, $input:expr, $output:expr) => { assert_eq!(&$func($input.to_string()).unwrap(), $output); }; } #[test] fn latin_characters() { pipeline_eq!(trimmer, "hello", "hello"); } #[test] fn removing_punctuation() { pipeline_eq!(trimmer, "hello.", "hello"); pipeline_eq!(trimmer, "it's", "it's"); pipeline_eq!(trimmer, "james'", "james"); pipeline_eq!(trimmer, "stop!", "stop"); pipeline_eq!(trimmer, "first,", "first"); pipeline_eq!(trimmer, "", ""); pipeline_eq!(trimmer, "[tag]", "tag"); pipeline_eq!(trimmer, "[[[tag]]]", "tag"); pipeline_eq!(trimmer, "[[!@#@!hello]]]}}}", "hello"); pipeline_eq!(trimmer, "~!@@@hello***()()()]]", "hello"); } #[test] fn test_stemmer() { let cases = [ ("consign", "consign"), ("consigned", "consign"), ("consigning", "consign"), ("consignment", "consign"), ("consist", "consist"), ("consisted", "consist"), ("consistency", "consist"), ("consistent", "consist"), ("consistently", "consist"), ("consisting", "consist"), ("consists", "consist"), ("consolation", "consol"), ("consolations", "consol"), ("consolatory", "consolatori"), ("console", "consol"), ("consoled", "consol"), ("consoles", "consol"), ("consolidate", "consolid"), ("consolidated", "consolid"), ("consolidating", "consolid"), ("consoling", "consol"), ("consols", "consol"), ("consonant", "conson"), ("consort", "consort"), ("consorted", "consort"), ("consorting", "consort"), ("conspicuous", "conspicu"), ("conspicuously", "conspicu"), ("conspiracy", "conspiraci"), ("conspirator", "conspir"), ("conspirators", "conspir"), ("conspire", "conspir"), ("conspired", "conspir"), ("conspiring", "conspir"), ("constable", "constabl"), ("constables", "constabl"), ("constance", "constanc"), ("constancy", "constanc"), ("constant", "constant"), ("knack", "knack"), ("knackeries", "knackeri"), ("knacks", "knack"), ("knag", "knag"), ("knave", "knave"), ("knaves", "knave"), ("knavish", "knavish"), ("kneaded", "knead"), ("kneading", "knead"), ("knee", "knee"), ("kneel", "kneel"), ("kneeled", "kneel"), ("kneeling", "kneel"), ("kneels", "kneel"), ("knees", "knee"), ("knell", "knell"), ("knelt", "knelt"), ("knew", "knew"), ("knick", "knick"), ("knif", "knif"), ("knife", "knife"), ("knight", "knight"), ("knights", "knight"), ("knit", "knit"), ("knits", "knit"), ("knitted", "knit"), ("knitting", "knit"), ("knives", "knive"), ("knob", "knob"), ("knobs", "knob"), ("knock", "knock"), ("knocked", "knock"), ("knocker", "knocker"), ("knockers", "knocker"), ("knocking", "knock"), ("knocks", "knock"), ("knopp", "knopp"), ("knot", "knot"), ("knots", "knot"), ("lay", "lay"), ("try", "tri"), ]; let stemmer = Stemmer::new(); for &(input, output) in cases.iter() { assert_eq!(&stemmer.stem(input.into()), output); } } } elasticlunr-rs-3.0.2/src/lang/es.rs000064400000000000000000000120770072674642500153210ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Spanish {} impl Spanish { pub fn new() -> Self { Self {} } } impl Language for Spanish { fn name(&self) -> String { "Spanish".into() } fn code(&self) -> String { "es".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-es", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-es", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-es", Algorithm::Spanish)), ], } } } const STOP_WORDS: &[&str] = &[ "", "a", "al", "algo", "algunas", "algunos", "ante", "antes", "como", "con", "contra", "cual", "cuando", "de", "del", "desde", "donde", "durante", "e", "el", "ella", "ellas", "ellos", "en", "entre", "era", "erais", "eran", "eras", "eres", "es", "esa", "esas", "ese", "eso", "esos", "esta", "estaba", "estabais", "estaban", "estabas", "estad", "estada", "estadas", "estado", "estados", "estamos", "estando", "estar", "estaremos", "estará", "estarán", "estarás", "estaré", "estaréis", "estaría", "estaríais", "estaríamos", "estarían", "estarías", "estas", "este", "estemos", "esto", "estos", "estoy", "estuve", "estuviera", "estuvierais", "estuvieran", "estuvieras", "estuvieron", "estuviese", "estuvieseis", "estuviesen", "estuvieses", "estuvimos", "estuviste", "estuvisteis", "estuviéramos", "estuviésemos", "estuvo", "está", "estábamos", "estáis", "están", "estás", "esté", "estéis", "estén", "estés", "fue", "fuera", "fuerais", "fueran", "fueras", "fueron", "fuese", "fueseis", "fuesen", "fueses", "fui", "fuimos", "fuiste", "fuisteis", "fuéramos", "fuésemos", "ha", "habida", "habidas", "habido", "habidos", "habiendo", "habremos", "habrá", "habrán", "habrás", "habré", "habréis", "habría", "habríais", "habríamos", "habrían", "habrías", "habéis", "había", "habíais", "habíamos", "habían", "habías", "han", "has", "hasta", "hay", "haya", "hayamos", "hayan", "hayas", "hayáis", "he", "hemos", "hube", "hubiera", "hubierais", "hubieran", "hubieras", "hubieron", "hubiese", "hubieseis", "hubiesen", "hubieses", "hubimos", "hubiste", "hubisteis", "hubiéramos", "hubiésemos", "hubo", "la", "las", "le", "les", "lo", "los", "me", "mi", "mis", "mucho", "muchos", "muy", "más", "mí", "mía", "mías", "mío", "míos", "nada", "ni", "no", "nos", "nosotras", "nosotros", "nuestra", "nuestras", "nuestro", "nuestros", "o", "os", "otra", "otras", "otro", "otros", "para", "pero", "poco", "por", "porque", "que", "quien", "quienes", "qué", "se", "sea", "seamos", "sean", "seas", "seremos", "será", "serán", "serás", "seré", "seréis", "sería", "seríais", "seríamos", "serían", "serías", "seáis", "sido", "siendo", "sin", "sobre", "sois", "somos", "son", "soy", "su", "sus", "suya", "suyas", "suyo", "suyos", "sí", "también", "tanto", "te", "tendremos", "tendrá", "tendrán", "tendrás", "tendré", "tendréis", "tendría", "tendríais", "tendríamos", "tendrían", "tendrías", "tened", "tenemos", "tenga", "tengamos", "tengan", "tengas", "tengo", "tengáis", "tenida", "tenidas", "tenido", "tenidos", "teniendo", "tenéis", "tenía", "teníais", "teníamos", "tenían", "tenías", "ti", "tiene", "tienen", "tienes", "todo", "todos", "tu", "tus", "tuve", "tuviera", "tuvierais", "tuvieran", "tuvieras", "tuvieron", "tuviese", "tuvieseis", "tuviesen", "tuvieses", "tuvimos", "tuviste", "tuvisteis", "tuviéramos", "tuviésemos", "tuvo", "tuya", "tuyas", "tuyo", "tuyos", "tú", "un", "una", "uno", "unos", "vosotras", "vosotros", "vuestra", "vuestras", "vuestro", "vuestros", "y", "ya", "yo", "él", "éramos", ]; elasticlunr-rs-3.0.2/src/lang/fi.rs000064400000000000000000000100310072674642500152740ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Finnish {} impl Finnish { pub fn new() -> Self { Self {} } } impl Language for Finnish { fn name(&self) -> String { "Finnish".into() } fn code(&self) -> String { "fi".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-fi", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-fi", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-fi", Algorithm::Finnish)), ], } } } const STOP_WORDS: &[&str] = &[ "", "ei", "eivät", "emme", "en", "et", "ette", "että", "he", "heidän", "heidät", "heihin", "heille", "heillä", "heiltä", "heissä", "heistä", "heitä", "hän", "häneen", "hänelle", "hänellä", "häneltä", "hänen", "hänessä", "hänestä", "hänet", "häntä", "itse", "ja", "johon", "joiden", "joihin", "joiksi", "joilla", "joille", "joilta", "joina", "joissa", "joista", "joita", "joka", "joksi", "jolla", "jolle", "jolta", "jona", "jonka", "jos", "jossa", "josta", "jota", "jotka", "kanssa", "keiden", "keihin", "keiksi", "keille", "keillä", "keiltä", "keinä", "keissä", "keistä", "keitä", "keneen", "keneksi", "kenelle", "kenellä", "keneltä", "kenen", "kenenä", "kenessä", "kenestä", "kenet", "ketkä", "ketkä", "ketä", "koska", "kuin", "kuka", "kun", "me", "meidän", "meidät", "meihin", "meille", "meillä", "meiltä", "meissä", "meistä", "meitä", "mihin", "miksi", "mikä", "mille", "millä", "miltä", "minkä", "minkä", "minua", "minulla", "minulle", "minulta", "minun", "minussa", "minusta", "minut", "minuun", "minä", "minä", "missä", "mistä", "mitkä", "mitä", "mukaan", "mutta", "ne", "niiden", "niihin", "niiksi", "niille", "niillä", "niiltä", "niin", "niin", "niinä", "niissä", "niistä", "niitä", "noiden", "noihin", "noiksi", "noilla", "noille", "noilta", "noin", "noina", "noissa", "noista", "noita", "nuo", "nyt", "näiden", "näihin", "näiksi", "näille", "näillä", "näiltä", "näinä", "näissä", "näistä", "näitä", "nämä", "ole", "olemme", "olen", "olet", "olette", "oli", "olimme", "olin", "olisi", "olisimme", "olisin", "olisit", "olisitte", "olisivat", "olit", "olitte", "olivat", "olla", "olleet", "ollut", "on", "ovat", "poikki", "se", "sekä", "sen", "siihen", "siinä", "siitä", "siksi", "sille", "sillä", "sillä", "siltä", "sinua", "sinulla", "sinulle", "sinulta", "sinun", "sinussa", "sinusta", "sinut", "sinuun", "sinä", "sinä", "sitä", "tai", "te", "teidän", "teidät", "teihin", "teille", "teillä", "teiltä", "teissä", "teistä", "teitä", "tuo", "tuohon", "tuoksi", "tuolla", "tuolle", "tuolta", "tuon", "tuona", "tuossa", "tuosta", "tuota", "tähän", "täksi", "tälle", "tällä", "tältä", "tämä", "tämän", "tänä", "tässä", "tästä", "tätä", "vaan", "vai", "vaikka", "yli", ]; elasticlunr-rs-3.0.2/src/lang/fr.rs000064400000000000000000000043350072674642500153170ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct French {} impl French { pub fn new() -> Self { Self {} } } impl Language for French { fn name(&self) -> String { "French".into() } fn code(&self) -> String { "fr".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-fr", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-fr", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-fr", Algorithm::French)), ], } } } const STOP_WORDS: &[&str] = &[ "", "ai", "aie", "aient", "aies", "ait", "as", "au", "aura", "aurai", "auraient", "aurais", "aurait", "auras", "aurez", "auriez", "aurions", "aurons", "auront", "aux", "avaient", "avais", "avait", "avec", "avez", "aviez", "avions", "avons", "ayant", "ayez", "ayons", "c", "ce", "ceci", "celà", "ces", "cet", "cette", "d", "dans", "de", "des", "du", "elle", "en", "es", "est", "et", "eu", "eue", "eues", "eurent", "eus", "eusse", "eussent", "eusses", "eussiez", "eussions", "eut", "eux", "eûmes", "eût", "eûtes", "furent", "fus", "fusse", "fussent", "fusses", "fussiez", "fussions", "fut", "fûmes", "fût", "fûtes", "ici", "il", "ils", "j", "je", "l", "la", "le", "les", "leur", "leurs", "lui", "m", "ma", "mais", "me", "mes", "moi", "mon", "même", "n", "ne", "nos", "notre", "nous", "on", "ont", "ou", "par", "pas", "pour", "qu", "que", "quel", "quelle", "quelles", "quels", "qui", "s", "sa", "sans", "se", "sera", "serai", "seraient", "serais", "serait", "seras", "serez", "seriez", "serions", "serons", "seront", "ses", "soi", "soient", "sois", "soit", "sommes", "son", "sont", "soyez", "soyons", "suis", "sur", "t", "ta", "te", "tes", "toi", "ton", "tu", "un", "une", "vos", "votre", "vous", "y", "à", "étaient", "étais", "était", "étant", "étiez", "étions", "été", "étée", "étées", "étés", "êtes", ]; elasticlunr-rs-3.0.2/src/lang/hu.rs000064400000000000000000000300310072674642500153140ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Hungarian {} impl Hungarian { pub fn new() -> Self { Self {} } } impl Language for Hungarian { fn name(&self) -> String { "Hungarian".into() } fn code(&self) -> String { "hu".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-hu", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-hu", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-hu", Algorithm::Hungarian)), ], } } } const STOP_WORDS: &[&str] = &[ "", "a", "abba", "abban", "abból", "addig", "ahhoz", "ahogy", "ahol", "aki", "akik", "akkor", "akár", "alapján", "alatt", "alatta", "alattad", "alattam", "alattatok", "alattuk", "alattunk", "alá", "alád", "alájuk", "alám", "alánk", "alátok", "alól", "alóla", "alólad", "alólam", "alólatok", "alóluk", "alólunk", "amely", "amelybol", "amelyek", "amelyekben", "amelyeket", "amelyet", "amelyik", "amelynek", "ami", "amikor", "amit", "amolyan", "amott", "amíg", "annak", "annál", "arra", "arról", "attól", "az", "aznap", "azok", "azokat", "azokba", "azokban", "azokból", "azokhoz", "azokig", "azokkal", "azokká", "azoknak", "azoknál", "azokon", "azokra", "azokról", "azoktól", "azokért", "azon", "azonban", "azonnal", "azt", "aztán", "azután", "azzal", "azzá", "azért", "bal", "balra", "ban", "be", "belé", "beléd", "beléjük", "belém", "belénk", "belétek", "belül", "belőle", "belőled", "belőlem", "belőletek", "belőlük", "belőlünk", "ben", "benne", "benned", "bennem", "bennetek", "bennük", "bennünk", "bár", "bárcsak", "bármilyen", "búcsú", "cikk", "cikkek", "cikkeket", "csak", "csakhogy", "csupán", "de", "dehogy", "e", "ebbe", "ebben", "ebből", "eddig", "egy", "egyebek", "egyebet", "egyedül", "egyelőre", "egyes", "egyet", "egyetlen", "egyik", "egymás", "egyre", "egyszerre", "egyéb", "együtt", "egész", "egészen", "ehhez", "ekkor", "el", "eleinte", "ellen", "ellenes", "elleni", "ellenére", "elmondta", "elsõ", "első", "elsők", "elsősorban", "elsőt", "elé", "eléd", "elég", "eléjük", "elém", "elénk", "elétek", "elõ", "elõször", "elõtt", "elő", "előbb", "elől", "előle", "előled", "előlem", "előletek", "előlük", "előlünk", "először", "előtt", "előtte", "előtted", "előttem", "előttetek", "előttük", "előttünk", "előző", "emilyen", "engem", "ennek", "ennyi", "ennél", "enyém", "erre", "erről", "esetben", "ettől", "ez", "ezek", "ezekbe", "ezekben", "ezekből", "ezeken", "ezeket", "ezekhez", "ezekig", "ezekkel", "ezekké", "ezeknek", "ezeknél", "ezekre", "ezekről", "ezektől", "ezekért", "ezen", "ezentúl", "ezer", "ezret", "ezt", "ezután", "ezzel", "ezzé", "ezért", "fel", "fele", "felek", "felet", "felett", "felé", "fent", "fenti", "fél", "fölé", "gyakran", "ha", "halló", "hamar", "hanem", "harmadik", "harmadikat", "harminc", "hat", "hatodik", "hatodikat", "hatot", "hatvan", "helyett", "hetedik", "hetediket", "hetet", "hetven", "hirtelen", "hiszen", "hiába", "hogy", "hogyan", "hol", "holnap", "holnapot", "honnan", "hova", "hozzá", "hozzád", "hozzájuk", "hozzám", "hozzánk", "hozzátok", "hurrá", "huszadik", "hány", "hányszor", "hármat", "három", "hát", "hátha", "hátulsó", "hét", "húsz", "ide", "ide-оda", "idén", "igazán", "igen", "ill", "ill.", "illetve", "ilyen", "ilyenkor", "immár", "inkább", "is", "ismét", "ison", "itt", "jelenleg", "jobban", "jobbra", "jó", "jól", "jólesik", "jóval", "jövőre", "kell", "kellene", "kellett", "kelljen", "keressünk", "keresztül", "ketten", "kettő", "kettőt", "kevés", "ki", "kiben", "kiből", "kicsit", "kicsoda", "kihez", "kik", "kikbe", "kikben", "kikből", "kiken", "kiket", "kikhez", "kikkel", "kikké", "kiknek", "kiknél", "kikre", "kikről", "kiktől", "kikért", "kilenc", "kilencedik", "kilencediket", "kilencet", "kilencven", "kin", "kinek", "kinél", "kire", "kiről", "kit", "kitől", "kivel", "kivé", "kié", "kiért", "korábban", "képest", "kérem", "kérlek", "kész", "késő", "később", "későn", "két", "kétszer", "kívül", "körül", "köszönhetően", "köszönöm", "közben", "közel", "közepesen", "közepén", "közé", "között", "közül", "külön", "különben", "különböző", "különbözőbb", "különbözőek", "lassan", "le", "legalább", "legyen", "lehet", "lehetetlen", "lehetett", "lehetőleg", "lehetőség", "lenne", "lenni", "lennék", "lennének", "lesz", "leszek", "lesznek", "leszünk", "lett", "lettek", "lettem", "lettünk", "lévő", "ma", "maga", "magad", "magam", "magatokat", "magukat", "magunkat", "magát", "mai", "majd", "majdnem", "manapság", "meg", "megcsinál", "megcsinálnak", "megint", "megvan", "mellett", "mellette", "melletted", "mellettem", "mellettetek", "mellettük", "mellettünk", "mellé", "melléd", "melléjük", "mellém", "mellénk", "mellétek", "mellől", "mellőle", "mellőled", "mellőlem", "mellőletek", "mellőlük", "mellőlünk", "mely", "melyek", "melyik", "mennyi", "mert", "mi", "miatt", "miatta", "miattad", "miattam", "miattatok", "miattuk", "miattunk", "mibe", "miben", "miből", "mihez", "mik", "mikbe", "mikben", "mikből", "miken", "miket", "mikhez", "mikkel", "mikké", "miknek", "miknél", "mikor", "mikre", "mikről", "miktől", "mikért", "milyen", "min", "mind", "mindegyik", "mindegyiket", "minden", "mindenesetre", "mindenki", "mindent", "mindenütt", "mindig", "mindketten", "minek", "minket", "mint", "mintha", "minél", "mire", "miről", "mit", "mitől", "mivel", "mivé", "miért", "mondta", "most", "mostanáig", "már", "más", "másik", "másikat", "másnap", "második", "másodszor", "mások", "másokat", "mást", "még", "mégis", "míg", "mögé", "mögéd", "mögéjük", "mögém", "mögénk", "mögétek", "mögött", "mögötte", "mögötted", "mögöttem", "mögöttetek", "mögöttük", "mögöttünk", "mögül", "mögüle", "mögüled", "mögülem", "mögületek", "mögülük", "mögülünk", "múltkor", "múlva", "na", "nagy", "nagyobb", "nagyon", "naponta", "napot", "ne", "negyedik", "negyediket", "negyven", "neked", "nekem", "neki", "nekik", "nektek", "nekünk", "nem", "nemcsak", "nemrég", "nincs", "nyolc", "nyolcadik", "nyolcadikat", "nyolcat", "nyolcvan", "nála", "nálad", "nálam", "nálatok", "náluk", "nálunk", "négy", "négyet", "néha", "néhány", "nélkül", "o", "oda", "ok", "olyan", "onnan", "ott", "pedig", "persze", "pár", "például", "rajta", "rajtad", "rajtam", "rajtatok", "rajtuk", "rajtunk", "rendben", "rosszul", "rá", "rád", "rájuk", "rám", "ránk", "rátok", "régen", "régóta", "részére", "róla", "rólad", "rólam", "rólatok", "róluk", "rólunk", "rögtön", "s", "saját", "se", "sem", "semmi", "semmilyen", "semmiség", "senki", "soha", "sok", "sokan", "sokat", "sokkal", "sokszor", "sokáig", "során", "stb.", "szemben", "szerbusz", "szerint", "szerinte", "szerinted", "szerintem", "szerintetek", "szerintük", "szerintünk", "szervusz", "szinte", "számára", "száz", "századik", "százat", "szépen", "szét", "szíves", "szívesen", "szíveskedjék", "sőt", "talán", "tavaly", "te", "tegnap", "tegnapelőtt", "tehát", "tele", "teljes", "tessék", "ti", "tied", "titeket", "tizedik", "tizediket", "tizenegy", "tizenegyedik", "tizenhat", "tizenhárom", "tizenhét", "tizenkettedik", "tizenkettő", "tizenkilenc", "tizenkét", "tizennyolc", "tizennégy", "tizenöt", "tizet", "tovább", "további", "továbbá", "távol", "téged", "tényleg", "tíz", "több", "többi", "többször", "túl", "tőle", "tőled", "tőlem", "tőletek", "tőlük", "tőlünk", "ugyanakkor", "ugyanez", "ugyanis", "ugye", "urak", "uram", "urat", "utoljára", "utolsó", "után", "utána", "vagy", "vagyis", "vagyok", "vagytok", "vagyunk", "vajon", "valahol", "valaki", "valakit", "valamelyik", "valami", "valamint", "való", "van", "vannak", "vele", "veled", "velem", "veletek", "velük", "velünk", "vissza", "viszlát", "viszont", "viszontlátásra", "volna", "volnának", "volnék", "volt", "voltak", "voltam", "voltunk", "végre", "végén", "végül", "által", "általában", "ám", "át", "éljen", "én", "éppen", "érte", "érted", "értem", "értetek", "értük", "értünk", "és", "év", "évben", "éve", "évek", "éves", "évi", "évvel", "így", "óta", "õ", "õk", "õket", "ön", "önbe", "önben", "önből", "önhöz", "önnek", "önnel", "önnél", "önre", "önről", "önt", "öntől", "önért", "önök", "önökbe", "önökben", "önökből", "önöket", "önökhöz", "önökkel", "önöknek", "önöknél", "önökre", "önökről", "önöktől", "önökért", "önökön", "önön", "össze", "öt", "ötven", "ötödik", "ötödiket", "ötöt", "úgy", "úgyis", "úgynevezett", "új", "újabb", "újra", "úr", "ő", "ők", "őket", "őt", ]; elasticlunr-rs-3.0.2/src/lang/it.rs000064400000000000000000000106300072674642500153170ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Italian {} impl Italian { pub fn new() -> Self { Self {} } } impl Language for Italian { fn name(&self) -> String { "Italian".into() } fn code(&self) -> String { "it".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-it", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-it", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-it", Algorithm::Italian)), ], } } } const STOP_WORDS: &[&str] = &[ "", "a", "abbia", "abbiamo", "abbiano", "abbiate", "ad", "agl", "agli", "ai", "al", "all", "alla", "alle", "allo", "anche", "avemmo", "avendo", "avesse", "avessero", "avessi", "avessimo", "aveste", "avesti", "avete", "aveva", "avevamo", "avevano", "avevate", "avevi", "avevo", "avrai", "avranno", "avrebbe", "avrebbero", "avrei", "avremmo", "avremo", "avreste", "avresti", "avrete", "avrà", "avrò", "avuta", "avute", "avuti", "avuto", "c", "che", "chi", "ci", "coi", "col", "come", "con", "contro", "cui", "da", "dagl", "dagli", "dai", "dal", "dall", "dalla", "dalle", "dallo", "degl", "degli", "dei", "del", "dell", "della", "delle", "dello", "di", "dov", "dove", "e", "ebbe", "ebbero", "ebbi", "ed", "era", "erano", "eravamo", "eravate", "eri", "ero", "essendo", "faccia", "facciamo", "facciano", "facciate", "faccio", "facemmo", "facendo", "facesse", "facessero", "facessi", "facessimo", "faceste", "facesti", "faceva", "facevamo", "facevano", "facevate", "facevi", "facevo", "fai", "fanno", "farai", "faranno", "farebbe", "farebbero", "farei", "faremmo", "faremo", "fareste", "faresti", "farete", "farà", "farò", "fece", "fecero", "feci", "fosse", "fossero", "fossi", "fossimo", "foste", "fosti", "fu", "fui", "fummo", "furono", "gli", "ha", "hai", "hanno", "ho", "i", "il", "in", "io", "l", "la", "le", "lei", "li", "lo", "loro", "lui", "ma", "mi", "mia", "mie", "miei", "mio", "ne", "negl", "negli", "nei", "nel", "nell", "nella", "nelle", "nello", "noi", "non", "nostra", "nostre", "nostri", "nostro", "o", "per", "perché", "più", "quale", "quanta", "quante", "quanti", "quanto", "quella", "quelle", "quelli", "quello", "questa", "queste", "questi", "questo", "sarai", "saranno", "sarebbe", "sarebbero", "sarei", "saremmo", "saremo", "sareste", "saresti", "sarete", "sarà", "sarò", "se", "sei", "si", "sia", "siamo", "siano", "siate", "siete", "sono", "sta", "stai", "stando", "stanno", "starai", "staranno", "starebbe", "starebbero", "starei", "staremmo", "staremo", "stareste", "staresti", "starete", "starà", "starò", "stava", "stavamo", "stavano", "stavate", "stavi", "stavo", "stemmo", "stesse", "stessero", "stessi", "stessimo", "steste", "stesti", "stette", "stettero", "stetti", "stia", "stiamo", "stiano", "stiate", "sto", "su", "sua", "sue", "sugl", "sugli", "sui", "sul", "sull", "sulla", "sulle", "sullo", "suo", "suoi", "ti", "tra", "tu", "tua", "tue", "tuo", "tuoi", "tutti", "tutto", "un", "una", "uno", "vi", "voi", "vostra", "vostre", "vostri", "vostro", "è", ]; elasticlunr-rs-3.0.2/src/lang/ja.rs000064400000000000000000000040040072674642500152730ustar 00000000000000use super::{common::RegexTrimmer, Language}; use crate::pipeline::{FnWrapper, Pipeline}; use lindera::tokenizer::{Tokenizer, TokenizerConfig}; use lindera_core::viterbi::Mode; #[derive(Clone)] pub struct Japanese { tokenizer: Tokenizer, } impl Japanese { pub fn new() -> Self { let config = TokenizerConfig { mode: Mode::Decompose(Default::default()), ..Default::default() }; Self::with_config(config) } pub fn with_config(config: TokenizerConfig) -> Self { // NB: unwrap() is okay since the errors are only related to user-supplied dictionaries. let tokenizer = Tokenizer::with_config(config).unwrap(); Self { tokenizer } } } impl Language for Japanese { fn name(&self) -> String { "Japanese".into() } fn code(&self) -> String { "ja".into() } fn tokenize(&self, text: &str) -> Vec { self.tokenizer .tokenize(text) .unwrap() .into_iter() .filter_map(|tok| match tok.detail.get(0).map(|d| d.as_str()) { Some("助詞") | Some("助動詞") | Some("記号") | Some("UNK") => None, _ => Some(tok.text.to_string()), }) .collect() } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-ja", WORD_CHARS)), Box::new(FnWrapper("stemmer-ja".into(), stemmer)), ], } } } const WORD_CHARS: &str = r"0-9A-Za-z\p{Hiragana}\p{Katakana}\p{Unified_Ideograph}"; fn stemmer(token: String) -> Option { Some(token) } #[cfg(test)] mod tests { use crate::pipeline::PipelineFn; use super::*; #[test] fn test_trimmer() { let trimmer = RegexTrimmer::new("trimmer-ja".into(), WORD_CHARS); assert_eq!( trimmer.filter(" こんにちは、世界!".to_string()), Some("こんにちは、世界".to_string()) ); } } elasticlunr-rs-3.0.2/src/lang/ko.rs000064400000000000000000000275640072674642500153320ustar 00000000000000use super::{common::{RegexTrimmer, StopWordFilter}, Language}; use crate::pipeline::{FnWrapper, Pipeline}; #[derive(Clone)] pub struct Korean { } impl Korean { pub fn new() -> Self { Self { } } } impl Language for Korean { fn name(&self) -> String { "Korean".into() } fn code(&self) -> String { "ko".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-ko", r"\p{Latin}\p{Hangul}")), Box::new(StopWordFilter::new("stopWordFilter-ko", STOP_WORDS)), Box::new(FnWrapper("stemmer-ko".into(), stemmer)), ], } } } fn stemmer(token: String) -> Option { Some(token) } // Korean stop words came from https://github.com/stopwords-iso/stopwords-ko/blob/master/stopwords-ko.txt const STOP_WORDS: &[&str] = &[ "", "가", "가까스로", "가령", "각", "각각", "각자", "각종", "갖고말하자면", "같다", "같이", "개의치않고", "거니와", "거바", "거의", "것", "것과 같이", "것들", "게다가", "게우다", "겨우", "견지에서", "결과에 이르다", "결국", "결론을 낼 수 있다", "겸사겸사", "고려하면", "고로", "곧", "공동으로", "과", "과연", "관계가 있다", "관계없이", "관련이 있다", "관하여", "관한", "관해서는", "구", "구체적으로", "구토하다", "그", "그들", "그때", "그래", "그래도", "그래서", "그러나", "그러니", "그러니까", "그러면", "그러므로", "그러한즉", "그런 까닭에", "그런데", "그런즉", "그럼", "그럼에도 불구하고", "그렇게 함으로써", "그렇지", "그렇지 않다면", "그렇지 않으면", "그렇지만", "그렇지않으면", "그리고", "그리하여", "그만이다", "그에 따르는", "그위에", "그저", "그중에서", "그치지 않다", "근거로", "근거하여", "기대여", "기점으로", "기준으로", "기타", "까닭으로", "까악", "까지", "까지 미치다", "까지도", "꽈당", "끙끙", "끼익", "나", "나머지는", "남들", "남짓", "너", "너희", "너희들", "네", "넷", "년", "논하지 않다", "놀라다", "누가 알겠는가", "누구", "다른", "다른 방면으로", "다만", "다섯", "다소", "다수", "다시 말하자면", "다시말하면", "다음", "다음에", "다음으로", "단지", "답다", "당신", "당장", "대로 하다", "대하면", "대하여", "대해 말하자면", "대해서", "댕그", "더구나", "더군다나", "더라도", "더불어", "더욱더", "더욱이는", "도달하다", "도착하다", "동시에", "동안", "된바에야", "된이상", "두번째로", "둘", "둥둥", "뒤따라", "뒤이어", "든간에", "들", "등", "등등", "딩동", "따라", "따라서", "따위", "따지지 않다", "딱", "때", "때가 되어", "때문에", "또", "또한", "뚝뚝", "라 해도", "령", "로", "로 인하여", "로부터", "로써", "륙", "를", "마음대로", "마저", "마저도", "마치", "막론하고", "만 못하다", "만약", "만약에", "만은 아니다", "만이 아니다", "만일", "만큼", "말하자면", "말할것도 없고", "매", "매번", "메쓰겁다", "몇", "모", "모두", "무렵", "무릎쓰고", "무슨", "무엇", "무엇때문에", "물론", "및", "바꾸어말하면", "바꾸어말하자면", "바꾸어서 말하면", "바꾸어서 한다면", "바꿔 말하면", "바로", "바와같이", "밖에 안된다", "반대로", "반대로 말하자면", "반드시", "버금", "보는데서", "보다더", "보드득", "본대로", "봐", "봐라", "부류의 사람들", "부터", "불구하고", "불문하고", "붕붕", "비걱거리다", "비교적", "비길수 없다", "비로소", "비록", "비슷하다", "비추어 보아", "비하면", "뿐만 아니라", "뿐만아니라", "뿐이다", "삐걱", "삐걱거리다", "사", "삼", "상대적으로 말하자면", "생각한대로", "설령", "설마", "설사", "셋", "소생", "소인", "솨", "쉿", "습니까", "습니다", "시각", "시간", "시작하여", "시초에", "시키다", "실로", "심지어", "아", "아니", "아니나다를가", "아니라면", "아니면", "아니었다면", "아래윗", "아무거나", "아무도", "아야", "아울러", "아이", "아이고", "아이구", "아이야", "아이쿠", "아하", "아홉", "안 그러면", "않기 위하여", "않기 위해서", "알 수 있다", "알았어", "앗", "앞에서", "앞의것", "야", "약간", "양자", "어", "어기여차", "어느", "어느 년도", "어느것", "어느곳", "어느때", "어느쪽", "어느해", "어디", "어때", "어떠한", "어떤", "어떤것", "어떤것들", "어떻게", "어떻해", "어이", "어째서", "어쨋든", "어쩔수 없다", "어찌", "어찌됏든", "어찌됏어", "어찌하든지", "어찌하여", "언제", "언젠가", "얼마", "얼마 안 되는 것", "얼마간", "얼마나", "얼마든지", "얼마만큼", "얼마큼", "엉엉", "에", "에 가서", "에 달려 있다", "에 대해", "에 있다", "에 한하다", "에게", "에서", "여", "여기", "여덟", "여러분", "여보시오", "여부", "여섯", "여전히", "여차", "연관되다", "연이서", "영", "영차", "옆사람", "예", "예를 들면", "예를 들자면", "예컨대", "예하면", "오", "오로지", "오르다", "오자마자", "오직", "오호", "오히려", "와", "와 같은 사람들", "와르르", "와아", "왜", "왜냐하면", "외에도", "요만큼", "요만한 것", "요만한걸", "요컨대", "우르르", "우리", "우리들", "우선", "우에 종합한것과같이", "운운", "월", "위에서 서술한바와같이", "위하여", "위해서", "윙윙", "육", "으로", "으로 인하여", "으로서", "으로써", "을", "응", "응당", "의", "의거하여", "의지하여", "의해", "의해되다", "의해서", "이", "이 되다", "이 때문에", "이 밖에", "이 외에", "이 정도의", "이것", "이곳", "이때", "이라면", "이래", "이러이러하다", "이러한", "이런", "이럴정도로", "이렇게 많은 것", "이렇게되면", "이렇게말하자면", "이렇구나", "이로 인하여", "이르기까지", "이리하여", "이만큼", "이번", "이봐", "이상", "이어서", "이었다", "이와 같다", "이와 같은", "이와 반대로", "이와같다면", "이외에도", "이용하여", "이유만으로", "이젠", "이지만", "이쪽", "이천구", "이천육", "이천칠", "이천팔", "인 듯하다", "인젠", "일", "일것이다", "일곱", "일단", "일때", "일반적으로", "일지라도", "임에 틀림없다", "입각하여", "입장에서", "잇따라", "있다", "자", "자기", "자기집", "자마자", "자신", "잠깐", "잠시", "저", "저것", "저것만큼", "저기", "저쪽", "저희", "전부", "전자", "전후", "점에서 보아", "정도에 이르다", "제", "제각기", "제외하고", "조금", "조차", "조차도", "졸졸", "좀", "좋아", "좍좍", "주룩주룩", "주저하지 않고", "줄은 몰랏다", "줄은모른다", "중에서", "중의하나", "즈음하여", "즉", "즉시", "지든지", "지만", "지말고", "진짜로", "쪽으로", "차라리", "참", "참나", "첫번째로", "쳇", "총적으로", "총적으로 말하면", "총적으로 보면", "칠", "콸콸", "쾅쾅", "쿵", "타다", "타인", "탕탕", "토하다", "통하여", "툭", "퉤", "틈타", "팍", "팔", "퍽", "펄렁", "하", "하게될것이다", "하게하다", "하겠는가", "하고 있다", "하고있었다", "하곤하였다", "하구나", "하기 때문에", "하기 위하여", "하기는한데", "하기만 하면", "하기보다는", "하기에", "하나", "하느니", "하는 김에", "하는 편이 낫다", "하는것도", "하는것만 못하다", "하는것이 낫다", "하는바", "하더라도", "하도다", "하도록시키다", "하도록하다", "하든지", "하려고하다", "하마터면", "하면 할수록", "하면된다", "하면서", "하물며", "하여금", "하여야", "하자마자", "하지 않는다면", "하지 않도록", "하지마", "하지마라", "하지만", "하하", "한 까닭에", "한 이유는", "한 후", "한다면", "한다면 몰라도", "한데", "한마디", "한적이있다", "한켠으로는", "한항목", "할 따름이다", "할 생각이다", "할 줄 안다", "할 지경이다", "할 힘이 있다", "할때", "할만하다", "할망정", "할뿐", "할수있다", "할수있어", "할줄알다", "할지라도", "할지언정", "함께", "해도된다", "해도좋다", "해봐요", "해서는 안된다", "해야한다", "해요", "했어요", "향하다", "향하여", "향해서", "허", "허걱", "허허", "헉", "헉헉", "헐떡헐떡", "형식으로 쓰여", "혹시", "혹은", "혼자", "훨씬", "휘익", "휴", "흐흐", "흥", "힘입어", ]; #[cfg(test)] mod tests { use super::*; #[test] fn test_tokenize() { assert_eq!( Korean::new().tokenize(" 한글 사랑 "), vec!["한글", "사랑"] ); } #[test] fn test_stopword() { assert_eq!( Korean::new().make_pipeline().run( vec!["어떤".to_string(), "사람".to_string()]), vec!["사람".to_string()] ); } #[test] fn test_pipeline() { assert_eq!( Korean::new().make_pipeline().run( vec![" 한글 ".to_string(), " 사랑!".to_string()]), vec!["한글".to_string(), "사랑".to_string()] ); } } elasticlunr-rs-3.0.2/src/lang/mod.rs000064400000000000000000000105720072674642500154670ustar 00000000000000//! Intended to be compatible with . Each supported //! language has a trimmer, a stop word filter, and a stemmer. Most users will not need to use //! these modules directly. pub mod common; use crate::Pipeline; pub trait Language { /// The name of the language in English fn name(&self) -> String; /// The ISO 639-1 language code of the language fn code(&self) -> String; /// Separates the input text into individual tokens. In most languages a token is a word, separated by whitespace. fn tokenize(&self, text: &str) -> Vec; /// Returns the [`Pipeline`] to process the tokens with fn make_pipeline(&self) -> Pipeline; } /// Splits a text string into a vector of individual tokens. pub fn tokenize_whitespace(text: &str) -> Vec { text.split(|c: char| c.is_whitespace() || c == '-') .filter(|s| !s.is_empty()) .map(|s| s.trim().to_lowercase()) .collect() } macro_rules! impl_language { ($( ( $name:ident, $code:ident $(, #[$cfgs:meta] )? ), )+) => { /// Returns a list of all the [`Language`] implementations in the crate pub fn languages() -> Vec> { vec![ $( $(#[$cfgs])? Box::new($code::$name::new()), )+ ] } /// Returns the [`Language`] for the given two-character [ISO 639-1][iso] language code if the /// language is supported. Returns `None` if not supported. /// /// *Note:* /// /// The ISO 639-1 code for Dutch is "nl". However "du" is used for the module name /// and pipeline suffix in order to match lunr-languages. /// /// [iso]: https://en.wikipedia.org/wiki/ISO_639-1 pub fn from_code(code: &str) -> Option> { match code.to_ascii_lowercase().as_str() { $( $(#[$cfgs])? stringify!($code) => Some(Box::new($code::$name::new())), )+ _ => None, } } /// Returns the [`Language`] for the given English language name if the /// language is supported. Returns `None` if not supported. The first letter must /// be capitalized. pub fn from_name(name: &str) -> Option> { match name { $( $(#[$cfgs])? stringify!($name) => Some(Box::new($code::$name::new())), )+ _ => None, } } $( $(#[$cfgs])? mod $code; $(#[$cfgs])? pub use $code::$name; )+ }; } impl_language! { (English, en), (Arabic, ar, #[cfg(feature = "ar")]), (Chinese, zh, #[cfg(feature = "zh")]), (Danish, da, #[cfg(feature = "da")]), (Dutch, du, #[cfg(feature = "du")]), (Finnish, fi, #[cfg(feature = "fi")]), (French, fr, #[cfg(feature = "fr")]), (German, de, #[cfg(feature = "de")]), (Hungarian, hu, #[cfg(feature = "hu")]), (Italian, it, #[cfg(feature = "it")]), (Japanese, ja, #[cfg(feature = "ja")]), (Korean, ko, #[cfg(feature = "ko")]), (Norwegian, no, #[cfg(feature = "no")]), (Portuguese, pt, #[cfg(feature = "pt")]), (Romanian, ro, #[cfg(feature = "ro")]), (Russian, ru, #[cfg(feature = "ru")]), (Spanish, es, #[cfg(feature = "es")]), (Swedish, sv, #[cfg(feature = "sv")]), (Turkish, tr, #[cfg(feature = "tr")]), } #[cfg(test)] mod tests { use super::tokenize_whitespace; #[test] fn split_simple_strings() { let string = "this is a simple string"; assert_eq!( &tokenize_whitespace(string), &["this", "is", "a", "simple", "string"] ); } #[test] fn multiple_white_space() { let string = " foo bar "; assert_eq!(&tokenize_whitespace(string), &["foo", "bar"]); } #[test] fn hyphens() { let string = "take the New York-San Francisco flight"; assert_eq!( &tokenize_whitespace(string), &["take", "the", "new", "york", "san", "francisco", "flight"] ); } #[test] fn splitting_strings_with_hyphens() { let string = "Solve for A - B"; assert_eq!(&tokenize_whitespace(string), &["solve", "for", "a", "b"]); } } elasticlunr-rs-3.0.2/src/lang/no.rs000064400000000000000000000056560072674642500153330ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Norwegian {} impl Norwegian { pub fn new() -> Self { Self {} } } impl Language for Norwegian { fn name(&self) -> String { "Norwegian".into() } fn code(&self) -> String { "no".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-no", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-no", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-no", Algorithm::Norwegian)), ], } } } const STOP_WORDS: &[&str] = &[ "", "alle", "at", "av", "bare", "begge", "ble", "blei", "bli", "blir", "blitt", "både", "båe", "da", "de", "deg", "dei", "deim", "deira", "deires", "dem", "den", "denne", "der", "dere", "deres", "det", "dette", "di", "din", "disse", "ditt", "du", "dykk", "dykkar", "då", "eg", "ein", "eit", "eitt", "eller", "elles", "en", "enn", "er", "et", "ett", "etter", "for", "fordi", "fra", "før", "ha", "hadde", "han", "hans", "har", "hennar", "henne", "hennes", "her", "hjå", "ho", "hoe", "honom", "hoss", "hossen", "hun", "hva", "hvem", "hver", "hvilke", "hvilken", "hvis", "hvor", "hvordan", "hvorfor", "i", "ikke", "ikkje", "ikkje", "ingen", "ingi", "inkje", "inn", "inni", "ja", "jeg", "kan", "kom", "korleis", "korso", "kun", "kunne", "kva", "kvar", "kvarhelst", "kven", "kvi", "kvifor", "man", "mange", "me", "med", "medan", "meg", "meget", "mellom", "men", "mi", "min", "mine", "mitt", "mot", "mykje", "ned", "no", "noe", "noen", "noka", "noko", "nokon", "nokor", "nokre", "nå", "når", "og", "også", "om", "opp", "oss", "over", "på", "samme", "seg", "selv", "si", "si", "sia", "sidan", "siden", "sin", "sine", "sitt", "sjøl", "skal", "skulle", "slik", "so", "som", "som", "somme", "somt", "så", "sånn", "til", "um", "upp", "ut", "uten", "var", "vart", "varte", "ved", "vere", "verte", "vi", "vil", "ville", "vore", "vors", "vort", "vår", "være", "være", "vært", "å", ]; elasticlunr-rs-3.0.2/src/lang/pt.rs000064400000000000000000000070200072674642500153250ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Portuguese {} impl Portuguese { pub fn new() -> Self { Self {} } } impl Language for Portuguese { fn name(&self) -> String { "Portuguese".into() } fn code(&self) -> String { "pt".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-pt", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-pt", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-pt", Algorithm::Portuguese)), ], } } } const STOP_WORDS: &[&str] = &[ "", "a", "ao", "aos", "aquela", "aquelas", "aquele", "aqueles", "aquilo", "as", "até", "com", "como", "da", "das", "de", "dela", "delas", "dele", "deles", "depois", "do", "dos", "e", "ela", "elas", "ele", "eles", "em", "entre", "era", "eram", "essa", "essas", "esse", "esses", "esta", "estamos", "estas", "estava", "estavam", "este", "esteja", "estejam", "estejamos", "estes", "esteve", "estive", "estivemos", "estiver", "estivera", "estiveram", "estiverem", "estivermos", "estivesse", "estivessem", "estivéramos", "estivéssemos", "estou", "está", "estávamos", "estão", "eu", "foi", "fomos", "for", "fora", "foram", "forem", "formos", "fosse", "fossem", "fui", "fôramos", "fôssemos", "haja", "hajam", "hajamos", "havemos", "hei", "houve", "houvemos", "houver", "houvera", "houveram", "houverei", "houverem", "houveremos", "houveria", "houveriam", "houvermos", "houverá", "houverão", "houveríamos", "houvesse", "houvessem", "houvéramos", "houvéssemos", "há", "hão", "isso", "isto", "já", "lhe", "lhes", "mais", "mas", "me", "mesmo", "meu", "meus", "minha", "minhas", "muito", "na", "nas", "nem", "no", "nos", "nossa", "nossas", "nosso", "nossos", "num", "numa", "não", "nós", "o", "os", "ou", "para", "pela", "pelas", "pelo", "pelos", "por", "qual", "quando", "que", "quem", "se", "seja", "sejam", "sejamos", "sem", "serei", "seremos", "seria", "seriam", "será", "serão", "seríamos", "seu", "seus", "somos", "sou", "sua", "suas", "são", "só", "também", "te", "tem", "temos", "tenha", "tenham", "tenhamos", "tenho", "terei", "teremos", "teria", "teriam", "terá", "terão", "teríamos", "teu", "teus", "teve", "tinha", "tinham", "tive", "tivemos", "tiver", "tivera", "tiveram", "tiverem", "tivermos", "tivesse", "tivessem", "tivéramos", "tivéssemos", "tu", "tua", "tuas", "tém", "tínhamos", "um", "uma", "você", "vocês", "vos", "à", "às", "éramos", ]; elasticlunr-rs-3.0.2/src/lang/ro.rs000064400000000000000000000106300072674642500153230ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Romanian {} impl Romanian { pub fn new() -> Self { Self {} } } impl Language for Romanian { fn name(&self) -> String { "Romanian".into() } fn code(&self) -> String { "ro".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-ro", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-ro", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-ro", Algorithm::Romanian)), ], } } } const STOP_WORDS: &[&str] = &[ "", "acea", "aceasta", "această", "aceea", "acei", "aceia", "acel", "acela", "acele", "acelea", "acest", "acesta", "aceste", "acestea", "aceşti", "aceştia", "acolo", "acord", "acum", "ai", "aia", "aibă", "aici", "al", "ale", "alea", "altceva", "altcineva", "am", "ar", "are", "asemenea", "asta", "astea", "astăzi", "asupra", "au", "avea", "avem", "aveţi", "azi", "aş", "aşadar", "aţi", "bine", "bucur", "bună", "ca", "care", "caut", "ce", "cel", "ceva", "chiar", "cinci", "cine", "cineva", "contra", "cu", "cum", "cumva", "curând", "curînd", "când", "cât", "câte", "câtva", "câţi", "cînd", "cît", "cîte", "cîtva", "cîţi", "că", "căci", "cărei", "căror", "cărui", "către", "da", "dacă", "dar", "datorită", "dată", "dau", "de", "deci", "deja", "deoarece", "departe", "deşi", "din", "dinaintea", "dintr-", "dintre", "doi", "doilea", "două", "drept", "după", "dă", "ea", "ei", "el", "ele", "eram", "este", "eu", "eşti", "face", "fata", "fi", "fie", "fiecare", "fii", "fim", "fiu", "fiţi", "frumos", "fără", "graţie", "halbă", "iar", "ieri", "la", "le", "li", "lor", "lui", "lângă", "lîngă", "mai", "mea", "mei", "mele", "mereu", "meu", "mi", "mie", "mine", "mult", "multă", "mulţi", "mulţumesc", "mâine", "mîine", "mă", "ne", "nevoie", "nici", "nicăieri", "nimeni", "nimeri", "nimic", "nişte", "noastre", "noastră", "noi", "noroc", "nostru", "nouă", "noştri", "nu", "opt", "ori", "oricare", "orice", "oricine", "oricum", "oricând", "oricât", "oricînd", "oricît", "oriunde", "patra", "patru", "patrulea", "pe", "pentru", "peste", "pic", "poate", "pot", "prea", "prima", "primul", "prin", "puţin", "puţina", "puţină", "până", "pînă", "rog", "sa", "sale", "sau", "se", "spate", "spre", "sub", "sunt", "suntem", "sunteţi", "sută", "sînt", "sîntem", "sînteţi", "să", "săi", "său", "ta", "tale", "te", "timp", "tine", "toate", "toată", "tot", "totuşi", "toţi", "trei", "treia", "treilea", "tu", "tăi", "tău", "un", "una", "unde", "undeva", "unei", "uneia", "unele", "uneori", "unii", "unor", "unora", "unu", "unui", "unuia", "unul", "vi", "voastre", "voastră", "voi", "vostru", "vouă", "voştri", "vreme", "vreo", "vreun", "vă", "zece", "zero", "zi", "zice", "îi", "îl", "îmi", "împotriva", "în", "înainte", "înaintea", "încotro", "încât", "încît", "între", "întrucât", "întrucît", "îţi", "ăla", "ălea", "ăsta", "ăstea", "ăştia", "şapte", "şase", "şi", "ştiu", "ţi", "ţie", ]; elasticlunr-rs-3.0.2/src/lang/ru.rs000064400000000000000000000203030072674642500153270ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Russian {} impl Russian { pub fn new() -> Self { Self {} } } impl Language for Russian { fn name(&self) -> String { "Russian".into() } fn code(&self) -> String { "ru".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-ru", r"\p{Cyrillic}")), Box::new(StopWordFilter::new("stopWordFilter-ru", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-ru", Algorithm::Russian)), ], } } } const STOP_WORDS: &[&str] = &[ "", "алло", "без", "близко", "более", "больше", "будем", "будет", "будете", "будешь", "будто", "буду", "будут", "будь", "бы", "бывает", "бывь", "был", "была", "были", "было", "быть", "в", "важная", "важное", "важные", "важный", "вам", "вами", "вас", "ваш", "ваша", "ваше", "ваши", "вверх", "вдали", "вдруг", "ведь", "везде", "весь", "вниз", "внизу", "во", "вокруг", "вон", "восемнадцатый", "восемнадцать", "восемь", "восьмой", "вот", "впрочем", "времени", "время", "все", "всегда", "всего", "всем", "всеми", "всему", "всех", "всею", "всю", "всюду", "вся", "всё", "второй", "вы", "г", "где", "говорил", "говорит", "год", "года", "году", "да", "давно", "даже", "далеко", "дальше", "даром", "два", "двадцатый", "двадцать", "две", "двенадцатый", "двенадцать", "двух", "девятнадцатый", "девятнадцать", "девятый", "девять", "действительно", "дел", "день", "десятый", "десять", "для", "до", "довольно", "долго", "должно", "другая", "другие", "других", "друго", "другое", "другой", "е", "его", "ее", "ей", "ему", "если", "есть", "еще", "ещё", "ею", "её", "ж", "же", "жизнь", "за", "занят", "занята", "занято", "заняты", "затем", "зато", "зачем", "здесь", "значит", "и", "из", "или", "им", "именно", "иметь", "ими", "имя", "иногда", "их", "к", "каждая", "каждое", "каждые", "каждый", "кажется", "как", "какая", "какой", "кем", "когда", "кого", "ком", "кому", "конечно", "которая", "которого", "которой", "которые", "который", "которых", "кроме", "кругом", "кто", "куда", "лет", "ли", "лишь", "лучше", "люди", "м", "мало", "между", "меля", "менее", "меньше", "меня", "миллионов", "мимо", "мира", "мне", "много", "многочисленная", "многочисленное", "многочисленные", "многочисленный", "мной", "мною", "мог", "могут", "мож", "может", "можно", "можхо", "мои", "мой", "мор", "мочь", "моя", "моё", "мы", "на", "наверху", "над", "надо", "назад", "наиболее", "наконец", "нам", "нами", "нас", "начала", "наш", "наша", "наше", "наши", "не", "него", "недавно", "недалеко", "нее", "ней", "нельзя", "нем", "немного", "нему", "непрерывно", "нередко", "несколько", "нет", "нею", "неё", "ни", "нибудь", "ниже", "низко", "никогда", "никуда", "ними", "них", "ничего", "но", "ну", "нужно", "нх", "о", "об", "оба", "обычно", "один", "одиннадцатый", "одиннадцать", "однажды", "однако", "одного", "одной", "около", "он", "она", "они", "оно", "опять", "особенно", "от", "отовсюду", "отсюда", "очень", "первый", "перед", "по", "под", "пожалуйста", "позже", "пока", "пор", "пора", "после", "посреди", "потом", "потому", "почему", "почти", "прекрасно", "при", "про", "просто", "против", "процентов", "пятнадцатый", "пятнадцать", "пятый", "пять", "раз", "разве", "рано", "раньше", "рядом", "с", "сам", "сама", "сами", "самим", "самими", "самих", "само", "самого", "самой", "самом", "самому", "саму", "свое", "своего", "своей", "свои", "своих", "свою", "сеаой", "себе", "себя", "сегодня", "седьмой", "сейчас", "семнадцатый", "семнадцать", "семь", "сих", "сказал", "сказала", "сказать", "сколько", "слишком", "сначала", "снова", "со", "собой", "собою", "совсем", "спасибо", "стал", "суть", "т", "та", "так", "такая", "также", "такие", "такое", "такой", "там", "твой", "твоя", "твоё", "те", "тебе", "тебя", "тем", "теми", "теперь", "тех", "то", "тобой", "тобою", "тогда", "того", "тоже", "только", "том", "тому", "тот", "тою", "третий", "три", "тринадцатый", "тринадцать", "ту", "туда", "тут", "ты", "тысяч", "у", "уж", "уже", "уметь", "хорошо", "хотеть", "хоть", "хотя", "хочешь", "часто", "чаще", "чего", "человек", "чем", "чему", "через", "четвертый", "четыре", "четырнадцатый", "четырнадцать", "что", "чтоб", "чтобы", "чуть", "шестнадцатый", "шестнадцать", "шестой", "шесть", "эта", "эти", "этим", "этими", "этих", "это", "этого", "этой", "этом", "этому", "этот", "эту", "я", "а", ]; elasticlunr-rs-3.0.2/src/lang/sv.rs000064400000000000000000000034520072674642500153370ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Swedish {} impl Swedish { pub fn new() -> Self { Self {} } } impl Language for Swedish { fn name(&self) -> String { "Swedish".into() } fn code(&self) -> String { "sv".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-sv", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-sv", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-sv", Algorithm::Swedish)), ], } } } const STOP_WORDS: &[&str] = &[ "", "alla", "allt", "att", "av", "blev", "bli", "blir", "blivit", "de", "dem", "den", "denna", "deras", "dess", "dessa", "det", "detta", "dig", "din", "dina", "ditt", "du", "där", "då", "efter", "ej", "eller", "en", "er", "era", "ert", "ett", "från", "för", "ha", "hade", "han", "hans", "har", "henne", "hennes", "hon", "honom", "hur", "här", "i", "icke", "ingen", "inom", "inte", "jag", "ju", "kan", "kunde", "man", "med", "mellan", "men", "mig", "min", "mina", "mitt", "mot", "mycket", "ni", "nu", "när", "någon", "något", "några", "och", "om", "oss", "på", "samma", "sedan", "sig", "sin", "sina", "sitta", "själv", "skulle", "som", "så", "sådan", "sådana", "sådant", "till", "under", "upp", "ut", "utan", "vad", "var", "vara", "varför", "varit", "varje", "vars", "vart", "vem", "vi", "vid", "vilka", "vilkas", "vilken", "vilket", "vår", "våra", "vårt", "än", "är", "åt", "över", ]; elasticlunr-rs-3.0.2/src/lang/tr.rs000064400000000000000000000072110072674642500153310ustar 00000000000000use super::{ common::{RustStemmer, StopWordFilter, RegexTrimmer}, Language, }; use crate::pipeline::Pipeline; use rust_stemmers::Algorithm; #[derive(Clone)] pub struct Turkish {} impl Turkish { pub fn new() -> Self { Self {} } } impl Language for Turkish { fn name(&self) -> String { "Turkish".into() } fn code(&self) -> String { "tr".into() } fn tokenize(&self, text: &str) -> Vec { super::tokenize_whitespace(text) } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-tr", r"\p{Latin}")), Box::new(StopWordFilter::new("stopWordFilter-tr", STOP_WORDS)), Box::new(RustStemmer::new("stemmer-tr", Algorithm::Turkish)), ], } } } const STOP_WORDS: &[&str] = &[ "", "acaba", "altmış", "altı", "ama", "ancak", "arada", "aslında", "ayrıca", "bana", "bazı", "belki", "ben", "benden", "beni", "benim", "beri", "beş", "bile", "bin", "bir", "biri", "birkaç", "birkez", "birçok", "birşey", "birşeyi", "biz", "bizden", "bize", "bizi", "bizim", "bu", "buna", "bunda", "bundan", "bunlar", "bunları", "bunların", "bunu", "bunun", "burada", "böyle", "böylece", "da", "daha", "dahi", "de", "defa", "değil", "diye", "diğer", "doksan", "dokuz", "dolayı", "dolayısıyla", "dört", "edecek", "eden", "ederek", "edilecek", "ediliyor", "edilmesi", "ediyor", "elli", "en", "etmesi", "etti", "ettiği", "ettiğini", "eğer", "gibi", "göre", "halen", "hangi", "hatta", "hem", "henüz", "hep", "hepsi", "her", "herhangi", "herkesin", "hiç", "hiçbir", "iki", "ile", "ilgili", "ise", "itibaren", "itibariyle", "için", "işte", "kadar", "karşın", "katrilyon", "kendi", "kendilerine", "kendini", "kendisi", "kendisine", "kendisini", "kez", "ki", "kim", "kimden", "kime", "kimi", "kimse", "kırk", "milyar", "milyon", "mu", "mü", "mı", "nasıl", "ne", "neden", "nedenle", "nerde", "nerede", "nereye", "niye", "niçin", "o", "olan", "olarak", "oldu", "olduklarını", "olduğu", "olduğunu", "olmadı", "olmadığı", "olmak", "olması", "olmayan", "olmaz", "olsa", "olsun", "olup", "olur", "olursa", "oluyor", "on", "ona", "ondan", "onlar", "onlardan", "onları", "onların", "onu", "onun", "otuz", "oysa", "pek", "rağmen", "sadece", "sanki", "sekiz", "seksen", "sen", "senden", "seni", "senin", "siz", "sizden", "sizi", "sizin", "tarafından", "trilyon", "tüm", "var", "vardı", "ve", "veya", "ya", "yani", "yapacak", "yapmak", "yaptı", "yaptıkları", "yaptığı", "yaptığını", "yapılan", "yapılması", "yapıyor", "yedi", "yerine", "yetmiş", "yine", "yirmi", "yoksa", "yüz", "zaten", "çok", "çünkü", "öyle", "üzere", "üç", "şey", "şeyden", "şeyi", "şeyler", "şu", "şuna", "şunda", "şundan", "şunları", "şunu", "şöyle", ]; elasticlunr-rs-3.0.2/src/lang/zh.rs000064400000000000000000000024230072674642500153250ustar 00000000000000use super::{common::RegexTrimmer, Language}; use crate::pipeline::{FnWrapper, Pipeline}; #[derive(Clone)] pub struct Chinese { jieba: jieba_rs::Jieba, } impl Chinese { pub fn new() -> Self { Self { jieba: jieba_rs::Jieba::new(), } } } impl Language for Chinese { fn name(&self) -> String { "Chinese".into() } fn code(&self) -> String { "zh".into() } fn tokenize(&self, text: &str) -> Vec { self.jieba .cut_for_search(text, false) .iter() .map(|s| s.to_string()) .collect() } fn make_pipeline(&self) -> Pipeline { Pipeline { queue: vec![ Box::new(RegexTrimmer::new("trimmer-zh", r"\p{Unified_Ideograph}\p{Latin}")), Box::new(FnWrapper("stopWordFilter-zh".into(), stop_word_filter)), Box::new(FnWrapper("stemmer-zh".into(), stemmer)), ], } } } // TODO: lunr.zh.js has a much larger set of stop words fn stop_word_filter(token: String) -> Option { match token.as_str() { "的" | "了" => None, _ => Some(token), } } // lunr.zh.js has an empty stemmer as well fn stemmer(token: String) -> Option { Some(token) } elasticlunr-rs-3.0.2/src/lib.rs000064400000000000000000000264130072674642500145360ustar 00000000000000//!# elasticlunr-rs //! //! [![Build Status](https://travis-ci.org/mattico/elasticlunr-rs.svg?branch=master)](https://travis-ci.org/mattico/elasticlunr-rs) //! [![Documentation](https://docs.rs/elasticlunr-rs/badge.svg)](https://docs.rs/elasticlunr-rs) //! [![Crates.io](https://img.shields.io/crates/v/elasticlunr-rs.svg)](https://crates.io/crates/elasticlunr-rs) //! //! A partial port of [elasticlunr](https://github.com/weixsong/elasticlunr.js) to Rust. Intended to //! be used for generating compatible search indices. //! //! Access to all index-generating functionality is provided. Most users will only need to use the //! [`Index`](struct.Index.html) or [`IndexBuilder`](struct.IndexBuilder.html) types. //! //! The [`Language`] trait can be used to implement a custom language. //! //! ## Example //! //! ``` //! use std::fs::File; //! use std::io::Write; //! use elasticlunr::Index; //! //! let mut index = Index::new(&["title", "body"]); //! index.add_doc("1", &["This is a title", "This is body text!"]); //! // Add more docs... //! let mut file = File::create("out.json").unwrap(); //! file.write_all(index.to_json_pretty().as_bytes()); //! ``` #[macro_use] extern crate serde_derive; #[cfg(test)] #[macro_use] extern crate maplit; /// The version of elasticlunr.js this library was designed for. pub const ELASTICLUNR_VERSION: &str = "0.9.5"; pub mod config; pub mod document_store; pub mod inverted_index; pub mod lang; pub mod pipeline; use std::collections::BTreeMap; use document_store::DocumentStore; use inverted_index::InvertedIndex; use lang::English; pub use lang::Language; pub use pipeline::Pipeline; type Tokenizer = Option Vec>>; /// A builder for an `Index` with custom parameters. /// /// # Example /// ``` /// # use elasticlunr::{Index, IndexBuilder}; /// let mut index = IndexBuilder::new() /// .save_docs(false) /// .add_fields(&["title", "subtitle", "body"]) /// .set_ref("doc_id") /// .build(); /// index.add_doc("doc_a", &["Chapter 1", "Welcome to Copenhagen", "..."]); /// ``` pub struct IndexBuilder { save: bool, fields: Vec, field_tokenizers: Vec, ref_field: String, pipeline: Option, language: Box, } impl Default for IndexBuilder { fn default() -> Self { IndexBuilder { save: true, fields: Vec::new(), field_tokenizers: Vec::new(), ref_field: "id".into(), pipeline: None, language: Box::new(English::new()), } } } impl IndexBuilder { pub fn new() -> Self { Default::default() } pub fn with_language(language: Box) -> Self { Self { language, ..Default::default() } } /// Set whether or not documents should be saved in the `Index`'s document store. pub fn save_docs(mut self, save: bool) -> Self { self.save = save; self } /// Add a document field to the `Index`. /// /// # Panics /// /// Panics if a field with the name already exists. pub fn add_field(mut self, field: &str) -> Self { let field = field.into(); if self.fields.contains(&field) { panic!("Duplicate fields in index: {}", field); } self.fields.push(field); self.field_tokenizers.push(None); self } /// Add a document field to the `Index`, with a custom tokenizer for that field. /// /// # Panics /// /// Panics if a field with the name already exists. pub fn add_field_with_tokenizer( mut self, field: &str, tokenizer: Box Vec>, ) -> Self { let field = field.into(); if self.fields.contains(&field) { panic!("Duplicate fields in index: {}", field); } self.fields.push(field); self.field_tokenizers.push(Some(tokenizer)); self } /// Add the document fields to the `Index`. /// /// # Panics /// /// Panics if two fields have the same name. pub fn add_fields(mut self, fields: I) -> Self where I: IntoIterator, I::Item: AsRef, { for field in fields { self = self.add_field(field.as_ref()) } self } /// Set the key used to store the document reference field. pub fn set_ref(mut self, ref_field: &str) -> Self { self.ref_field = ref_field.into(); self } /// Build an `Index` from this builder. pub fn build(self) -> Index { let IndexBuilder { save, fields, field_tokenizers, ref_field, pipeline, language, } = self; let index = fields .iter() .map(|f| (f.clone(), InvertedIndex::new())) .collect(); let pipeline = pipeline.unwrap_or_else(|| language.make_pipeline()); Index { index, fields, field_tokenizers, ref_field, document_store: DocumentStore::new(save), pipeline, version: crate::ELASTICLUNR_VERSION, lang: language, } } } /// An elasticlunr search index. #[derive(Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Index { fields: Vec, #[serde(skip)] field_tokenizers: Vec, pipeline: Pipeline, #[serde(rename = "ref")] ref_field: String, version: &'static str, index: BTreeMap, document_store: DocumentStore, #[serde(with = "ser_lang")] lang: Box, } mod ser_lang { use crate::Language; use serde::de; use serde::{Deserializer, Serializer}; use std::fmt; pub fn serialize(lang: &Box, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(&lang.name()) } pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, { deserializer.deserialize_str(LanguageVisitor) } struct LanguageVisitor; impl<'de> de::Visitor<'de> for LanguageVisitor { type Value = Box; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a capitalized language name") } fn visit_borrowed_str(self, v: &'de str) -> Result where E: de::Error, { match crate::lang::from_name(v) { Some(l) => Ok(l), None => Err(E::custom(format!("Unknown language name: {}", v))), } } } } impl Index { /// Create a new index with the provided fields. /// /// # Example /// /// ``` /// # use elasticlunr::{Index}; /// let mut index = Index::new(&["title", "body"]); /// index.add_doc("1", &["this is a title", "this is body text"]); /// ``` /// /// # Panics /// /// Panics if a field with the name already exists. pub fn new(fields: I) -> Self where I: IntoIterator, I::Item: AsRef, { IndexBuilder::new().add_fields(fields).build() } /// Create a new index with the provided fields for the given /// [`Language`](lang/enum.Language.html). /// /// # Example /// /// ``` /// use elasticlunr::{Index, lang::English}; /// let mut index = Index::with_language(Box::new(English::new()), &["title", "body"]); /// index.add_doc("1", &["this is a title", "this is body text"]); /// ``` /// /// # Panics /// /// Panics if a field with the name already exists. pub fn with_language(lang: Box, fields: I) -> Self where I: IntoIterator, I::Item: AsRef, { IndexBuilder::with_language(lang).add_fields(fields).build() } /// Add the data from a document to the index. /// /// *NOTE: The elements of `data` should be provided in the same order as /// the fields used to create the index.* /// /// # Example /// ``` /// # use elasticlunr::Index; /// let mut index = Index::new(&["title", "body"]); /// index.add_doc("1", &["this is a title", "this is body text"]); /// ``` pub fn add_doc(&mut self, doc_ref: &str, data: I) where I: IntoIterator, I::Item: AsRef, { let mut doc = BTreeMap::new(); doc.insert(self.ref_field.clone(), doc_ref.into()); let mut token_freq = BTreeMap::new(); for (i, value) in data.into_iter().enumerate() { let field = &self.fields[i]; let tokenizer = self.field_tokenizers[i].as_ref(); doc.insert(field.clone(), value.as_ref().to_string()); if field == &self.ref_field { continue; } let raw_tokens = if let Some(tokenizer) = tokenizer { tokenizer(value.as_ref()) } else { self.lang.tokenize(value.as_ref()) }; let tokens = self.pipeline.run(raw_tokens); self.document_store .add_field_length(doc_ref, field, tokens.len()); for token in tokens { *token_freq.entry(token).or_insert(0u64) += 1; } for (token, count) in &token_freq { let freq = (*count as f64).sqrt(); self.index .get_mut(field) .unwrap_or_else(|| panic!("InvertedIndex does not exist for field {}", field)) .add_token(doc_ref, token, freq); } } self.document_store.add_doc(doc_ref, doc); } pub fn get_fields(&self) -> &[String] { &self.fields } /// Returns the index, serialized to pretty-printed JSON. pub fn to_json_pretty(&self) -> String { serde_json::to_string_pretty(&self).unwrap() } /// Returns the index, serialized to JSON. pub fn to_json(&self) -> String { serde_json::to_string(&self).unwrap() } } #[cfg(test)] mod tests { use super::*; #[test] fn add_field_to_builder() { let idx = IndexBuilder::new() .add_fields(&["foo", "bar", "baz"]) .build(); let idx_fields = idx.get_fields(); for f in &["foo", "bar", "baz"] { assert_eq!(idx_fields.iter().filter(|x| x == f).count(), 1); } } #[test] fn adding_document_to_index() { let mut idx = Index::new(&["body"]); idx.add_doc("1", &["this is a test"]); assert_eq!(idx.document_store.len(), 1); assert_eq!( idx.document_store.get_doc("1").unwrap(), btreemap! { "id".into() => "1".into(), "body".into() => "this is a test".into(), } ); } #[test] fn adding_document_with_empty_field() { let mut idx = Index::new(&["title", "body"]); idx.add_doc("1", &["", "test"]); assert_eq!(idx.index["body"].get_doc_frequency("test"), 1); assert_eq!(idx.index["body"].get_docs("test").unwrap()["1"], 1.); } #[test] #[should_panic] fn creating_index_with_identical_fields_panics() { let _idx = Index::new(&["title", "body", "title"]); } } elasticlunr-rs-3.0.2/src/pipeline.rs000064400000000000000000000033540072674642500155740ustar 00000000000000//! Defines the pipeline which processes text for inclusion in the index. Most users do not need //! to use this module directly. use serde::ser::{Serialize, SerializeSeq, Serializer}; pub trait PipelineFn { fn name(&self) -> String; fn filter(&self, token: String) -> Option; } #[derive(Clone)] pub struct FnWrapper(pub String, pub fn(String) -> Option); impl PipelineFn for FnWrapper { fn name(&self) -> String { self.0.clone() } fn filter(&self, token: String) -> Option { (self.1)(token) } } /// A sequence of `PipelineFn`s which are run on tokens to prepare them for searching. #[derive(Deserialize)] pub struct Pipeline { #[serde(skip_deserializing)] pub queue: Vec>, } impl Serialize for Pipeline { fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut seq = serializer.serialize_seq(Some(self.queue.len()))?; for elem in &self.queue { seq.serialize_element(&elem.name())?; } seq.end() } } impl Pipeline { /// Run the Pipeline against the given vector of tokens. The returned vector may be shorter /// than the input if a pipeline function returns `None` for a token. pub fn run(&self, tokens: Vec) -> Vec { let mut ret = vec![]; for token in tokens { let mut token = Some(token); for func in &self.queue { if let Some(t) = token { token = func.filter(t); } else { break; } } if let Some(t) = token { ret.push(t); } } ret } } elasticlunr-rs-3.0.2/tests/data/ar.in.txt000064400000000000000000000020470072674642500164530ustar 00000000000000استعار جحا مرة آنية من جاره وعندما أعادها له أعاد معها آنية صغيرة فسأله جاره لماذا أعدت مع أنيتي آنية صغيرة يا جحا؟ فقال له جحا: إنّ آنيتك ولدت في الأمس آنية صغيرة وإنّها الآن من حقك، فرح الرجل وأخذ الطنجرة ودخل بيته، وبعد فترة من الزمان ذهب جحا إلى جاره وطلب منه أنية أخرى، فأعطاه جاره ما طلب، مرّ وقت طويل ولم يُعد جحا الآنية، فذهب جاره إلى بيته ليطلبها منه، فاستقبله جحا باكياً منتحباً، فقال له الرجل: مالي أراك باكياً يا جحا؟!! فقال له جحا وهو يبكي إنّ آنيتك توفيت بالأمس يا صاحبي، فقال له جاره وهو غاضب: وكيف لآنيةٍ أن تموت يا رجل؟!! فقال جحا أتصدق أنّ إناء قد يلد ولا تصدق أنّه قد يموت؟! elasticlunr-rs-3.0.2/tests/data/ar.out.txt000064400000000000000000000020200072674642500166430ustar 00000000000000استعار جحا مرة انية من جاره وعندما اعادها له اعاد معها انية صغيرة فساله جاره لماذا اعدت مع انيتي انية صغيرة يا جحا؟ فقال له جحا: ان انيتك ولدت في الامس انية صغيرة وانها الان من حقك، فرح الرجل واخذ الطنجرة ودخل بيته، وبعد فترة من الزمان ذهب جحا الى جاره وطلب منه انية اخرى، فاعطاه جاره ما طلب، مر وقت طويل ولم يعد جحا الانية، فذهب جاره الى بيته ليطلبها منه، فاستقبله جحا باكيا منتحبا، فقال له الرجل: مالي اراك باكيا يا جحا؟!! فقال له جحا وهو يبكي ان انيتك توفيت بالامس يا صاحبي، فقال له جاره وهو غاضب: وكيف لانية ان تموت يا رجل؟!! فقال جحا اتصدق ان اناء قد يلد ولا تصدق انه قد يموت؟! elasticlunr-rs-3.0.2/tests/data/da.in.txt000064400000000000000000000063340072674642500164400ustar 00000000000000I det lille Værtshus i Genf, hvor Russerne plejede at have deres Tilhold, nød Helene hurtigt sit beskedne Aftensmaaltid uden som sædvanlig at drikke en Kop Kaffe dertil -- en Luksus, hun ikke havde nægtet sig lige siden den Dag, hun havde faaet sine Elever i Russisk. Men i Aften maatte hun skynde sig; et længe ventet Brev fra Rusland laa gemt i hendes Lomme. Hun havde for et Øjeblik siden faaet det af den gamle, hvidhaarede Urmager, til hvem hele hendes udenlandske Korrespondance blev adresseret, og hun brændte af Utaalmodighed efter at erfare de Nyheder, som det i al Almindelighed maatte indeholde, og efter at faa det overbragt til sin Ven Andrey, hvem det dog fornemmelig angik. Hun vekslede nogle Ord med en anden landflygtig, krydsede imellem de mange Rækker smaa Borde, ved hvilke der overalt sad Mænd i Arbejdsbluser, og naaede ud paa Gaden. Klokken var kun halvsyv, hun var sikker paa at træffe Andrey hjemme. Han boede i Nærheden, og efter fem Minutters Forløb befandt Helene sig uden for hans Dør. Hendes smukke, noget stillestaaende Ansigt havde faaet en let Farve af den hurtige Gang. Andrey var alene, i Færd med at gøre Uddrag af en Statistik, som han benyttede til Grundlag for den Artikel, han hver Uge skrev til et russisk Provinsblad. Han vendte Hovedet og rejste sig med udstrakt Haand for at byde sin Gæst velkommen. „Her er et Brev til dig!“ sagde Helene, idet hun gav ham Haanden. „Naa, endelig!“ udbrød han. Andrey var en Mand paa seks-syv og tyve Aar med et alvorligt, godmodigt Ansigt, lidt skarpt og regelmæssigt i Trækkene. Over hans Pande laa Spor af tidlige Sorger, og hans Øjne var ualmindelig dybe og tankefulde, men dette forringede ikke det Indtryk af Ro og Bestemthed, man fik af hele hans kraftige, velformede Skikkelse. Der gled en let Rødme over hans Pande, idet hans slanke, muskelstærke Fingre med nervøs Hast rev Konvolutten op og fremdrog et stort Ark Papir, bedækket med Linier i vid Afstand fra hinanden, skrevne med en uregelmæssig, sammentrængt Haandskrift. Helene, der ikke syntes at være mindre utaalmodig end han, gik hen til ham og lagde Haanden paa hans Skulder for ogsaa at kunne læse i Brevet. „Det er bedre, at vi sætter os ned, Helene!“ sagde den unge Mand. „Du skygger for Lyset med dine Krøller!“ Det mere end tarvelige Værelse var kun sparsomt oplyst af en eneste Lampe, dækket af en grøn Papirskærm, saaledes at kun en Del af Brædegulvet, Benene paa nogle simple Stole og den nederste Del af en Mahogni Kommode -- Værelsets fornemste Prydelse -- var helt oplyst. Væggene, som var betrukne med gult Tapetpapir og prydede med et billigt Litografi af den schweiziske General Dufour, et Landskab, et Fotografi af Værtindens afdøde Ægteherre og hendes Eksamensbevis fra Skoletiden, indfattet i Glas og Ramme, var hyllede i et diskret Tusmørke, meget klædeligt for disse Kunstværker, men umuligt at læse i. Andrey stillede endnu en Stol hen til det runde Spisebord, som var dækket med Bøger og Papirer, og drejede Lampeskærmen saaledes, at det Hjørne, han plejede at bruge som Skrivebord, var helt oplyst. Helene satte sig ved Siden af ham og saa nær, at hendes Haar undertiden berørte hans; men ingen af dem ænsede det, saa optagne var de af deres Tanker.elasticlunr-rs-3.0.2/tests/data/da.out.txt000064400000000000000000000035200072674642500166330ustar 00000000000000lil værtshus genf rus plejed tilhold nød hel hurt beskedn aftensmaaltid uden sædvan drik kop kaf dertil luksus nægt lig sid dag faaet elev russisk aft maat skynd læng vent brev rusland laa gemt lom øjeblik sid faaet gaml hvidhaared urmag hvem hel udenlandsk korrespondanc adres brænd utaalmod erfar nyhed al almind maat indehold faa overbrag ven andrey hvem fornem angik veksled ord landflyg krydsed imellem ræk smaa bord ved hvilk overalt sad mænd arbejdsblus naaed paa gad klok kun halvsyv sik paa træf andrey hjem boed nær fem minut forløb befand hel uden dør smuk stillestaa ans faaet let farv hurt gang andrey alen færd gør uddrag statistik benytted grundlag artikel hver uge skrev russisk provinsblad vend hoved rejst udstrak haand byd gæst velkom brev sagd hel idet gav haand naa end udbrød andrey mand paa sek syv tyv aar alvor godmod ans lidt skarpt regelmæs træk pand laa spor tid sorg øjn ualmind dyb tankefuld forringed indtryk ro bestemt fik hel kraft velformed skik gled let rødm pand idet slank muskelstærk fingr nervøs hast rev konvolut fremdrog stort ark papir bedæk lini vid afstand hinand skrevn uregelmæs sammentræng haandskrift hel synt mindr utaalmod gik hen lagd haand paa skuld ogsaa læs brev bedr sæt hel sagd ung mand skyg lys din krøl mer tarv vær kun sparsomt oplyst enest lamp dæk grøn papirskærm saaled kun del brædegulv ben paa simpl stol nederst del mahogni kommod vær fornemst pryd helt oplyst væg betrukn gult tapetpapir pryded bil litografi schweizisk general dufour landskab fotografi værtind afdød ægteher eksamensbevis skoletid indfat glas ram hylled diskr tusmørk klæd kunstværk umu læs andrey stilled endnu stol hen rund spisebord dæk bøg papir drejed lampeskærm saaled hjørn plejed brug skrivebord helt oplyst hel sat ved sid saa nær haar undertid berørt ing ænsed saa optagn tank elasticlunr-rs-3.0.2/tests/data/de.in.txt000064400000000000000000000107150072674642500164420ustar 00000000000000Briefe und die letzten Vorbereitungen füllten den gestrigen Tag. Müde und abgespannt, eigentlich krank und fiebernd stieg ich in Graz Abends 6 Uhr in den Eisenbahnwagen; erst da ich heute Morgens das Meer wieder sah und dem alten Lieblinge das freudige Θάλαττα! Θάλαττα! entgegenrufen konnte, ward mir wieder wohl in Leib und Seele. Die Nacht war kalt gewesen, wie wenn dem Kalender zum Trotze der Winter noch fortdauere. Oder wollte sich die Heimath nur eindringlich dem Scheidenden in’s Gedächtniß heften? Umsonst die Angst, daß ich sie vergesse! es liegt ja die Nothwendigkeit der Rückkehr vor mir. Lange konnte ich den Schlaf nicht finden; dafür fand ich in der Ungestörtheit des Alleinseins mich selbst wieder, der sich in den Sorgen und Mühen der letzten Monate verloren hatte. Es ist das ein Vortheil des Reisens, daß es uns mit der Unabhängigkeit auch die unabweisliche Selbständigkeit gibt; herausgerissen aus der Bequemlichkeit der gewöhnlichen Verhältnisse, zwingt es uns die Gedanken und die Hilfe, die wir sonst rechts und links neben uns schon hergerichtet fanden, nunmehr in uns selbst zu suchen. Menschen, die sich bisher noch gar nicht kannten, haben sich oft am ersten Reisetage erst erkennen lernen. Ein Gang in die weite Welt ist die beste Schule für das Leben, und gerade für uns Kinder der Civilisation eine um so unentbehrlichere, als wir in stubenhockerischen Gewohnheiten den Contact mit der Natur verloren haben. Diese und sich selbst findet der verzogene Mensch dort wieder und so auch die Freiheit, die nur dort ist, wo der Mensch allein, oder wo er fremd unter Hunderten seines Gleichen steht. Nach 6 Uhr erwache ich. Ich sehe den Karst, auf dessen Höhe wir fahren; die Sonne ist vom Regen versteckt, der die Steinfelder dieser Berge noch unwirthlicher als sonst erscheinen läßt. In Nabresina hält der Zug; die Bahn nach Italien trennt sich hier von der, welche den Karst hinab nach Triest führt. Der Bahnhof ist groß und zweckmäßig eingerichtet. Schon singt Alles das Italienische. Erfreut durch die bekannten Klänge beobachte ich das zu- und abströmende Gedränge. Ein Conducteur war mir darin aufgefallen, weil seine Blicke mich unablässig verfolgten. War der Mann ein Vertrauter der Polizei und hielt er mich für einen Flüchtling? Jetzt drängte er sich zu an die offene Wagenthüre, umfaßte meine Knie, er hatte mich erkannt! Es war Venerando, der Gondolier, der mich in Venedig immer geführt hatte. Wie aber auch hätte ich ihn, den zierlichen, schlanken Burschen, der mich so oft in der ärgsten Sommerhitze, nichts als ein Hemd und die leichte Hose an, nach dem Lido, nach den Inseln, nach Torcello oder nach San Francesco del Deserto gerudert hatte, in der steifen, zugeknöpften Eisenbahnuniform erkennen sollen? Früh Morgens schon klopfte er damals an meine Thüre. Ich wollte die Leute schonen und so verneinte ich die Absicht einer Fahrt. Er aber kannte die stille Neigung meiner Wünsche und aufopfernd wußte er mich bald zu überreden, mich ihm und seinem Genossen hinzugeben. Landeten wir dann nach stundenlanger Fahrt an einsam abgelegener Küste und hatte ich die Früchte, die ich mitgenommen, mit ihnen getheilt, so geleitete er mich in das Innere des Landes, dem Fremdlinge die herrlichen Reste einer abgestorbenen Kunst mit all’ dem Schönheitssinn und all’ der Liebe zu seinem Vaterlande zu erklären, die dem Südländer, und dem Italiener insbesondere, eigen sind. War ich müde geworden, so ruhten wir neben einander auf dem Strande aus, dem das Meer mit leicht aufschlagenden Wellen, die immer näher unsern Füßen kamen, vertraute Grüße aus entlegenen Fernen zubrachte. Sein fortwährendes Gelispel machte die Rede meines Venerando noch geschwätziger. Von Venedig erzählte er mir, das vor uns lag im Dufte gluthvoller Mittagssonne, von den Lagunen und von den Geheimnissen, die sich nächtlich darauf begeben; zuweilen auch, wenn ich ihm besonders geneigt schien, von sich und seinen Freunden und daß er schon einmal das Messer gezückt, weil man seinem Weibe zu nahe treten wollte. Ich hörte ihm immer mit regem Interesse zu; seine Worte waren gut gewählt und seine Stimme klang melodisch. Erst Abends, wenn die Sonne schon auf den schneeigen Gipfeln der Alpen ruhte, ruderte er mich zurück durch das purpurfarbene Meer nach der goldbethürmten, kuppelbedeckten Stadt. Mit mir trug ich kostbare Erinnerungen, die ich unvergeßlich festhalte und ihm treulich danke. Sein Gefährte hieß Beppo, aber er war vergleichsweise unbedeutend.elasticlunr-rs-3.0.2/tests/data/de.out.txt000064400000000000000000000044160072674642500166440ustar 00000000000000brief letzt vorbereit fullt gestrig tag mud abgespannt eigent krank fiebernd stieg graz abend uhr eisenbahnwag erst heut morg meer sah alt liebling freudig entgegenruf konnt ward wohl leib seel nacht kalt kalend trotz wint fortdau heimath eindring scheidend in’s gedachtniss heft umson angst vergess liegt ja nothwend ruckkehr lang konnt schlaf find dafur fand ungestort alleinsein sorg muh letzt monat verlor vortheil reis unabhang unabweis selbstand gibt herausgeriss bequem gewohn verhaltnis zwingt gedank hilf recht link neb schon hergerichtet fand nunmehr such mensch bish gar kannt oft erst reisetag erst erkenn lern gang weit welt best schul leb gerad kind civilisation unentbehr stubenhocker gewohn contact natur verlor findet verzog mensch freiheit mensch allein fremd hundert gleich steht uhr erwach seh karst hoh fahr sonn reg versteckt steinfeld berg unwirth erschein lasst nabresina halt zug bahn itali trennt karst hinab triest fuhrt bahnhof gross zweckmass eingerichtet schon singt italien erfreut bekannt klang beobacht abstrom gedrang conducteur darin aufgefall blick unablass verfolgt mann vertraut polizei hielt fluchtling drangt off wagenthur umfasst knie erkannt venerando gondoli vened imm gefuhrt hatt zierlich schlank bursch oft argst sommerhitz hemd leicht hos lido inseln torcello san francesco del deserto gerudert steif zugeknopft eisenbahnuniform erkenn soll fruh morg schon klopft damal thur leut schon verneint absicht fahrt kannt still neigung wunsch aufopfernd wusst bald uberred genoss hinzugeb landet stundenlang fahrt einsam abgeleg kust frucht mitgenomm getheilt geleitet inn land fremdling herrlich rest abgestorb kunst all schonheitssinn all lieb vaterland erklar sudland itali insbesond eig mud geword ruht neb einand strand meer leicht aufschlag well imm nah uns fuss kam vertraut gruss entleg fern zubracht fortwahr gelispel macht red venerando geschwatz vened erzahlt lag duft gluthvoll mittagssonn lagun geheimnis nachtlich darauf begeb zuweil besond geneigt schien freund schon mess gezuckt weib nah tret hort imm reg interess wort gut gewahlt stimm klang melod erst abend sonn schon schneeig gipfeln alp ruht rudert zuruck purpurfarb meer goldbethurmt kuppelbedeckt stadt trug kostbar erinner unvergess festhalt treulich dank gefahrt hiess beppo vergleichsweis unbedeut elasticlunr-rs-3.0.2/tests/data/du.in.txt000064400000000000000000000066750072674642500164740ustar 00000000000000[Doel der vertaling.] Het doel van deze vertaling is den Nederlandschen lezer in kennis te stellen met den volledigen inhoud van Dante's Gedicht. De vertaling is zooveel mogelijk woordelijk, kan dus ook als handleiding dienen bij het lezen en bestudeeren van den oorspronkelijken, Italiaanschen tekst. [Waarom in proza?] De vertaling is in Proza. Waarom? Omdat de woorden, waarin het Gedicht vervat is, den dichter werden ingegeven in het scheppingsoogenblik door de volheid zijner fantasieën, gevoelens en gedachten zelve. Ook in het practisch-onmogelijke, maar theoretisch stelbare geval dat de vertaler evenzeer vervuld ware als de dichter van hetgeen uitgedrukt moet worden, zoude het onmogelijk zijn, dat de tweede, de Nederlandsche dichter kwam tot een uitdrukkingsvorm, die ook maar eenigszins gelijkliep met den vorm door den eersten, den Italiaanschen dichter gevonden. Dante zelf zegt op dit stuk: "En daarom wete een ieder, dat geen enkele zaak, door den band der muziek harmonisch uitgedrukt, uit hare eigene taal in eene andere kan worden overgebracht, zonder dat men al hare zoetheid en harmonie verbreke." [Naam v.h. gedicht.] Het hier den Nederlandschen lezer aangeboden werk is het eerste van drie gedichten (Canzoni), "de Hel," "de Louteringsberg" en "het Paradijs," door Dante tezamen genoemd "Comedia", om de eenvoudige reden, dat het er in vervatte verhaal begint met 's Dichters tocht door de Hel, dus met treurigheid, vervolgens handelt van 's Dichters tocht langs den Louteringsberg en eindigt met 's Dichters tocht door den Hemel, of het Paradijs, dus een blijden afloop heeft. Comedia beteekent niet anders dan "blij-eindend Dicht." "Divina" is de Comedia eerst later door een bewonderend nageslacht genoemd. [Wat de inl. behelst.] Het Gedicht, waarin deze tocht verhaald wordt, en alles tot de kleinste bijzonderheden den lezer voor oogen wordt gesteld, kan eigenlijk geheel voor zich zelf spreken. Daar echter de Dichter op zijn tocht door die drie Rijken een ontzaggelijk groot aantal personen ontmoet, zoowel uit zijn eigen als uit vroegere tijden, hebben wij, vooral tot beter begrip van de gesprekken met personen uit 's Dichters eigen tijd, gemeend den lezer geen onwelkomen dienst te bewijzen, door eenige hoofdzaken aangaande 's Dichters leven en tijd mede te deelen. [Dante niet duister, wel diep.] Even wil ik nog den lezer op het hart drukken, dat het Gedicht nooit duister is, wèl op sommige plaatsen zeer diep van zin, zoodat menige plaats, behalve den eersten, bij de lezing onmiddellijk begrijpbaren zin bij nadere beschouwing blijkt nog veel meer te bevatten. Zulke plaatsen hebben dan ook aanleiding gegeven tot oneindige discussie, ten onrechte, daar er van discussie geen kwestie mocht zijn, nl. van een strijd van verschillende partijen, die ieder voor zich gelijk willen hebben, maar wel van een wedstrijd wie het diepst in den zin des dichters vermocht door te dringen. Maar vóór alles zij nog dit gezegd. Dante's Gedicht is niet maar eene schildering van zijn tijd; het is de schildering van den mensch, in al zijne vermogens en mogelijkheden, in al zijne eigenschappen, zoowel die hem tot de diepste zonde, als die hem tot den hoogsten heilstaat brengen. Daarom voert Dante, zelf alle ellende, loutering en geleidelijk-groeiend geluk doorlevend, den mensch van de gruwelijkste onvergoeilijke zonden, door die welke door boetedoeningen overwonnen kunnen worden tot het hoogste zielegeluk, d. w. z. door de Hel, langs den Louteringsberg naar den Hemel. elasticlunr-rs-3.0.2/tests/data/du.out.txt000064400000000000000000000041670072674642500166670ustar 00000000000000doel vertal doel vertal den nederlandsch lezer kennis stell den volled inhoud dante' gedicht vertal zoovel mogelijk woordelijk handleid dien lez bestuder den oorspronk italiaansch tekst waarom proza vertal proza waarom woord waarin gedicht vervat den dichter werd ingegev scheppingsoogenblik volheid zijner fantasieen gevoelen gedacht zelv practisch onmog theoretisch stelbar geval vertaler evenzer vervuld war dichter hetgen uitgedrukt zoud onmog twed nederlandsch dichter kwam uitdrukkingsvorm eenigszin gelijkliep den vorm den eerst den italiaansch dichter gevond dant zegt stuk daarom wet ieder enkel zak den band muziek harmonisch uitgedrukt har eig tal een overgebracht har zoetheid harmonie verbrek nam v.h gedicht den nederlandsch lezer aangebod werk eerst drie gedicht canzoni hel louteringsberg paradijs dant tezam genoemd comedia eenvoud red vervat verhal begint s dichter tocht hel treurig vervolgen handelt s dichter tocht lang den louteringsberg eindigt s dichter tocht den hemel paradijs blijd aflop comedia beteekent ander blij eindend dicht divina comedia eerst later bewonder nageslacht genoemd inl behelst gedicht waarin tocht verhaald kleinst bijzonder den lezer oog gesteld eigen gehel sprek echter dichter tocht drie rijk ontzagg grot aantal person ontmoet zoowel eig vroeger tijd wij vooral beter begrip gesprek person s dichter eig tijd gemeend den lezer onwelkom dienst bewijz eenig hoofdzak aangaand s dichter lev tijd med del dant duister wel diep even den lezer hart druk gedicht nooit duister wèl sommig plaats zer diep zin zoodat menig plat behalv den eerst lezing onmiddel begrijpbar zin nader beschouw blijkt bevat zulk plaats aanleid gegev oneind discussie ten onrecht discussie kwestie mocht nl strijd verschill partij ieder gelijk will wel wedstrijd diepst den zin des dichter vermocht dring vor gezegd dante' gedicht een schilder tijd schilder den mensch zijn vermogen mogelijk zijn eigenschapp zoowel diepst zond den hoogst heilstat breng daarom voert dant all ellend louter geleid groeiend geluk doorlev den mensch gruwelijkst onvergoei zond welk boetedoen overwonn hoogst zielegeluk d w z hel lang den louteringsberg den hemel elasticlunr-rs-3.0.2/tests/data/en.in.txt000064400000000000000000000110100072674642500164410ustar 00000000000000It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife. However little known the feelings or views of such a man may be on his first entering a neighbourhood, this truth is so well fixed in the minds of the surrounding families, that he is considered the rightful property of some one or other of their daughters. “My dear Mr. Bennet,” said his lady to him one day, “have you heard that Netherfield Park is let at last?” Mr. Bennet replied that he had not. “But it is,” returned she; “for Mrs. Long has just been here, and she told me all about it.” Mr. Bennet made no answer. “Do you not want to know who has taken it?” cried his wife impatiently. “_You_ want to tell me, and I have no objection to hearing it.” This was invitation enough. “Why, my dear, you must know, Mrs. Long says that Netherfield is taken by a young man of large fortune from the north of England; that he came down on Monday in a chaise and four to see the place, and was so much delighted with it, that he agreed with Mr. Morris immediately; that he is to take possession before Michaelmas, and some of his servants are to be in the house by the end of next week.” “What is his name?” “Bingley.” “Is he married or single?” “Oh! Single, my dear, to be sure! A single man of large fortune; four or five thousand a year. What a fine thing for our girls!” “How so? How can it affect them?” “My dear Mr. Bennet,” replied his wife, “how can you be so tiresome! You must know that I am thinking of his marrying one of them.” “Is that his design in settling here?” “Design! Nonsense, how can you talk so! But it is very likely that he _may_ fall in love with one of them, and therefore you must visit him as soon as he comes.” “I see no occasion for that. You and the girls may go, or you may send them by themselves, which perhaps will be still better, for as you are as handsome as any of them, Mr. Bingley may like you the best of the party.” “My dear, you flatter me. I certainly _have_ had my share of beauty, but I do not pretend to be anything extraordinary now. When a woman has five grown-up daughters, she ought to give over thinking of her own beauty.” “In such cases, a woman has not often much beauty to think of.” “But, my dear, you must indeed go and see Mr. Bingley when he comes into the neighbourhood.” “It is more than I engage for, I assure you.” “But consider your daughters. Only think what an establishment it would be for one of them. Sir William and Lady Lucas are determined to go, merely on that account, for in general, you know, they visit no newcomers. Indeed you must go, for it will be impossible for _us_ to visit him if you do not.” “You are over-scrupulous, surely. I dare say Mr. Bingley will be very glad to see you; and I will send a few lines by you to assure him of my hearty consent to his marrying whichever he chooses of the girls; though I must throw in a good word for my little Lizzy.” “I desire you will do no such thing. Lizzy is not a bit better than the others; and I am sure she is not half so handsome as Jane, nor half so good-humoured as Lydia. But you are always giving _her_ the preference.” “They have none of them much to recommend them,” replied he; “they are all silly and ignorant like other girls; but Lizzy has something more of quickness than her sisters.” “Mr. Bennet, how _can_ you abuse your own children in such a way? You take delight in vexing me. You have no compassion for my poor nerves.” “You mistake me, my dear. I have a high respect for your nerves. They are my old friends. I have heard you mention them with consideration these last twenty years at least.” “Ah, you do not know what I suffer.” “But I hope you will get over it, and live to see many young men of four thousand a year come into the neighbourhood.” “It will be no use to us, if twenty such should come, since you will not visit them.” “Depend upon it, my dear, that when there are twenty, I will visit them all.” Mr. Bennet was so odd a mixture of quick parts, sarcastic humour, reserve, and caprice, that the experience of three-and-twenty years had been insufficient to make his wife understand his character. _Her_ mind was less difficult to develop. She was a woman of mean understanding, little information, and uncertain temper. When she was discontented, she fancied herself nervous. The business of her life was to get her daughters married; its solace was visiting and news.elasticlunr-rs-3.0.2/tests/data/en.out.txt000064400000000000000000000041440072674642500166540ustar 00000000000000truth univers acknowledg singl man possess good fortun want wife littl known feel view such man first enter neighbourhood truth well fix mind surround famili consid right properti on daughter mr bennet ladi on day heard netherfield park last mr bennet repli return mr long here told mr bennet made answer want know taken cri wife impati _you_ want tell object hear invit enough know mr long netherfield taken young man larg fortun north england came down monday chais four see place much delight agre mr morri immedi take possess befor michaelma servant hous end next week name bingley marri singl oh singl sure singl man larg fortun four five thousand year fine thing girl affect mr bennet repli wife tiresom know think marri on design settl here design nonsens talk veri _may_ fall love on therefor visit soon come see occas girl go send themselv perhap still better handsom mr bingley best parti flatter certainli _have_ share beauti pretend anyth extraordinari now woman five grown up daughter ought give over think beauti such case woman much beauti think inde go see mr bingley come neighbourhood more engag assur consid daughter think establish on sir william ladi luca determin go mere account gener know visit newcom inde go imposs _us_ visit over scrupul sure dare mr bingley veri glad see send few line assur hearti consent marri whichev choos girl though throw good word littl lizzi desir such thing lizzi bit better other sure half handsom jane half good humour lydia alway give _her_ prefer none much recommend repli silli ignor girl lizzi someth more quick sister mr bennet _can_ abus children such way take delight vex compass poor nerv mistak high respect nerv old friend heard mention consider last twenti year ah know suffer hope over live see mani young men four thousand year come neighbourhood us twenti such come visit depend upon twenti visit mr bennet odd mixtur quick part sarcast humour reserv capric experi three twenti year insuffici make wife understand charact _her_ mind less difficult develop woman mean understand littl inform uncertain temper discont fanci herself nervou busi life daughter marri solac visit new elasticlunr-rs-3.0.2/tests/data/es.in.txt000064400000000000000000000052550072674642500164640ustar 00000000000000En el piso bajo de la izquierda de una humilde pero graciosa y limpia casa de la calle de Preciados, calle muy estrecha y retorcida en aquel entonces, y teatro de la refriega en tal momento, vivían[13] solas, esto es, sin la compañía de hombre ninguno, tres buenas y piadosas[14] mujeres, que mucho se diferenciaban entre sí en cuanto al ser físico y estado social, puesto que éranse que se eran[15] una señora mayor, viuda, guipuzcoana, de aspecto grave y distinguido; una hija suya, joven, soltera, natural de Madrid, y bastante guapa, aunque de tipo diferente al de la madre (lo cual daba a entender que había salido en todo a su padre),[16] y una doméstica,[17] imposible de filiar o describir, sin edad, figura ni casi sexo determinables, bautizada, hasta cierto punto,[18] en Mondoñedo, y a la cual ya hemos hecho demasiado favor (como también se lo hizo aquel señor Cura) con reconocer que pertenecía a la especie humana... La mencionada joven parecía el símbolo o representación, viva y con faldas,[19] del sentido común: tal equilibrio había entre su hermosura y su naturalidad, entre su elegancia y su sencillez, entre su gracia y su modestia. Facilísimo[20] era que pasase inadvertida por la vía pública, sin alborotar a los galanteadores de oficio, pero imposible que nadie dejara de admirarla[21] y de prendarse de sus múltiples encantos,[22] luego que fijase en ella la atención.[23] No era, no (o, por mejor decir, no quería ser), una de esas beldades llamativas, aparatosas, fulminantes, que atraen todas las miradas no bien se presentan en un salón, teatro, o paseo, y que comprometen o anulan al pobrete que las acompaña, sea novio, sea marido, sea padre, sea el mismísimo Preste Juan de las Indias...[24] Era un conjunto sabio y armónico de perfecciones físicas y morales, cuya prodigiosa regularidad no entusiasmaba al pronto, como no entusiasman la paz y el orden; o como acontece con los monumentos bien proporcionados, donde nada nos choca ni maravilla hasta que[25] formamos juicio de que,[26] si todo resulta llano, fácil y natural, consiste en que todo es igualmente bello. Dijérase[27] que aquella diosa honrada de la clase media había estudiado su modo de vestirse, de peinarse, de mirar, de moverse, de conllevar, en fin, los tesoros de su espléndida juventud, en tal forma y manera, que no se la creyese pagada[28] de sí misma, ni presuntuosa, ni incitante, sino muy diferente de las deidades por casar que hacen feria de sus hechizos y van por esas calles[29] de Dios diciendo a todo el mundo: _Esta casa se vende... o se alquila_. Pero no nos detengamos en floreos ni dibujos,[30] que es mucho lo que tenemos que referir, y poquísimo el tiempo de que disponemos. elasticlunr-rs-3.0.2/tests/data/es.out.txt000064400000000000000000000025110072674642500166550ustar 00000000000000pis baj izquierd humild gracios limpi cas call preci call estrech retorc aquel entonc teatr refrieg tal moment viv sol compañ hombr ningun tres buen piad mujer diferenci cuant ser fisic social puest erans señor mayor viud guipuzcoan aspect grav distingu hij jov solter natural madr bastant guap aunqu tip diferent madr dab entend sal padr domest impos fili describ edad figur casi sex determin bautiz ciert punt mondoñed hech demasi favor hiz aquel señor cur reconoc pertenec especi human mencion jov parec simbol represent viv fald sent comun tal equilibri hermosur natural eleg sencillez graci modesti facilisim pas inadvert via public alborot galant ofici impos nadi dej admir prend multipl encant lueg fij atencion mejor dec quer ser beldad llamat aparat fulmin atra tod mir bien present salon teatr pase compromet anul pobret acompañ novi mar padr mismisim prest juan indi conjunt sabi armon perfeccion fisic moral cuy prodigi regular entusiasm pront entusiasm paz orden acontec monument bien proporcion choc maravill form juici si result llan facil natural cons igual bell dijer aquell dios honr clas medi estudi mod vest pein mir mov conllev fin tesor esplend juventud tal form maner creyes pag mism presuntu incit sin diferent deidad cas hac feri hechiz van call dios dic mund cas vend alquil deteng flore dibuj refer poquisim tiemp dispon elasticlunr-rs-3.0.2/tests/data/fi.in.txt000064400000000000000000000072230072674642500164500ustar 00000000000000MÄKÄRÄ. _(Kumarrellen sakastin rappusilla.)_ Hyvä herra, armollinen kirkonpalvelija... SUNTIO. Ka, mene, mene! Ei tänne saa tulla. MÄKÄRÄ. Enhän minä .. suokaa anteeksi.. Olisin vaan kaikessa nöyryydessäni tullut herran temppeliin. SUNTIO. Sinä et kuitenkaan malta olla yhdessä kohden. Juoksentelet ympäri kirkkoa ja höpiset jonkin joutavaa. MÄKÄRÄ. He, hee.. Höpisenkö minä?.. No ei sitten. Tämä pääkartano ei tahdo taas pitää kutiaan. _(Hän nauraa kähäyttää.)_ SUNTIO. Senpätähden saat pysyä poikessa. MÄKÄRÄ. _(Vedet silmissä.)_ Ni-ni-niinhän tuota pitänee.. Kas, pääsky lensi.. Mutta mitä pahaa minä sitten olen tehnyt, kun sinä minua ajelet kirkosta pois? SUNTIO. Ethän sinä pysy yhdessä kohdenkaan, ja kun pappi saarnassaan muun muassa sanoi: kyllä herra hullut hoitaa, niin silloin sinä heti ääneen huusit että kyllä se hoitaa. Sopiiko tämä nyt kirkossa? MÄKÄRÄ. _(On hetken alakuloisen näköinen, mutta sitten iloisesti.)_ Mutta hoitaakinhan se. Mitäs pahaa siinä on? SUNTIO. Eihän kirkossa saa huutaa ja juoksennella. Ymmärräthän sen sinäkin. MÄKÄRÄ. Niin .. niinhän se on, eihän sitä saisi, vaan kun ne henget viettelevät, niin minkäpä sille taitaa. SUNTIO. Miten ne henget viettelevät? MÄKÄRÄ. Nekö? Nehän pitävät Mäkärää aivan narrillaan. Suhkavat kirkossakin korvaan: juokse, juokse!.. ja silloin täytyy juosta. Toinen tulee ja kuiskaa: elä vainenkaan juokse, elä vainenkaan juokse, mutta huuda .. ja minä huudan. Muutoin ne tekisivät kerrassa kummia, niitä täytyy totella. Kyllä ne kirkossa kumminkin vähän siivommalla ovat, mutta, vie sun, kun ne kotona vehkeilevät, niin jos siinä ei ole sen seitsemässäkin höyräkässä. Se pappa-piru on -- koira vieköön -- kaikista ilkein .. katsos.. _(Hän levittää kätensä ja sormensa, panee naamansa hyvin julman näköiseksi, irvistää ja hyppää suntioon päin.)_ Näin se tekee. _(SUNTIO ärjäsee ja vetäytyy säikähtäen taaksepäin.)_ MÄKÄRÄ. _(Nauraa viekkaasti ja räpyttää silmiään.)_ Ei tämä mitään Ole sen suhteen.. Vaan niin se tekee ja ottaa kirveen penkin alta ja huutaa: Mäkärä, pane pää pölkylle! Ja jos minä silloin en olisi tiukkana ja sukkelana, niin arvaathan sen, mitä se piru silloin tekisi. Mutta minä hyppään näin ikään .. taaksepäin, kun se tulee kirveineen ja manaan häntä jumalan nimessä menemään pois. Sitten se vähitellen vetäytyy jonkun pimeän nurkan kautta pellolle, mutta kauvanhan peijakkaan mustat silmät sieltä vielä kiiluvat. SUNTIO. Katsos peijakasta. Kyllä kai se minua peloittaisi. MÄKÄRÄ. Vaarassa niiden kanssa toki onkin tuolla kulkiessaan. Kun vaan kaivon kohdalle sattuu, niin silloin tuo, joka on kaikista suurin roisto, tuo Ansgaarius, joka kirkkoonkin tulee .. se tuntuu aivan kuin niskasta kiini ottavan ja suhkaa: Mäkärä, Mäkärä, hyppää kaivoon! Vaan silloin minä pyöräytän sitä tuolla lailla .. ja alan juosta sen minkä käpälästä lähtee.. Kas, kas; tuolla kun oriit tappelevat. SUNTIO. _(Juoksee katsomaan.)_ Missä .. missä? MÄKÄRÄ. _(Kiiruhtaa sakastin rappusia ylös päästäkseen kirkkoon.)_ Siellä, siellä.. He, he, he!.. He, he, he! SUNTIO. _(Rientää heti Mäkärän perästä ja tapaa hänet kiini vaatteen liepeestä juuri sakastin ovella.)_ Eläpäs menekään.. Kas peijakkaan, kun oli sukkela. MÄKÄRÄ. _(Vetäytyy siivosti takaisin ja viekkaasti nauraen räpyttää silmiään.)_ Ilmanhan minä vaan säikäytin. Pidä vasta ovesi tarkemmin kiini, ettei syntinen kirkkoon pääse.. Hähä! Jo tulen. _(SUNTIO menee sakastiin ja sukkelasti vetää oven kiini jälessään.)_ MÄKÄRÄ. elasticlunr-rs-3.0.2/tests/data/fi.out.txt000064400000000000000000000040300072674642500166420ustar 00000000000000mäkär kumarrel sakast rappus hyvä her armollin kirkonpalvelij suntio ka mene mene tän saa tul mäkär en suoka ant kaike nöyryyd tulu her temppel suntio kuite mal yhd kohd juoksentel ympär kirko höpis jon joutav mäkär hee höpis no sit pääkartano tahdo taas pitä kutia naura kähäyt suntio senpätähd saat pysy poike mäkär vede silm ni ni niin pitän kas pääsky len paha sit tehny ajel kirko pois suntio et pysy yhd kohd pap saarn muun muas sanoi kyl her hulu hoita silo heti ääne huusi kyl hoita sopi kirko mäkär hetk alakulois näköin sit ilois hoitaak mitäs paha suntio eihä kirko saa huuta juoksen ymmär sinä mäkär niin eihä saisi heng viettelev mink taita suntio mite heng viettelev mäkär nekö nehä pitäv mäkär aiva nar suhkav kirko korv juoks juoks silo täytyy juos toine tule kuisk elä vaine juoks elä vaine juoks huuda huuda muuto tekisiv ker kum täytyy tote kyl kirko kum vähä siivom vie sun koto vehkeilev seitsem höyräk pap piru koira viekö kaik ilk katsos levit käte sorm pane naama hyv julm näköis irvist hyp suntio päin näin teke suntio ärjäs vetäytyy säikähtäe taaksep mäkär naura viekkaast räpyt silmiä mitä suht teke ot kirv pen al huuta mäkär pane pää pölky silo tiuk sukkel arv piru silo teki hyp näin ikä taaksep tule kirv mana jumal nime menem pois sit vähitel vetäytyy jonku pimeä nurk kaut pelo kauva peijak must silm siel vielä kiiluv suntio katsos peijak kyl kai peloitai mäkär vaara toki on kulkie kaivo kohd satu silo kaik suur roisto ansgaarius kirko tule tuntu aiva nisk kiini ottav suhk mäkär mäkär hyp kaivo silo pyöräyt lail ala juos käpäl läht kas kas ori tappelev suntio juoks katsom mäkär kiiruht sakast rappus ylös päästäks kirko siel siel suntio rient heti mäkär perä tapa kiini vaat liepe juuri sakast ove eläpäs mene kas peijak sukkel mäkär vetäytyy siivost takais viekkaast naurae räpyt silmiä ilm säikäyt pidä vas ove tarkem kiini etei syntin kirko pääse hähä jo tule suntio mene sakast sukkel vetä ove kiini jäle mäkär elasticlunr-rs-3.0.2/tests/data/fr.in.txt000064400000000000000000000073450072674642500164660ustar 00000000000000Zanette, c'était son nom de Jeanne, de Jeannette, comme elle le prononçait en zézayant, lorsqu'elle était toute petite. Tel il lui était resté. Ce qui, aussi, lui était resté, c'était sa grâce d'enfance, on ne sait quoi de tout mignon, de plus jeune qu'elle-même. Elle était belle de ses beaux seize ans, de son profil de Grecque, et de ses cheveux noirs, qui, sous le hennin à l'arlésienne, pendaient lourdement sur la blancheur dorée de son cou. Elle avait seize ans avec l'air d'en avoir douze. Pourtant, on sentait la vie jeune et forte palpiter dans la chapelle, c'est-à-dire dans l'entre-bâillement des fichus aux plis innombrables, qui laissent voir un peu de la poitrine nue sur laquelle brille la croix d'or suspendue à la chaînette des grand'mères. Zanette vivait à la ferme de la Sirène, bien tranquille à soigner ses poules, ses lapins, auprès de son père, maître Augias, le bayle. À l'ordinaire elle allait en Arles tous les dimanches. Et bien souvent, assise au bord du Petit Rhône, seule, sous les saules et les aubes, elle rêvait en regardant l'eau, l'eau qui s'en allait vers la mer, vers la mer si grande, où des bateaux vont et viennent, comme des bêtes de rêve, comme de grands oiseaux aux ailes blanches.... Un songe d'inconnu accompagnait toujours Zanette. Ses beaux seize ans espéraient. ...N'est-ce pas qu'elle porte un joli nom, la ferme de la Sirène? La Sirène (la Sereno) si vous interrogez les paysans, ils vous le diront, est un oiseau de passage, qui jamais ne s'arrête chez nous, et qui traverse seulement notre ciel, très haut. Quelquefois, le laboureur, en novembre, arrête son attelage, parce qu'il a entendu une harmonie lointaine, confuse, comme un son prolongé de viole ou de mandoline.... Et il écoute, en rêvant.... Ce sont les sirènes qui passent là-haut, tout là-haut. Elles sont plus petites que des tourterelles et leurs plumes miroitantes ont toutes les couleurs de l'arc-en-ciel. On ne sait pas si la musique qu'elles font sort de leur gosier ou vient simplement de le vibration de leurs ailes. On croit plutôt que leur vol est harmonieux. Leur voix y ajoute une seule note qui, de temps en temps, scande et domine la mélodie des ailes.... Un jour, dit-on, comme on venait à peine de construire le château et sa ferme, une sirène un instant se posa sur le bouquet de tamaris en fleurs que les maçons plantent au bout d'une perche, sur la toiture, dès qu'elle est achevée. Et le château, et la ferme qui le touche, furent, voilà bien longtemps, baptisés du nom qu'ils portent encore. Entre la ferme et la château, une vieille chapelle décrépite, où jadis on disait la messe, se dresse, étroite et longue. On la dirait bâtie sur le modèle des huttes camarguaises. Les huttes sont en «tape», en argile desséchée, recouvertes de roseaux, et la chapelle est en moellons, et recouverte de pierres plates, mais les deux toits ont la même forme, celle d'un bateau long, la quille en l'air; et sur leurs toitures, les cabanes, aussi bien que la chapelle, portent toutes une croix penchée, comme renversée en arrière. Toutes ces croix penchantes font songer au mistral éternel qui incline ainsi un peu tous les arbres des plaines provençales, dans la même direction. Tous ils gardent un peu la marque du vent maître, «magistral», à qui les Romains avaient élevé un temple, comme à la puissance divine, protectrice de ce pays qu'il balaye et assainit sans cesse.... Elles donnent encore, les petites croix qu'on plante ainsi à dessein penchées, l'impression des choses de la religion, à la fois vaincues et résistantes. Elles sont là, tenaces mais inclinées, jamais arrachées mais toujours penchantes, et elles disent le triomphe obstiné d'une foi sans relâche battue des vents....elasticlunr-rs-3.0.2/tests/data/fr.out.txt000064400000000000000000000043040072674642500166570ustar 00000000000000zanet c'et nom jeann jeannet comm prononc zézai lorsqu'el tout petit tel rest auss rest c'et grâc d'enfanc sait quoi tout mignon plus jeun qu'el bel beau seiz an profil grecqu cheveux noir sous hennin l'arlésien pend lourd blancheur dor cou seiz an l'air d'en avoir douz pourt sent vi jeun fort palpit chapel c'est dir l'entr bâill fichus plis innombr laissent voir peu poitrin nu laquel brill croix d'or suspendu chaînet grand'mer zanet viv ferm siren bien tranquill soign poul lapin aupres per maîtr augi bayl l'ordinair allait arle tous dimanch bien souvent assis bord pet rhôn seul sous saul aub rêv regard l'eau l'eau s'en allait ver mer ver mer si grand où bateau vont viennent comm bêt rêv comm grand oiseau ail blanch song d'inconnu accompagn toujour zanet beau seiz an esper n'est qu'el port jol nom ferm siren siren sereno si interrog paysan diront oiseau passag jam s'arrêt chez travers seul ciel tres haut quelquefois laboureur novembr arrêt attelag parc qu'il a entendu harmon lointain confus comm prolong viol mandolin écout rêv siren passent là haut tout là haut elle plus petit tourterel plum miroit tout couleur l'arc ciel sait si musiqu qu'el font sort gosi vient simpl vibrat ail croit plutôt vol harmoni voix ajout seul not temp temp scand domin mélod ail jour dit comm ven pein construir château ferm siren instant pos bouquet tamar fleur maçon plantent bout d'un perch toitur des qu'el achev château ferm touch voilà bien longtemp baptis nom qu'il portent encor entre ferm château vieil chapel décrépit où jad dis mess dress étroit longu dir bât model hutt camarguais hutt tap argil dessech recouvert roseau chapel moellon recouvert pierr plat deux toit form cel d'un bateau long quill l'air toitur caban auss bien chapel portent tout croix pench comm renvers arrier tout croix pench font song mistral éternel inclin ains peu tous arbre plain provençal direct tous gardent peu marqu vent maîtr magistral romain élev templ comm puissanc divin protectric pay qu'il balay assain cess elle donnent encor petit croix qu'on plant ains dessein pench l'impress chos religion fois vaincu résist elle là tenac inclin jam arrach toujour pench elle disent triomph obstin d'un foi relâch battu vent elasticlunr-rs-3.0.2/tests/data/hu.in.txt000064400000000000000000000116160072674642500164670ustar 00000000000000Színházak és újságok, mintha tartanának tőle, hogy valaki megelőzi őket az ünneplésben, nem tudják bevárni március tizenötödikét. Korán kezdik az ünneplést és valószínűleg nem sietnek majd a befejezésével sem. Nagyon szép vonás ez. Az igazi ünnep nem is férhet bele a huszonnégy órás keretbe, s a mesék is úgy tanítják, hogy a nagy lakodalmak három hétig tartanak. Hadd kezdődjék hát a nagy nemzeti ünnep és tartson el akár április végéig, legalább elhelyezkedik ebben a nagy keretben mindenkinek a választott dátuma, meg a hivatalos is. Ma hegyen-völgyön tart a széles jókedv, szépen összeolvad az ünnepi hangulatban március tizenötödike és április tizenegyedike, s ebben az ünnepi folytonosságban lesz valami költői igazság is. A megindult ünneplésből azonban csak egy kis epizódra tereljük ezúttal a figyelmet. Csekélység az egész, nem is bevallott pont az ünnepi programban. De nekünk úgy tetszik, hogy a véletlen sokkal szebben tud ünnepet csinálni, mint a közönséges halandó. És a nemzet voltaképpen hálával tartozik annak az ismeretlen egyetemi hallgatónak (bolond diszkréció, hogy az újságok nem akarják a nevét kiírni), aki annakidején megvette hitelbe a Jókai Mór összes munkáit és a kikötött részleteket, természetesen, nem fizette meg. Kiskorú volt a diák, s a könyvkereskedő ennélfogva a szülőt fogta pörbe, de az apa nem akart fizetni, mondván, hogy kiskorú fiának csak olyan vásárlásaiért felelős, amelyek fenntartásához föltétlenül szükségesek. Így történt, hogy a bíróságnak ítélkeznie kellett a pörös dologban és Kiss Ferenc táblai bíró mint a kerületi járásbíróság vezetője a diák apját elmarasztalta, mert Jókai művei nem jelentenek fényűzési cikket, hanem oly szükséges beszerzése minden egyetemi polgárnak, amelyért, ha a diák kiskorú, az apa teljes felelősséggel tartozik. ...Hogy ez a kis diák, aki nem tud fizetni, ez az apa, aki nem akar fizetni, s ez a bíró, aki az ötvenedik márciusban ítélkezett: hárman együtt micsoda szép ünnepet csináltak Jókai Mórnak, az nem tartozik ebbe a strófába. Az agg költő szeme talán megnedvesedik tőle, és eszébe jut, hogy a negyvennyolcadiki márciustól a mostani tavaszig nem dolgozott hiába. Talán megtelik a szíve örömmel, hogy abban a fiúban és abban az apában két generáció mérkőzött miatta, és az elfogulatlan bíró döntése szerint a költő munkája nem fényűzési cikk, hanem olyan bevásárlás, amely az ifjúság fenntartásához szükséges. Ez a költő külön diadala és külön ünnepe, s mind a kettőt becsületesen megérdemelte. Nekünk pedig jólesik, hogy a nagy március ötvenedik évfordulójára a véletlen ilyen örömet tartogatott annak az embernek, aki mind a két március - nem, mind az ötven március munkájából és dicsőségéből kivehette részét. De programon kívül szép epizódja ez az ítélet a nemzeti ünneplésnek is. Ha az a kis diák véletlenül nem csinálja ezt az adósságot, szándékosan kellett volna egyet fogni a helyébe, csak hogy ezen az emlékezetes tavaszon ilyen ítélet hangozhassék el. Mert csonka ünnep volna, ha azt az időt, mikor a magyar nemzet költői jártak legelül, a nemzeti irodalom megbecsülésének különösebb jele nélkül ünnepelnők meg. Hogy sírok és emlékszobrok koszorúkkal fognak megtelni, hogy zarándok népnek élén dicsőítő beszédek fognak ott elhangozni, az nem minden, ami az ünneplő nemzettől kitelik. Ha idegen nemzetek volnának olyan szerencsés helyzetben, hogy az ünneplésre megérett korszak szereplői közül élőket is lássanak körükben, sohasem mulasztanák el, hogy azokat tegyék az ünneplés középpontjává. S ha ilyen korszakokból egy istentől megáldott poéta, egy egész külön irodalom forrása járna közöttük, az idegen nemzetek egy percig sem haboznának, hogy az egész ünnepben ezt az írót, ezt az eleven emléket tiszteljék meg. Mert abban a poétában látnák a múlt harcos apostolát, aki ott volt a kezdet kezdetén, amikor a szabadságfa levelezni kezdett, és a végzet különös kegyelméből itt van ma is, amikor e fának árnyékában az unokák találnak pihenőt. S ha nem is kell szükségképpen a spanyolok példáját követni, akik kisebb alkalomból és kisebb poétával szemben, a vérük tüzes fellobbanásában, költőkirállyá koronázták Zorillát, talán a magyar nemzet is módját találhatta volna, hogy ünnepében Jókai is legyen egy - programpont. S mi azért találjuk a tegnapi bírói döntést olyan szép epizódnak, mert teljessé teszi az ünnepet. És függetlenül az írótól, aki véletlenül a nemzeti szabadságnak is első katonái közül való, éppen a nagy március jubileumán mondja ki, hogy a nagy nemzeti írók munkája nem fényűzési cikk, hanem az önfenntartáshoz szükséges, mint a kenyér. elasticlunr-rs-3.0.2/tests/data/hu.out.txt000064400000000000000000000053740072674642500166740ustar 00000000000000színház újság tartan megelőz ünneplés tudja bevárn március tizenötöd kor kezd ünneplés valószínűleg siet befejezés szép vonás igaz ünnep férh bel huszonnégy órás keret mese tanítja lakodalm hét tart had kezdődje nemzet ünnep tarts április vég elhelyezked keret minden választot dátum hivatalos hegy völgy tar széles jókedv összeolv ünnep hangulat március tizenötöd április tizenegyed ünnep folytonosság költő igazság megindul ünneplés kis epizó terel ezú figyel csekélység bevallot pon ünnep progr tetsz véletl szeb tu ünnep csináln közönséges halandó nemz volt hál tartoz ismeretl egyetem hallgató bolon diszkréció újság akarja nev kiírn annakide megvett hitel jó mór összes munka kikötöt részlet természetes fizett kiskorú dia könyvkereskedő ennélfogv szülő fogt pör ap akar fizetn mondv kiskorú fi vásárlás felelős fenntartás föltétlen szükséges tört bíróság ítélkezn pörös dolog kiss ferenc tábl bíró kerület járásbíróság vezető dia ap elmarasztalt jó műv jelent fényűzés cik oly szükséges beszerzés egyetem polgár amely dia kiskorú ap felelősség tartoz kis dia tu fizetn ap akar fizetn bíró ötvened március ítélkezet hár micsod szép ünnep csinált jó mór tartoz stróf agg költő szem megnedvesed esz ju negyvennyolcadi március mostan tavasz dolgozot megtel szív örö fiú ap generáció mérkőzöt elfogulatl bíró döntés költő munká fényűzés bevásárlás ifjúság fenntartás szükséges költő diadal ünnep becsületes megérdemelt március ötvened évforduló véletl örö tartogatot ember március március munká dicsőség kivehett rész progr szép epizód ítél nemzet ünneplés kis dia véletlen csinál adósság szándékos fogn hely emlékezetes tavasz ítél hangozhasse cson ünnep idő magyar nemz költő járt legel nemzet irodal megbecsülés különösebb jel ünnepelnő sír emlékszobr koszorú fog megteln zaránd nép él dicsőítő beszéd fog elhangozn ünneplő nemzet kitel ideg nemzet szerencsés helyzet ünneplés megéret korsz szereplő élő láss kör sohas mulasztana tegye ünneplés középpont korszak isten megáldot poét irodal forrás járn között ideg nemzet perc habozn ünnep író elev emle tisztelje poét látna múl harcos apostol kez kezdet szabadságf levelezn kezdet végz különös kegyelm fá árnye unoka talál pihenő szükség spanyol példá követn kisebb alkal kisebb poét vér tüzes fellobbanás költőkirály koronázta zorill magyar nemz mód találhatt ünnep jó programpon talál tegnap bíró döntés szép epizó teljes tesz ünnep független író véletlen nemzet szabadság katona március jubileum mond nemzet író munká fényűzés önfenntartás szükséges kenyér elasticlunr-rs-3.0.2/tests/data/it.in.txt000064400000000000000000000122230072674642500164620ustar 00000000000000Al cadere d'una bella giornata d'aprile dell'anno 1503 la campana di San Domenico in Barletta sonava gli ultimi tocchi dell'avemaria. Sulla piazza vicina in riva al mare, luogo di ritrovo degli abitanti tranquilli che, nelle terricciuole dei climi meridionali specialmente, sogliono sulla sera essere insieme a barattar parole al sereno per riposarsi dalle faccende del giorno, stavano col fine medesimo dispersi in varj gruppi molti soldati spagnuoli ed italiani, alcuni passeggiando, altri fermi, o seduti, od appoggiati alle barche tirate a secco, delle quali era ingombra la spiaggia, e, com'è costume delle soldatesche d'ogni età e d'ogni nazione, il loro contegno era tale che pareva dire: il mondo è nostro. Di fatto, lasciato loro il campo migliore, si tenevano i terrazzani in disparte, dando così a questa loro burbanza tacita approvazione. Chi per figurarsi questo quadro si volesse rappresentare una simile radunata de' nostri soldati moderni nella loro misera _uniforme_, sarebbe lontano assai dall'averne una giusta immagine. L'esercito di Consalvo, le fanterie specialmente, quantunque le meglio in arnese, e le migliori di tutta cristianità, non conoscevano però, più di qualunque altra milizia del secolo XVI, la stretta disciplina moderna, che è giunta a render simili un soldato all'altro dalle scarpe al cappello. Qui invece, ogni uomo che facesse il mestier dell'arme a piede o a cavallo, poteva vestirsi, armarsi ed adornarsi come più gli piacesse; onde nasceva fra questa turba una mirabile varietà e vaghezza nelle fogge, ne' colori e nel portamento, dal quale si poteva facilmente conoscere a qual nazione appartenesse ogni individuo. Gli Spagnuoli, per lo più serii, immobili, atteggiati da bravacci, ed avvolti (o com'essi dicono _embozados_) nella _capa_ nazionale, dalla quale si vedeva uscir per di sotto la lunga e sottil lama di Toledo; gl'Italiani loquaci e pronti al gestire, in sajo od in farsetto colla daga pistolese appesa dietro le reni. Al sonare della campana era cessato il susurro, e scomparendo la maggior parte de' cappelli, le teste eran rimaste scoperte, perchè in quel tempo anche i soldati credevano in Dio, e talvolta lo pregavano. Dopo piccola pausa tornarono a luogo i cappelli, ricominciò il bisbiglio; e benchè quella turba presa insieme avesse al primo aspetto un non so che di gajo e di vivace, si poteva tuttavia facilmente avvedersi, girando fra i diversi crocchi, esservi un motivo comune di tristezza e di scoramento, al quale erano volte le menti e le parole di tutti. Infatti il motivo era vero e possente. La fame cominciava a farsi sentire fra i soldati ed anche fra gli abitanti di Barletta, ove il gran Capitano, aspettando i tardi ajuti di Spagna, teneva chiuso l'esercito di troppo inferiore a quello dei Francesi perchè s'arrischiasse commetter la somma delle cose alla fortuna d'una giornata. Tre lati della piazza erano chiusi da certe povere case di marinaj e pescatori, dalla chiesa e dall'osteria. Il quarto s'apriva alla marina, ingombro, com'è costume di tali luoghi, di barche, reti e di altri attrezzi pescherecci; ed all'ultima linea dell'orizzonte si vedeva sorgere dal seno delle acque la bruna forma del monte Gargano, sulla cui vetta andava morendo l'ultimo raggio del sole cadente. Nello spazio frapposto, veleggiava chetamente un legno sottile; e si volgeva tratto tratto per cercare il vento che soffiava incostante in quel golfo, increspando qua e là a lunghe strisce la superficie del mare. La distanza tuttavia della nave e la dubbia luce del crepuscolo non lasciavano distinguere qual fosse la sua bandiera. Uno Spagnuolo, che insieme con molti soldati era presso alla riva, la guardava fisso, aguzzando le ciglia, ed attorcigliandosi certi grandissimi baffi più bigi che neri. --Che cosa guardi che sembri una statua, e non dai retta a chi discorre con te?-- Quest'apostrofe d'un soldato napoletano, che non avendo ottenuta risposta ad una prima domanda, se l'aveva per male, non mosse nè punto nè poco l'imperturbabile Spagnuolo. Alla fine con un sospiro che pareva uscire più da un mantice che dal petto d'un uomo, disse: --_Voto a Dios que nuestra segnora de Gaeta_, che manda buon vento e buon cammino a tanti che la pregano in mare, potrebbe mandar ora questa fusta a noi che la preghiamo in terra, e non abbiamo da metter sotto i denti altro che il calcio dell'archibuso! Chi sa che non porti grano e provvisioni a quei _descomulgados_ di Francesi che ci tengono stretti in questa gabbia per farci morir di fame...... _Y mala Pasqua me de Dios y sea la primera que viniere, si a su gracia el segnor Gonzalo Hernandez_[1] quando ha ben pranzato e meglio cenato gl'importa di noi più che del _cuero de sus zapatos_[2]. --Che cosa può far Consalvo?--rispose con istizza il Napoletano, contento di contraddire:--dovrà diventar pane per entrar in corpo ad una bestia come te? Quando ne avrà, ne darà; e le navi che il malanno loro ha portate nelle secche di Manfredonia, chi l'ha divorate? Consalvo, o voi altri?-- Lo Spagnuolo un po' mutato in viso mostrava di voler rispondere, ma fu interrotto da un altro del crocchio, il quale, battendogli sulla spalla, scuotendo la testa, ed abbassando la voce, come per dar maggior peso alle parole,elasticlunr-rs-3.0.2/tests/data/it.out.txt000064400000000000000000000057710072674642500166750ustar 00000000000000cad d'un bell giorn d'april dell'ann campan san domen barlett son ultim tocc dell'avemar piazz vicin riv mar luog ritrov abit tranquill terricciuol clim meridional special sogl ser esser insiem baratt parol seren ripos facc giorn fin medesim disp varj grupp molt sold spagnuol italian alcun passegg altri ferm sed od appogg barc tir secc qual ingombr spiagg com' costum soldatesc d'ogn età d'ogn nazion contegn tal par dir mond fatt lasc camp miglior ten terrazzan dispart dand cos burbanz tac approv figur quadr voless rappresent simil radun de sold modern miser uniform lont assa dall'av giust immagin l'eserc consalv fanter special quantunqu megl arnes miglior tutt cristian conosc per qualunqu altra miliz secol xvi strett disciplin modern giunt render simil sold all'altr scarp cappell qui invec ogni uom mestier dell'arm pied cavall pot vest armars adorn piacess onde nasc fra turb mirabil variet vaghezz fogg color port pot facil conosc qual nazion apparteness ogni individu spagnuol ser immobil attegg bravacc avvolt com'ess dic embozados cap nazional ved uscir sott lung sottil lam toled gl'italian loquac pront gest saj od farsett coll dag pistoles appes dietr ren son campan cess susurr scompar maggior part de cappell test eran rimast scopert perc quel temp sold cred dio talvolt preg dop piccol paus torn luog cappell ricominc bisbigl benc turb pres insiem prim aspett so gaj vivac pot tuttav facil avved gir fra div crocc esserv mot comun tristezz scor volt ment parol infatt mot ver possent fam cominc fars sent fra sold fra abit barlett ove gran capit aspett tard ajut spagn ten chius l'eserc tropp inferior frances perc s'arrisc commetter somm cos fortun d'un giorn tre lat piazz chius cert pov cas marinaj pescator chies dall'oster quart s'apr marin ingombr com' costum tal luog barc ret altri attrezz pescherecc all'ultim line dell'orizzont ved sorg sen acque brun form mont garg vett andav mor l'ultim ragg sol cadent spaz frappost velegg chet legn sottil volg tratt tratt cerc vent soff incost quel golf incresp qua là lung strisc superfic mar distanz tuttav nav dubb luc crepuscol lasc distingu qual bandier spagnuol insiem molt sold press riv guard fiss aguzz cigl attorcigl cert grandissim baff big ner cos guard sembr statu rett discorr te quest'apostrof d'un sold napolet otten rispost prim domand l'av mal moss nè punt nè poc l'imperturb spagnuol fin sospir par uscir mantic pett d'un uom diss vot dios que nuestr segnor de gaet mand buon vent buon cammin tant preg mar potrebb mand ora fust preg terr metter sott dent altro calc dell'archibus sa port gran provvision que descomulgados frances teng strett gabb farc mor fam y mal pasqu me de dios y sea primer que vin grac el segnor gonzal hernandez quand ben pranz megl cen gl'import cuer de sus zapatos cos può far consalv rispos istizz napolet content contradd dovr divent pan entrar corp best te quand dar nav malann port secc manfredon l'ha divor consalv altri spagnuol po mut vis mostr voler rispond interrott altro crocc batt spall scuot test abbass voc dar maggior pes parol elasticlunr-rs-3.0.2/tests/data/ja.in.txt000064400000000000000000000040060072674642500164400ustar 00000000000000バイト仲間で、ものすごく気の合うメンバーがいた。 なぜ気が合うかというと、共通の上司がヤバいやつだったからだった。 どうヤバいかここで説明するのは割愛する。主題からずれるので。 そのヤバい上司の愚痴を言っている間は、お互い仲間意識を持っていたように思う。 月日は流れ、私はそこを辞め、しばらくしてその気が合うメンバーも辞めた。 何回かその後、そのメンバーと合っているうちに、意見の衝突などから、険悪な雰囲気になることが増えた。 そして私も面倒なので、もう会わないようになった。 それからもう何年も経つ。 「共通の敵」を作ると、結束が固まるという話はよく聞くが、それは非常に壊れやすいものと思う。 敵が去ったあとは、内紛が起こる。 人類に文明が発生してから、地球上のあらゆる場所で繰り返してきたことかもしれない。 漢の劉邦が中国を統一したとき、つまり宿敵項羽を倒したあと、敵がいなくなった。 その時、今まで一緒に戦ってきた功臣の何人かは、劉邦によって降格されたり、またそれを恨みに思った功臣が謀反を起こし、一族ごと処罰を受けたりもした。 ただ、軍師の張良は天下統一の後、「政治には興味ありません。オカルトの研究だけはさせてください」と自分は安全であるというアピールをしたからなのか助かったようである。 軍師として成果を上げるくらいだから、やはり人間の性質を理解していたようだ。 こんな感じで、共通の敵がいるという条件下で育まれた友情のようなものは、いずれ崩壊する儚いものであると考えておくのがいい。 喫煙所で上司の悪口を言ってる暇があったら、自分のスキルアップに時間を使う方が有益と言える。 elasticlunr-rs-3.0.2/tests/data/ja.out.txt000064400000000000000000000027000072674642500166400ustar 00000000000000バイト 仲間 ものすごく 気 合う メンバ い なぜ 気 合う いう 共通 上司 い やつ どう いか ここ 説明 する の 割愛 する 主題 ずれる その い 上司 愚痴 言っ いる 間 お互い 仲間 意識 持っ い よう 思う 月日 流れ 私 そこ 辞め しばらく し その 気 合う メンバ 辞め 何 回 その後 その メンバ 合っ いる うち 意見 衝突 険悪 雰囲気 なる こと 増え そして 私 面倒 もう 会わ よう なっ それから もう 何 年 経つ 共通 敵 作る 結束 固まる 話 よく 聞く それ 非常 壊れ やすい もの 思う 敵 去っ あと 内紛 起こる 人類 文明 発生 し 地球 上 あらゆる 場所 繰り返し き こと しれ 漢 劉邦 中国 統一 し とき つまり 宿敵 項羽 倒し あと 敵 い なっ その 時 今 一緒 戦っ き 功臣 何 人 劉邦 降格 さ れ また それ 恨み 思っ 功臣 謀反 起こし 一族 ごと 処罰 受け し ただ 軍師 張 良 天下 統一 後 政治 興味 あり 研究 さ せ ください 自分 安全 アピール し の 助かっ よう 軍師 成果 上げる やはり 人間 性質 理解 し い よう こんな 感じ 共通 敵 いる 条件下 育ま れ 友情 よう もの いずれ 崩壊 する 儚い もの 考え おく の いい 喫煙 所 上司 悪口 言っ てる 暇 あっ 自分 スキル アップ 時間 使う 方 有益 言える elasticlunr-rs-3.0.2/tests/data/ko.in.txt000064400000000000000000000024340072674642500164620ustar 00000000000000국회는 국가의 예산안을 심의·확정한다. 헌법재판소의 조직과 운영 기타 필요한 사항은 법률로 정한다. 국회에서 의결된 법률안은 정부에 이송되어 15일 이내에 대통령이 공포한다. 국가는 지역간의 균형있는 발전을 위하여 지역경제를 육성할 의무를 진다. 국민경제의 발전을 위한 중요정책의 수립에 관하여 대통령의 자문에 응하기 위하여 국민경제자문회의를 둘 수 있다. 국가는 전통문화의 계승·발전과 민족문화의 창달에 노력하여야 한다. 모든 국민은 인간으로서의 존엄과 가치를 가지며, 행복을 추구할 권리를 가진다. 국가는 개인이 가지는 불가침의 기본적 인권을 확인하고 이를 보장할 의무를 진다. 중앙선거관리위원회는 대통령이 임명하는 3인, 국회에서 선출하는 3인과 대법원장이 지명하는 3인의 위원으로 구성한다. 위원장은 위원중에서 호선한다. 국가는 농수산물의 수급균형과 유통구조의 개선에 노력하여 가격안정을 도모함으로써 농·어민의 이익을 보호한다. 국가원로자문회의의 의장은 직전대통령이 된다. 다만, 직전대통령이 없을 때에는 대통령이 지명한다.elasticlunr-rs-3.0.2/tests/data/ko.out.txt000064400000000000000000000023100072674642500166540ustar 00000000000000국회는 국가의 예산안을 심의·확정한다 헌법재판소의 조직과 운영 필요한 사항은 법률로 정한다 국회에서 의결된 법률안은 정부에 이송되어 이내에 대통령이 공포한다 국가는 지역간의 균형있는 발전을 지역경제를 육성할 의무를 진다 국민경제의 발전을 위한 중요정책의 수립에 대통령의 자문에 응하기 국민경제자문회의를 수 국가는 전통문화의 계승·발전과 민족문화의 창달에 노력하여야 한다 모든 국민은 인간으로서의 존엄과 가치를 가지며 행복을 추구할 권리를 가진다 국가는 개인이 가지는 불가침의 기본적 인권을 확인하고 이를 보장할 의무를 진다 중앙선거관리위원회는 대통령이 임명하는 인 국회에서 선출하는 인과 대법원장이 지명하는 인의 위원으로 구성한다 위원장은 위원중에서 호선한다 국가는 농수산물의 수급균형과 유통구조의 개선에 노력하여 가격안정을 도모함으로써 농·어민의 이익을 보호한다 국가원로자문회의의 의장은 직전대통령이 된다 직전대통령이 없을 때에는 대통령이 지명한다elasticlunr-rs-3.0.2/tests/data/no.in.txt000064400000000000000000000172010072674642500164630ustar 00000000000000Hver dag blir vi litt klokere på pandemien som har snudd samfunnet vårt på hodet. Hver dag er vi ett skritt nærmere en vaksine og ett skritt nærmere det som skal være vår nye hverdag, etter krisen. Jeg tror vi alle kjenner på at det tærer litt på nå – syv måneder nærmest i unntakstilstand, med mer usikkerhet og mindre frihet enn vi noen gang trodde vi skulle oppleve. Vi fikk raskt kontroll på smitten. Og vi kunne bruke våre økonomiske muskler til å dempe tilbakeslaget. Gjeninnhentingen kom raskere enn vi så for oss. Men vi blir stadig minnet på hvor skjør situasjonen er. Å hindre nye smitteutbrudd er den viktigste jobben fremover også, antakelig langt inn i neste år. Den jobben kan ingen gjøre alene. Vi må gjøre den sammen. Hver og en av oss må fortsette å holde avstand, vi må vaske hendene og være hjemme hvis vi er syke, for å beskytte de mest sårbare blant oss, for å ta vare på arbeidsplassene, for at alt skal bli bra igjen – selv om vi kjenner at det røyner på. De siste månedene har jeg møtt folk og bedrifter fra hele Norge for å høre deres historier og deres tanker om fremtiden. Det er ett ord som går igjen: usikkerhet. Men det er ofte med en betryggende undertone av optimisme og innsatsvilje – betryggende fordi vi må ha med privat næringsliv på laget hvis vi skal ri denne stormen av uten å miste kurs. Det beste vi kan gjøre, er å legge til rette for at det kan skje – og vise vei. Med budsjettet for 2021 forsterker regjeringen det langsiktige arbeidet for å styrke bærekraften og konkurransekraften i norsk økonomi. Den jobben startet vi i 2013. For å trygge jobbene og fremme omstilling har vi investert mer i kunnskap, i forskning, i infrastruktur og i vekstfremmende skattelettelser. Prioriteringene i budsjettet skal bringe Norge mot seks mål som alle handler om å komme gjennom denne krisen, uten å miste de langsiktige perspektivene av syne. Det første målet handler om å få folk tilbake i jobb. I mars steg antallet permitterte dramatisk. På det meste var mer enn hver tiende person i arbeidsstyrken registrert som helt ledig hos Nav. Bildet har bedret seg siden den gang. Men fortsatt er mer enn 100 000 mennesker helt arbeidsledige i Norge. Det er altfor mange. Det viktigste for å bevare et samfunn med små forskjeller og gode velferdsordninger er at flest mulig er i jobb. Derfor må vi unngå at ledigheten nå biter seg fast på et høyt nivå. Vi må være spesielt oppmerksom på ungdom som er på vei ut i arbeidslivet. Og vi må unngå at de som i utgangspunktet hadde svak tilknytning til arbeidslivet, støtes varig ut. Å legge til rette for nye arbeidsplasser i privat sektor har vært en viktig del av den økonomiske politikken regjeringen har ført i syv år. Før krisen var sysselsettingsandelen på vei opp. Vi må tilbake til det sporet. Siden mars har vi iverksatt kraftfulle tiltak for å gi økonomisk trygghet til alle de som plutselig fikk inntektsgrunnlaget revet bort, for å sikre at kommunene og helsetjenesten har ressurser nok til å fortsette å ta vare på oss, og for å hjelpe levedyktige bedrifter gjennom denne krevende perioden. Tiltakene har virket. Aktiviteten i norsk økonomi har tatt seg opp igjen etter den dramatiske nedgangen i vår. Men gjeninnhentingen er skjør, krisen er ikke over. Vi må ta høyde for at mange bedrifter – og særlig i næringer som reiseliv og kultur – fortsatt vil være rammet av smitteverntiltak som begrenser aktiviteten, og at ordrebøkene til eksportbedriftene antakelig vil tynnes ut på grunn av svak etterspørsel fra landene vi handler med. Hos handelspartnerne våre har den økonomiske nedgangen vært enda dypere, og gjeninnhentingen har vært svakere. I budsjettet for 2021 legger vi opp til at oljepengebruken skal være på 313 mrd. kr. Det tilsvarer 3 pst. av fondsverdien og betyr at vi allerede neste år kan være tilbake på den langsiktige rettesnoren for bærekraftig bruk av oljeinntektene. Men det er samtidig over 60 mrd. kr mer enn vi brukte i 2019. Budsjettet for neste år vil virke ekspansivt i den økonomiske politikken. Alt fra bygging av vei og bane, investeringer i forsvaret, overføringer til kommuner, fylker og helsetjenester, nye byggeprosjekter og ulike støtteordninger skaper aktivitet og arbeid over hele landet. I tillegg vil vi forsterke satsingen på arbeidsmarkedstiltak for dem som står uten jobb. Koronapandemien kommer til å endre vanene våre. Vi har blitt mer digitale. Mye tyder på at vi kommer til å reise mindre og være mer på hjemmekontor. Varige endringer er endringer som næringslivet må tilpasse seg. Vi må hjelpe næringslivet gjennom krisen – uten å svekke innovasjonskraften og uten å ødelegge omstillingsevnen. Det er en krevende balansegang. Bedrifter går konkurs – i gode tider og i vanskelige tider. Vi politikere må være kloke nok til å erkjenne at vi ikke vet hvem som er morgendagens vinnere. Markedet må avgjøre hvilke bedrifter som skal være med videre. Norge har naturressurser, teknologi og kompetanse som gir oss fantastiske muligheter. Det andre målet i regjeringens strategi er at vi må sikre flere ben å stå på. Vi må fortsette å fornye Norge. Vi trenger flere jobber, i flere bransjer, over hele landet. Og veksten må komme i privat næringsliv. Gjennom syv år i regjering har vi gjort mye for å bedre rammebetingelsene for bedriftene. Konkurranseevnen er kraftig bedret, bl.a. fordi bedriftene tar i bruk ny teknologi, og fordi vi har senket skattene ned mot nivået i land vi konkurrerer med. Vi har prioritert lavere selskapsskatt, lavere marginalskatt på arbeid og lavere formuesskatt fordi det gir arbeidsplasser og arbeidslyst. Nå kutter vi skattene enda mer, med brede lettelser i inntektsskatten, som lavere trinnskatt, høyere minstefradrag både på lønn og på pensjon og videre nedtrapping av skatten på arbeidende kapital. Det kan gi norske eiere muskler til å investere i norske arbeidsplasser. Og vi øker skattefordelen for dem som kjøper aksjer i bedriften de er ansatt i. Vi ønsker at flere medarbeidere skal bli medeiere i bedriftene. Vi foreslår også å endre vannkraftbeskatningen og tilfører næringen betydelig likviditet. Det vil legge til rette for investeringer og nødvendige oppgraderinger i en næring som skaper aktivitet i lokalsamfunn over hele landet. I tillegg vil vi øke frikortgrensen til 60 000 kr. Det kommer ungdom til gode, som nå kan beholde litt mer av de pengene de tjener. Fra 1. juli neste år vil vi også innføre kildeskatt på enkelte betalinger til nærstående selskap i lavskatteland. Formålet er å motvirke overskuddsflytting, unngå at inntekter som skapes i Norge, blir beskattet i et annet land med lavere skatt. Skatt og avgift er den klart største inntektskilden i statsbudsjettet. Å beskytte det norske skattegrunnlaget er derfor helt nødvendig for at vi skal kunne holde skattene lave for våre bedrifter og arbeidsplasser. Med dette budsjettet vil en vanlig familie betale 14 000 kr mindre i skatt neste år enn om det rød-grønne skattenivået fra 2013 hadde blitt videreført – 14 000 kr for en vanlig familie. Til sammen er skatte- og avgiftsnivået redusert med nesten 30 mrd. kr i vår regjeringsperiode. Gjennom sterk satsing på forskning og utvikling skaper regjeringen grobunn for omstilling og vekst. I 2021 øker vi FoU-bevilgningene med mer enn 2 mrd. kr, til over 45 mrd. kr. Pengene skal bl.a. gå til å trappe opp langtidsplanen for forskning og høyere utdanning og til deltagelse i Horisont Europa og EUs romprogram. Norske bedrifter og forskere hevder seg bra i konkurransen om penger fra EU-programmene. Deltagelse der bidrar til nye jobber her og til at vi løser mange store samfunnsoppgaver. elasticlunr-rs-3.0.2/tests/data/no.out.txt000064400000000000000000000110550072674642500166650ustar 00000000000000dag litt kloker pandemi snudd samfunn vårt hod dag skritt nærmer vaksin skritt nærmer nye hverdag kris tror kjenn tær litt syv måned nærmest unntakstilstand mer usikker mindr frih gang trodd opplev fikk raskt kontroll smitt bruk vår økonomisk muskl demp tilbakeslag gjeninnhenting rasker stad minn skjør situasjon hindr nye smitteutbrudd viktigst jobb fremov antak langt nest år jobb gjør alen må gjør samm må fortsett hold avstand må vask hend hjemm syk beskytt mest sårbar blant ta var arbeidsplass alt bra igj kjenn røyn sist måned møtt folk bedrift hel norg hør histori tank fremtid ord går igj usikker oft betrygg underton optimism innsatsvilj betrygg må privat næringsliv lag ri storm mist kur best gjør legg rett skje vis vei budsjett forsterk regjering langsikt arbeid styrk bærekraft konkurransekraft norsk økonomi jobb start trygg jobb fremm omstilling invester mer kunnskap forskning infrastruktur vekstfremm skattelett prioritering budsjett bring norg seks mål handl komm gjennom kris mist langsikt perspektiv syn først mål handl få folk tilbak jobb mar steg antall permitter dramatisk mest mer tiend person arbeidsstyrk registrer helt led hos nav bild bedr gang fortsatt mer mennesk helt arbeidsled norg altfor viktigst bevar samfunn små forskjell god velferdsordning flest mul jobb derfor må unngå led bit fast høyt nivå må spesielt oppmerksom ungdom vei arbeidsliv må unngå utgangspunkt svak tilknytning arbeidsliv støt var legg rett nye arbeidsplass privat sektor vikt del økonomisk politikk regjering ført syv år kris sysselsettingsandel vei må tilbak spor mar iverksatt kraftfull tiltak gi økonomisk trygg pluts fikk inntektsgrunnlag rev bort sikr kommun helsetjenest ressurs nok fortsett ta var hjelp levedykt bedrift gjennom krev period tiltak virk aktivitet norsk økonomi tatt igj dramatisk nedgang gjeninnhenting skjør kris må ta høyd bedrift sær næring reiseliv kultur fortsatt ramm smitteverntiltak begrens aktivitet ordrebøk eksportbedrift antak tynn grunn svak etterspørsel land handl hos handelspartnern vår økonomisk nedgang end dyper gjeninnhenting svaker budsjett legg oljepengebruk mrd kr tilsvar pst fondsverdi betyr aller nest år tilbak langsikt rettesnor bærekraft bruk oljeinntekt samtid mrd kr mer brukt budsjett nest år virk ekspansiv økonomisk politikk alt bygging vei ban investering forsvar overføring kommun fylk helsetjenest nye byggeprosjekt ulik støtteordning skap aktivit arbeid hel land tillegg forsterk satsing arbeidsmarkedstiltak står jobb koronapandemi komm endr van vår mer digital mye tyd komm reis mindr mer hjemmekontor var endring endring næringsliv må tilpass må hjelp næringsliv gjennom kris svekk innovasjonskraft ødelegg omstillingsevn krev balansegang bedrift går konkur god tid vansk tid politiker må klok nok erkjenn vet morgendag vinner marked må avgjør bedrift vider norg naturressurs teknologi kompetans gir fantastisk mul andr mål regjering strategi må sikr fler ben stå må fortsett forny norg treng fler jobb fler bransj hel land vekst må komm privat næringsliv gjennom syv år regjering gjort mye bedr rammebeting bedrift konkurranseevn kraft bedr bl.a bedrift tar bruk ny teknologi senk skatt nivå land konkurrer prioriter laver selskapsskatt laver marginalskatt arbeid laver formuesskatt gir arbeidsplass arbeidslyst kutt skatt end mer bred lett inntektsskatt laver trinnskatt høyer minstefradrag lønn pensjon vider nedtrapping skatt arbeid kapital gi norsk eier muskl invester norsk arbeidsplass øker skattefordel kjøp aksj bedrift ansatt ønsk fler medarbeider medeier bedrift foreslår endr vannkraftbeskatning tilfør næring betyd likvidit legg rett investering nødvend oppgradering næring skap aktivit lokalsamfunn hel land tillegg øke frikortgrens kr komm ungdom god behold litt mer peng tjen juli nest år innfør kildeskatt enkelt betaling nærstå selskap lavskatteland formål motvirk overskuddsflytting unngå inntekt skap norg beskatt ann land laver skatt skatt avgift klart størst inntektskild statsbudsjett beskytt norsk skattegrunnlag derfor helt nødvend hold skatt lav vår bedrift arbeidsplass budsjett van famili betal kr mindr skatt nest år rød grønn skattenivå videreført kr van famili samm skatt avgiftsnivå reduser nest mrd kr regjeringsperiod gjennom sterk satsing forskning utvikling skap regjering grobunn omstilling vekst øker fou bevilgning mer mrd kr mrd kr peng bl.a gå trapp langtidsplan forskning høyer utdanning deltag horisont europ eus romprogram norsk bedrift forsker hevd bra konkurrans peng eu programm deltag bidr nye jobb løs stor samfunnsoppgav elasticlunr-rs-3.0.2/tests/data/pt.in.txt000064400000000000000000000057530072674642500165030ustar 00000000000000Eu poucas vezes canto os casos melancolicos, Os lethargos gentis, os extasis bucolicos E as desditas crueis do proprio coração; Mas não celebro o vicio e odeio o desalinho Da muza sem pudor que mostra no caminho A liga á multidão. A sagrada poesia, a peregrina eterna, Ouvi dizer que soffre uma affecção moderna, Uns fastios sem nome, uns tedios ideaes; Que ensaia, presumida, o gesto romanesco E, vaidosa de si, no collo eburneo e fresco, Põe crémes triviaes! Oh, pensam mal de ti, da tua castidade! Deslumbra-os o fulgor dos astros da cidade, Os falsos ouropeis das cortezãs gentis, E julgam já tocar-te as roçagantes vestes Ó deusa virginal das coleras celestes, Das graças juvenis! Retine a cançoneta alegre das bachantes, Saudadas nos wagons, nos caes, nos restaurantes, Visões d'olhar travesso e provocantes pés, E julgam já escutar a voz do paraiso, Amando o que ha de falso e torpe no sorriso Das musas dos cafés! Oh, tu não és, de certo, a virgem quebradiça Estiolada e gentil, que vem depois da missa Mostrar pela cidade o seu fino desdem, Nem a fada que sente um vaporoso tedio Emquanto vae sonhando um noivo rico e nédio Que a possa pagar bem! Nem posso mesmo crêr, archanjo, que tu sejas A menina gentil que ás portas das egrejas Emquanto a multidão galante adora a cruz, A bem do pobre enfermo á turba pede esmola Nas pompas ideaes da moda, que a consola Das magoas do Jesus! E nas horas de luta emquanto os povos choram E a guerra tudo mata e os reis tudo devoram, Não posso dizer bem se acaso tu serás A senhora que espalha os languidos fastios Nos pomposos salões, sorrindo a fazer fios Á viva luz do gaz! Tu és a apparição gentil, meia selvagem, D'olhar profundo e bom, de candida roupagem, De fronte immaculada e seios virginaes, Que desenha no espaço o limpido contorno E cinge na cabeça o virginal adorno De folhas naturaes. Tens a linha ideal das candidas figuras; As curvas divinaes; as tintas sãs e puras Da austera virgindade; as bellas correcções; E segues magestosa em teu longo caminho Deixando fluctuar a tunica de linho Ás frescas virações! Quando trava batalha a tua irmã Justiça Acodes ao combate e apontas sobre a liça Uma espada de luz ao Mal dominador: E pensas na belleza harmonica das cousas Sentindo que se move um mundo sob as louzas No germen d'uma flôr! N'um sorriso cruel, pungente d'ironia, Tambem sabes vibrar, serena, altiva e fria, O latego febril das grandes punições; E vendo-te sorrir, a geração doente, Sentir cuida, talvez, a nota decadente, Das morbidas canções! Oh, vôa sem cessar traçando nos teus hombros O manto constellado, ó deusa dos assombros, Até chegar um dia ás regiões de luz, Aonde, na poeira aurifera dos astros, Contricto, Satanaz enxugará de rastos, As chagas de Jesus! Logar á minha fada ó languidas senhoras! E vós que amaes do circo as noites tentadoras, Os fluctuantes véos, os gestos divinaes, Podeis vel-a passar n'um turbilhão fantastico, Voando no corcel febril, nervoso, elastico, Dos novos ideaes!elasticlunr-rs-3.0.2/tests/data/pt.out.txt000064400000000000000000000035730072674642500167020ustar 00000000000000pouc vez cant cas melancol letharg gent extas bucol desdit cru propri coraçã celebr vici odei desalinh muz pudor mostr caminh lig á multidã sagr poes peregrin etern ouv diz soffr affecçã modern uns fasti nom uns tedi idea ensa presum gest romanesc vaidos si coll eburn fresc põ crém trivia oh pens mal ti castidad deslumbr fulgor astros cidad fals ourop cortezãs gent julg toc roçag vest ó deus virginal col cel grac juven retin cançonet alegr bachant saud wagons caes restaur visõ d'olh travess provoc pés julg escut voz parais amand ha fals torp sorris mus cafés oh és cert virg quebradic estiol gentil vem miss mostr cidad fin desd fad sent vapor tedi emquant vae sonh noiv ric nédi poss pag bem poss crêr archanj sej menin gentil ás port egrej emquant multidã galant ador cruz bem pobr enferm á turb ped esmol pomp idea mod consol mago jesus hor lut emquant pov chor guerr tud mat reis tud devor poss diz bem acas serás senhor espalh langu fasti pompos salõ sorr faz fios á viv luz gaz és appariçã gentil mei selvag d'olh profund bom cand roupag front immacul sei virgina desenh espac limp contorn cing cabec virginal adorn folh natura tens linh ideal cand figur curv divina tint sãs pur aust virgindad bell correcçõ segu magest long caminh deix fluctu tunic linh ás fresc viraçõ trav batalh irmã justic acod combat apont sobr lic espad luz mal domin pens bellez harmon cous sent mov mund sob louz germen d'um flôr n'um sorris cruel pungent d'iron tamb sab vibr seren altiv fri lateg febril grand puniçõ vend sorr geraçã doent sent cuid talvez not decadent morb cançõ oh vôa cess trac hombr mant constell ó deus assombr cheg dia ás regiõ luz aond poeir aurif astros contrict satanaz enxug rast chag jesus log á fad ó langu senhor vós ama circ noit tentador fluctuant véos gest divina pod vel pass n'um turbilhã fantast voand corcel febril nervos elast nov idea elasticlunr-rs-3.0.2/tests/data/ro.in.txt000064400000000000000000000142450072674642500164740ustar 00000000000000Aceasta carte contine teoria mea originala, numita MDT (Modeling Devices Theory), asupra functiilor hardware de baza ale unui creier (animal sau uman). Fiind o teorie stiintifica, ea este de fapt un model simbolic. Orice model simbolic trebuie sa contina un numar foarte limitat de termeni fundamentali si un numar foarte limitat de relatii fundamentale intre termenii fundamentali. Pentru termenii fundamentali si numai pentru ei, se accepta' definitii bazate pe descrieri. Toti ceilalti termeni sunt generati de model, odata cu definitiile lor, prin operatii logico-matematice. Acestea sunt caracteristicile fundamentale ale oricarei teorii stiintifice. Teoria prezentata urmeaza aceste reguli de baza. Aceasta teorie se afla' in totala opozitie cu toate stiintele actuale care studiaza functionarea creierului si care stiinte nu se bazeaza pe un singur model simbolic. In acest fel, aceasta teorie descalifica' din start tot ce s-a creat in ultimele citeva sute de ani in domenii cum ar fi psihologia, psihiatria, gnoseologia, epistemologia, stiintele comportamentelor animalelor, partial stiintele sociale si alte domenii conexe. Aceasta incercare de revolutie totala este necesara si justificata de urmatoarea situatie, care situayie exista' independent de existenta sau nu a teoriei mele. In psihologie, de exemplu, se folosesc o serie de termeni (constiinta, realitate, adevar, perceptii, emotii, etc.) care nu au definitii universal acceptate. In fapt, fiecare psiholog are propriile variante de definitii descriptive asupra tuturor termenilor folositi de el. Psihologia nu este o stiinta exacta, lucru universal acceptat. Atunci cind va aparea o stiinta exacta care sa acopere si domeniul psihologiei, atunci tot ce s-a scris deja in psihologie trebuie abandonat sau rescris in baza acelei teorii stintifice. Intr-o stiinta exacta cum ar fi Mecanica lui Newton, toti termenii folositi au exact aceleasi definitii pentru oricine, oriunde si oricind, fara nici o modificare de aproximativ 340 de ani de cind au fost creati. De exemplu, termenul "viteza" are o definitie generata de modelul simbolic. Acesta definitie este v=s/t (se imparte spatiul la timp). Termenul "viteza" nu este deci introdus prin descriere. Sa presupunem acum ca cineva a creat sau va crea un model simbolic fundamental (o stiinta exacta) care explica' functionarea creierului in mod acceptabil. Prima consecinta a aparitiei acestui model este ca absolut toti termenii folositi in domeniile acoperite de acel model, vor fi definiti pe baza modelului. Rezultatul este cel care a fost enuntat mai sus si anume, tot ce s-a scris in ultimii citeva sute de ani in asa zisele stiinte asociate creierului, va trebui abandonat sau rescris. Indiferent daca teoria prezentata in aceasta carte va fi sau nu acceptata, mai repede sau mai tirziu, tot va aparea un model simbolic fundamental care sa explice functionarea creierului si deci, mai repede sau mai tirziu, tot se va intimpla aceasta revolutie. Aici apare insa o problema suplimentara. Pseudo-stiintele actuale asociate creierului sunt sustinute de o puternica structura academica si cu caracter aplicativ/lucrativ. Oamenii care sustin aceasta structura nu au cum sa accepte nici o teorie bazata pe un singur model simbolic, deoarece asta inseamna' sa ia totul de la zero. Consecinta este faptul ca, chiar daca ar aparea un model simbolic fundamental "absolut corect ", opozitia care ar aparea ar fi enorma. Nu-mi creez nici o iluzie ca cineva care deja lucreaza in domeniile acestor pseudo-stiinte va accepta sau chiar va lua in considerare aceasta teorie sau oricare alta de acest fel. Bazat pe experienta de peste 10 ani de cind exista' aceasta teorie, ea a avut succes la persoanele care lucreaza deja in domeniul stiintelor exacte (matematicieni, fizicieni,..) dar si la tinerii intre 12 si 20 de ani. Mai precis, la tinerii care nu sunt inca remorcati de sistemul social-economic actual. Un student care a primit deja o tema de lucrare de diploma, va trebui sa urmeze linia trasata de profesorii lui. El nu are cum sa-si riste viitorul aventurindu-se intr- un domeniu neinteles de profesorii lui. Sa vedem ce ofera aceasta teorie. In primul rind, fiind un model simbolic, ea este bazata pe logica. Ea da definitii extrem de precise si neinterpretabile tuturor termenilor folositi in asociatie cu functionarea creierului. Teoria explica' principiul de functionare al creierului, animal sau uman, pina la a fi in stare sa faca un proiect logic functional, adica un proiect de dispozitiv logic, care poate sintetiza functiile de baza ale creierului animal sau uman. De fapt, creierul este tratat ca un produs tehnologic. Astfel, se definesc cerintele fundamentale dar si deficientele fundamentale de proiectare. Sunt explicate problemele si solutiile legate de implementarea tehnologica a creierului, in multiplele lui variante. Teoria sugereaza faptul ca proiectantul, in decursul zecilor de milenii, a facut mai multe variante tehnologice care se pot recunoaste in realitatea externa. Se analizeaza daca prin evolutie se poate trece sau nu, de la un creier de animal la un creier de om. Sunt tratate si problemele de proiectare sau tehnologice, cunoscute sub denumirea de deficiente/boli psihice (in forme patologice sau nu). Teoria trateaza intr-un mod stiintific si asa zisele fenomene paranormale si sugereaza metode pentru dezvoltarea abilitatilor in acest domeniu. Cartea are doua parti. Prima prezinta teoria generala impreuna cu citeva aplicatii considerate mai importante. In a doua parte sunt prezentate mai detaliat, un numar de exemple, teste si aplicatii, care sa sustina intelegerea teoriei generale. Din cauza ca teoria, numita de mine MDT (Modeling-Devices Theory), a fost scrisa initial in limba engleza (din 1997 elemente ale ei se afla pe WEB), un mare numar de termeni sunt prescurtati folosind terminologia engleza. Elementele de baza ale teoriei au aparut cam in 1993 si prima forma scrisa in 1995. De atunci teoria a fost perfectionata si dezvoltata si procesul continua. In anul 2003 o versiune foarte apropiata de aceasta a fost publicata la editura Cosmos din Sibiu. Aceasta versiune poate fi considerata ca o editie imbunatatita si adaugita a cartii din 2003.elasticlunr-rs-3.0.2/tests/data/ro.out.txt000064400000000000000000000074560072674642500167030ustar 00000000000000cart contin teor original numit mdt modeling devices theory funct hardw baz creier animal uman fiind o teor stiintif fapt model simbol model simbol trebui contin numar foart limit termen fundamental si numar foart limit relat fundamental intre termen fundamental termen fundamental si numa accept defin bazat descrier tot ceilalt termen gener model odat defin oper logico matemat caracterist fundamental oricare teor stiintif teor prezent urmeaz regul baz teor afla in total opozit stiint actual studiaz function creier si stiint bazeaz singur model simbol in fel teor descalif start s a creat in ultim citev sut ani in domen psiholog psihiatr gnoseolog epistemolog stiint comportament animal partial stiint social si alte domen conex incerc revolut total necesar si justific urmat situat situay exist independent existent a teor in psiholog exemplu folos o ser termen constiint realitat adevar percept emot etc defin universal accept in fapt psiholog propr variant defin descript tuturor termen folos psiholog o stiint exact lucru universal accept atunc cind va apar o stiint exact acop si domen psiholog atunc s a scris in psiholog trebui abandon rescris in baz acele teor stintif intr o stiint exact mecan newton tot termen folos exact aceleas defin si oric far o modific aproxim ani cind fost creat exemplu termen vitez o definit gener model simbol definit v=s/t impart spat termen vitez introdus descrier presupun a creat va cre model simbol fundamental o stiint exact explic function creier in mod accept consecint a apar acest model absol tot termen folos in domen acoper model vor defin baz model rezult a fost enunt sus si anum s a scris in ultim citev sut ani in asa zis stiint asoc creier va treb abandon rescris indiferent dac teor prezent in cart va accept reped tirziu va apar model simbol fundamental explic function creier si reped tirziu va intimpl revolut apar insa o problem suplimentar pseudo stiint actual asoc creier sustin o putern structur academ si caracter aplicativ/lucr oamen sustin structur accept o teor bazat singur model simbol inseamn ia tot consecint fapt dac apar model simbol fundamental absol corect opozit apar enorm creez o iluz lucreaz in domen acestor pseudo stiint va accept va lua in consider teor alta fel bazat experient ani cind exist teor a avut succes persoan lucreaz in domen stiint exact matematicien fizicien si tiner intre si ani precis tiner inca remorc sist social econom actual student a primit o tem lucr diplom va treb urmez lini trasat profesor si rist viitor aventur intr domeniu neinteles profesor ved ofer teor in rind fiind model simbol bazat logic defin extrem prec si neinterpret tuturor termen folos in asociat function creier teor explic princip function creier animal uman pin a in star fac proiect logic functional adic proiect dispoz logic sintetiz funct baz creier animal uman fapt creier tratat produs tehnolog astfel defin cerint fundamental si deficient fundamental proiect explic problem si solut legat implement tehnolog a creier in multipl variant teor sugereaz fapt proiect in decurs zec milen a facut mult variant tehnolog recunoast in realitat extern analizeaz dac evolut trec creier animal creier om tratat si problem proiect tehnolog cunosc denum deficiente/bol psihic in form patolog teor trateaz intr mod stiintif si asa zis fenomen paranormal si sugereaz metod dezvolt abil in domeniu cart dou part prezint teor general impreun citev aplic consider import in a dou part prezent detal numar exempl test si aplic sustin inteleg teor general cauz teor numit mdt modeling devices theory a fost scris initial in limb englez element afla web mar numar termen prescurt folos terminolog englez element baz teor apar cam in si form scris in atunc teor a fost perfection si dezvolt si proces continu in an o versiun foart aprop a fost public editur cosmos sibiu versiun consider o edit imbunatat si adaug a cart elasticlunr-rs-3.0.2/tests/data/ru.in.txt000064400000000000000000000141350072674642500165000ustar 00000000000000Московія! Въ понятіи иностранцевъ, отправлявшихся въ этотъ далекій, загадочный, снѣжный край — центральная ли только Россія? Или и Югъ съ златоглавымъ Кіевомъ, и Архангельскъ рыбный, и Каспій мутноводный, и Сибирь съ пушнымъ звѣремъ, и Кавказъ? Все это, вмѣстѣ взятое. Границы не были четко очерчены. Гдѣ кончалась Московія въ представленіи даже ученыхъ географовъ того времени, а тѣмъ болѣе въ воображеніи художниковъ, просто туристовъ, искателей приключеній, составителей мемуаровъ? Границы страны, по которой бродятъ бѣлые медвѣди, гдѣ снѣгъ лежитъ толстой пеленой, гдѣ люди питаются сырымъ мясомъ и даже поѣдаютъ другъ друга! Но туда ѣдутъ любознательные и пытливые путешественники: Олеарій, Корбъ, Герберштейнъ и другіе, и понемногу проливается свѣтъ на невѣдомую страну. Захватывая и всѣ окраины Россіи (Сибирь, Кавказъ), путешественники, однако, смѣшиваютъ свои представленія о людяхъ, обычаяхъ, костюмахъ, почти отождествляя, напримѣръ, татарина съ великороссомъ; они искажаютъ и архитектурныя формы: на ихъ рисункахъ главки Василія Блаженнаго и русскихъ монастырей пріобрѣтаютъ формы куполовъ персидскихъ дворцовъ и самаркандскихъ мечетей. Но отъ этого экзотическій интересъ ихъ живописныхъ показаній не ослабляется, а напротивъ усиливается. Явно восточнаго типа халаты, мѣховыя шапки, длинные рукава — и тутъ же великорусскія кольчуги и чисто русскіе уборы коней. Бытъ, жизнь Московіи кажется имъ суровой. Сколько наказаній тѣлесныхъ — висѣлицъ! Лѣсомъ цѣлымъ стоятъ висѣлицы на площадяхъ, людей живьемъ зарываютъ въ могилы, а тутъ же пышные кортежи, пріемы пословъ, засѣданія Думы Боярской — вотъ картины, проходящія передъ зрителемъ этихъ изображеній. Заѣзжія экспедиціи и труды отдѣльныхъ современниковъ иностранцевъ даютъ богатѣйшій матеріалъ, являющійся основой разысканій о Россіи былыхъ временъ. Интересъ къ Россіи, къ ея исторіи, быту, культурѣ, всюду нарастаетъ; онъ неизбѣжно станетъ еще большимъ. Уже и сейчасъ въ Англіи, въ Германіи, въ Чехіи издаются книги о старомъ и новомъ русскомъ искусствѣ. То же наблюдается и въ Парижѣ, гдѣ усиленно коллекціонируются гравюры и книги, относящіяся къ Россіи. Изъ числа коллекцій, содержащихъ богатый подборъ книгъ, упомянемъ собранія Апостола, Катенева, Нелидова, Тищенко, Трубецкой, Гревса, Шуваловой и др. Гравюры, изображающія русскую жизнь и русскій бытъ, собираютъ всѣ, кто можетъ. Особенно цѣннымъ для вопроса, насъ занимающаго нынѣ, является собраніе П. Н. Апостола, заключающее въ себѣ рѣдкія изданія Олеарія, Корба, Герберштейна и др. Старательно подобранныя, эти изданія представляютъ собою особую рѣдкость за границей, ибо многихъ изъ нихъ не имѣетъ даже Парижская Національная Библіотека. Обзоръ хотя бы трехъ-четырехъ авторовъ изъ собранія Апостола даетъ уже такой богатый матеріалъ для характеристики Россіи XVI-XVII вѣковъ, что мы и ограничимся пока репродукціями гравюръ изъ этихъ книгъ. Когда я пересматривалъ чудесныя in quarto и in folio, въ одинъ изъ уютныхъ вечеровъ, проведенныхъ мною въ Парижѣ, въ квартирѣ П.Н. Апостола, у меня явилась мысль подобрать такіе наиболѣе интересные моменты изъ русской жизни, которые будутъ характерны для пониманія иностранцами Россіи — тогда _Московіи_. Выбравъ эти гравюры, я просилъ компетентнаго П.Н. Апостола составить къ нимъ описаніе. Нынѣ это изданіе, съ необходимыми комментаріями, предлагается на судъ читателя. Думается, _Московія_ въ этихъ, хотя бы немногихъ, «штрихахъ» закрѣплена. elasticlunr-rs-3.0.2/tests/data/ru.out.txt000064400000000000000000000113420072674642500166760ustar 00000000000000москові въ поняті иностранцевъ отправля въ этотъ далекі загадочн снѣжны кра центральн россі югъ съ златоглавымъ кіевомъ архангельскъ рыбн каспі мутноводн сибир съ пушнымъ звѣремъ кавказъ вмѣстѣ взят границ четк очерч гдѣ конча москові въ представлені ученыхъ географовъ а тѣмъ болѣ въ воображені художниковъ туристовъ искател приключені составител мемуаровъ границ стран бродятъ бѣлы медвѣд гдѣ снѣгъ лежитъ толст пелен гдѣ пита сырымъ мясомъ поѣдаютъ другъ друг ѣдутъ любознательн пытлив путешественник олеарі корбъ герберштейнъ другі понемног пролива свѣтъ невѣдом стран захватыв всѣ окраин россі сибир кавказъ путешественник смѣшиваютъ представлені людяхъ обычаяхъ костюмахъ отождествл напримѣръ татарин съ великороссомъ искажаютъ архитектурны форм ихъ рисункахъ главк василі блаженнаг русскихъ монастыр пріобрѣтаютъ форм куполовъ персидскихъ дворцовъ самаркандскихъ мечет отъ экзотическі интересъ ихъ живописныхъ показані ослабля а напротивъ усилива явн восточнаг тип халат мѣховы шапк длин рукав тутъ великорусскі кольчуг чист русскі убор кон бытъ москові имъ суров наказані тѣлесныхъ висѣлицъ лѣсомъ цѣлымъ стоятъ висѣлиц площадяхъ люд живьемъ зарываютъ въ могил а тутъ пышн кортеж пріем пословъ засѣдані дум боярск вотъ картин проходящі передъ зрителемъ этихъ изображені заѣзжі экспедиці труд отдѣльныхъ современниковъ иностранцевъ даютъ богатѣйші матеріалъ являющі основ разыскані россі былыхъ временъ интересъ къ россі къ е исторі быт культурѣ нарастаетъ онъ неизбѣжн станетъ большимъ сейчасъ въ англі въ германі въ чехі изда книг старомъ новомъ русскомъ искусствѣ наблюда въ парижѣ гдѣ усилен коллекціонир гравюр книг относящі къ россі изъ числ коллекці содержащихъ богат подборъ книгъ упомянемъ собрані апостол катенев нелидов тищенк трубецк гревс шувалов др гравюр изображающі русск русскі бытъ собираютъ всѣ можетъ цѣннымъ вопрос насъ занимающаг нынѣ явля собрані п н апостол заключа въ себѣ рѣдкія издані олеарі корб герберштейн др старательн подобранны издані представляютъ особ рѣдкост границ иб многихъ изъ нихъ имѣетъ парижск національн библіотек обзоръ трехъ четырехъ авторовъ изъ собрані апостол даетъ богат матеріалъ характеристик россі вѣковъ огранич репродукці гравюръ изъ этихъ книгъ пересматривалъ чудесны въ одинъ изъ уютныхъ вечеровъ проведенныхъ въ парижѣ въ квартирѣ п.н апостол яв мысл подобра такі наиболѣ интересн момент изъ русск жизн будутъ характерн понимані иностранц россі москові выбравъ гравюр просилъ компетентнаг п.н апостол состав къ нимъ описані нынѣ издані съ необходим комментарі предлага судъ читател дума москові въ этихъ немногихъ штрихахъ закрѣпл elasticlunr-rs-3.0.2/tests/data/sv.in.txt000064400000000000000000000217170072674642500165060ustar 00000000000000Likasom förtjensten att undan förgängelsen hafva räddat Finlands historiska minnen nästan uteslutande tillhör Porthan, likaså hafva nationens mythiska qvarlefvor hufvudsakligast genom D:r Lönnrots verksamhet blifvit framkallade i ljuset. Alldeles obanad var väl icke heller den väg, han beträdde; men, hvad man före honom gjort, är liksom det icke vore till, sedan Kalevala och Kanteletar trädt i dagen. De få dessförinnan gjorda runo-samlingar hafva numera betydelse endast i den mon, som de tjenat till att framkalla dessa åtminstone för Finland evigt dyrbara skatter. Men ur denna synpunkt betraktade hafva de ett stort, historiskt värde, och vi anse af sådan anledning för en pligt att omnämna de förnämsta bland dem, som för D:r Lönnrot banat vägen. -- Den som först fästade uppmärksamheten vid vår inhemska poesi, var Porthan -- den Finska litteraturens heros. Man tror, att Porthan det oaktadt icke alltför högt uppskattat de mythiska sångerna; men han samlade dem icke desto mindre, och genom hans föresyn väcktes äfven andra, i synnerhet Ganander, att fortgå på samma bana. Gananders förtjenst består dock icke så mycket i gjorda runo-samlingar, som i bemödandet att tillvägabringa en Finsk Mythologi. Väl säger han sig "_i flere år hafva genomgått alla tillgängliga och i landet möjligen existerande Runor_", men ur dedicationen i hans arbete framlyser, att de till större delen blifvit af Porthan honom meddelade. Detsamma torde i ännu högre grad gälla om Lenqvist, hvars mythologiska arbete för öfrigt vida öfverträffar Gananders. I allmänhet synes man vid denna tid ännu icke varit betänkt på utgifvandet af en fullständig runo-samling, utan ansåg det till en början vara nödvändigare att bearbeta en mythologi, såsom "clavis poëseos Fennicae" (Gan.). -- Efter Porthans död stod den Finska litteraturen en lång tid stilla, sörjande blott den hädangångne. Men med år 1809 vaknade hon åter till nytt lif. Vid denna tid eller åtminstone kort derefter uppträdde många utmärkta litteratörer, och under den tidrymd, som sedan tilländalupit, räknar den Finska litteraturen flere bearbetare, än under hela det föregående seklet. Ibland dem hafva de flesta i större eller mindre mon egnat sin uppmärksamhet åt folkpoesien. Mest förtjent är i detta afseende D:r Topelius, hvilken i fem särskildta häften utgifvit en samling af äldre och nyare runor. Honom tillhör äfven förtjensten att hafva angifvit de orter, hvarest de Finska sångerna renast och i största mängd bibehållit sig. Han säger i företalet till femte häftet af sin samling: "På få ställen och nästan ingenstädes i Finland finnas fullständiga och oförderfvade forntida qväden. -- -- Ett enda ställe på jorden, som äfven är utom Finlands gräns, nemligen några Socknar i Archangelska Guvernementet samt i synnerhet, Wuokkiniemi Församling, räddar ännu fordna seder och den fordna hjelte-ättens minnen i deras rena och oförfalskade drägt. -- -- Derifrån har äfven jag med icke ringa möda förskaffat mina bästa sånger." -- År 1820 företog sig Prof. v. Becker i Turun Wiikko-Sanomat att till enhet bringa en mängd sånger om Wäinämöinen. Detta försök, så obetydligt det äfven var, förtjenar likväl uppmärksamhet, såsom det första i sitt slag. Emellertid synes man i början icke fästat någon uppmärksamhet vid denna vink. Således fortfor Topelius att utgifva sin samling i fragmentarisk form. Samma, method följdes af D:r Lönnrot i "Kantele", ehuru, han i företalet till Kalevala säger sig redan år 1826 vid författandet af sin academiska afhandling om Wäinämöinen hafva kommit på den tanken, att de förhanden varande Sångerna om Wäinämöinen, Ilmarinen och Lemminkäinen m.ö. sannolikt voro fragmenter ur längre cykler. Men de runor han samlade under sina vandringar i Finland åren 1828 och 1831, voro alltför obetydliga för att gifva anledning till ett combinations-försök. Först sedan han 1832 och följande åren besökt de af Topelius anvista orter utom Finska gränsen, började han utföra sin storartade plan att till enhet bringa alla Fornfinska sånger af episkt innehåll. [Ehuru den ursprungligt Finska folkpoesien är så charakterristisk, att deri icke en enda rad kan infuskas, utan att hvar och en, som gjort dess närmare bekantskap, genast urskiljer det äkta och ursprungliga från all oäkta afvel, så hafva likväl åtskilliga Pseudo-critici i mjugg sökt insinuera den oskyldiga förmodan, att D:r Lönnrot, för att få sin idé realiserad, sjelf completterat Kalevala. Skulle dessa affällingar vilja i nåder skänka sitt fosterland en tusende del af den kärlek, hvarmed de sannolikt omfatta sina egna, höga personer, så vore det för dem, om icke lätt, åtminstone möjligt att öfvertyga sig derom, att icke en enda af D:r Lönnrot sjelf författad vers förekommer i hela Kalevala. Vare det likväl långt ifrån mig att för den Finska Litteraturen vilja göra proselyter af dessa petrificater.] Huru han lyckats i detta företag, må blifva ett problem för kommande tider, då, såsom vi hoppas, ett ännu större antal af forntida qväden kommit i dagsljuset. Emellertid vore det önskligt, att D:r Lönnrot skulle redogöra för sammanhanget i Kalevala och ådagalägga den enhet, han trott sig finna deri. Af de åsigter, han uttalat i sitt företal till Kalevala, synes härflyta, att han icke anser detta qväde kunna göra anspråk på enhet i högre mening. Han säger bland annat: "Enligt min mening hafva dessa runor uppstått småningom och i samma ordning, som händelserna tilldragit sig. De skilda sångerna om Wäinämöinen, Ilmarinen och Lemminkäinen torde icke vara författade af en enda, utan ett verk af många. Den ena anförtrodde åt minnet en, den andra en annan tilldragelse och skildrade, hvad han sjelf hade sett eller hört." Utan att inlåta oss i någon kritik öfver dessa påståenden, vilja vi allenast anmärka, att, enligt D:r Lönnrots åsigt om runornas historiska uppkomst, enheten i Kalevala kommer att bero derpå, att de skilda tilldragelserna ordnas i behörig tidsföljd. Det var sannolikt äfven denna åsigt, som förmådde honom att i Kalevala införa runorna 28-32, hvilka till sitt innehåll på intet vis sammanhänga med de öfriga. -- Men huru D:r Lönnrot äfven må hafva uppfattat sammanhanget i Kalevala, så är i hvarje fall den mening obefogad, som finnes uttalad i företalet till Runola, att Kalevala endast innehåller en mängd stympade runo-fragmenter. Hvem medger icke, att mången runa under tidernas lopp kunnat stympas och förändras, ja till och med försvinna? Men en hvar, som ej låter förblinda sig af fördomar, bör finna i Kalevala vida mer än fragmenter. Öfversättaren skall, för att icke göra sig skyldig till maktspråk af motsatt beskaffenhet, lemna en kort öfversigt af arbetets innehåll, så att läsaren bättre kan sammanhålla det hela och lättare inse sammanhanget emellan de särskildta partierna. -- Det är i fråga om detta sammanhang icke likgiltigt, hvilken ordning runo-sångarne sjelfva iakttaga. De flesta följa naturligtvis ingen ordning, utan recitera ett stycke allt eftersom det faller dem i minnet. Men de flesta bland dem jag under mina vandringar i Olonetska och Archangelska Guvernementen varit i tillfälle att höra, sjunga runorna om Sampo i ett sammanhang. Wäinämöinens, Ilmarinens och Lemminkäinens Pohjola-färder betrakta de äter såsom skilda cykler. Men då Pohjas fagra mö var målet för dessa färder, så kunna äfven dessa cykler betraktas såsom ett inom sig slutet helt. Sålunda erbjuder Kalevala tvenne väsendtliga afdelningar, hvilka åter hafva ett ganska nära inbördes sammanhang derigenom, att Pohjolas värdinna hade utfästat sin dotter såsom belöning för den, som kunde smida Sampo. Detta underbara redskap förfärdigas af Ilmarinen. Ehuru således flickan hade bordt tillhöra honom, ledsnade han likväl vid lifvet i Pohjola, förr än det lyckats honom att tillvinna sig hennes kärlek, och återvände så till sin hembygd. Emellertid sökte äfven Wäinämöinen och Lemminkäinen att vinna hennes ynnest; men slutligen segrade dock Ilmarinen -- Genom de runor, som besjunga dessa trenne hjeltars Pohjola-färder, afsöndrar D:r Lönnrot Sampo-cykeln i tvenne afdelningar. Han låter frieri-runorna begynna, så snart Sampo var hopsmidd, och Ilmarinen återvände till hemmet. Denna fördelning kan ur flere skäl försvaras. Först och främst träda Kalevala och Pohjola i den sednare afdelningen af Sampo-cykeln i ett så fiendtligt förhållande till hvarandra, att några frieri-färder ifrån den ena orten till den ändra derefter ej kunna tänkas äga rum. Vidare är det ganska naturligt, att fiendtligheterna först då taga sin början, sedan Ilmarinens hustru blifvit dödad, och Pohja-dottren icke mera utgjorde ett föreningsband emellan Pohjolas och Kalevalas folk. Ett vigtigt skäl för ifrågavarande fördelning är äfven det, att Lemminkäinens öden i den sednare afdelningen af Sampo-cykeln förutsättas såsom bekanta?elasticlunr-rs-3.0.2/tests/data/sv.out.txt000064400000000000000000000134340072674642500167040ustar 00000000000000likasom förtjenst undan förgäng hafv rädd finland historisk minn nästan uteslut tillhör porthan likaså hafv nation mythisk qvarlefv hufvudsak genom d:r lönnrot verksam blifvit framkall ljuset alldel oban väl hell väg beträd hvad för gjort liksom vor kaleval kantelet trädt dag få dessförinnan gjord runo samling hafv numer betyd end mon tjen framkall åtminston finland evig dyrbar skatt ur synpunk betrak hafv stort historisk värd ans af anledning pligt omnämn förnämst bland d:r lönnrot ban väg först fäst uppmärksam inhemsk poesi porthan finsk litteratur hero tror porthan oaktad alltför högt uppskat mythisk sång saml desto mindr genom föresyn väck äfv andr synner ganand fortgå ban ganander förtjenst består dock gjord runo samling bemöd tillvägabring finsk mythologi väl säg fler år hafv genomgåt tillgäng landet möj exister run ur dedication arbet framlys störr del blifvit af porthan meddel detsamm tord ännu högr grad gäll lenqvist hvar mythologisk arbet öfr vid öfverträff ganander allmän syn tid ännu betänk utgifv af fullständ runo samling ansåg början nödvänd bearbet mythologi såsom clavis poëseo fennica gan porthan död stod finsk litteratur lång tid still sörj blott hädangångn år vakn åter nytt lif tid åtminston kort dereft uppträd mång utmärk litteratör tidrymd tilländalupit räkn finsk litteratur fler bearbet hel föregåend seklet ibland hafv flest störr mindr mon egn uppmärksam folkpoesi mest förtjent afseend d:r topelius hvilk fem särskild häft utgifvit samling af äldr nyar run tillhör äfv förtjenst hafv angifvit ort hvarest finsk sång ren störst mäng bibehållit säg företalet femt häftet af samling få ställ nästan ingenstäd finland finn fullständ oförderfv forntid qväd end ställ jord äfv utom finland grän nem sockn archangelsk guvernementet samt synner wuokkiniemi församling rädd ännu fordn sed fordn hjelt ätt minn ren oförfalsk drägt derifrån äfv ring möd förskaff bäst sång år företog prof v beck turun wiikko sanom enhet bring mäng sång wäinämöin försök obetyd äfv förtjen likväl uppmärksam såsom först sitt slag emellertid syn början fäst uppmärksam vink såled fortf topelius utgifv samling fragmentarisk form method följd af d:r lönnrot kantel ehuru företalet kaleval säg redan år förfat af academisk afhandling wäinämöin hafv kommit tank förh var sång wäinämöin ilmarin lemminkäin m.ö sannolik voro fragment ur längr cykl run saml vandring finland åren voro alltför obetyd gifv anledning combination försök först följ åren besök af topelius anvist ort utom finsk gräns börj utför storart plan enhet bring fornfinsk sång af episk innehåll ehuru ursprung finsk folkpoesi charakterristisk deri end rad infusk hvar gjort närm bekantskap gen urskilj äkt ursprung all oäkt afvel hafv likväl åtskil pseudo critici mjugg sökt insinuer oskyld förmodan d:r lönnrot få idé realiser sjelf completter kaleval affälling vilj nåd skänk sitt fosterland tusend del af kärlek hvarmed sannolik omfat egn hög person vor lätt åtminston möj öfvertyg derom end af d:r lönnrot sjelf förfat ver förekomm hel kaleval var likväl lång ifrån finsk litteratur vilj gör proselyt af petrificat huru lyckat företag må blifv problem komm tid såsom hopp ännu störr antal af forntid qväd kommit dagsljuset emellertid vor önsk d:r lönnrot redogör sammanhanget kaleval ådagalägg enhet trott finn deri af åsig uttal sitt företal kaleval syn härflyt ans qväd kunn gör anspråk enhet högr mening säg bland ann enl mening hafv run uppståt småningom ordning händ tilldragit skild sång wäinämöin ilmarin lemminkäin tord förfat af end verk af mång ena anförtrod minnet andr annan tilldrag skildr hvad sjelf sett hört inlåt kritik öfv påståend vilj allen anmärk enl d:r lönnrot åsig run historisk uppkomst enhet kaleval komm bero derpå skild tilldrag ordn behör tidsföljd sannolik äfv åsig förmåd kaleval inför run hvilk sitt innehåll intet vis sammanhäng öfr huru d:r lönnrot äfv må hafv uppfat sammanhanget kaleval hvarj fall mening obefog finn uttal företalet runol kaleval end innehåll mäng stymp runo fragment hvem medg mång run tid lopp kunn stymp förändr ja försvin hvar låt förblind af fördom bör finn kaleval vid mer fragment öfversät skall gör skyld maktspråk af motsat beskaffen lemn kort öfvers af arbetet innehåll läs bättr sammanhåll hel lätt ins sammanhanget emellan särskild parti fråg sammanhang likgilt hvilk ordning runo sång sjelfv iakttag flest följ naturligtvis ordning reciter styck eftersom fall minnet flest bland vandring olonetsk archangelsk guvernement tillfäll hör sjung run sampo sammanhang wäinämöin ilmarin lemminkäin pohjol färd betrak äter såsom skild cykl pohj fagr mö målet färd kunn äfv cykl betrak såsom slutet helt sålund erbjud kaleval tvenn väsendt afdelning hvilk åter hafv gansk när inbörd sammanhang derigenom pohjol värdin utfäst dott såsom belöning smid sampo underbar redskap förfärd af ilmarin ehuru såled flickan bord tillhör ledsn likväl lifvet pohjol förr lyckat tillvin kärlek återvänd hembyg emellertid sökt äfv wäinämöin lemminkäin vinn ynnest slut segr dock ilmarin genom run besjung trenn hjeltar pohjol färd afsöndr d:r lönnrot sampo cykeln tvenn afdelning låt frieri run begyn snart sampo hopsmid ilmarin återvänd hemmet fördelning ur fler skäl försvar först främst träd kaleval pohjol sedn afdelning af sampo cykeln fiendt förhåll hvarandr frieri färd ifrån ena ort ändr dereft kunn tänk äga rum vid gansk natur fiendt först tag början ilmarin hustru blifvit död pohj dottr mer utgjord föreningsband emellan pohjol kaleval folk vigt skäl ifrågavar fördelning äfv lemminkäin öden sedn afdelning af sampo cykeln förutsät såsom bekant elasticlunr-rs-3.0.2/tests/data/tr.in.txt000064400000000000000000000050270072674642500164770ustar 00000000000000Ah gençlik!.. Tıpkı ezeli bir baharın ilk çiçekli günlerine benzer. Yeşil kırlar, kelebek dolu bahçeler, güzel kokular içinde serçelerin şen efsanelerini doymadan dinleyerek dolaşırız. İdealimizin rüyası bize hayat kışının fırtınalarını, karlarını, tipilerini hatırlatmaz. Ben işte bu hiç bitmez sanılan baharı İzmir'de geçirdim. On dokuz yaşındaydım. Galiba on beş sene evvel... Evet, seneler nasıl bir ok gölgesi gibi uçuyor! Meşrutiyetin bu hür, bu serbest günlerinden çok uzaktık. Lâkin o eski, zalim idarenin ezici kahrını, gafletim sayesinde hiç duymuyordum. Mersinli'deki minimini evimde, kocaman çınar ağaçlarının hiç durmadan öten ninnileri içinde, kitapların dipsiz girdabına dalmış gitmiştim. Haricî kainat umrumda değildi. Sözde, felsefe feneriyle büyük bir hakikat bulacaktım. Heyhat! Şimdi bu masum hülyamı aklıma getirince, nasıl acı acı gülüyorum... Bir kelimeyi, bir satırı, bir sözü haftalarca, aylarca düşünür, bir cümlenin altındaki —var tevehhüm ettiğim— gizli mânâyı bulmak için birçok geceler uyuyamazdım. Filozofların pek o kadar mânâ murad etmeden yumurtladığı fikirler, bence bir "ilahi nass" gibiydi. Hatta romanlarda rasgeldiğim "ukalalık"lar bile gözümden kaçmazdı. Onları da fişlere yazar, notlarımın arasına kordum. Bu "ukalalık"lardan birisi, beni tam üç ay düşündürdü. Tam yüz beş gece gözüme uyku girmedi. Flaubert'in miydi, yoksa bir başkasının mı, iyice hatırlayamıyorum. "Le grade dégrade...", yani: "Rütbe, haysiyeti düşürtür." cümlesi! Bundan bir türlü mânâ çıkaramadım. Bilakis, fikrimce rütbe insanı herkesin seviyesinden yukarı kaldırır, yükseltir, hatta sahibine hususi bir haysiyet verirdi. Artık başka kitap, gazete falan okuyamaz oldum... Her satırın altında, mânâsını anlamadığım bu "Le grade dégrade.." cümlesi kararıyor, bir avuç istifham işaretinden yuğrulmuş sabit bir fikir gibi dimağımda düğümleniyordu. Sakin evimde oturamıyor, bulamadığım mânâyı arayarak tenha sahillerde, kalabalık caddelerde, dar sokaklarda serseri serseri dolaşıyordum. Bir "meçhul", bir "sır" insana ne kadar ıztırap verir; bâhusus masum bir iman da olursa... Bir gün yine deli gibi, içimden: "Le grade dégrade..." diye söylenerek Hükümet Konağı'nın önünden geçiyordum. İsmimi işittim. Döndüm. Bir de baktım ki, riyâziye muallimim, Logaritmacı Hasan! Askerî Kıraathanesinin ta köşesinde bir sandalyeye kurulmuş nargilesini çekiyor...elasticlunr-rs-3.0.2/tests/data/tr.out.txt000064400000000000000000000031210072674642500166710ustar 00000000000000ah gençlik tıpkı ezel bahar ilk çiçekli gün benzer yeşil kır kelebek dol bahçe güzel koku iç serçe şen efsane doyma dinleyerek dolaşır i̇deal rüyas hayat kış fırtına kar tipi hatırlatmaz bitmez sanıla bahar i̇zmir' geçir yaş galip se evvel evet sene ok gölges uçuyor meşrutiyet hür serbest gün uzak lâkin eski zal idare eziç kahr gaflet saye duymuyor mersinli' minim ev kocama çınar ağaç durma ö ninni iç kitap dipsiz girdap dal gitmiş haricî kainat umr değil söz felsef fener büyük hakikat bulacak heyhat ş mas hülya akl getir aç aç gülüyor kelime satır söz hafta ay düşünür cümle alt tevehh ettik gizli mânâyı bulmak gece uyuyamaz filozof mânâ muradı etme yumurtladık fikir be ilah nass gip roman rasgeldik ukalalık" göz kaçmaz fiş yazar not ara kor ukalalık" biris tam ay düşündür tam geç göz uyku girmedi flaubert' mi başka iyiç hatırlayamıyor le grade dégrade rütbe haysiyet düşür cümles türlü mânâ çıkaramadı bilakis fikr rütbe insa seviye yukar kaldırır yüksel sahip hususi haysiyet verir ar başka kitap gaze fala okuyamaz ol satır alt mânâs anlamadık le grade dégrade cümles kararıyor avuç istifha işaret yuğrul sabit fikir dimak düğümleniyor sak ev oturamıyor bulamadık mânâyı arayarak tenha sahil kalabalık cadde dar sokak serser serser dolaşıyor meçhul sır insa ıztırap verir bâhusus mas ima gün del iç le grade dégrade söylenerek hükümet konağı'n ön geçiyor i̇sm işit dö bak riyâzi muall logaritmaç hasa askerî kıraathane ta köşe sandalye kurul nargile çekiyor elasticlunr-rs-3.0.2/tests/data/zh.in.txt000064400000000000000000000025560072674642500164770ustar 00000000000000这条法国邮船白拉日隆子爵号(VicomtedeBragelonne)正向中国开来。早晨八点多钟,冲洗过的三等舱甲板湿意未干,但已坐满了人,法国人、德国流亡出来的犹太人、印度人、安南人,不用说还有中国人。海风里早含着燥热,胖人身体给炎风吹干了,上一层汗结的盐霜,仿佛刚在巴勒斯坦的死海里洗过澡。毕竟是清晨,人的兴致还没给太阳晒萎,烘懒,说话做事都很起劲。那几个新派到安南或中国租界当警察的法国人,正围了那年轻善撒娇的犹太女人在调情。俾斯麦曾说过,法国公使大使的特点,就是一句外国话不会讲;这几位警察并不懂德文,居然传情达意,引得犹太女人格格地笑,比他们的外交官强多了。这女人的漂亮丈夫,在旁顾而乐之,因为他几天来,香烟、啤酒、柠檬水沾光了不少。红海已过,不怕热极引火,所以等一会甲板上零星果皮、纸片、瓶塞之外,香烟头定又遍处皆是。法国人的思想是有名的清楚,他的文章也明白干净,但是他的做事,无不混乱、肮脏、喧哗,但看这船上的乱糟糟。这船,倚仗人的机巧,载满人的扰攘,寄满人的希望,热闹地行着,每分钟把沾污了人气的一小方小面,还给那无情、无尽、无际的大海。elasticlunr-rs-3.0.2/tests/data/zh.out.txt000064400000000000000000000027600072674642500166750ustar 00000000000000这 条 法国 邮船 白 拉 日隆 子爵 号 VicomtedeBragelonne 正向 中国 开来 早晨 八点 多 钟 冲洗 过 三等 三等舱 甲板 湿 意 未 干 但 已 坐满 人 法国 国人 法国人 德国 流亡 出来 犹太 犹太人 印度 印度人 安南 人 不用 不用说 还有 中国 人 海风 里 早 含 着 燥热 胖 人 身体 给 炎风 吹干 上 一层 汗 结 盐霜 仿佛 刚 在 巴勒 勒斯 巴勒斯 巴勒斯坦 死 海里 洗过 洗过澡 毕竟 是 清晨 人 兴致 还 没 给 太阳 晒 萎 烘 懒 说话 做事 都 很 起劲 那 几个 新派 到 安南 或 中国 租界 当 警察 法国 国人 法国人 正 围 那 年轻 善 撒娇 犹太 女人 在 调情 俾斯麦 曾 说 过 法国 公使 大使 特点 就是 一句 外国 话 不会 讲 这 几位 警察 并 不 懂 德文 居然 传情 达意 引得 犹太 女人 格格 地 笑 比 他们 外交 外交官 强 多 这 女人 漂亮 丈夫 在 旁 顾 而 乐 之 因为 他 几天 来 香烟 啤酒 柠檬 柠檬水 沾光 不少 红海 已 过 不怕 热 极 引火 所以 等 一会 甲板 上 零星 果皮 纸片 瓶塞 之外 香烟 烟头 香烟头 定 又 遍 处 皆 是 法国 国人 法国人 思想 是 有名 清楚 他 文章 也 明白 干净 但是 他 做事 无不 混乱 肮脏 喧哗 但 看 这 船上 乱糟 乱糟糟 这 船 倚仗 人 机巧 载满 人 扰攘 寄满 人 希望 热闹 地 行 着 分钟 每分钟 把 沾污 人气 一小 方 小 面 还给 那 无情 无尽 无际 大海 elasticlunr-rs-3.0.2/tests/searchindex_fixture_en.json000064400000000000000000000764740072674642500214310ustar 00000000000000{ "documentStore": { "docInfo": { "1": { "body": 8, "title": 2 }, "2": { "body": 13, "title": 2 }, "3": { "body": 11, "title": 2 }, "4": { "body": 9, "title": 2 }, "5": { "body": 11, "title": 2 }, "6": { "body": 4, "title": 2 } }, "docs": { "1": { "body": "Lorem ipsum dolor sit amet, consectetur adipiscing elit", "id": "1", "title": "Chapter 1" }, "2": { "body": "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad", "id": "2", "title": "Chapter 2" }, "3": { "body": "minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex", "id": "3", "title": "Chapter 3" }, "4": { "body": "ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate", "id": "4", "title": "Chapter 4" }, "5": { "body": "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat", "id": "5", "title": "Chapter 5" }, "6": { "body": "Spatiëring shouldn’t cause a panic.", "id": "6", "title": "Chapter 6" } }, "length": 6, "save": true }, "fields": [ "title", "body" ], "index": { "body": { "root": { "1": { "df": 1, "docs": { "1": { "tf": 1.0 } } }, "2": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "3": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "4": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "5": { "df": 1, "docs": { "5": { "tf": 1.0 } } }, "6": { "df": 1, "docs": { "6": { "tf": 1.0 } } }, "a": { "d": { "df": 1, "docs": { "2": { "tf": 1.0 } }, "i": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "s": { "c": { "df": 1, "docs": { "1": { "tf": 1.0 } } }, "df": 0, "docs": {} } } } } }, "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "q": { "df": 0, "docs": {}, "u": { "a": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "p": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } } } }, "m": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "1": { "tf": 1.0 } } } } }, "u": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } }, "c": { "a": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "s": { "df": 1, "docs": { "6": { "tf": 1.0 } } } } }, "df": 0, "docs": {}, "h": { "a": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "r": { "df": 6, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.0 }, "5": { "tf": 1.0 }, "6": { "tf": 1.0 } } } } } } }, "df": 0, "docs": {} }, "i": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "m": { "df": 1, "docs": { "5": { "tf": 1.0 } } } } } } }, "o": { "df": 0, "docs": {}, "m": { "df": 0, "docs": {}, "m": { "df": 0, "docs": {}, "o": { "d": { "df": 0, "docs": {}, "o": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "df": 0, "docs": {} } } }, "n": { "df": 0, "docs": {}, "s": { "df": 0, "docs": {}, "e": { "c": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "1": { "tf": 1.0 } } } } } } } }, "df": 0, "docs": {}, "q": { "df": 0, "docs": {}, "u": { "a": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "df": 0, "docs": {} } } } } } } }, "d": { "df": 0, "docs": {}, "o": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "o": { "df": 0, "docs": {}, "r": { "df": 4, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "4": { "tf": 1.0 }, "5": { "tf": 1.0 } } } } } }, "u": { "df": 0, "docs": {}, "i": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } }, "df": 0, "docs": {}, "e": { "a": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "s": { "df": 0, "docs": {}, "m": { "df": 0, "docs": {}, "o": { "d": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "df": 0, "docs": {} } } } } }, "l": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "1": { "tf": 1.0 } } } } }, "n": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "m": { "df": 1, "docs": { "2": { "tf": 1.0 } } } } }, "s": { "df": 0, "docs": {}, "s": { "df": 1, "docs": { "5": { "tf": 1.0 } } } }, "t": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "u": { "df": 1, "docs": { "5": { "tf": 1.0 } } }, "x": { "c": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "5": { "tf": 1.0 } } } } } } } } }, "df": 1, "docs": { "3": { "tf": 1.0 } }, "e": { "df": 0, "docs": {}, "r": { "c": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } }, "df": 0, "docs": {} } } } }, "f": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "g": { "df": 0, "docs": {}, "i": { "a": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "5": { "tf": 1.0 } } } }, "df": 0, "docs": {} } } } }, "i": { "df": 0, "docs": {}, "n": { "c": { "df": 0, "docs": {}, "i": { "d": { "df": 0, "docs": {}, "i": { "d": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "n": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "2": { "tf": 1.0 } } } } } }, "df": 0, "docs": {} } }, "df": 0, "docs": {} } }, "df": 0, "docs": {} }, "p": { "df": 0, "docs": {}, "s": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "m": { "df": 1, "docs": { "1": { "tf": 1.0 } } } } } }, "r": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } } }, "l": { "a": { "b": { "df": 0, "docs": {}, "o": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "2": { "tf": 1.0 } }, "i": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } }, "df": 0, "docs": {} }, "df": 0, "docs": {}, "o": { "df": 0, "docs": {}, "r": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "m": { "df": 1, "docs": { "1": { "tf": 1.0 } } } } } } }, "m": { "a": { "df": 0, "docs": {}, "g": { "df": 0, "docs": {}, "n": { "a": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "df": 0, "docs": {} } } }, "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "n": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "m": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } } }, "n": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "s": { "df": 0, "docs": {}, "i": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } }, "o": { "df": 0, "docs": {}, "s": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "r": { "df": 0, "docs": {}, "u": { "d": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "df": 0, "docs": {} } } } } }, "u": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "l": { "a": { "df": 1, "docs": { "5": { "tf": 1.0 } } }, "df": 0, "docs": {} } } } }, "o": { "c": { "c": { "a": { "df": 0, "docs": {}, "e": { "c": { "a": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "5": { "tf": 1.0 } } } }, "df": 0, "docs": {} }, "df": 0, "docs": {} } }, "df": 0, "docs": {} }, "df": 0, "docs": {} }, "df": 0, "docs": {} }, "p": { "a": { "df": 0, "docs": {}, "n": { "df": 0, "docs": {}, "i": { "c": { "df": 1, "docs": { "6": { "tf": 1.0 } } }, "df": 0, "docs": {} } }, "r": { "df": 0, "docs": {}, "i": { "a": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "5": { "tf": 1.0 } } } } } }, "df": 0, "docs": {} } } }, "df": 0, "docs": {} }, "q": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "i": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } }, "r": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "r": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "h": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "n": { "d": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "r": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } } } }, "df": 0, "docs": {} } } } } } } } }, "s": { "df": 0, "docs": {}, "e": { "d": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "df": 0, "docs": {} }, "h": { "df": 0, "docs": {}, "o": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "l": { "d": { "df": 0, "docs": {}, "n": { "df": 0, "docs": {}, "’": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "6": { "tf": 1.0 } } } } } }, "df": 0, "docs": {} } } } }, "i": { "df": 0, "docs": {}, "n": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "5": { "tf": 1.0 } } } }, "t": { "df": 1, "docs": { "1": { "tf": 1.0 } } } }, "p": { "a": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "ë": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "6": { "tf": 1.0 } } } } } } }, "df": 0, "docs": {} } }, "t": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "m": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "o": { "df": 0, "docs": {}, "r": { "df": 1, "docs": { "2": { "tf": 1.0 } } } } } } } }, "u": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "l": { "a": { "df": 0, "docs": {}, "m": { "c": { "df": 0, "docs": {}, "o": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "df": 0, "docs": {} } }, "df": 0, "docs": {} } }, "t": { "df": 2, "docs": { "2": { "tf": 1.4142135623730951 }, "3": { "tf": 1.0 } } } }, "v": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "i": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "5": { "tf": 1.0 } } } } }, "n": { "df": 0, "docs": {}, "i": { "a": { "df": 0, "docs": {}, "m": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "df": 0, "docs": {} } } }, "o": { "df": 0, "docs": {}, "l": { "df": 0, "docs": {}, "u": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "t": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } } } } } } }, "title": { "root": { "1": { "df": 1, "docs": { "1": { "tf": 1.0 } } }, "2": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "3": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "4": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "5": { "df": 1, "docs": { "5": { "tf": 1.0 } } }, "6": { "df": 1, "docs": { "6": { "tf": 1.0 } } }, "c": { "df": 0, "docs": {}, "h": { "a": { "df": 0, "docs": {}, "p": { "df": 0, "docs": {}, "t": { "df": 0, "docs": {}, "e": { "df": 0, "docs": {}, "r": { "df": 6, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.0 }, "5": { "tf": 1.0 }, "6": { "tf": 1.0 } } } } } } }, "df": 0, "docs": {} } }, "df": 0, "docs": {} } } }, "lang": "English", "pipeline": [ "trimmer", "stopWordFilter", "stemmer" ], "ref": "id", "version": "0.9.5" }elasticlunr-rs-3.0.2/tests/searchindex_fixture_ja.json000064400000000000000000000563510072674642500214110ustar 00000000000000{ "documentStore": { "docInfo": { "1": { "body": 5, "title": 2 }, "2": { "body": 15, "title": 2 }, "3": { "body": 72, "title": 2 }, "4": { "body": 33, "title": 2 } }, "docs": { "1": { "body": "吾輩は猫である。名前はまだ無い。", "id": "1", "title": "第1章" }, "2": { "body": "どこで生れたかとんと見当がつかぬ。何でも薄暗いじめじめした所でニャーニャー泣いていた事だけは記憶している。", "id": "2", "title": "第2章" }, "3": { "body": "吾輩はここで始めて人間というものを見た。しかもあとで聞くとそれは書生という人間中で一番獰悪な種族であったそうだ。この書生というのは時々我々を捕えて煮て食うという話である。しかしその当時は何という考もなかったから別段恐しいとも思わなかった。ただ彼の掌に載せられてスーと持ち上げられた時何だかフワフワした感じがあったばかりである。掌の上で少し落ちついて書生の顔を見たのがいわゆる人間というものの見始であろう。この時妙なものだと思った感じが今でも残っている。", "id": "3", "title": "第3章" }, "4": { "body": "第一毛をもって装飾されべきはずの顔がつるつるしてまるで薬缶だ。その後猫にもだいぶ逢ったがこんな片輪には一度も出会わした事がない。のみならず顔の真中があまりに突起している。", "id": "4", "title": "第4章" } }, "length": 4, "save": true }, "fields": [ "title", "body" ], "index": { "body": { "root": { "df": 0, "docs": {}, "あ": { "df": 0, "docs": {}, "っ": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "と": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "ま": { "df": 0, "docs": {}, "り": { "df": 0, "docs": {}, "に": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } } }, "い": { "df": 0, "docs": {}, "た": { "df": 0, "docs": {}, "事": { "df": 1, "docs": { "2": { "tf": 1.0 } } } }, "と": { "df": 0, "docs": {}, "も": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "る": { "df": 3, "docs": { "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.0 } } }, "わ": { "df": 0, "docs": {}, "ゆ": { "df": 0, "docs": {}, "る": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } }, "こ": { "df": 0, "docs": {}, "こ": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "の": { "df": 1, "docs": { "3": { "tf": 1.4142135623730951 } } }, "ん": { "df": 0, "docs": {}, "な": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } }, "さ": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "し": { "df": 3, "docs": { "2": { "tf": 1.4142135623730951 }, "3": { "tf": 1.0 }, "4": { "tf": 1.7320508075688772 } }, "か": { "df": 0, "docs": {}, "し": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "も": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } }, "じ": { "df": 0, "docs": {}, "め": { "df": 0, "docs": {}, "じ": { "df": 0, "docs": {}, "め": { "df": 1, "docs": { "2": { "tf": 1.0 } } } } } }, "そ": { "df": 0, "docs": {}, "う": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "の": { "df": 1, "docs": { "3": { "tf": 1.0 } }, "後": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "れ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "た": { "df": 0, "docs": {}, "だ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "だ": { "df": 0, "docs": {}, "い": { "df": 0, "docs": {}, "ぶ": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } }, "つ": { "df": 0, "docs": {}, "か": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "る": { "df": 0, "docs": {}, "つ": { "df": 0, "docs": {}, "る": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } } }, "と": { "df": 0, "docs": {}, "ん": { "df": 0, "docs": {}, "と": { "df": 1, "docs": { "2": { "tf": 1.0 } } } } }, "ど": { "df": 0, "docs": {}, "こ": { "df": 1, "docs": { "2": { "tf": 1.0 } } } }, "な": { "df": 0, "docs": {}, "い": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "か": { "df": 0, "docs": {}, "っ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "ら": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "の": { "df": 1, "docs": { "3": { "tf": 1.4142135623730951 } }, "み": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "は": { "df": 0, "docs": {}, "ず": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "ま": { "df": 0, "docs": {}, "だ": { "df": 1, "docs": { "1": { "tf": 1.0 } } }, "る": { "df": 0, "docs": {}, "で": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } }, "も": { "df": 0, "docs": {}, "の": { "df": 1, "docs": { "3": { "tf": 1.7320508075688772 } } } }, "ら": { "df": 0, "docs": {}, "れ": { "df": 1, "docs": { "3": { "tf": 1.4142135623730951 } } } }, "れ": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "ス": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "フ": { "df": 0, "docs": {}, "ワ": { "df": 0, "docs": {}, "フ": { "df": 0, "docs": {}, "ワ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } }, "一": { "df": 1, "docs": { "4": { "tf": 1.4142135623730951 } }, "番": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "上": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "中": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "事": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "人": { "df": 0, "docs": {}, "間": { "df": 1, "docs": { "3": { "tf": 1.7320508075688772 } } } }, "今": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "何": { "df": 2, "docs": { "2": { "tf": 1.0 }, "3": { "tf": 1.0 } }, "だ": { "df": 0, "docs": {}, "か": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } }, "出": { "df": 0, "docs": {}, "会": { "df": 0, "docs": {}, "わ": { "df": 1, "docs": { "4": { "tf": 1.0 } } } } }, "別": { "df": 0, "docs": {}, "段": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "名": { "df": 0, "docs": {}, "前": { "df": 1, "docs": { "1": { "tf": 1.0 } } } }, "吾": { "df": 0, "docs": {}, "輩": { "df": 2, "docs": { "1": { "tf": 1.0 }, "3": { "tf": 1.0 } } } }, "妙": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "始": { "df": 1, "docs": { "3": { "tf": 1.0 } }, "め": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "少": { "df": 0, "docs": {}, "し": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "度": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "当": { "df": 0, "docs": {}, "時": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "彼": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "思": { "df": 0, "docs": {}, "っ": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "わ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "恐": { "df": 0, "docs": {}, "し": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "感": { "df": 0, "docs": {}, "じ": { "df": 1, "docs": { "3": { "tf": 1.4142135623730951 } } } }, "我": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "所": { "df": 1, "docs": { "2": { "tf": 1.0 } } }, "持": { "df": 0, "docs": {}, "ち": { "df": 0, "docs": {}, "上": { "df": 0, "docs": {}, "げ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } }, "捕": { "df": 0, "docs": {}, "え": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "掌": { "df": 1, "docs": { "3": { "tf": 1.4142135623730951 } } }, "時": { "df": 1, "docs": { "3": { "tf": 1.7320508075688772 } } }, "書": { "df": 0, "docs": {}, "生": { "df": 1, "docs": { "3": { "tf": 1.7320508075688772 } } } }, "残": { "df": 0, "docs": {}, "っ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "毛": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "泣": { "df": 0, "docs": {}, "い": { "df": 1, "docs": { "2": { "tf": 1.0 } } } }, "無": { "df": 0, "docs": {}, "い": { "df": 1, "docs": { "1": { "tf": 1.0 } } } }, "煮": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "片": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "猫": { "df": 2, "docs": { "1": { "tf": 1.0 }, "4": { "tf": 1.0 } } }, "獰": { "df": 0, "docs": {}, "悪": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "生": { "df": 0, "docs": {}, "れ": { "df": 1, "docs": { "2": { "tf": 1.0 } } } }, "真": { "df": 0, "docs": {}, "中": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "種": { "df": 0, "docs": {}, "族": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "突": { "df": 0, "docs": {}, "起": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "章": { "df": 4, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.0 } } }, "第": { "df": 4, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.4142135623730951 } } }, "考": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "聞": { "df": 0, "docs": {}, "く": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "落": { "df": 0, "docs": {}, "ち": { "df": 0, "docs": {}, "つ": { "df": 0, "docs": {}, "い": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } }, "薄": { "df": 0, "docs": {}, "暗": { "df": 0, "docs": {}, "い": { "df": 1, "docs": { "2": { "tf": 1.0 } } } } }, "薬": { "df": 0, "docs": {}, "缶": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "装": { "df": 0, "docs": {}, "飾": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "見": { "df": 1, "docs": { "3": { "tf": 1.7320508075688772 } }, "当": { "df": 1, "docs": { "2": { "tf": 1.0 } } } }, "記": { "df": 0, "docs": {}, "憶": { "df": 1, "docs": { "2": { "tf": 1.0 } } } }, "話": { "df": 1, "docs": { "3": { "tf": 1.0 } } }, "載": { "df": 0, "docs": {}, "せ": { "df": 1, "docs": { "3": { "tf": 1.0 } } } }, "輪": { "df": 1, "docs": { "4": { "tf": 1.0 } } }, "逢": { "df": 0, "docs": {}, "っ": { "df": 1, "docs": { "4": { "tf": 1.0 } } } }, "顔": { "df": 2, "docs": { "3": { "tf": 1.0 }, "4": { "tf": 1.4142135623730951 } } }, "食": { "df": 0, "docs": {}, "う": { "df": 1, "docs": { "3": { "tf": 1.0 } } } } } }, "title": { "root": { "df": 0, "docs": {}, "章": { "df": 4, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.0 } } }, "第": { "df": 4, "docs": { "1": { "tf": 1.0 }, "2": { "tf": 1.0 }, "3": { "tf": 1.0 }, "4": { "tf": 1.0 } } } } } }, "lang": "Japanese", "pipeline": [ "trimmer-ja", "stemmer-ja" ], "ref": "id", "version": "0.9.5" }elasticlunr-rs-3.0.2/tests/test-index.rs000064400000000000000000000075030072674642500164260ustar 00000000000000use elasticlunr::*; use serde_json::json; use std::fs::{self, File}; use std::path::Path; fn create_index(lang: Box, docs: &'static [[&'static str; 2]]) -> serde_json::Value { let mut index = Index::with_language(lang, &["title", "body"]); for (i, doc) in docs.iter().enumerate() { index.add_doc(&(i + 1).to_string(), doc); } json!(index) } fn generate_fixture( lang: Box, docs: &'static [[&'static str; 2]], ) -> serde_json::Value { let code = lang.code(); let src = create_index(lang, docs); let dest = Path::new(env!("CARGO_MANIFEST_DIR")) .join(format!("tests/searchindex_fixture_{}.json", code)); let dest = File::create(&dest).unwrap(); serde_json::to_writer_pretty(dest, &src).unwrap(); src } fn read_fixture(lang: &dyn Language) -> serde_json::Value { let src = Path::new(env!("CARGO_MANIFEST_DIR")) .join(format!("tests/searchindex_fixture_{}.json", lang.code())); let json = fs::read_to_string(src).unwrap(); serde_json::from_str(&json).expect("Unable to deserialize the fixture") } const GENERATE_FIXTURE: bool = false; fn check_index(lang: L, docs: &'static [[&'static str; 2]]) { let new_index = create_index(Box::new(lang.clone()), docs); let name = lang.name(); let fixture_index = if GENERATE_FIXTURE { generate_fixture(Box::new(lang), docs) } else { read_fixture(&lang) }; if new_index != fixture_index { panic!("The {} search index has changed from the fixture", name); } } #[test] fn en_search_index_hasnt_changed_accidentally() { check_index(lang::English::new(), DOCS_EN); } #[cfg(feature = "ja")] #[test] fn ja_search_index_hasnt_changed_accidentally() { check_index(lang::Japanese::new(), DOCS_JA); } const DOCS_EN: &[[&str; 2]] = &[ [ "Chapter 1", "Lorem ipsum dolor sit amet, consectetur adipiscing elit", ], [ "Chapter 2", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad", ], [ "Chapter 3", "minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex", ], [ "Chapter 4", "ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate", ], [ "Chapter 5", "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat", ], ["Chapter 6", "Spatiëring shouldn’t cause a panic."], ]; #[cfg(feature = "ja")] const DOCS_JA: &'static [[&'static str; 2]] = &[ [ "第1章", "吾輩は猫である。名前はまだ無い。", ], [ "第2章", "どこで生れたかとんと見当がつかぬ。何でも薄暗いじめじめした所でニャーニャー泣いていた事だけは記憶している。", ], [ "第3章", "吾輩はここで始めて人間というものを見た。しかもあとで聞くとそれは書生という人間中で一番獰悪な種族であったそうだ。この書生というのは時々我々を捕えて煮て食うという話である。しかしその当時は何という考もなかったから別段恐しいとも思わなかった。ただ彼の掌に載せられてスーと持ち上げられた時何だかフワフワした感じがあったばかりである。掌の上で少し落ちついて書生の顔を見たのがいわゆる人間というものの見始であろう。この時妙なものだと思った感じが今でも残っている。", ], [ "第4章", "第一毛をもって装飾されべきはずの顔がつるつるしてまるで薬缶だ。その後猫にもだいぶ逢ったがこんな片輪には一度も出会わした事がない。のみならず顔の真中があまりに突起している。", ], ]; elasticlunr-rs-3.0.2/tests/test-pipeline.rs000064400000000000000000000034640072674642500171260ustar 00000000000000// Input text is excerpted from public domain books on gutenberg.org or wikisource.org use elasticlunr::*; use std::fs::File; use std::io::{BufRead, BufReader, Read, Write}; use std::path::Path; #[allow(dead_code)] fn write_output(lang: &dyn Language) { let code = lang.code(); let base = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests") .join("data"); let input = base.join(&format!("{}.in.txt", code)); let mut input_str = String::new(); File::open(&input) .unwrap() .read_to_string(&mut input_str) .unwrap(); let output = base.join(&format!("{}.out.txt", code)); let mut output = File::create(&output).unwrap(); let pipeline = lang.make_pipeline(); let tokens = pipeline.run(lang.tokenize(&input_str)); for tok in tokens { writeln!(&mut output, "{}", tok).unwrap(); } } fn compare_to_fixture(lang: &dyn Language) { let code = lang.code(); let base = Path::new(env!("CARGO_MANIFEST_DIR")) .join("tests") .join("data"); let input = base.join(&format!("{}.in.txt", code)); let mut input_str = String::new(); File::open(&input) .unwrap() .read_to_string(&mut input_str) .unwrap(); let output = base.join(&format!("{}.out.txt", code)); let mut output = BufReader::new(File::open(&output).unwrap()).lines(); let pipeline = lang.make_pipeline(); let tokens = pipeline.run(lang.tokenize(&input_str)); for tok in tokens { assert_eq!( tok, output.next().unwrap().unwrap(), "Comparing pipeline tokens to fixture for {}", lang.name() ); } } #[test] fn test_languages() { for lang in lang::languages() { //write_output(lang.as_ref()); compare_to_fixture(lang.as_ref()); } }