sqlformat-0.2.1/.cargo_vcs_info.json0000644000000001360000000000100130440ustar { "git": { "sha1": "66bdb0fadf9e750600df0c778cff88e45277d7c1" }, "path_in_vcs": "" }sqlformat-0.2.1/.editorconfig000064400000000000000000000002261046102023000143110ustar 00000000000000root = true [*.rs] charset = utf-8 end_of_line = lf indent_size = 4 indent_style = space insert_final_newline = true trim_trailing_whitespace = true sqlformat-0.2.1/.github/workflows/sqlformat.yml000064400000000000000000000030421046102023000177630ustar 00000000000000name: sqlformat on: push: branches: - master pull_request: branches: - master jobs: build-test-unix: runs-on: ubuntu-latest strategy: matrix: conf: - latest-stable - latest-beta - latest-nightly include: - conf: latest-stable toolchain: stable - conf: latest-beta toolchain: beta - conf: latest-nightly toolchain: nightly steps: - uses: actions/checkout@v2 - name: Install ${{ matrix.toolchain }} uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.toolchain }} override: true components: clippy, rustfmt - name: Cache cargo registry uses: actions/cache@v1 with: path: ~/.cargo/registry/cache key: ${{ runner.os }}-${{ matrix.conf }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ runner.os }}-${{ matrix.conf }}-cargo-registry- - name: Run rustfmt if: matrix.toolchain == 'stable' uses: actions-rs/cargo@v1 with: command: fmt args: -- --check - name: Run clippy if: matrix.toolchain == 'stable' uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: -- -D warnings - name: Run tests run: cargo test - name: Build benchmarks if: matrix.toolchain == 'stable' run: cargo bench --no-run - name: Build docs run: cargo doc --no-deps sqlformat-0.2.1/.gitignore000064400000000000000000000000321046102023000136170ustar 00000000000000/target Cargo.lock /.idea sqlformat-0.2.1/CHANGELOG.md000064400000000000000000000026061046102023000134510ustar 00000000000000### Version 0.2.1 - Fix extra spaces inside of scientific notation [#16](https://github.com/shssoichiro/sqlformat-rs/pull/16) - Remove unnecessary space in BETWEEN clause [#17](https://github.com/shssoichiro/sqlformat-rs/pull/17) - Denote the minimum Rust version in Cargo.toml ### Version 0.2.0 - Fix extra spaces in string escaping [#13](https://github.com/shssoichiro/sqlformat-rs/pull/13) - Fix panic on overflowing integer [#14](https://github.com/shssoichiro/sqlformat-rs/pull/14) - Bump Rust edition to 2021 - This is technically a breaking change as it bumps the minimum Rust version to 1.56 ### Version 0.1.8 - Remove regex dependency - Remove unused maplit dependency ### Version 0.1.7 - Bump nom to 7.0, which reportedly also fixes some build issues ### Version 0.1.6 - Fix compatibility with Rust 1.44 which was broken in 0.1.5 ### Version 0.1.5 - Fix a possible panic on multibyte unicode strings ### Version 0.1.4 - Attempt again to fix the issue some users experience where this crate would fail to compile ### Version 0.1.3 - Fix an issue some users experienced where this crate would fail to compile ### Version 0.1.2 - Rewrite the parser in nom, providing significant performance improvements across the board - Other significant performance improvement on pathological queries ### Version 0.1.1 - Significant performance improvements ### Version 0.1.0 - Initial release sqlformat-0.2.1/Cargo.toml0000644000000022700000000000100110430ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.56" name = "sqlformat" version = "0.2.1" authors = ["Josh Holmer "] description = "Formats whitespace in a SQL string to make it easier to read" homepage = "https://github.com/shssoichiro/sqlformat-rs" documentation = "https://docs.rs/sqlformat" readme = "README.md" keywords = ["sql"] categories = ["development-tools"] license = "MIT OR Apache-2.0" repository = "https://github.com/shssoichiro/sqlformat-rs" [[bench]] name = "bench" harness = false [dependencies.itertools] version = "0.10" [dependencies.nom] version = "7.0.0" [dependencies.unicode_categories] version = "0.1.1" [dev-dependencies.criterion] version = "0.3" [dev-dependencies.indoc] version = "1.0" sqlformat-0.2.1/Cargo.toml.orig000064400000000000000000000011641046102023000145250ustar 00000000000000[package] name = "sqlformat" version = "0.2.1" authors = ["Josh Holmer "] edition = "2021" rust-version = "1.56" license = "MIT OR Apache-2.0" homepage = "https://github.com/shssoichiro/sqlformat-rs" repository = "https://github.com/shssoichiro/sqlformat-rs" documentation = "https://docs.rs/sqlformat" description = "Formats whitespace in a SQL string to make it easier to read" keywords = ["sql"] categories = ["development-tools"] [dependencies] itertools = "0.10" nom = "7.0.0" unicode_categories = "0.1.1" [dev-dependencies] criterion = "0.3" indoc = "1.0" [[bench]] name = "bench" harness = false sqlformat-0.2.1/LICENSE-APACHE000064400000000000000000000227731046102023000135730ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS sqlformat-0.2.1/LICENSE-MIT000064400000000000000000000017771046102023000133040ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sqlformat-0.2.1/README.md000064400000000000000000000014621046102023000131160ustar 00000000000000# sqlformat [![Build Status](https://github.com/shssoichiro/sqlformat-rs/workflows/sqlformat/badge.svg)](https://github.com/shssoichiro/sqlformat-rs/actions?query=branch%3Amaster) [![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/) [![Version](https://img.shields.io/crates/v/sqlformat.svg)](https://crates.io/crates/sqlformat) [![Docs](https://docs.rs/sqlformat/badge.svg)](https://docs.rs/sqlformat) This crate is a port of https://github.com/kufii/sql-formatter-plus written in Rust. It is intended to be usable as a pure-Rust library for formatting SQL queries. There is currently no binary interface. This crate was written for formatting queries to logs within `sqlx`, but it may be useful to other crates in the Rust ecosystem. sqlformat-0.2.1/benches/bench.rs000064400000000000000000000134521046102023000146750ustar 00000000000000use criterion::{black_box, criterion_group, criterion_main, Criterion}; use sqlformat::*; fn simple_query(c: &mut Criterion) { let input = "SELECT * FROM my_table WHERE id = 1"; c.bench_function("simple query", |b| { b.iter(|| { format( black_box(input), black_box(&QueryParams::None), black_box(FormatOptions::default()), ) }) }); } fn complex_query(c: &mut Criterion) { let input = "SELECT t1.id, t1.name, t1.title, t1.description, t2.mothers_maiden_name, t2.first_girlfriend\nFROM my_table t1 LEFT JOIN other_table t2 ON t1.id = t2.other_id WHERE t2.order BETWEEN 17 AND 30"; c.bench_function("complex query", |b| { b.iter(|| { format( black_box(input), black_box(&QueryParams::None), black_box(FormatOptions::default()), ) }) }); } fn query_with_named_params(c: &mut Criterion) { let input = "SELECT * FROM my_table WHERE id = :first OR id = :second OR id = :third"; let params = vec![ ("first".to_string(), "1".to_string()), ("second".to_string(), "2".to_string()), ("third".to_string(), "3".to_string()), ]; c.bench_function("named params", |b| { b.iter(|| { format( black_box(input), black_box(&QueryParams::Named(params.clone())), black_box(FormatOptions::default()), ) }) }); } fn query_with_explicit_indexed_params(c: &mut Criterion) { let input = "SELECT * FROM my_table WHERE id = ?1 OR id = ?2 OR id = ?0"; let params = vec!["0".to_string(), "1".to_string(), "2".to_string()]; c.bench_function("explicit indexed params", |b| { b.iter(|| { format( black_box(input), black_box(&QueryParams::Indexed(params.clone())), black_box(FormatOptions::default()), ) }) }); } fn query_with_implicit_indexed_params(c: &mut Criterion) { let input = "SELECT * FROM my_table WHERE id = ? OR id = ? OR id = ?"; let params = vec!["0".to_string(), "1".to_string(), "2".to_string()]; c.bench_function("implicit indexed params", |b| { b.iter(|| { format( black_box(input), black_box(&QueryParams::Indexed(params.clone())), black_box(FormatOptions::default()), ) }) }); } fn issue_633(c: &mut Criterion) { const SIZE: usize = 1000; pub struct UserData { pub id: i64, pub first_name: String, pub last_name: String, pub address: String, pub email: String, pub phone: String, } fn sample() -> UserData { UserData { id: -1, first_name: "FIRST_NAME".to_string(), last_name: "LAST_NAME".to_string(), address: "SOME_ADDRESS".to_string(), email: "email@example.com".to_string(), phone: "9999999999".to_string(), } } fn to_insert_params(user_data: &UserData) -> String { format!( r#"('{}', '{}', '{}', '{}', '{}')"#, user_data.first_name, user_data.last_name, user_data.address, user_data.email, user_data.phone, ) } static INSERT_QUERY: &str = " INSERT INTO user_data (first_name, last_name, address, phone, email) VALUES "; fn generate_insert_query() -> String { let mut query_str = String::with_capacity(1_000_000); query_str.push_str(INSERT_QUERY); let mut is_first = true; let sample_data = sample(); for _ in 0..SIZE { if is_first { is_first = false; } else { query_str.push(','); } let params = to_insert_params(&sample_data); query_str.push_str(¶ms); } query_str.push(';'); query_str } let input = generate_insert_query(); c.bench_function("issue 633", |b| { b.iter(|| { format( black_box(&input), black_box(&QueryParams::None), black_box(FormatOptions::default()), ) }) }); } fn issue_633_2(c: &mut Criterion) { let input = "SELECT\n d.uuid AS uuid,\n\td.name_of_document AS name,\n\td.slug_name AS slug,\n\td.default_contract_uuid AS default_contract_uuid,\n\ta.uuid AS parent_uuid,\n\ta.name_of_agreement AS agreement_name,\n\td.icon_name AS icon\nFROM `documents` d\nLEFT JOIN agreements a ON a.uuid = d.parent_uuid\n WHERE d.uuid = ? LIMIT 1"; let params = vec!["0".to_string()]; c.bench_function("issue 633 query 2", |b| { b.iter(|| { format( black_box(input), black_box(&QueryParams::Indexed(params.clone())), black_box(FormatOptions::default()), ) }) }); } fn issue_633_3(c: &mut Criterion) { const SIZE: usize = 1000; let mut input = String::with_capacity(100_000); input.push_str("INSERT INTO test_table(a) values "); let mut is_first = true; for _ in 0..SIZE { if is_first { is_first = false; } else { input.push_str(", "); } input.push_str("(?)"); } c.bench_function("issue 633 query 3", |b| { b.iter(|| { format( black_box(&input), black_box(&QueryParams::None), black_box(FormatOptions::default()), ) }) }); } criterion_group!( benches, simple_query, complex_query, query_with_named_params, query_with_explicit_indexed_params, query_with_implicit_indexed_params, issue_633, issue_633_2, issue_633_3 ); criterion_main!(benches); sqlformat-0.2.1/src/formatter.rs000064400000000000000000000231341046102023000147770ustar 00000000000000use crate::indentation::Indentation; use crate::inline_block::InlineBlock; use crate::params::Params; use crate::tokenizer::{Token, TokenKind}; use crate::{FormatOptions, QueryParams}; use itertools::Itertools; use std::borrow::Cow; pub(crate) fn format(tokens: &[Token<'_>], params: &QueryParams, options: FormatOptions) -> String { let mut formatter = Formatter::new(tokens, params, options); let mut formatted_query = String::new(); for (index, token) in tokens.iter().enumerate() { formatter.index = index; if token.kind == TokenKind::Whitespace { // ignore (we do our own whitespace formatting) } else if token.kind == TokenKind::LineComment { formatter.format_line_comment(token, &mut formatted_query); } else if token.kind == TokenKind::BlockComment { formatter.format_block_comment(token, &mut formatted_query); } else if token.kind == TokenKind::ReservedTopLevel { formatter.format_top_level_reserved_word(token, &mut formatted_query); formatter.previous_reserved_word = Some(token); } else if token.kind == TokenKind::ReservedTopLevelNoIndent { formatter.format_top_level_reserved_word_no_indent(token, &mut formatted_query); formatter.previous_reserved_word = Some(token); } else if token.kind == TokenKind::ReservedNewline { formatter.format_newline_reserved_word(token, &mut formatted_query); formatter.previous_reserved_word = Some(token); } else if token.kind == TokenKind::Reserved { formatter.format_with_spaces(token, &mut formatted_query); formatter.previous_reserved_word = Some(token); } else if token.kind == TokenKind::OpenParen { formatter.format_opening_parentheses(token, &mut formatted_query); } else if token.kind == TokenKind::CloseParen { formatter.format_closing_parentheses(token, &mut formatted_query); } else if token.kind == TokenKind::Placeholder { formatter.format_placeholder(token, &mut formatted_query); } else if token.value == "," { formatter.format_comma(token, &mut formatted_query); } else if token.value == ":" { formatter.format_with_space_after(token, &mut formatted_query); } else if token.value == "." { formatter.format_without_spaces(token, &mut formatted_query); } else if token.value == ";" { formatter.format_query_separator(token, &mut formatted_query); } else { formatter.format_with_spaces(token, &mut formatted_query); } } formatted_query.trim().to_string() } struct Formatter<'a> { index: usize, previous_reserved_word: Option<&'a Token<'a>>, tokens: &'a [Token<'a>], params: Params<'a>, options: FormatOptions, indentation: Indentation, inline_block: InlineBlock, } impl<'a> Formatter<'a> { fn new(tokens: &'a [Token<'a>], params: &'a QueryParams, options: FormatOptions) -> Self { Formatter { index: 0, previous_reserved_word: None, tokens, params: Params::new(params), options, indentation: Indentation::new(options), inline_block: InlineBlock::new(), } } fn format_line_comment(&self, token: &Token<'_>, query: &mut String) { query.push_str(token.value); self.add_new_line(query); } fn format_block_comment(&self, token: &Token<'_>, query: &mut String) { self.add_new_line(query); query.push_str(&self.indent_comment(token.value)); self.add_new_line(query); } fn format_top_level_reserved_word(&mut self, token: &Token<'_>, query: &mut String) { self.indentation.decrease_top_level(); self.add_new_line(query); self.indentation.increase_top_level(); query.push_str(&self.equalize_whitespace(&self.format_reserved_word(token.value))); self.add_new_line(query); } fn format_top_level_reserved_word_no_indent(&mut self, token: &Token<'_>, query: &mut String) { self.indentation.decrease_top_level(); self.add_new_line(query); query.push_str(&self.equalize_whitespace(&self.format_reserved_word(token.value))); self.add_new_line(query); } fn format_newline_reserved_word(&self, token: &Token<'_>, query: &mut String) { self.add_new_line(query); query.push_str(&self.equalize_whitespace(&self.format_reserved_word(token.value))); query.push(' '); } fn format_with_spaces(&self, token: &Token<'_>, query: &mut String) { let value = if token.kind == TokenKind::Reserved { self.format_reserved_word(token.value) } else { Cow::Borrowed(token.value) }; query.push_str(&value); query.push(' '); } // Opening parentheses increase the block indent level and start a new line fn format_opening_parentheses(&mut self, token: &Token<'_>, query: &mut String) { const PRESERVE_WHITESPACE_FOR: &[TokenKind] = &[ TokenKind::Whitespace, TokenKind::OpenParen, TokenKind::LineComment, ]; // Take out the preceding space unless there was whitespace there in the original query // or another opening parens or line comment let previous_token = self.previous_token(); if previous_token.is_none() || !PRESERVE_WHITESPACE_FOR.contains(&previous_token.unwrap().kind) { self.trim_spaces_end(query); } if self.options.uppercase { query.push_str(&token.value.to_uppercase()); } else { query.push_str(token.value); }; self.inline_block.begin_if_possible(self.tokens, self.index); if !self.inline_block.is_active() { self.indentation.increase_block_level(); self.add_new_line(query); } } // Closing parentheses decrease the block indent level fn format_closing_parentheses(&mut self, token: &Token<'_>, query: &mut String) { let mut token = token.clone(); let value = if self.options.uppercase { token.value.to_uppercase() } else { token.value.to_string() }; token.value = &value; if self.inline_block.is_active() { self.inline_block.end(); self.format_with_space_after(&token, query); } else { self.indentation.decrease_block_level(); self.add_new_line(query); self.format_with_spaces(&token, query); } } fn format_placeholder(&mut self, token: &'a Token<'a>, query: &mut String) { query.push_str(self.params.get(token)); query.push(' '); } // Commas start a new line (unless within inline parentheses or SQL "LIMIT" clause) fn format_comma(&self, token: &Token<'_>, query: &mut String) { self.trim_spaces_end(query); query.push_str(token.value); query.push(' '); if self.inline_block.is_active() { return; } if self .previous_reserved_word .map(|word| word.value.to_lowercase() == "limit") .unwrap_or(false) { return; } self.add_new_line(query); } fn format_with_space_after(&self, token: &Token<'_>, query: &mut String) { self.trim_spaces_end(query); query.push_str(token.value); query.push(' '); } fn format_without_spaces(&self, token: &Token<'_>, query: &mut String) { self.trim_spaces_end(query); query.push_str(token.value); } fn format_query_separator(&mut self, token: &Token<'_>, query: &mut String) { self.indentation.reset_indentation(); self.trim_spaces_end(query); query.push_str(token.value); for _ in 0..self.options.lines_between_queries { query.push('\n'); } } fn add_new_line(&self, query: &mut String) { self.trim_spaces_end(query); if !query.ends_with('\n') { query.push('\n'); } query.push_str(&self.indentation.get_indent()); } fn trim_spaces_end(&self, query: &mut String) { query.truncate(query.trim_end_matches(|c| c == ' ' || c == '\t').len()); } fn indent_comment(&self, token: &str) -> String { token .split('\n') .enumerate() .map(|(i, line)| { if i == 0 { return line.to_string(); } if !line.starts_with(|c| c == ' ' || c == '\t') { return line.to_string(); } format!( "{} {}", self.indentation.get_indent(), line.chars() .skip_while(|&c| c == ' ' || c == '\t') .collect::() ) }) .join("\n") } fn format_reserved_word<'t>(&self, token: &'t str) -> Cow<'t, str> { if self.options.uppercase { Cow::Owned(token.to_uppercase()) } else { Cow::Borrowed(token) } } // Replace any sequence of whitespace characters with single space fn equalize_whitespace(&self, token: &str) -> String { token .split(char::is_whitespace) .filter(|s| !s.is_empty()) .join(" ") } fn previous_token(&self) -> Option<&Token<'_>> { let index = self.index.checked_sub(1); if let Some(index) = index { self.tokens.get(index) } else { None } } } sqlformat-0.2.1/src/indentation.rs000064400000000000000000000025761046102023000153170ustar 00000000000000use crate::{FormatOptions, Indent}; pub(crate) struct Indentation { options: FormatOptions, indent_types: Vec, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum IndentType { TopLevel, BlockLevel, } impl Indentation { pub fn new(options: FormatOptions) -> Self { Indentation { options, indent_types: Vec::new(), } } pub fn get_indent(&self) -> String { match self.options.indent { Indent::Spaces(num_spaces) => " " .repeat(num_spaces as usize) .repeat(self.indent_types.len()), Indent::Tabs => "\t".repeat(self.indent_types.len()), } } pub fn increase_top_level(&mut self) { self.indent_types.push(IndentType::TopLevel); } pub fn increase_block_level(&mut self) { self.indent_types.push(IndentType::BlockLevel); } pub fn decrease_top_level(&mut self) { if self.indent_types.last() == Some(&IndentType::TopLevel) { self.indent_types.pop(); } } pub fn decrease_block_level(&mut self) { while !self.indent_types.is_empty() { let kind = self.indent_types.pop(); if kind != Some(IndentType::TopLevel) { break; } } } pub fn reset_indentation(&mut self) { self.indent_types.clear(); } } sqlformat-0.2.1/src/inline_block.rs000064400000000000000000000032411046102023000154210ustar 00000000000000use crate::tokenizer::{Token, TokenKind}; pub(crate) struct InlineBlock { level: usize, } impl InlineBlock { pub fn new() -> Self { InlineBlock { level: 0 } } pub fn begin_if_possible(&mut self, tokens: &[Token<'_>], index: usize) { if self.level == 0 && self.is_inline_block(tokens, index) { self.level = 1; } else if self.level > 0 { self.level += 1; } else { self.level = 0; } } pub fn end(&mut self) { self.level -= 1; } pub fn is_active(&self) -> bool { self.level > 0 } fn is_inline_block(&self, tokens: &[Token<'_>], index: usize) -> bool { const INLINE_MAX_LENGTH: usize = 50; let mut length = 0; let mut level = 0; for token in &tokens[index..] { length += token.value.len(); // Overran max length if length > INLINE_MAX_LENGTH { return false; } if token.kind == TokenKind::OpenParen { level += 1; } else if token.kind == TokenKind::CloseParen { level -= 1; if level == 0 { return true; } } if self.is_forbidden_token(token) { return false; } } false } fn is_forbidden_token(&self, token: &Token<'_>) -> bool { token.kind == TokenKind::ReservedTopLevel || token.kind == TokenKind::ReservedNewline || token.kind == TokenKind::LineComment || token.kind == TokenKind::BlockComment || token.value == ";" } } sqlformat-0.2.1/src/lib.rs000064400000000000000000001136561046102023000135530ustar 00000000000000//! This crate is a port of https://github.com/kufii/sql-formatter-plus //! written in Rust. It is intended to be usable as a pure-Rust library //! for formatting SQL queries. #![type_length_limit = "99999999"] #![forbid(unsafe_code)] // Maintains semver compatibility for older Rust versions #![allow(clippy::manual_strip)] mod formatter; mod indentation; mod inline_block; mod params; mod tokenizer; /// Formats whitespace in a SQL string to make it easier to read. /// Optionally replaces parameter placeholders with `params`. pub fn format(query: &str, params: &QueryParams, options: FormatOptions) -> String { let tokens = tokenizer::tokenize(query); formatter::format(&tokens, params, options) } /// Options for controlling how the library formats SQL #[derive(Debug, Clone, Copy)] pub struct FormatOptions { /// Controls the type and length of indentation to use /// /// Default: 2 spaces pub indent: Indent, /// When set, changes reserved keywords to ALL CAPS /// /// Default: false pub uppercase: bool, /// Controls the number of line breaks after a query /// /// Default: 1 pub lines_between_queries: u8, } impl Default for FormatOptions { fn default() -> Self { FormatOptions { indent: Indent::Spaces(2), uppercase: false, lines_between_queries: 1, } } } #[derive(Debug, Clone, Copy)] pub enum Indent { Spaces(u8), Tabs, } #[derive(Debug, Clone)] pub enum QueryParams { Named(Vec<(String, String)>), Indexed(Vec), None, } impl Default for QueryParams { fn default() -> Self { QueryParams::None } } #[cfg(test)] mod tests { use super::*; use indoc::indoc; #[test] fn it_uses_given_indent_config_for_indentation() { let input = "SELECT count(*),Column1 FROM Table1;"; let options = FormatOptions { indent: Indent::Spaces(4), ..FormatOptions::default() }; let expected = indoc!( " SELECT count(*), Column1 FROM Table1;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_set_schema_queries() { let input = "SET SCHEMA schema1; SET CURRENT SCHEMA schema2;"; let options = FormatOptions::default(); let expected = indoc!( " SET SCHEMA schema1; SET CURRENT SCHEMA schema2;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_select_query() { let input = "SELECT count(*),Column1 FROM Table1;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT count(*), Column1 FROM Table1;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_complex_select() { let input = "SELECT DISTINCT name, ROUND(age/7) field1, 18 + 20 AS field2, 'some string' FROM foo;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT DISTINCT name, ROUND(age / 7) field1, 18 + 20 AS field2, 'some string' FROM foo;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_with_complex_where() { let input = indoc!( " SELECT * FROM foo WHERE Column1 = 'testing' AND ( (Column2 = Column3 OR Column4 >= NOW()) ); " ); let options = FormatOptions::default(); let expected = indoc!( " SELECT * FROM foo WHERE Column1 = 'testing' AND ( ( Column2 = Column3 OR Column4 >= NOW() ) );" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_with_top_level_reserved_words() { let input = indoc!( " SELECT * FROM foo WHERE name = 'John' GROUP BY some_column HAVING column > 10 ORDER BY other_column LIMIT 5; " ); let options = FormatOptions::default(); let expected = indoc!( " SELECT * FROM foo WHERE name = 'John' GROUP BY some_column HAVING column > 10 ORDER BY other_column LIMIT 5;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_limit_with_two_comma_separated_values_on_single_line() { let input = "LIMIT 5, 10;"; let options = FormatOptions::default(); let expected = indoc!( " LIMIT 5, 10;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_limit_of_single_value_followed_by_another_select_using_commas() { let input = "LIMIT 5; SELECT foo, bar;"; let options = FormatOptions::default(); let expected = indoc!( " LIMIT 5; SELECT foo, bar;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_limit_of_single_value_and_offset() { let input = "LIMIT 5 OFFSET 8;"; let options = FormatOptions::default(); let expected = indoc!( " LIMIT 5 OFFSET 8;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_limit_in_lowercase() { let input = "limit 5, 10;"; let options = FormatOptions::default(); let expected = indoc!( " limit 5, 10;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_preserves_case_of_keywords() { let input = "select distinct * frOM foo left join bar WHERe a > 1 and b = 3"; let options = FormatOptions::default(); let expected = indoc!( " select distinct * frOM foo left join bar WHERe a > 1 and b = 3" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_query_with_select_query_inside_it() { let input = "SELECT *, SUM(*) AS sum FROM (SELECT * FROM Posts LIMIT 30) WHERE a > b"; let options = FormatOptions::default(); let expected = indoc!( " SELECT *, SUM(*) AS sum FROM ( SELECT * FROM Posts LIMIT 30 ) WHERE a > b" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_query_with_inner_join() { let input = indoc!( " SELECT customer_id.from, COUNT(order_id) AS total FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id;" ); let options = FormatOptions::default(); let expected = indoc!( " SELECT customer_id.from, COUNT(order_id) AS total FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_query_with_different_comments() { let input = indoc!( " SELECT /* * This is a block comment */ * FROM -- This is another comment MyTable # One final comment WHERE 1 = 2;" ); let options = FormatOptions::default(); let expected = indoc!( " SELECT /* * This is a block comment */ * FROM -- This is another comment MyTable # One final comment WHERE 1 = 2;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_maintains_block_comment_indentation() { let input = indoc!( " SELECT /* * This is a block comment */ * FROM MyTable WHERE 1 = 2;" ); let options = FormatOptions::default(); assert_eq!(format(input, &QueryParams::None, options), input); } #[test] fn it_formats_simple_insert_query() { let input = "INSERT INTO Customers (ID, MoneyBalance, Address, City) VALUES (12,-123.4, 'Skagen 2111','Stv');"; let options = FormatOptions::default(); let expected = indoc!( " INSERT INTO Customers (ID, MoneyBalance, Address, City) VALUES (12, -123.4, 'Skagen 2111', 'Stv');" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_keeps_short_parenthesized_list_with_nested_parenthesis_on_single_line() { let input = "SELECT (a + b * (c - NOW()));"; let options = FormatOptions::default(); let expected = indoc!( " SELECT (a + b * (c - NOW()));" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_breaks_long_parenthesized_lists_to_multiple_lines() { let input = indoc!( " INSERT INTO some_table (id_product, id_shop, id_currency, id_country, id_registration) ( SELECT IF(dq.id_discounter_shopping = 2, dq.value, dq.value / 100), IF (dq.id_discounter_shopping = 2, 'amount', 'percentage') FROM foo);" ); let options = FormatOptions::default(); let expected = indoc!( " INSERT INTO some_table ( id_product, id_shop, id_currency, id_country, id_registration ) ( SELECT IF( dq.id_discounter_shopping = 2, dq.value, dq.value / 100 ), IF ( dq.id_discounter_shopping = 2, 'amount', 'percentage' ) FROM foo );" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_update_query() { let input = "UPDATE Customers SET ContactName='Alfred Schmidt', City='Hamburg' WHERE CustomerName='Alfreds Futterkiste';"; let options = FormatOptions::default(); let expected = indoc!( " UPDATE Customers SET ContactName = 'Alfred Schmidt', City = 'Hamburg' WHERE CustomerName = 'Alfreds Futterkiste';" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_delete_query() { let input = "DELETE FROM Customers WHERE CustomerName='Alfred' AND Phone=5002132;"; let options = FormatOptions::default(); let expected = indoc!( " DELETE FROM Customers WHERE CustomerName = 'Alfred' AND Phone = 5002132;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_drop_query() { let input = "DROP TABLE IF EXISTS admin_role;"; let options = FormatOptions::default(); assert_eq!(format(input, &QueryParams::None, options), input); } #[test] fn it_formats_incomplete_query() { let input = "SELECT count("; let options = FormatOptions::default(); let expected = indoc!( " SELECT count(" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_query_that_ends_with_open_comment() { let input = indoc!( " SELECT count(*) /*Comment" ); let options = FormatOptions::default(); let expected = indoc!( " SELECT count(*) /*Comment" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_update_query_with_as_part() { let input = "UPDATE customers SET total_orders = order_summary.total FROM ( SELECT * FROM bank) AS order_summary"; let options = FormatOptions::default(); let expected = indoc!( " UPDATE customers SET total_orders = order_summary.total FROM ( SELECT * FROM bank ) AS order_summary" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_top_level_and_newline_multi_word_reserved_words_with_inconsistent_spacing() { let input = "SELECT * FROM foo LEFT \t OUTER \n JOIN bar ORDER \n BY blah"; let options = FormatOptions::default(); let expected = indoc!( " SELECT * FROM foo LEFT OUTER JOIN bar ORDER BY blah" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_long_double_parenthesized_queries_to_multiple_lines() { let input = "((foo = '0123456789-0123456789-0123456789-0123456789'))"; let options = FormatOptions::default(); let expected = indoc!( " ( ( foo = '0123456789-0123456789-0123456789-0123456789' ) )" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_short_double_parenthesizes_queries_to_one_line() { let input = "((foo = 'bar'))"; let options = FormatOptions::default(); assert_eq!(format(input, &QueryParams::None, options), input); } #[test] fn it_formats_single_char_operators() { let inputs = [ "foo = bar", "foo < bar", "foo > bar", "foo + bar", "foo - bar", "foo * bar", "foo / bar", "foo % bar", ]; let options = FormatOptions::default(); for input in &inputs { assert_eq!(&format(input, &QueryParams::None, options), input); } } #[test] fn it_formats_multi_char_operators() { let inputs = [ "foo != bar", "foo <> bar", "foo == bar", "foo || bar", "foo <= bar", "foo >= bar", "foo !< bar", "foo !> bar", ]; let options = FormatOptions::default(); for input in &inputs { assert_eq!(&format(input, &QueryParams::None, options), input); } } #[test] fn it_formats_logical_operators() { let inputs = [ "foo ALL bar", "foo = ANY (1, 2, 3)", "EXISTS bar", "foo IN (1, 2, 3)", "foo LIKE 'hello%'", "foo IS NULL", "UNIQUE foo", ]; let options = FormatOptions::default(); for input in &inputs { assert_eq!(&format(input, &QueryParams::None, options), input); } } #[test] fn it_formats_and_or_operators() { let strings = [ ("foo BETWEEN bar AND baz", "foo BETWEEN bar AND baz"), ("foo BETWEEN\nbar\nAND baz", "foo BETWEEN bar AND baz"), ("foo AND bar", "foo\nAND bar"), ("foo OR bar", "foo\nOR bar"), ]; let options = FormatOptions::default(); for (input, output) in &strings { assert_eq!(&format(input, &QueryParams::None, options), output); } } #[test] fn it_recognizes_strings() { let inputs = ["\"foo JOIN bar\"", "'foo JOIN bar'", "`foo JOIN bar`"]; let options = FormatOptions::default(); for input in &inputs { assert_eq!(&format(input, &QueryParams::None, options), input); } } #[test] fn it_recognizes_escaped_strings() { let inputs = [ r#""foo \" JOIN bar""#, r#"'foo \' JOIN bar'"#, r#"`foo `` JOIN bar`"#, r#"'foo '' JOIN bar'"#, r#"'two households"'"#, r#"'two households'''"#, ]; let options = FormatOptions::default(); for input in &inputs { assert_eq!(&format(input, &QueryParams::None, options), input); } } #[test] fn it_formats_postgres_specific_operators() { let strings = [ ("column::int", "column :: int"), ("v->2", "v -> 2"), ("v->>2", "v ->> 2"), ("foo ~~ 'hello'", "foo ~~ 'hello'"), ("foo !~ 'hello'", "foo !~ 'hello'"), ("foo ~* 'hello'", "foo ~* 'hello'"), ("foo ~~* 'hello'", "foo ~~* 'hello'"), ("foo !~~ 'hello'", "foo !~~ 'hello'"), ("foo !~* 'hello'", "foo !~* 'hello'"), ("foo !~~* 'hello'", "foo !~~* 'hello'"), ]; let options = FormatOptions::default(); for (input, output) in &strings { assert_eq!(&format(input, &QueryParams::None, options), output); } } #[test] fn it_keeps_separation_between_multiple_statements() { let strings = [ ("foo;bar;", "foo;\nbar;"), ("foo\n;bar;", "foo;\nbar;"), ("foo\n\n\n;bar;\n\n", "foo;\nbar;"), ]; let options = FormatOptions::default(); for (input, output) in &strings { assert_eq!(&format(input, &QueryParams::None, options), output); } let input = indoc!( " SELECT count(*),Column1 FROM Table1; SELECT count(*),Column1 FROM Table2;" ); let options = FormatOptions::default(); let expected = indoc!( " SELECT count(*), Column1 FROM Table1; SELECT count(*), Column1 FROM Table2;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_unicode_correctly() { let input = "SELECT test, тест FROM table;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT test, тест FROM table;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_converts_keywords_to_uppercase_when_option_passed_in() { let input = "select distinct * frOM foo left join bar WHERe cola > 1 and colb = 3"; let options = FormatOptions { uppercase: true, ..FormatOptions::default() }; let expected = indoc!( " SELECT DISTINCT * FROM foo LEFT JOIN bar WHERE cola > 1 AND colb = 3" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_line_breaks_between_queries_with_config() { let input = "SELECT * FROM foo; SELECT * FROM bar;"; let options = FormatOptions { lines_between_queries: 2, ..FormatOptions::default() }; let expected = indoc!( " SELECT * FROM foo; SELECT * FROM bar;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_correctly_indents_create_statement_after_select() { let input = indoc!( " SELECT * FROM test; CREATE TABLE TEST(id NUMBER NOT NULL, col1 VARCHAR2(20), col2 VARCHAR2(20)); " ); let options = FormatOptions::default(); let expected = indoc!( " SELECT * FROM test; CREATE TABLE TEST( id NUMBER NOT NULL, col1 VARCHAR2(20), col2 VARCHAR2(20) );" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_short_create_table() { let input = "CREATE TABLE items (a INT PRIMARY KEY, b TEXT);"; let options = FormatOptions::default(); assert_eq!(format(input, &QueryParams::None, options), input); } #[test] fn it_formats_long_create_table() { let input = "CREATE TABLE items (a INT PRIMARY KEY, b TEXT, c INT NOT NULL, d INT NOT NULL);"; let options = FormatOptions::default(); let expected = indoc!( " CREATE TABLE items ( a INT PRIMARY KEY, b TEXT, c INT NOT NULL, d INT NOT NULL );" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_insert_without_into() { let input = "INSERT Customers (ID, MoneyBalance, Address, City) VALUES (12,-123.4, 'Skagen 2111','Stv');"; let options = FormatOptions::default(); let expected = indoc!( " INSERT Customers (ID, MoneyBalance, Address, City) VALUES (12, -123.4, 'Skagen 2111', 'Stv');" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_alter_table_modify_query() { let input = "ALTER TABLE supplier MODIFY supplier_name char(100) NOT NULL;"; let options = FormatOptions::default(); let expected = indoc!( " ALTER TABLE supplier MODIFY supplier_name char(100) NOT NULL;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_alter_table_alter_column_query() { let input = "ALTER TABLE supplier ALTER COLUMN supplier_name VARCHAR(100) NOT NULL;"; let options = FormatOptions::default(); let expected = indoc!( " ALTER TABLE supplier ALTER COLUMN supplier_name VARCHAR(100) NOT NULL;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_bracketed_strings() { let inputs = ["[foo JOIN bar]", "[foo ]] JOIN bar]"]; let options = FormatOptions::default(); for input in &inputs { assert_eq!(&format(input, &QueryParams::None, options), input); } } #[test] fn it_recognizes_at_variables() { let input = "SELECT @variable, @a1_2.3$, @'var name', @\"var name\", @`var name`, @[var name];"; let options = FormatOptions::default(); let expected = indoc!( " SELECT @variable, @a1_2.3$, @'var name', @\"var name\", @`var name`, @[var name];" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_at_variables_with_param_values() { let input = "SELECT @variable, @a1_2.3$, @'var name', @\"var name\", @`var name`, @[var name], @'var\\name';"; let params = vec![ ("variable".to_string(), "\"variable value\"".to_string()), ("a1_2.3$".to_string(), "'weird value'".to_string()), ("var name".to_string(), "'var value'".to_string()), ("var\\name".to_string(), "'var\\ value'".to_string()), ]; let options = FormatOptions::default(); let expected = indoc!( " SELECT \"variable value\", 'weird value', 'var value', 'var value', 'var value', 'var value', 'var\\ value';" ); assert_eq!( format(input, &QueryParams::Named(params), options), expected ); } #[test] fn it_recognizes_colon_variables() { let input = "SELECT :variable, :a1_2.3$, :'var name', :\"var name\", :`var name`, :[var name];"; let options = FormatOptions::default(); let expected = indoc!( " SELECT :variable, :a1_2.3$, :'var name', :\"var name\", :`var name`, :[var name];" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_colon_variables_with_param_values() { let input = indoc!( " SELECT :variable, :a1_2.3$, :'var name', :\"var name\", :`var name`, :[var name], :'escaped \\'var\\'', :\"^*& weird \\\" var \"; " ); let params = vec![ ("variable".to_string(), "\"variable value\"".to_string()), ("a1_2.3$".to_string(), "'weird value'".to_string()), ("var name".to_string(), "'var value'".to_string()), ("escaped 'var'".to_string(), "'weirder value'".to_string()), ( "^*& weird \" var ".to_string(), "'super weird value'".to_string(), ), ]; let options = FormatOptions::default(); let expected = indoc!( " SELECT \"variable value\", 'weird value', 'var value', 'var value', 'var value', 'var value', 'weirder value', 'super weird value';" ); assert_eq!( format(input, &QueryParams::Named(params), options), expected ); } #[test] fn it_recognizes_question_numbered_placeholders() { let input = "SELECT ?1, ?25, ?;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT ?1, ?25, ?;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_question_numbered_placeholders_with_param_values() { let input = "SELECT ?1, ?2, ?0;"; let params = vec![ "first".to_string(), "second".to_string(), "third".to_string(), ]; let options = FormatOptions::default(); let expected = indoc!( " SELECT second, third, first;" ); assert_eq!( format(input, &QueryParams::Indexed(params), options), expected ); format("?62666666121266666612", &QueryParams::None, options); } #[test] fn it_recognizes_question_indexed_placeholders_with_param_values() { let input = "SELECT ?, ?, ?;"; let params = vec![ "first".to_string(), "second".to_string(), "third".to_string(), ]; let options = FormatOptions::default(); let expected = indoc!( " SELECT first, second, third;" ); assert_eq!( format(input, &QueryParams::Indexed(params), options), expected ); } #[test] fn it_recognizes_dollar_sign_numbered_placeholders() { let input = "SELECT $1, $2;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT $1, $2;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_dollar_sign_numbered_placeholders_with_param_values() { let input = "SELECT $2, $3, $1;"; let params = vec![ "first".to_string(), "second".to_string(), "third".to_string(), ]; let options = FormatOptions::default(); let expected = indoc!( " SELECT second, third, first;" ); assert_eq!( format(input, &QueryParams::Indexed(params), options), expected ); } #[test] fn it_formats_query_with_go_batch_separator() { let input = "SELECT 1 GO SELECT 2"; let params = vec![ "first".to_string(), "second".to_string(), "third".to_string(), ]; let options = FormatOptions::default(); let expected = indoc!( " SELECT 1 GO SELECT 2" ); assert_eq!( format(input, &QueryParams::Indexed(params), options), expected ); } #[test] fn it_formats_select_query_with_cross_join() { let input = "SELECT a, b FROM t CROSS JOIN t2 on t.id = t2.id_t"; let options = FormatOptions::default(); let expected = indoc!( " SELECT a, b FROM t CROSS JOIN t2 on t.id = t2.id_t" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_query_with_cross_apply() { let input = "SELECT a, b FROM t CROSS APPLY fn(t.id)"; let options = FormatOptions::default(); let expected = indoc!( " SELECT a, b FROM t CROSS APPLY fn(t.id)" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_select() { let input = "SELECT N, M FROM t"; let options = FormatOptions::default(); let expected = indoc!( " SELECT N, M FROM t" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_simple_select_with_national_characters_mssql() { let input = "SELECT N'value'"; let options = FormatOptions::default(); let expected = indoc!( " SELECT N'value'" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_select_query_with_outer_apply() { let input = "SELECT a, b FROM t OUTER APPLY fn(t.id)"; let options = FormatOptions::default(); let expected = indoc!( " SELECT a, b FROM t OUTER APPLY fn(t.id)" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_fetch_first_like_limit() { let input = "SELECT * FETCH FIRST 2 ROWS ONLY;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT * FETCH FIRST 2 ROWS ONLY;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_case_when_with_a_blank_expression() { let input = "CASE WHEN option = 'foo' THEN 1 WHEN option = 'bar' THEN 2 WHEN option = 'baz' THEN 3 ELSE 4 END;"; let options = FormatOptions::default(); let expected = indoc!( " CASE WHEN option = 'foo' THEN 1 WHEN option = 'bar' THEN 2 WHEN option = 'baz' THEN 3 ELSE 4 END;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_case_when_inside_select() { let input = "SELECT foo, bar, CASE baz WHEN 'one' THEN 1 WHEN 'two' THEN 2 ELSE 3 END FROM table"; let options = FormatOptions::default(); let expected = indoc!( " SELECT foo, bar, CASE baz WHEN 'one' THEN 1 WHEN 'two' THEN 2 ELSE 3 END FROM table" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_case_when_with_an_expression() { let input = "CASE toString(getNumber()) WHEN 'one' THEN 1 WHEN 'two' THEN 2 WHEN 'three' THEN 3 ELSE 4 END;"; let options = FormatOptions::default(); let expected = indoc!( " CASE toString(getNumber()) WHEN 'one' THEN 1 WHEN 'two' THEN 2 WHEN 'three' THEN 3 ELSE 4 END;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_lowercase_case_end() { let input = "case when option = 'foo' then 1 else 2 end;"; let options = FormatOptions::default(); let expected = indoc!( " case when option = 'foo' then 1 else 2 end;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_ignores_words_case_and_end_inside_other_strings() { let input = "SELECT CASEDATE, ENDDATE FROM table1;"; let options = FormatOptions::default(); let expected = indoc!( " SELECT CASEDATE, ENDDATE FROM table1;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_tricky_line_comments() { let input = "SELECT a#comment, here\nFROM b--comment"; let options = FormatOptions::default(); let expected = indoc!( " SELECT a #comment, here FROM b --comment" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_line_comments_followed_by_semicolon() { let input = indoc!( " SELECT a FROM b --comment ;" ); let options = FormatOptions::default(); let expected = indoc!( " SELECT a FROM b --comment ;" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_line_comments_followed_by_comma() { let input = indoc!( " SELECT a --comment , b" ); let options = FormatOptions::default(); let expected = indoc!( " SELECT a --comment , b" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_line_comments_followed_by_close_paren() { let input = "SELECT ( a --comment\n )"; let options = FormatOptions::default(); let expected = indoc!( " SELECT ( a --comment )" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_line_comments_followed_by_open_paren() { let input = "SELECT a --comment\n()"; let options = FormatOptions::default(); let expected = indoc!( " SELECT a --comment ()" ); assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_formats_lonely_semicolon() { let input = ";"; let options = FormatOptions::default(); assert_eq!(format(input, &QueryParams::None, options), input); } #[test] fn it_formats_multibyte_chars() { let input = "\nSELECT 'главная'"; let options = FormatOptions::default(); let expected = "SELECT\n 'главная'"; assert_eq!(format(input, &QueryParams::None, options), expected); } #[test] fn it_recognizes_scientific_notation() { let input = "SELECT *, 1e-7 as small, 1e+7 as large FROM t"; let options = FormatOptions::default(); let expected = indoc!( " SELECT *, 1e-7 as small, 1e+7 as large FROM t" ); assert_eq!(format(input, &QueryParams::None, options), expected); } } sqlformat-0.2.1/src/params.rs000064400000000000000000000025241046102023000142570ustar 00000000000000use crate::tokenizer::Token; use crate::QueryParams; pub(crate) struct Params<'a> { index: usize, params: &'a QueryParams, } impl<'a> Params<'a> { pub fn new(params: &'a QueryParams) -> Self { Params { index: 0, params } } pub fn get(&mut self, token: &'a Token<'a>) -> &'a str { match self.params { QueryParams::Named(params) => token .key .as_ref() .and_then(|key| { params .iter() .find(|param| param.0 == key.named()) .map(|param| param.1.as_str()) }) .unwrap_or(token.value), QueryParams::Indexed(params) => { if let Some(key) = token.key.as_ref().and_then(|key| key.indexed()) { params .get(key) .map(|param| param.as_str()) .unwrap_or(token.value) } else { let value = params .get(self.index) .map(|param| param.as_str()) .unwrap_or(token.value); self.index += 1; value } } QueryParams::None => token.value, } } } sqlformat-0.2.1/src/tokenizer.rs000064400000000000000000001235341046102023000150130ustar 00000000000000use nom::branch::alt; use nom::bytes::complete::{tag, tag_no_case, take, take_until, take_while1}; use nom::character::complete::{anychar, char, digit0, digit1, not_line_ending}; use nom::combinator::{eof, opt, peek, recognize, verify}; use nom::error::ParseError; use nom::error::{Error, ErrorKind}; use nom::multi::many0; use nom::sequence::{terminated, tuple}; use nom::{AsChar, Err, IResult}; use std::borrow::Cow; use unicode_categories::UnicodeCategories; pub(crate) fn tokenize(mut input: &str) -> Vec> { let mut tokens: Vec = Vec::new(); // Keep processing the string until it is empty while let Ok(result) = get_next_token( input, tokens.last().cloned(), tokens .iter() .rfind(|token| token.kind == TokenKind::Reserved) .cloned(), ) { input = result.0; tokens.push(result.1); } tokens } #[derive(Debug, Clone)] pub(crate) struct Token<'a> { pub kind: TokenKind, pub value: &'a str, // Only used for placeholder--there is a reason this isn't on the enum pub key: Option>, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum TokenKind { Whitespace, String, Reserved, ReservedTopLevel, ReservedTopLevelNoIndent, ReservedNewline, Operator, OpenParen, CloseParen, LineComment, BlockComment, Number, Placeholder, Word, } #[derive(Debug, Clone)] pub(crate) enum PlaceholderKind<'a> { Named(Cow<'a, str>), ZeroIndexed(usize), OneIndexed(usize), } impl<'a> PlaceholderKind<'a> { pub fn named(&'a self) -> &'a str { match self { PlaceholderKind::Named(val) => val.as_ref(), _ => "", } } pub fn indexed(&self) -> Option { match self { PlaceholderKind::ZeroIndexed(val) => Some(*val), PlaceholderKind::OneIndexed(val) => Some(*val - 1), _ => None, } } } fn get_next_token<'a>( input: &'a str, previous_token: Option>, last_reserved_token: Option>, ) -> IResult<&'a str, Token<'a>> { get_whitespace_token(input) .or_else(|_| get_comment_token(input)) .or_else(|_| get_string_token(input)) .or_else(|_| get_open_paren_token(input)) .or_else(|_| get_close_paren_token(input)) .or_else(|_| get_placeholder_token(input)) .or_else(|_| get_number_token(input)) .or_else(|_| get_reserved_word_token(input, previous_token, last_reserved_token)) .or_else(|_| get_word_token(input)) .or_else(|_| get_operator_token(input)) } fn get_whitespace_token(input: &str) -> IResult<&str, Token<'_>> { take_while1(char::is_whitespace)(input).map(|(input, token)| { ( input, Token { kind: TokenKind::Whitespace, value: token, key: None, }, ) }) } fn get_comment_token(input: &str) -> IResult<&str, Token<'_>> { get_line_comment_token(input).or_else(|_| get_block_comment_token(input)) } fn get_line_comment_token(input: &str) -> IResult<&str, Token<'_>> { recognize(tuple((alt((tag("#"), tag("--"))), not_line_ending)))(input).map(|(input, token)| { ( input, Token { kind: TokenKind::LineComment, value: token, key: None, }, ) }) } fn get_block_comment_token(input: &str) -> IResult<&str, Token<'_>> { recognize(tuple(( tag("/*"), alt((take_until("*/"), recognize(many0(anychar)))), opt(take(2usize)), )))(input) .map(|(input, token)| { ( input, Token { kind: TokenKind::BlockComment, value: token, key: None, }, ) }) } pub fn take_till_escaping<'a, Error: ParseError<&'a str>>( desired: char, escapes: &'static [char], ) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, Error> { move |input: &str| { let mut chars = input.chars().enumerate().peekable(); loop { let item = chars.next(); let next = chars.peek().map(|item| item.1); match item { Some(item) => { // escape? if escapes.contains(&item.1) && next.map(|n| n == desired).unwrap_or(false) { // consume this and next char chars.next(); continue; } if item.1 == desired { let byte_pos = input.chars().take(item.0).map(|c| c.len()).sum::(); return Ok((&input[byte_pos..], &input[..byte_pos])); } } None => { return Ok(("", input)); } } } } } // This enables the following string patterns: // 1. backtick quoted string using `` to escape // 2. square bracket quoted string (SQL Server) using ]] to escape // 3. double quoted string using "" or \" to escape // 4. single quoted string using '' or \' to escape // 5. national character quoted string using N'' or N\' to escape fn get_string_token(input: &str) -> IResult<&str, Token<'_>> { alt(( recognize(tuple(( char('`'), take_till_escaping('`', &['`']), take(1usize), ))), recognize(tuple(( char('['), take_till_escaping(']', &[']']), take(1usize), ))), recognize(tuple(( char('"'), take_till_escaping('"', &['"', '\\']), take(1usize), ))), recognize(tuple(( char('\''), take_till_escaping('\'', &['\'', '\\']), take(1usize), ))), recognize(tuple(( tag("N'"), take_till_escaping('\'', &['\'', '\\']), take(1usize), ))), ))(input) .map(|(input, token)| { ( input, Token { kind: TokenKind::String, value: token, key: None, }, ) }) } // Like above but it doesn't replace double quotes fn get_placeholder_string_token(input: &str) -> IResult<&str, Token<'_>> { alt(( recognize(tuple(( char('`'), take_till_escaping('`', &['`']), take(1usize), ))), recognize(tuple(( char('['), take_till_escaping(']', &[']']), take(1usize), ))), recognize(tuple(( char('"'), take_till_escaping('"', &['\\']), take(1usize), ))), recognize(tuple(( char('\''), take_till_escaping('\'', &['\\']), take(1usize), ))), recognize(tuple(( tag("N'"), take_till_escaping('\'', &['\\']), take(1usize), ))), ))(input) .map(|(input, token)| { ( input, Token { kind: TokenKind::String, value: token, key: None, }, ) }) } fn get_open_paren_token(input: &str) -> IResult<&str, Token<'_>> { alt((tag("("), terminated(tag_no_case("CASE"), end_of_word)))(input).map(|(input, token)| { ( input, Token { kind: TokenKind::OpenParen, value: token, key: None, }, ) }) } fn get_close_paren_token(input: &str) -> IResult<&str, Token<'_>> { alt((tag(")"), terminated(tag_no_case("END"), end_of_word)))(input).map(|(input, token)| { ( input, Token { kind: TokenKind::CloseParen, value: token, key: None, }, ) }) } fn get_placeholder_token(input: &str) -> IResult<&str, Token<'_>> { alt(( get_ident_named_placeholder_token, get_string_named_placeholder_token, get_indexed_placeholder_token, ))(input) } fn get_indexed_placeholder_token(input: &str) -> IResult<&str, Token<'_>> { alt(( recognize(tuple((alt((char('?'), char('$'))), digit1))), recognize(char('?')), ))(input) .map(|(input, token)| { ( input, Token { kind: TokenKind::Placeholder, value: token, key: if token.len() > 1 { if let Ok(index) = token[1..].parse::() { Some(if token.starts_with('$') { PlaceholderKind::OneIndexed(index) } else { PlaceholderKind::ZeroIndexed(index) }) } else { None } } else { None }, }, ) }) } fn get_ident_named_placeholder_token(input: &str) -> IResult<&str, Token<'_>> { recognize(tuple(( alt((char('@'), char(':'))), take_while1(|item: char| { item.is_alphanumeric() || item == '.' || item == '_' || item == '$' }), )))(input) .map(|(input, token)| { let index = Cow::Borrowed(&token[1..]); ( input, Token { kind: TokenKind::Placeholder, value: token, key: Some(PlaceholderKind::Named(index)), }, ) }) } fn get_string_named_placeholder_token(input: &str) -> IResult<&str, Token<'_>> { recognize(tuple(( alt((char('@'), char(':'))), get_placeholder_string_token, )))(input) .map(|(input, token)| { let index = get_escaped_placeholder_key(&token[2..token.len() - 1], &token[token.len() - 1..]); ( input, Token { kind: TokenKind::Placeholder, value: token, key: Some(PlaceholderKind::Named(index)), }, ) }) } fn get_escaped_placeholder_key<'a>(key: &'a str, quote_char: &str) -> Cow<'a, str> { Cow::Owned(key.replace(&format!("\\{}", quote_char), quote_char)) } fn get_number_token(input: &str) -> IResult<&str, Token<'_>> { recognize(tuple(( opt(tag("-")), alt((scientific_notation, decimal_number, digit1)), )))(input) .map(|(input, token)| { ( input, Token { kind: TokenKind::Number, value: token, key: None, }, ) }) } fn decimal_number(input: &str) -> IResult<&str, &str> { recognize(tuple((digit1, tag("."), digit0)))(input) } fn scientific_notation(input: &str) -> IResult<&str, &str> { recognize(tuple(( alt((decimal_number, digit1)), tag("e"), alt((tag("-"), tag("+"))), digit1, )))(input) } fn get_reserved_word_token<'a>( input: &'a str, previous_token: Option>, last_reserved_token: Option>, ) -> IResult<&'a str, Token<'a>> { // A reserved word cannot be preceded by a "." // this makes it so in "my_table.from", "from" is not considered a reserved word if let Some(token) = previous_token { if token.value == "." { return Err(Err::Error(Error::new(input, ErrorKind::IsNot))); } } alt(( get_top_level_reserved_token, get_newline_reserved_token(last_reserved_token), get_top_level_reserved_token_no_indent, get_plain_reserved_token, ))(input) } // We have to be a bit creative here for performance reasons fn get_uc_words(input: &str, words: usize) -> String { input .split_whitespace() .take(words) .collect::>() .join(" ") .to_ascii_uppercase() } fn get_top_level_reserved_token(input: &str) -> IResult<&str, Token<'_>> { let uc_input = get_uc_words(input, 3); let result: IResult<&str, &str> = alt(( terminated(tag("ADD"), end_of_word), terminated(tag("AFTER"), end_of_word), terminated(tag("ALTER COLUMN"), end_of_word), terminated(tag("ALTER TABLE"), end_of_word), terminated(tag("DELETE FROM"), end_of_word), terminated(tag("EXCEPT"), end_of_word), terminated(tag("FETCH FIRST"), end_of_word), terminated(tag("FROM"), end_of_word), terminated(tag("GROUP BY"), end_of_word), terminated(tag("GO"), end_of_word), terminated(tag("HAVING"), end_of_word), terminated(tag("INSERT INTO"), end_of_word), terminated(tag("INSERT"), end_of_word), terminated(tag("LIMIT"), end_of_word), terminated(tag("MODIFY"), end_of_word), terminated(tag("ORDER BY"), end_of_word), terminated(tag("SELECT"), end_of_word), terminated(tag("SET CURRENT SCHEMA"), end_of_word), terminated(tag("SET SCHEMA"), end_of_word), terminated(tag("SET"), end_of_word), alt(( terminated(tag("UPDATE"), end_of_word), terminated(tag("VALUES"), end_of_word), terminated(tag("WHERE"), end_of_word), )), ))(&uc_input); if let Ok((_, token)) = result { let final_word = token.split(' ').last().unwrap(); let input_end_pos = input.to_ascii_uppercase().find(final_word).unwrap() + final_word.len(); let (token, input) = input.split_at(input_end_pos); Ok(( input, Token { kind: TokenKind::ReservedTopLevel, value: token, key: None, }, )) } else { Err(Err::Error(Error::new(input, ErrorKind::Alt))) } } fn get_newline_reserved_token<'a>( last_reserved_token: Option>, ) -> impl FnMut(&'a str) -> IResult<&'a str, Token<'a>> { move |input: &'a str| { let uc_input = get_uc_words(input, 3); let result: IResult<&str, &str> = alt(( terminated(tag("AND"), end_of_word), terminated(tag("CROSS APPLY"), end_of_word), terminated(tag("CROSS JOIN"), end_of_word), terminated(tag("ELSE"), end_of_word), terminated(tag("INNER JOIN"), end_of_word), terminated(tag("JOIN"), end_of_word), terminated(tag("LEFT JOIN"), end_of_word), terminated(tag("LEFT OUTER JOIN"), end_of_word), terminated(tag("OR"), end_of_word), terminated(tag("OUTER APPLY"), end_of_word), terminated(tag("OUTER JOIN"), end_of_word), terminated(tag("RIGHT JOIN"), end_of_word), terminated(tag("RIGHT OUTER JOIN"), end_of_word), terminated(tag("WHEN"), end_of_word), terminated(tag("XOR"), end_of_word), ))(&uc_input); if let Ok((_, token)) = result { let final_word = token.split(' ').last().unwrap(); let input_end_pos = input.to_ascii_uppercase().find(final_word).unwrap() + final_word.len(); let (token, input) = input.split_at(input_end_pos); let kind = if token == "AND" && last_reserved_token.is_some() && last_reserved_token.as_ref().unwrap().value == "BETWEEN" { // If the "AND" is part of a "BETWEEN" clause, we want to handle it as one clause by not adding a new line. TokenKind::Reserved } else { TokenKind::ReservedNewline }; Ok(( input, Token { kind, value: token, key: None, }, )) } else { Err(Err::Error(Error::new(input, ErrorKind::Alt))) } } } fn get_top_level_reserved_token_no_indent(input: &str) -> IResult<&str, Token<'_>> { let uc_input = get_uc_words(input, 2); let result: IResult<&str, &str> = alt(( terminated(tag("INTERSECT"), end_of_word), terminated(tag("INTERSECT ALL"), end_of_word), terminated(tag("MINUS"), end_of_word), terminated(tag("UNION"), end_of_word), terminated(tag("UNION ALL"), end_of_word), ))(&uc_input); if let Ok((_, token)) = result { let final_word = token.split(' ').last().unwrap(); let input_end_pos = input.to_ascii_uppercase().find(final_word).unwrap() + final_word.len(); let (token, input) = input.split_at(input_end_pos); Ok(( input, Token { kind: TokenKind::ReservedTopLevelNoIndent, value: token, key: None, }, )) } else { Err(Err::Error(Error::new(input, ErrorKind::Alt))) } } fn get_plain_reserved_token(input: &str) -> IResult<&str, Token<'_>> { let uc_input = get_uc_words(input, 1); let result: IResult<&str, &str> = alt(( terminated(tag("ACCESSIBLE"), end_of_word), terminated(tag("ACTION"), end_of_word), terminated(tag("AGAINST"), end_of_word), terminated(tag("AGGREGATE"), end_of_word), terminated(tag("ALGORITHM"), end_of_word), terminated(tag("ALL"), end_of_word), terminated(tag("ALTER"), end_of_word), terminated(tag("ANALYSE"), end_of_word), terminated(tag("ANALYZE"), end_of_word), terminated(tag("AS"), end_of_word), terminated(tag("ASC"), end_of_word), terminated(tag("AUTOCOMMIT"), end_of_word), terminated(tag("AUTO_INCREMENT"), end_of_word), terminated(tag("BACKUP"), end_of_word), terminated(tag("BEGIN"), end_of_word), terminated(tag("BETWEEN"), end_of_word), terminated(tag("BINLOG"), end_of_word), terminated(tag("BOTH"), end_of_word), terminated(tag("CASCADE"), end_of_word), terminated(tag("CASE"), end_of_word), alt(( terminated(tag("CHANGE"), end_of_word), terminated(tag("CHANGED"), end_of_word), terminated(tag("CHARACTER SET"), end_of_word), terminated(tag("CHARSET"), end_of_word), terminated(tag("CHECK"), end_of_word), terminated(tag("CHECKSUM"), end_of_word), terminated(tag("COLLATE"), end_of_word), terminated(tag("COLLATION"), end_of_word), terminated(tag("COLUMN"), end_of_word), terminated(tag("COLUMNS"), end_of_word), terminated(tag("COMMENT"), end_of_word), terminated(tag("COMMIT"), end_of_word), terminated(tag("COMMITTED"), end_of_word), terminated(tag("COMPRESSED"), end_of_word), terminated(tag("CONCURRENT"), end_of_word), terminated(tag("CONSTRAINT"), end_of_word), terminated(tag("CONTAINS"), end_of_word), terminated(tag("CONVERT"), end_of_word), terminated(tag("CREATE"), end_of_word), terminated(tag("CROSS"), end_of_word), alt(( terminated(tag("CURRENT_TIMESTAMP"), end_of_word), terminated(tag("DATABASE"), end_of_word), terminated(tag("DATABASES"), end_of_word), terminated(tag("DAY"), end_of_word), terminated(tag("DAY_HOUR"), end_of_word), terminated(tag("DAY_MINUTE"), end_of_word), terminated(tag("DAY_SECOND"), end_of_word), terminated(tag("DEFAULT"), end_of_word), terminated(tag("DEFINER"), end_of_word), terminated(tag("DELAYED"), end_of_word), terminated(tag("DELETE"), end_of_word), terminated(tag("DESC"), end_of_word), terminated(tag("DESCRIBE"), end_of_word), terminated(tag("DETERMINISTIC"), end_of_word), terminated(tag("DISTINCT"), end_of_word), terminated(tag("DISTINCTROW"), end_of_word), terminated(tag("DIV"), end_of_word), terminated(tag("DO"), end_of_word), terminated(tag("DROP"), end_of_word), terminated(tag("DUMPFILE"), end_of_word), alt(( terminated(tag("DUPLICATE"), end_of_word), terminated(tag("DYNAMIC"), end_of_word), terminated(tag("ELSE"), end_of_word), terminated(tag("ENCLOSED"), end_of_word), terminated(tag("END"), end_of_word), terminated(tag("ENGINE"), end_of_word), terminated(tag("ENGINES"), end_of_word), terminated(tag("ENGINE_TYPE"), end_of_word), terminated(tag("ESCAPE"), end_of_word), terminated(tag("ESCAPED"), end_of_word), terminated(tag("EVENTS"), end_of_word), terminated(tag("EXEC"), end_of_word), terminated(tag("EXECUTE"), end_of_word), terminated(tag("EXISTS"), end_of_word), terminated(tag("EXPLAIN"), end_of_word), terminated(tag("EXTENDED"), end_of_word), terminated(tag("FAST"), end_of_word), terminated(tag("FETCH"), end_of_word), terminated(tag("FIELDS"), end_of_word), alt(( terminated(tag("FILE"), end_of_word), terminated(tag("FIRST"), end_of_word), terminated(tag("FIXED"), end_of_word), terminated(tag("FLUSH"), end_of_word), terminated(tag("FOR"), end_of_word), terminated(tag("FORCE"), end_of_word), terminated(tag("FOREIGN"), end_of_word), terminated(tag("FULL"), end_of_word), terminated(tag("FULLTEXT"), end_of_word), terminated(tag("FUNCTION"), end_of_word), terminated(tag("GLOBAL"), end_of_word), terminated(tag("GRANT"), end_of_word), terminated(tag("GRANTS"), end_of_word), terminated(tag("GROUP_CONCAT"), end_of_word), terminated(tag("HEAP"), end_of_word), terminated(tag("HIGH_PRIORITY"), end_of_word), terminated(tag("HOSTS"), end_of_word), terminated(tag("HOUR"), end_of_word), terminated(tag("HOUR_MINUTE"), end_of_word), terminated(tag("HOUR_SECOND"), end_of_word), alt(( terminated(tag("IDENTIFIED"), end_of_word), terminated(tag("IF"), end_of_word), terminated(tag("IFNULL"), end_of_word), terminated(tag("IGNORE"), end_of_word), terminated(tag("IN"), end_of_word), terminated(tag("INDEX"), end_of_word), terminated(tag("INDEXES"), end_of_word), terminated(tag("INFILE"), end_of_word), terminated(tag("INSERT"), end_of_word), terminated(tag("INSERT_ID"), end_of_word), terminated(tag("INSERT_METHOD"), end_of_word), terminated(tag("INTERVAL"), end_of_word), terminated(tag("INTO"), end_of_word), terminated(tag("INVOKER"), end_of_word), terminated(tag("IS"), end_of_word), terminated(tag("ISOLATION"), end_of_word), terminated(tag("KEY"), end_of_word), terminated(tag("KEYS"), end_of_word), terminated(tag("KILL"), end_of_word), terminated(tag("LAST_INSERT_ID"), end_of_word), alt(( terminated(tag("LEADING"), end_of_word), terminated(tag("LEVEL"), end_of_word), terminated(tag("LIKE"), end_of_word), terminated(tag("LINEAR"), end_of_word), terminated(tag("LINES"), end_of_word), terminated(tag("LOAD"), end_of_word), terminated(tag("LOCAL"), end_of_word), terminated(tag("LOCK"), end_of_word), terminated(tag("LOCKS"), end_of_word), terminated(tag("LOGS"), end_of_word), terminated(tag("LOW_PRIORITY"), end_of_word), terminated(tag("MARIA"), end_of_word), terminated(tag("MASTER"), end_of_word), terminated(tag("MASTER_CONNECT_RETRY"), end_of_word), terminated(tag("MASTER_HOST"), end_of_word), terminated(tag("MASTER_LOG_FILE"), end_of_word), terminated(tag("MATCH"), end_of_word), terminated(tag("MAX_CONNECTIONS_PER_HOUR"), end_of_word), terminated(tag("MAX_QUERIES_PER_HOUR"), end_of_word), terminated(tag("MAX_ROWS"), end_of_word), alt(( terminated(tag("MAX_UPDATES_PER_HOUR"), end_of_word), terminated(tag("MAX_USER_CONNECTIONS"), end_of_word), terminated(tag("MEDIUM"), end_of_word), terminated(tag("MERGE"), end_of_word), terminated(tag("MINUTE"), end_of_word), terminated(tag("MINUTE_SECOND"), end_of_word), terminated(tag("MIN_ROWS"), end_of_word), terminated(tag("MODE"), end_of_word), terminated(tag("MODIFY"), end_of_word), terminated(tag("MONTH"), end_of_word), terminated(tag("MRG_MYISAM"), end_of_word), terminated(tag("MYISAM"), end_of_word), terminated(tag("NAMES"), end_of_word), terminated(tag("NATURAL"), end_of_word), terminated(tag("NOT"), end_of_word), terminated(tag("NOW()"), end_of_word), terminated(tag("NULL"), end_of_word), terminated(tag("OFFSET"), end_of_word), terminated(tag("ON DELETE"), end_of_word), terminated(tag("ON UPDATE"), end_of_word), alt(( terminated(tag("ON"), end_of_word), terminated(tag("ONLY"), end_of_word), terminated(tag("OPEN"), end_of_word), terminated(tag("OPTIMIZE"), end_of_word), terminated(tag("OPTION"), end_of_word), terminated(tag("OPTIONALLY"), end_of_word), terminated(tag("OUTFILE"), end_of_word), terminated(tag("PACK_KEYS"), end_of_word), terminated(tag("PAGE"), end_of_word), terminated(tag("PARTIAL"), end_of_word), terminated(tag("PARTITION"), end_of_word), terminated(tag("PARTITIONS"), end_of_word), terminated(tag("PASSWORD"), end_of_word), terminated(tag("PRIMARY"), end_of_word), terminated(tag("PRIVILEGES"), end_of_word), terminated(tag("PROCEDURE"), end_of_word), terminated(tag("PROCESS"), end_of_word), terminated(tag("PROCESSLIST"), end_of_word), terminated(tag("PURGE"), end_of_word), terminated(tag("QUICK"), end_of_word), alt(( terminated(tag("RAID0"), end_of_word), terminated(tag("RAID_CHUNKS"), end_of_word), terminated(tag("RAID_CHUNKSIZE"), end_of_word), terminated(tag("RAID_TYPE"), end_of_word), terminated(tag("RANGE"), end_of_word), terminated(tag("READ"), end_of_word), terminated(tag("READ_ONLY"), end_of_word), terminated(tag("READ_WRITE"), end_of_word), terminated(tag("REFERENCES"), end_of_word), terminated(tag("REGEXP"), end_of_word), terminated(tag("RELOAD"), end_of_word), terminated(tag("RENAME"), end_of_word), terminated(tag("REPAIR"), end_of_word), terminated(tag("REPEATABLE"), end_of_word), terminated(tag("REPLACE"), end_of_word), terminated(tag("REPLICATION"), end_of_word), terminated(tag("RESET"), end_of_word), terminated(tag("RESTORE"), end_of_word), terminated(tag("RESTRICT"), end_of_word), terminated(tag("RETURN"), end_of_word), alt(( terminated(tag("RETURNS"), end_of_word), terminated(tag("REVOKE"), end_of_word), terminated(tag("RLIKE"), end_of_word), terminated(tag("ROLLBACK"), end_of_word), terminated(tag("ROW"), end_of_word), terminated(tag("ROWS"), end_of_word), terminated(tag("ROW_FORMAT"), end_of_word), terminated(tag("SECOND"), end_of_word), terminated(tag("SECURITY"), end_of_word), terminated(tag("SEPARATOR"), end_of_word), terminated(tag("SERIALIZABLE"), end_of_word), terminated(tag("SESSION"), end_of_word), terminated(tag("SHARE"), end_of_word), terminated(tag("SHOW"), end_of_word), terminated(tag("SHUTDOWN"), end_of_word), terminated(tag("SLAVE"), end_of_word), terminated(tag("SONAME"), end_of_word), terminated(tag("SOUNDS"), end_of_word), terminated(tag("SQL"), end_of_word), terminated(tag("SQL_AUTO_IS_NULL"), end_of_word), alt(( terminated(tag("SQL_BIG_RESULT"), end_of_word), terminated(tag("SQL_BIG_SELECTS"), end_of_word), terminated(tag("SQL_BIG_TABLES"), end_of_word), terminated( tag("SQL_BUFFER_RESULT"), end_of_word, ), terminated(tag("SQL_CACHE"), end_of_word), terminated( tag("SQL_CALC_FOUND_ROWS"), end_of_word, ), terminated(tag("SQL_LOG_BIN"), end_of_word), terminated(tag("SQL_LOG_OFF"), end_of_word), terminated(tag("SQL_LOG_UPDATE"), end_of_word), terminated( tag("SQL_LOW_PRIORITY_UPDATES"), end_of_word, ), terminated( tag("SQL_MAX_JOIN_SIZE"), end_of_word, ), terminated(tag("SQL_NO_CACHE"), end_of_word), terminated( tag("SQL_QUOTE_SHOW_CREATE"), end_of_word, ), terminated( tag("SQL_SAFE_UPDATES"), end_of_word, ), terminated( tag("SQL_SELECT_LIMIT"), end_of_word, ), terminated( tag("SQL_SLAVE_SKIP_COUNTER"), end_of_word, ), terminated( tag("SQL_SMALL_RESULT"), end_of_word, ), terminated(tag("SQL_WARNINGS"), end_of_word), terminated(tag("START"), end_of_word), terminated(tag("STARTING"), end_of_word), alt(( terminated(tag("STATUS"), end_of_word), terminated(tag("STOP"), end_of_word), terminated(tag("STORAGE"), end_of_word), terminated( tag("STRAIGHT_JOIN"), end_of_word, ), terminated(tag("STRING"), end_of_word), terminated(tag("STRIPED"), end_of_word), terminated(tag("SUPER"), end_of_word), terminated(tag("TABLE"), end_of_word), terminated(tag("TABLES"), end_of_word), terminated(tag("TEMPORARY"), end_of_word), terminated(tag("TERMINATED"), end_of_word), terminated(tag("THEN"), end_of_word), terminated(tag("TO"), end_of_word), terminated(tag("TRAILING"), end_of_word), terminated( tag("TRANSACTIONAL"), end_of_word, ), terminated(tag("TRUE"), end_of_word), terminated(tag("TRUNCATE"), end_of_word), terminated(tag("TYPE"), end_of_word), terminated(tag("TYPES"), end_of_word), terminated(tag("UNCOMMITTED"), end_of_word), alt(( terminated(tag("UNIQUE"), end_of_word), terminated(tag("UNLOCK"), end_of_word), terminated( tag("UNSIGNED"), end_of_word, ), terminated(tag("USAGE"), end_of_word), terminated(tag("USE"), end_of_word), terminated(tag("USING"), end_of_word), terminated( tag("VARIABLES"), end_of_word, ), terminated(tag("VIEW"), end_of_word), terminated(tag("WHEN"), end_of_word), terminated(tag("WITH"), end_of_word), terminated(tag("WORK"), end_of_word), terminated(tag("WRITE"), end_of_word), terminated( tag("YEAR_MONTH"), end_of_word, ), )), )), )), )), )), )), )), )), )), )), )), )), )), ))(&uc_input); if let Ok((_, token)) = result { let input_end_pos = token.len(); let (token, input) = input.split_at(input_end_pos); Ok(( input, Token { kind: TokenKind::Reserved, value: token, key: None, }, )) } else { Err(Err::Error(Error::new(input, ErrorKind::Alt))) } } fn get_word_token(input: &str) -> IResult<&str, Token<'_>> { take_while1(is_word_character)(input).map(|(input, token)| { ( input, Token { kind: TokenKind::Word, value: token, key: None, }, ) }) } fn get_operator_token(input: &str) -> IResult<&str, Token<'_>> { alt(( tag("!="), tag("<>"), tag("=="), tag("<="), tag(">="), tag("!<"), tag("!>"), tag("||"), tag("::"), tag("->>"), tag("->"), tag("~~*"), tag("~~"), tag("!~~*"), tag("!~~"), tag("~*"), tag("!~*"), tag("!~"), tag(":="), recognize(verify(take(1usize), |token: &str| { token != "\n" && token != "\r" })), ))(input) .map(|(input, token)| { ( input, Token { kind: TokenKind::Operator, value: token, key: None, }, ) }) } fn end_of_word(input: &str) -> IResult<&str, &str> { peek(alt(( eof, verify(take(1usize), |val: &str| { !is_word_character(val.chars().next().unwrap()) }), )))(input) } fn is_word_character(item: char) -> bool { item.is_alphanumeric() || item.is_mark() || item.is_punctuation_connector() }