tree-sitter-0.20.10/.cargo_vcs_info.json0000644000000001410000000000100134370ustar { "git": { "sha1": "0c49d6745b3fc4822ab02e0018770cd6383a779c" }, "path_in_vcs": "lib" }tree-sitter-0.20.10/Cargo.toml0000644000000023330000000000100114420ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "tree-sitter" version = "0.20.10" authors = ["Max Brunsfeld "] build = "binding_rust/build.rs" include = [ "/binding_rust/*", "/Cargo.toml", "/include/*", "/src/*.h", "/src/*.c", "/src/unicode/*", ] description = "Rust bindings to the Tree-sitter parsing library" readme = "binding_rust/README.md" keywords = [ "incremental", "parsing", ] categories = [ "api-bindings", "parsing", "text-editors", ] license = "MIT" repository = "https://github.com/tree-sitter/tree-sitter" [lib] path = "binding_rust/lib.rs" [dependencies.lazy_static] version = "1.2.0" optional = true [dependencies.regex] version = "1" [build-dependencies.cc] version = "^1.0.58" tree-sitter-0.20.10/Cargo.toml.orig0000644000000013230000000000100123770ustar [package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" version = "0.20.10" authors = ["Max Brunsfeld "] edition = "2021" license = "MIT" readme = "binding_rust/README.md" keywords = ["incremental", "parsing"] categories = ["api-bindings", "parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter" rust-version.workspace = true build = "binding_rust/build.rs" include = [ "/binding_rust/*", "/Cargo.toml", "/include/*", "/src/*.h", "/src/*.c", "/src/unicode/*", ] [dependencies] lazy_static = { version = "1.2.0", optional = true } regex = "1" [build-dependencies] cc = "^1.0.58" [lib] path = "binding_rust/lib.rs" tree-sitter-0.20.10/Cargo.toml.orig000064400000000000000000000013231046102023000151210ustar 00000000000000[package] name = "tree-sitter" description = "Rust bindings to the Tree-sitter parsing library" version = "0.20.10" authors = ["Max Brunsfeld "] edition = "2021" license = "MIT" readme = "binding_rust/README.md" keywords = ["incremental", "parsing"] categories = ["api-bindings", "parsing", "text-editors"] repository = "https://github.com/tree-sitter/tree-sitter" rust-version.workspace = true build = "binding_rust/build.rs" include = [ "/binding_rust/*", "/Cargo.toml", "/include/*", "/src/*.h", "/src/*.c", "/src/unicode/*", ] [dependencies] lazy_static = { version = "1.2.0", optional = true } regex = "1" [build-dependencies] cc = "^1.0.58" [lib] path = "binding_rust/lib.rs" tree-sitter-0.20.10/binding_rust/README.md000064400000000000000000000062021046102023000162010ustar 00000000000000# Rust Tree-sitter [![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter) Rust bindings to the [Tree-sitter][] parsing library. ### Basic Usage First, create a parser: ```rust use tree_sitter::{Parser, Language}; let mut parser = Parser::new(); ``` Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory): ```rust use std::path::PathBuf; fn main() { let dir: PathBuf = ["tree-sitter-javascript", "src"].iter().collect(); cc::Build::new() .include(&dir) .file(dir.join("parser.c")) .file(dir.join("scanner.c")) .compile("tree-sitter-javascript"); } ``` Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: ```toml [build-dependencies] cc="*" ``` To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. ```rust extern "C" { fn tree_sitter_c() -> Language; } extern "C" { fn tree_sitter_rust() -> Language; } extern "C" { fn tree_sitter_javascript() -> Language; } let language = unsafe { tree_sitter_rust() }; parser.set_language(language).unwrap(); ``` Now you can parse source code: ```rust let source_code = "fn test() {}"; let tree = parser.parse(source_code, None).unwrap(); let root_node = tree.root_node(); assert_eq!(root_node.kind(), "source_file"); assert_eq!(root_node.start_position().column, 0); assert_eq!(root_node.end_position().column, 12); ``` ### Editing Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly: ```rust let new_source_code = "fn test(a: u32) {}" tree.edit(InputEdit { start_byte: 8, old_end_byte: 8, new_end_byte: 14, start_position: Point::new(0, 8), old_end_position: Point::new(0, 8), new_end_position: Point::new(0, 14), }); let new_tree = parser.parse(new_source_code, Some(&tree)); ``` ### Text Input The source code to parse can be provided either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: ```rust // Store some source code in an array of lines. let lines = &[ "pub fn foo() {", " 1", "}", ]; // Parse the source code using a custom callback. The callback is called // with both a byte offset and a row/column offset. let tree = parser.parse_with(&mut |_byte: u32, position: Point| -> &[u8] { let row = position.row as usize; let column = position.column as usize; if row < lines.len() { if column < lines[row].as_bytes().len() { &lines[row].as_bytes()[column..] } else { "\n".as_bytes() } } else { &[] } }, None).unwrap(); assert_eq!( tree.root_node().to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (number_literal))))" ); ``` [tree-sitter]: https://github.com/tree-sitter/tree-sitter tree-sitter-0.20.10/binding_rust/bindings.rs000064400000000000000000001137001046102023000170670ustar 00000000000000/* automatically generated by rust-bindgen 0.59.2 */ pub type TSSymbol = u16; pub type TSFieldId = u16; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSLanguage { _unused: [u8; 0], } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSParser { _unused: [u8; 0], } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSTree { _unused: [u8; 0], } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQuery { _unused: [u8; 0], } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryCursor { _unused: [u8; 0], } pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; pub type TSInputEncoding = ::std::os::raw::c_uint; pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; pub type TSSymbolType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSPoint { pub row: u32, pub column: u32, } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSRange { pub start_point: TSPoint, pub end_point: TSPoint, pub start_byte: u32, pub end_byte: u32, } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSInput { pub payload: *mut ::std::os::raw::c_void, pub read: ::std::option::Option< unsafe extern "C" fn( payload: *mut ::std::os::raw::c_void, byte_index: u32, position: TSPoint, bytes_read: *mut u32, ) -> *const ::std::os::raw::c_char, >, pub encoding: TSInputEncoding, } pub const TSLogType_TSLogTypeParse: TSLogType = 0; pub const TSLogType_TSLogTypeLex: TSLogType = 1; pub type TSLogType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSLogger { pub payload: *mut ::std::os::raw::c_void, pub log: ::std::option::Option< unsafe extern "C" fn( payload: *mut ::std::os::raw::c_void, arg1: TSLogType, arg2: *const ::std::os::raw::c_char, ), >, } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSInputEdit { pub start_byte: u32, pub old_end_byte: u32, pub new_end_byte: u32, pub start_point: TSPoint, pub old_end_point: TSPoint, pub new_end_point: TSPoint, } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSNode { pub context: [u32; 4usize], pub id: *const ::std::os::raw::c_void, pub tree: *const TSTree, } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSTreeCursor { pub tree: *const ::std::os::raw::c_void, pub id: *const ::std::os::raw::c_void, pub context: [u32; 2usize], } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } pub const TSQuantifier_TSQuantifierZero: TSQuantifier = 0; pub const TSQuantifier_TSQuantifierZeroOrOne: TSQuantifier = 1; pub const TSQuantifier_TSQuantifierZeroOrMore: TSQuantifier = 2; pub const TSQuantifier_TSQuantifierOne: TSQuantifier = 3; pub const TSQuantifier_TSQuantifierOneOrMore: TSQuantifier = 4; pub type TSQuantifier = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryMatch { pub id: u32, pub pattern_index: u16, pub capture_count: u16, pub captures: *const TSQueryCapture, } pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; pub type TSQueryPredicateStepType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSQueryPredicateStep { pub type_: TSQueryPredicateStepType, pub value_id: u32, } pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0; pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6; pub type TSQueryError = ::std::os::raw::c_uint; extern "C" { #[doc = " Create a new parser."] pub fn ts_parser_new() -> *mut TSParser; } extern "C" { #[doc = " Delete the parser, freeing all of the memory that it used."] pub fn ts_parser_delete(parser: *mut TSParser); } extern "C" { #[doc = " Set the language that the parser should use for parsing."] #[doc = ""] #[doc = " Returns a boolean indicating whether or not the language was successfully"] #[doc = " assigned. True means assignment succeeded. False means there was a version"] #[doc = " mismatch: the language was generated with an incompatible version of the"] #[doc = " Tree-sitter CLI. Check the language's version using `ts_language_version`"] #[doc = " and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and"] #[doc = " `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; } extern "C" { #[doc = " Get the parser's current language."] pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { #[doc = " Set the ranges of text that the parser should include when parsing."] #[doc = ""] #[doc = " By default, the parser will always include entire documents. This function"] #[doc = " allows you to parse only a *portion* of a document but still return a syntax"] #[doc = " tree whose ranges match up with the document as a whole. You can also pass"] #[doc = " multiple disjoint ranges."] #[doc = ""] #[doc = " The second and third parameters specify the location and length of an array"] #[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"] #[doc = " the data, so it doesn't matter how these ranges are allocated."] #[doc = ""] #[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"] #[doc = " the given ranges must be ordered from earliest to latest in the document,"] #[doc = " and they must not overlap. That is, the following must hold for all"] #[doc = " `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte"] #[doc = ""] #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] #[doc = " will not be assigned, and this function will return `false`. On success,"] #[doc = " this function returns `true`"] pub fn ts_parser_set_included_ranges( self_: *mut TSParser, ranges: *const TSRange, length: u32, ) -> bool; } extern "C" { #[doc = " Get the ranges of text that the parser will include when parsing."] #[doc = ""] #[doc = " The returned pointer is owned by the parser. The caller should not free it"] #[doc = " or write to it. The length of the array will be written to the given"] #[doc = " `length` pointer."] pub fn ts_parser_included_ranges(self_: *const TSParser, length: *mut u32) -> *const TSRange; } extern "C" { #[doc = " Use the parser to parse some source code and create a syntax tree."] #[doc = ""] #[doc = " If you are parsing this document for the first time, pass `NULL` for the"] #[doc = " `old_tree` parameter. Otherwise, if you have already parsed an earlier"] #[doc = " version of this document and the document has since been edited, pass the"] #[doc = " previous syntax tree so that the unchanged parts of it can be reused."] #[doc = " This will save time and memory. For this to work correctly, you must have"] #[doc = " already edited the old syntax tree using the `ts_tree_edit` function in a"] #[doc = " way that exactly matches the source code changes."] #[doc = ""] #[doc = " The `TSInput` parameter lets you specify how to read the text. It has the"] #[doc = " following three fields:"] #[doc = " 1. `read`: A function to retrieve a chunk of text at a given byte offset"] #[doc = " and (row, column) position. The function should return a pointer to the"] #[doc = " text and write its length to the `bytes_read` pointer. The parser does"] #[doc = " not take ownership of this buffer; it just borrows it until it has"] #[doc = " finished reading it. The function should write a zero value to the"] #[doc = " `bytes_read` pointer to indicate the end of the document."] #[doc = " 2. `payload`: An arbitrary pointer that will be passed to each invocation"] #[doc = " of the `read` function."] #[doc = " 3. `encoding`: An indication of how the text is encoded. Either"] #[doc = " `TSInputEncodingUTF8` or `TSInputEncodingUTF16`."] #[doc = ""] #[doc = " This function returns a syntax tree on success, and `NULL` on failure. There"] #[doc = " are three possible reasons for failure:"] #[doc = " 1. The parser does not have a language assigned. Check for this using the"] #[doc = "`ts_parser_language` function."] #[doc = " 2. Parsing was cancelled due to a timeout that was set by an earlier call to"] #[doc = " the `ts_parser_set_timeout_micros` function. You can resume parsing from"] #[doc = " where the parser left out by calling `ts_parser_parse` again with the"] #[doc = " same arguments. Or you can start parsing from scratch by first calling"] #[doc = " `ts_parser_reset`."] #[doc = " 3. Parsing was cancelled using a cancellation flag that was set by an"] #[doc = " earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing"] #[doc = " from where the parser left out by calling `ts_parser_parse` again with"] #[doc = " the same arguments."] pub fn ts_parser_parse( self_: *mut TSParser, old_tree: *const TSTree, input: TSInput, ) -> *mut TSTree; } extern "C" { #[doc = " Use the parser to parse some source code stored in one contiguous buffer."] #[doc = " The first two parameters are the same as in the `ts_parser_parse` function"] #[doc = " above. The second two parameters indicate the location of the buffer and its"] #[doc = " length in bytes."] pub fn ts_parser_parse_string( self_: *mut TSParser, old_tree: *const TSTree, string: *const ::std::os::raw::c_char, length: u32, ) -> *mut TSTree; } extern "C" { #[doc = " Use the parser to parse some source code stored in one contiguous buffer with"] #[doc = " a given encoding. The first four parameters work the same as in the"] #[doc = " `ts_parser_parse_string` method above. The final parameter indicates whether"] #[doc = " the text is encoded as UTF8 or UTF16."] pub fn ts_parser_parse_string_encoding( self_: *mut TSParser, old_tree: *const TSTree, string: *const ::std::os::raw::c_char, length: u32, encoding: TSInputEncoding, ) -> *mut TSTree; } extern "C" { #[doc = " Instruct the parser to start the next parse from the beginning."] #[doc = ""] #[doc = " If the parser previously failed because of a timeout or a cancellation, then"] #[doc = " by default, it will resume where it left off on the next call to"] #[doc = " `ts_parser_parse` or other parsing functions. If you don't want to resume,"] #[doc = " and instead intend to use this parser to parse some other document, you must"] #[doc = " call `ts_parser_reset` first."] pub fn ts_parser_reset(self_: *mut TSParser); } extern "C" { #[doc = " Set the maximum duration in microseconds that parsing should be allowed to"] #[doc = " take before halting."] #[doc = ""] #[doc = " If parsing takes longer than this, it will halt early, returning NULL."] #[doc = " See `ts_parser_parse` for more information."] pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout: u64); } extern "C" { #[doc = " Get the duration in microseconds that parsing is allowed to take."] pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64; } extern "C" { #[doc = " Set the parser's current cancellation flag pointer."] #[doc = ""] #[doc = " If a non-null pointer is assigned, then the parser will periodically read"] #[doc = " from this pointer during parsing. If it reads a non-zero value, it will"] #[doc = " halt early, returning NULL. See `ts_parser_parse` for more information."] pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize); } extern "C" { #[doc = " Get the parser's current cancellation flag pointer."] pub fn ts_parser_cancellation_flag(self_: *const TSParser) -> *const usize; } extern "C" { #[doc = " Set the logger that a parser should use during parsing."] #[doc = ""] #[doc = " The parser does not take ownership over the logger payload. If a logger was"] #[doc = " previously assigned, the caller is responsible for releasing any memory"] #[doc = " owned by the previous logger."] pub fn ts_parser_set_logger(self_: *mut TSParser, logger: TSLogger); } extern "C" { #[doc = " Get the parser's current logger."] pub fn ts_parser_logger(self_: *const TSParser) -> TSLogger; } extern "C" { #[doc = " Set the file descriptor to which the parser should write debugging graphs"] #[doc = " during parsing. The graphs are formatted in the DOT language. You may want"] #[doc = " to pipe these graphs directly to a `dot(1)` process in order to generate"] #[doc = " SVG output. You can turn off this logging by passing a negative number."] pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, file: ::std::os::raw::c_int); } extern "C" { #[doc = " Create a shallow copy of the syntax tree. This is very fast."] #[doc = ""] #[doc = " You need to copy a syntax tree in order to use it on more than one thread at"] #[doc = " a time, as syntax trees are not thread safe."] pub fn ts_tree_copy(self_: *const TSTree) -> *mut TSTree; } extern "C" { #[doc = " Delete the syntax tree, freeing all of the memory that it used."] pub fn ts_tree_delete(self_: *mut TSTree); } extern "C" { #[doc = " Get the root node of the syntax tree."] pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode; } extern "C" { #[doc = " Get the root node of the syntax tree, but with its position"] #[doc = " shifted forward by the given offset."] pub fn ts_tree_root_node_with_offset( self_: *const TSTree, offset_bytes: u32, offset_point: TSPoint, ) -> TSNode; } extern "C" { #[doc = " Get the language that was used to parse the syntax tree."] pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; } extern "C" { #[doc = " Get the array of included ranges that was used to parse the syntax tree."] #[doc = ""] #[doc = " The returned pointer must be freed by the caller."] pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange; } extern "C" { #[doc = " Edit the syntax tree to keep it in sync with source code that has been"] #[doc = " edited."] #[doc = ""] #[doc = " You must describe the edit both in terms of byte offsets and in terms of"] #[doc = " (row, column) coordinates."] pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit); } extern "C" { #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same"] #[doc = " document, returning an array of ranges whose syntactic structure has changed."] #[doc = ""] #[doc = " For this to work correctly, the old syntax tree must have been edited such"] #[doc = " that its ranges match up to the new tree. Generally, you'll want to call"] #[doc = " this function right after calling one of the `ts_parser_parse` functions."] #[doc = " You need to pass the old tree that was passed to parse, as well as the new"] #[doc = " tree that was returned from that function."] #[doc = ""] #[doc = " The returned array is allocated using `malloc` and the caller is responsible"] #[doc = " for freeing it using `free`. The length of the array will be written to the"] #[doc = " given `length` pointer."] pub fn ts_tree_get_changed_ranges( old_tree: *const TSTree, new_tree: *const TSTree, length: *mut u32, ) -> *mut TSRange; } extern "C" { #[doc = " Write a DOT graph describing the syntax tree to the given file."] pub fn ts_tree_print_dot_graph(arg1: *const TSTree, file_descriptor: ::std::os::raw::c_int); } extern "C" { #[doc = " Get the node's type as a null-terminated string."] pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the node's type as a numerical id."] pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; } extern "C" { #[doc = " Get the node's start byte."] pub fn ts_node_start_byte(arg1: TSNode) -> u32; } extern "C" { #[doc = " Get the node's start position in terms of rows and columns."] pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; } extern "C" { #[doc = " Get the node's end byte."] pub fn ts_node_end_byte(arg1: TSNode) -> u32; } extern "C" { #[doc = " Get the node's end position in terms of rows and columns."] pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; } extern "C" { #[doc = " Get an S-expression representing the node as a string."] #[doc = ""] #[doc = " This string is allocated with `malloc` and the caller is responsible for"] #[doc = " freeing it using `free`."] pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; } extern "C" { #[doc = " Check if the node is null. Functions like `ts_node_child` and"] #[doc = " `ts_node_next_sibling` will return a null node to indicate that no such node"] #[doc = " was found."] pub fn ts_node_is_null(arg1: TSNode) -> bool; } extern "C" { #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the"] #[doc = " grammar, whereas *anonymous* nodes correspond to string literals in the"] #[doc = " grammar."] pub fn ts_node_is_named(arg1: TSNode) -> bool; } extern "C" { #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in"] #[doc = " order to recover from certain kinds of syntax errors."] pub fn ts_node_is_missing(arg1: TSNode) -> bool; } extern "C" { #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,"] #[doc = " which are not required the grammar, but can appear anywhere."] pub fn ts_node_is_extra(arg1: TSNode) -> bool; } extern "C" { #[doc = " Check if a syntax node has been edited."] pub fn ts_node_has_changes(arg1: TSNode) -> bool; } extern "C" { #[doc = " Check if the node is a syntax error or contains any syntax errors."] pub fn ts_node_has_error(arg1: TSNode) -> bool; } extern "C" { #[doc = " Get the node's immediate parent."] pub fn ts_node_parent(arg1: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's child at the given index, where zero represents the first"] #[doc = " child."] pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { #[doc = " Get the field name for node's child at the given index, where zero represents"] #[doc = " the first child. Returns NULL, if no field is found."] pub fn ts_node_field_name_for_child(arg1: TSNode, arg2: u32) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the node's number of children."] pub fn ts_node_child_count(arg1: TSNode) -> u32; } extern "C" { #[doc = " Get the node's *named* child at the given index."] #[doc = ""] #[doc = " See also `ts_node_is_named`."] pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { #[doc = " Get the node's number of *named* children."] #[doc = ""] #[doc = " See also `ts_node_is_named`."] pub fn ts_node_named_child_count(arg1: TSNode) -> u32; } extern "C" { #[doc = " Get the node's child with the given field name."] pub fn ts_node_child_by_field_name( self_: TSNode, field_name: *const ::std::os::raw::c_char, field_name_length: u32, ) -> TSNode; } extern "C" { #[doc = " Get the node's child with the given numerical field id."] #[doc = ""] #[doc = " You can convert a field name to an id using the"] #[doc = " `ts_language_field_id_for_name` function."] pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; } extern "C" { #[doc = " Get the node's next / previous sibling."] pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; } extern "C" { pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's next / previous *named* sibling."] pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; } extern "C" { pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's first child that extends beyond the given byte offset."] pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { #[doc = " Get the node's first named child that extends beyond the given byte offset."] pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { #[doc = " Get the smallest node within this node that spans the given range of bytes"] #[doc = " or (row, column) positions."] pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; } extern "C" { pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) -> TSNode; } extern "C" { #[doc = " Get the smallest named node within this node that spans the given range of"] #[doc = " bytes or (row, column) positions."] pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; } extern "C" { pub fn ts_node_named_descendant_for_point_range( arg1: TSNode, arg2: TSPoint, arg3: TSPoint, ) -> TSNode; } extern "C" { #[doc = " Edit the node to keep it in-sync with source code that has been edited."] #[doc = ""] #[doc = " This function is only rarely needed. When you edit a syntax tree with the"] #[doc = " `ts_tree_edit` function, all of the nodes that you retrieve from the tree"] #[doc = " afterward will already reflect the edit. You only need to use `ts_node_edit`"] #[doc = " when you have a `TSNode` instance that you want to keep and continue to use"] #[doc = " after an edit."] pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit); } extern "C" { #[doc = " Check if two nodes are identical."] pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; } extern "C" { #[doc = " Create a new tree cursor starting from the given node."] #[doc = ""] #[doc = " A tree cursor allows you to walk a syntax tree more efficiently than is"] #[doc = " possible using the `TSNode` functions. It is a mutable object that is always"] #[doc = " on a certain syntax node, and can be moved imperatively to different nodes."] pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor; } extern "C" { #[doc = " Delete a tree cursor, freeing all of the memory that it used."] pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); } extern "C" { #[doc = " Re-initialize a tree cursor to start at a different node."] pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode); } extern "C" { #[doc = " Get the tree cursor's current node."] pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; } extern "C" { #[doc = " Get the field name of the tree cursor's current node."] #[doc = ""] #[doc = " This returns `NULL` if the current node doesn't have a field."] #[doc = " See also `ts_node_child_by_field_name`."] pub fn ts_tree_cursor_current_field_name( arg1: *const TSTreeCursor, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the field id of the tree cursor's current node."] #[doc = ""] #[doc = " This returns zero if the current node doesn't have a field."] #[doc = " See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; } extern "C" { #[doc = " Move the cursor to the parent of its current node."] #[doc = ""] #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] #[doc = " if there was no parent node (the cursor was already on the root node)."] pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; } extern "C" { #[doc = " Move the cursor to the next sibling of its current node."] #[doc = ""] #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] #[doc = " if there was no next sibling node."] pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; } extern "C" { #[doc = " Move the cursor to the first child of its current node."] #[doc = ""] #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] #[doc = " if there were no children."] pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; } extern "C" { #[doc = " Move the cursor to the first child of its current node that extends beyond"] #[doc = " the given byte offset or point."] #[doc = ""] #[doc = " This returns the index of the child node if one was found, and returns -1"] #[doc = " if no such child was found."] pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; } extern "C" { pub fn ts_tree_cursor_goto_first_child_for_point(arg1: *mut TSTreeCursor, arg2: TSPoint) -> i64; } extern "C" { pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; } extern "C" { #[doc = " Create a new query from a string containing one or more S-expression"] #[doc = " patterns. The query is associated with a particular language, and can"] #[doc = " only be run on syntax nodes parsed with that language."] #[doc = ""] #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."] #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"] #[doc = " of information about the problem:"] #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."] #[doc = " 2. The type of error is written to the `error_type` parameter."] pub fn ts_query_new( language: *const TSLanguage, source: *const ::std::os::raw::c_char, source_len: u32, error_offset: *mut u32, error_type: *mut TSQueryError, ) -> *mut TSQuery; } extern "C" { #[doc = " Delete a query, freeing all of the memory that it used."] pub fn ts_query_delete(arg1: *mut TSQuery); } extern "C" { #[doc = " Get the number of patterns, captures, or string literals in the query."] pub fn ts_query_pattern_count(arg1: *const TSQuery) -> u32; } extern "C" { pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32; } extern "C" { pub fn ts_query_string_count(arg1: *const TSQuery) -> u32; } extern "C" { #[doc = " Get the byte offset where the given pattern starts in the query's source."] #[doc = ""] #[doc = " This can be useful when combining queries by concatenating their source"] #[doc = " code strings."] pub fn ts_query_start_byte_for_pattern(arg1: *const TSQuery, arg2: u32) -> u32; } extern "C" { #[doc = " Get all of the predicates for the given pattern in the query."] #[doc = ""] #[doc = " The predicates are represented as a single array of steps. There are three"] #[doc = " types of steps in this array, which correspond to the three legal values for"] #[doc = " the `type` field:"] #[doc = " - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names"] #[doc = " of captures. Their `value_id` can be used with the"] #[doc = " `ts_query_capture_name_for_id` function to obtain the name of the capture."] #[doc = " - `TSQueryPredicateStepTypeString` - Steps with this type represent literal"] #[doc = " strings. Their `value_id` can be used with the"] #[doc = " `ts_query_string_value_for_id` function to obtain their string value."] #[doc = " - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*"] #[doc = " that represent the end of an individual predicate. If a pattern has two"] #[doc = " predicates, then there will be two steps with this `type` in the array."] pub fn ts_query_predicates_for_pattern( self_: *const TSQuery, pattern_index: u32, length: *mut u32, ) -> *const TSQueryPredicateStep; } extern "C" { pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { pub fn ts_query_is_pattern_guaranteed_at_step(self_: *const TSQuery, byte_offset: u32) -> bool; } extern "C" { #[doc = " Get the name and length of one of the query's captures, or one of the"] #[doc = " query's string literals. Each capture and string is associated with a"] #[doc = " numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_name_for_id( arg1: *const TSQuery, id: u32, length: *mut u32, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the quantifier of the query's captures. Each capture is * associated"] #[doc = " with a numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_quantifier_for_id( arg1: *const TSQuery, pattern_id: u32, capture_id: u32, ) -> TSQuantifier; } extern "C" { pub fn ts_query_string_value_for_id( arg1: *const TSQuery, id: u32, length: *mut u32, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Disable a certain capture within a query."] #[doc = ""] #[doc = " This prevents the capture from being returned in matches, and also avoids"] #[doc = " any resource usage associated with recording the capture. Currently, there"] #[doc = " is no way to undo this."] pub fn ts_query_disable_capture( arg1: *mut TSQuery, arg2: *const ::std::os::raw::c_char, arg3: u32, ); } extern "C" { #[doc = " Disable a certain pattern within a query."] #[doc = ""] #[doc = " This prevents the pattern from matching and removes most of the overhead"] #[doc = " associated with the pattern. Currently, there is no way to undo this."] pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32); } extern "C" { #[doc = " Create a new cursor for executing a given query."] #[doc = ""] #[doc = " The cursor stores the state that is needed to iteratively search"] #[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"] #[doc = " to start running a given query on a given syntax node. Then, there are"] #[doc = " two options for consuming the results of the query:"] #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"] #[doc = " *matches* in the order that they were found. Each match contains the"] #[doc = " index of the pattern that matched, and an array of captures. Because"] #[doc = " multiple patterns can match the same set of nodes, one match may contain"] #[doc = " captures that appear *before* some of the captures from a previous match."] #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"] #[doc = " individual *captures* in the order that they appear. This is useful if"] #[doc = " don't care about which pattern matched, and just want a single ordered"] #[doc = " sequence of captures."] #[doc = ""] #[doc = " If you don't care about consuming all of the results, you can stop calling"] #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."] #[doc = " You can then start executing another query on another node by calling"] #[doc = " `ts_query_cursor_exec` again."] pub fn ts_query_cursor_new() -> *mut TSQueryCursor; } extern "C" { #[doc = " Delete a query cursor, freeing all of the memory that it used."] pub fn ts_query_cursor_delete(arg1: *mut TSQueryCursor); } extern "C" { #[doc = " Start running a given query on a given node."] pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); } extern "C" { #[doc = " Manage the maximum number of in-progress matches allowed by this query"] #[doc = " cursor."] #[doc = ""] #[doc = " Query cursors have an optional maximum capacity for storing lists of"] #[doc = " in-progress captures. If this capacity is exceeded, then the"] #[doc = " earliest-starting match will silently be dropped to make room for further"] #[doc = " matches. This maximum capacity is optional — by default, query cursors allow"] #[doc = " any number of pending matches, dynamically allocating new space for them as"] #[doc = " needed as the query is executed."] pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; } extern "C" { pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32; } extern "C" { pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); } extern "C" { #[doc = " Set the range of bytes or (row, column) positions in which the query"] #[doc = " will be executed."] pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32); } extern "C" { pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint); } extern "C" { #[doc = " Advance to the next match of the currently running query."] #[doc = ""] #[doc = " If there is a match, write it to `*match` and return `true`."] #[doc = " Otherwise, return `false`."] pub fn ts_query_cursor_next_match(arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch) -> bool; } extern "C" { pub fn ts_query_cursor_remove_match(arg1: *mut TSQueryCursor, id: u32); } extern "C" { #[doc = " Advance to the next capture of the currently running query."] #[doc = ""] #[doc = " If there is a capture, write its match to `*match` and its index within"] #[doc = " the matche's capture list to `*capture_index`. Otherwise, return `false`."] pub fn ts_query_cursor_next_capture( arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch, capture_index: *mut u32, ) -> bool; } extern "C" { #[doc = " Get the number of distinct node types in the language."] pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; } extern "C" { #[doc = " Get a node type string for the given numerical id."] pub fn ts_language_symbol_name( arg1: *const TSLanguage, arg2: TSSymbol, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the numerical id for the given node type string."] pub fn ts_language_symbol_for_name( self_: *const TSLanguage, string: *const ::std::os::raw::c_char, length: u32, is_named: bool, ) -> TSSymbol; } extern "C" { #[doc = " Get the number of distinct field names in the language."] pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32; } extern "C" { #[doc = " Get the field name string for the given numerical id."] pub fn ts_language_field_name_for_id( arg1: *const TSLanguage, arg2: TSFieldId, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the numerical id for the given field name string."] pub fn ts_language_field_id_for_name( arg1: *const TSLanguage, arg2: *const ::std::os::raw::c_char, arg3: u32, ) -> TSFieldId; } extern "C" { #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,"] #[doc = " or a hidden nodes."] #[doc = ""] #[doc = " See also `ts_node_is_named`. Hidden nodes are never returned from the API."] pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; } extern "C" { #[doc = " Get the ABI version number for this language. This version number is used"] #[doc = " to ensure that languages were generated by a compatible version of"] #[doc = " Tree-sitter."] #[doc = ""] #[doc = " See also `ts_parser_set_language`."] pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } extern "C" { #[doc = " Set the allocation functions used by the library."] #[doc = ""] #[doc = " By default, Tree-sitter uses the standard libc allocation functions,"] #[doc = " but aborts the process when an allocation fails. This function lets"] #[doc = " you supply alternative allocation functions at runtime."] #[doc = ""] #[doc = " If you pass `NULL` for any parameter, Tree-sitter will switch back to"] #[doc = " its default implementation of that function."] #[doc = ""] #[doc = " If you call this function after the library has already been used, then"] #[doc = " you must ensure that either:"] #[doc = " 1. All the existing objects have been freed."] #[doc = " 2. The new allocator shares its state with the old one, so it is capable"] #[doc = " of freeing memory that was allocated by the old allocator."] pub fn ts_set_allocator( new_malloc: ::std::option::Option< unsafe extern "C" fn(arg1: usize) -> *mut ::std::os::raw::c_void, >, new_calloc: ::std::option::Option< unsafe extern "C" fn(arg1: usize, arg2: usize) -> *mut ::std::os::raw::c_void, >, new_realloc: ::std::option::Option< unsafe extern "C" fn( arg1: *mut ::std::os::raw::c_void, arg2: usize, ) -> *mut ::std::os::raw::c_void, >, new_free: ::std::option::Option, ); } pub const TREE_SITTER_LANGUAGE_VERSION: usize = 14; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; tree-sitter-0.20.10/binding_rust/build.rs000064400000000000000000000027341046102023000163750ustar 00000000000000use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS"); if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() { if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) { let clang_path = clang_path.to_str().unwrap(); let scan_build_path = scan_build_path.to_str().unwrap(); env::set_var( "CC", &format!( "{} -analyze-headers --use-analyzer={} cc", scan_build_path, clang_path ), ); } } let src_path = Path::new("src"); for entry in fs::read_dir(&src_path).unwrap() { let entry = entry.unwrap(); let path = src_path.join(entry.file_name()); println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); } cc::Build::new() .flag_if_supported("-std=c99") .flag_if_supported("-Wno-unused-parameter") .include(src_path) .include("include") .file(src_path.join("lib.c")) .compile("tree-sitter"); } fn which(exe_name: impl AsRef) -> Option { env::var_os("PATH").and_then(|paths| { env::split_paths(&paths).find_map(|dir| { let full_path = dir.join(&exe_name); if full_path.is_file() { Some(full_path) } else { None } }) }) } tree-sitter-0.20.10/binding_rust/ffi.rs000064400000000000000000000003121046102023000160300ustar 00000000000000#![allow(dead_code)] #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] include!("./bindings.rs"); extern "C" { pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; } tree-sitter-0.20.10/binding_rust/lib.rs000064400000000000000000002463401046102023000160470ustar 00000000000000mod ffi; mod util; #[cfg(unix)] use std::os::unix::io::AsRawFd; use std::{ char, error, ffi::CStr, fmt, hash, iter, marker::PhantomData, mem::MaybeUninit, ops, os::raw::{c_char, c_void}, ptr::{self, NonNull}, slice, str, sync::atomic::AtomicUsize, u16, }; /// The latest ABI version that is supported by the current version of the /// library. /// /// When Languages are generated by the Tree-sitter CLI, they are /// assigned an ABI version number that corresponds to the current CLI version. /// The Tree-sitter library is generally backwards-compatible with languages /// generated using older CLI versions, but is not forwards-compatible. #[doc(alias = "TREE_SITTER_LANGUAGE_VERSION")] pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; /// The earliest ABI version that is supported by the current version of the /// library. #[doc(alias = "TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION")] pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); /// An opaque object that defines how to parse a particular language. The code for each /// `Language` is generated by the Tree-sitter CLI. #[doc(alias = "TSLanguage")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct Language(*const ffi::TSLanguage); /// A tree that represents the syntactic structure of a source code file. #[doc(alias = "TSTree")] pub struct Tree(NonNull); /// A position in a multi-line text document, in terms of rows and columns. /// /// Rows and columns are zero-based. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Point { pub row: usize, pub column: usize, } /// A range of positions in a multi-line text document, both in terms of bytes and of /// rows and columns. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Range { pub start_byte: usize, pub end_byte: usize, pub start_point: Point, pub end_point: Point, } /// A summary of a change to a text document. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct InputEdit { pub start_byte: usize, pub old_end_byte: usize, pub new_end_byte: usize, pub start_position: Point, pub old_end_position: Point, pub new_end_position: Point, } /// A single node within a syntax `Tree`. #[doc(alias = "TSNode")] #[derive(Clone, Copy)] #[repr(transparent)] pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); /// A stateful object that this is used to produce a `Tree` based on some source code. #[doc(alias = "TSParser")] pub struct Parser(NonNull); /// A type of log message. #[derive(Debug, PartialEq, Eq)] pub enum LogType { Parse, Lex, } /// A callback that receives log messages during parser. type Logger<'a> = Box; /// A stateful object for walking a syntax `Tree` efficiently. #[doc(alias = "TSTreeCursor")] pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); /// A set of patterns that match nodes in a syntax tree. #[doc(alias = "TSQuery")] #[derive(Debug)] pub struct Query { ptr: NonNull, capture_names: Vec, capture_quantifiers: Vec>, text_predicates: Vec>, property_settings: Vec>, property_predicates: Vec>, general_predicates: Vec>, } /// A quantifier for captures #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum CaptureQuantifier { Zero, ZeroOrOne, ZeroOrMore, One, OneOrMore, } impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { ffi::TSQuantifier_TSQuantifierZero => CaptureQuantifier::Zero, ffi::TSQuantifier_TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, ffi::TSQuantifier_TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, ffi::TSQuantifier_TSQuantifierOne => CaptureQuantifier::One, ffi::TSQuantifier_TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, _ => panic!("Unrecognized quantifier: {}", value), } } } /// A stateful object for executing a `Query` on a syntax `Tree`. #[doc(alias = "TSQueryCursor")] pub struct QueryCursor { ptr: NonNull, } /// A key-value pair associated with a particular pattern in a `Query`. #[derive(Debug, PartialEq, Eq)] pub struct QueryProperty { pub key: Box, pub value: Option>, pub capture_id: Option, } #[derive(Debug, PartialEq, Eq)] pub enum QueryPredicateArg { Capture(u32), String(Box), } /// A key-value pair associated with a particular pattern in a `Query`. #[derive(Debug, PartialEq, Eq)] pub struct QueryPredicate { pub operator: Box, pub args: Vec, } /// A match of a `Query` to a particular set of `Node`s. pub struct QueryMatch<'cursor, 'tree> { pub pattern_index: usize, pub captures: &'cursor [QueryCapture<'tree>], id: u32, cursor: *mut ffi::TSQueryCursor, } /// A sequence of `QueryMatch`es associated with a given `QueryCursor`. pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<'a>> { ptr: *mut ffi::TSQueryCursor, query: &'a Query, text_provider: T, buffer1: Vec, buffer2: Vec, _tree: PhantomData<&'tree ()>, } /// A sequence of `QueryCapture`s associated with a given `QueryCursor`. pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<'a>> { ptr: *mut ffi::TSQueryCursor, query: &'a Query, text_provider: T, buffer1: Vec, buffer2: Vec, _tree: PhantomData<&'tree ()>, } pub trait TextProvider<'a> { type I: Iterator + 'a; fn text(&mut self, node: Node) -> Self::I; } /// A particular `Node` that has been captured with a particular name within a `Query`. #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct QueryCapture<'a> { pub node: Node<'a>, pub index: u32, } /// An error that occurred when trying to assign an incompatible `Language` to a `Parser`. #[derive(Debug, PartialEq, Eq)] pub struct LanguageError { version: usize, } /// An error that occurred in `Parser::set_included_ranges`. #[derive(Debug, PartialEq, Eq)] pub struct IncludedRangesError(pub usize); /// An error that occurred when trying to create a `Query`. #[derive(Debug, PartialEq, Eq)] pub struct QueryError { pub row: usize, pub column: usize, pub offset: usize, pub message: String, pub kind: QueryErrorKind, } #[derive(Debug, PartialEq, Eq)] pub enum QueryErrorKind { Syntax, NodeType, Field, Capture, Predicate, Structure, Language, } #[derive(Debug)] enum TextPredicate { CaptureEqString(u32, String, bool), CaptureEqCapture(u32, u32, bool), CaptureMatchString(u32, regex::bytes::Regex, bool), } // TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` // is ever stabilized. pub struct LossyUtf8<'a> { bytes: &'a [u8], in_replacement: bool, } impl Language { /// Get the ABI version number that indicates which version of the Tree-sitter CLI /// that was used to generate this `Language`. #[doc(alias = "ts_language_version")] pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } } /// Get the number of distinct node types in this language. #[doc(alias = "ts_language_symbol_count")] pub fn node_kind_count(&self) -> usize { unsafe { ffi::ts_language_symbol_count(self.0) as usize } } /// Get the name of the node kind for the given numerical id. #[doc(alias = "ts_language_symbol_name")] pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_symbol_name(self.0, id) }; if ptr.is_null() { None } else { Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } } /// Get the numeric id for the given node kind. #[doc(alias = "ts_language_symbol_for_name")] pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 { unsafe { ffi::ts_language_symbol_for_name( self.0, kind.as_bytes().as_ptr() as *const c_char, kind.len() as u32, named, ) } } /// Check if the node type for the given numerical id is named (as opposed /// to an anonymous node type). pub fn node_kind_is_named(&self, id: u16) -> bool { unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } } #[doc(alias = "ts_language_symbol_type")] pub fn node_kind_is_visible(&self, id: u16) -> bool { unsafe { ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolType_TSSymbolTypeAnonymous } } /// Get the number of distinct field names in this language. #[doc(alias = "ts_language_field_count")] pub fn field_count(&self) -> usize { unsafe { ffi::ts_language_field_count(self.0) as usize } } /// Get the field names for the given numerical id. #[doc(alias = "ts_language_field_name_for_id")] pub fn field_name_for_id(&self, field_id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_field_name_for_id(self.0, field_id) }; if ptr.is_null() { None } else { Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } } /// Get the numerical id for the given field name. #[doc(alias = "ts_language_field_id_for_name")] pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); let id = unsafe { ffi::ts_language_field_id_for_name( self.0, field_name.as_ptr() as *const c_char, field_name.len() as u32, ) }; if id == 0 { None } else { Some(id) } } } impl Parser { /// Create a new parser. pub fn new() -> Parser { unsafe { let parser = ffi::ts_parser_new(); Parser(NonNull::new_unchecked(parser)) } } /// Set the language that the parser should use for parsing. /// /// Returns a Result indicating whether or not the language was successfully /// assigned. True means assignment succeeded. False means there was a version /// mismatch: the language was generated with an incompatible version of the /// Tree-sitter CLI. Check the language's version using [Language::version] /// and compare it to this library's [LANGUAGE_VERSION](LANGUAGE_VERSION) and /// [MIN_COMPATIBLE_LANGUAGE_VERSION](MIN_COMPATIBLE_LANGUAGE_VERSION) constants. #[doc(alias = "ts_parser_set_language")] pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> { let version = language.version(); if version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION { Err(LanguageError { version }) } else { unsafe { ffi::ts_parser_set_language(self.0.as_ptr(), language.0); } Ok(()) } } /// Get the parser's current language. #[doc(alias = "ts_parser_language")] pub fn language(&self) -> Option { let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) }; if ptr.is_null() { None } else { Some(Language(ptr)) } } /// Get the parser's current logger. #[doc(alias = "ts_parser_logger")] pub fn logger(&self) -> Option<&Logger> { let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; unsafe { (logger.payload as *mut Logger).as_ref() } } /// Set the logging callback that a parser should use during parsing. #[doc(alias = "ts_parser_set_logger")] pub fn set_logger(&mut self, logger: Option) { let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; if !prev_logger.payload.is_null() { drop(unsafe { Box::from_raw(prev_logger.payload as *mut Logger) }); } let c_logger; if let Some(logger) = logger { let container = Box::new(logger); unsafe extern "C" fn log( payload: *mut c_void, c_log_type: ffi::TSLogType, c_message: *const c_char, ) { let callback = (payload as *mut Logger).as_mut().unwrap(); if let Ok(message) = CStr::from_ptr(c_message).to_str() { let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { LogType::Parse } else { LogType::Lex }; callback(log_type, message); } } let raw_container = Box::into_raw(container); c_logger = ffi::TSLogger { payload: raw_container as *mut c_void, log: Some(log), }; } else { c_logger = ffi::TSLogger { payload: ptr::null_mut(), log: None, }; } unsafe { ffi::ts_parser_set_logger(self.0.as_ptr(), c_logger) }; } /// Set the destination to which the parser should write debugging graphs /// during parsing. The graphs are formatted in the DOT language. You may want /// to pipe these graphs directly to a `dot(1)` process in order to generate /// SVG output. #[cfg(unix)] #[doc(alias = "ts_parser_print_dot_graphs")] pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) { let fd = file.as_raw_fd(); unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::dup(fd)) } } /// Stop the parser from printing debugging graphs while parsing. #[doc(alias = "ts_parser_print_dot_graphs")] pub fn stop_printing_dot_graphs(&mut self) { unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), -1) } } /// Parse a slice of UTF8 text. /// /// # Arguments: /// * `text` The UTF8-encoded text to parse. /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using /// [Tree::edit]. /// /// Returns a [Tree] if parsing succeeded, or `None` if: /// * The parser has not yet had a language assigned with [Parser::set_language] /// * The timeout set with [Parser::set_timeout_micros] expired /// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped #[doc(alias = "ts_parser_parse")] pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = text.as_ref(); let len = bytes.len(); self.parse_with( &mut |i, _| if i < len { &bytes[i..] } else { &[] }, old_tree, ) } /// Parse a slice of UTF16 text. /// /// # Arguments: /// * `text` The UTF16-encoded text to parse. /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using /// [Tree::edit]. pub fn parse_utf16( &mut self, input: impl AsRef<[u16]>, old_tree: Option<&Tree>, ) -> Option { let code_points = input.as_ref(); let len = code_points.len(); self.parse_utf16_with( &mut |i, _| if i < len { &code_points[i..] } else { &[] }, old_tree, ) } /// Parse UTF8 text provided in chunks by a callback. /// /// # Arguments: /// * `callback` A function that takes a byte offset and position and /// returns a slice of UTF8-encoded text starting at that byte offset /// and position. The slices can be of any length. If the given position /// is at the end of the text, the callback should return an empty slice. /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using /// [Tree::edit]. pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, ) -> Option { // A pointer to this payload is passed on every call to the `read` C function. // The payload contains two things: // 1. A reference to the rust `callback`. // 2. The text that was returned from the previous call to `callback`. // This allows the callback to return owned values like vectors. let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); *text = Some(callback(byte_offset as usize, position.into())); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32; return slice.as_ptr() as *const c_char; } let c_input = ffi::TSInput { payload: &mut payload as *mut (&mut F, Option) as *mut c_void, read: Some(read::), encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); unsafe { let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input); NonNull::new(c_new_tree).map(Tree) } } /// Parse UTF16 text provided in chunks by a callback. /// /// # Arguments: /// * `callback` A function that takes a code point offset and position and /// returns a slice of UTF16-encoded text starting at that byte offset /// and position. The slices can be of any length. If the given position /// is at the end of the text, the callback should return an empty slice. /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using /// [Tree::edit]. pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, ) -> Option { // A pointer to this payload is passed on every call to the `read` C function. // The payload contains two things: // 1. A reference to the rust `callback`. // 2. The text that was returned from the previous call to `callback`. // This allows the callback to return owned values like vectors. let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); *text = Some(callback( (byte_offset / 2) as usize, Point { row: position.row as usize, column: position.column as usize / 2, }, )); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32 * 2; slice.as_ptr() as *const c_char } let c_input = ffi::TSInput { payload: &mut payload as *mut (&mut F, Option) as *mut c_void, read: Some(read::), encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); unsafe { let c_new_tree = ffi::ts_parser_parse(self.0.as_ptr(), c_old_tree, c_input); NonNull::new(c_new_tree).map(Tree) } } /// Instruct the parser to start the next parse from the beginning. /// /// If the parser previously failed because of a timeout or a cancellation, then /// by default, it will resume where it left off on the next call to `parse` or /// other parsing functions. If you don't want to resume, and instead intend to /// use this parser to parse some other document, you must call `reset` first. #[doc(alias = "ts_parser_reset")] pub fn reset(&mut self) { unsafe { ffi::ts_parser_reset(self.0.as_ptr()) } } /// Get the duration in microseconds that parsing is allowed to take. /// /// This is set via [set_timeout_micros](Parser::set_timeout_micros). #[doc(alias = "ts_parser_timeout_micros")] pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) } } /// Set the maximum duration in microseconds that parsing should be allowed to /// take before halting. /// /// If parsing takes longer than this, it will halt early, returning `None`. /// See `parse` for more information. #[doc(alias = "ts_parser_set_timeout_micros")] pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) } } /// Set the ranges of text that the parser should include when parsing. /// /// By default, the parser will always include entire documents. This function /// allows you to parse only a *portion* of a document but still return a syntax /// tree whose ranges match up with the document as a whole. You can also pass /// multiple disjoint ranges. /// /// If `ranges` is empty, then the entire document will be parsed. Otherwise, /// the given ranges must be ordered from earliest to latest in the document, /// and they must not overlap. That is, the following must hold for all /// `i` < `length - 1`: /// ```text /// ranges[i].end_byte <= ranges[i + 1].start_byte /// ``` /// If this requirement is not satisfied, method will return IncludedRangesError /// error with an offset in the passed ranges slice pointing to a first incorrect range. #[doc(alias = "ts_parser_set_included_ranges")] pub fn set_included_ranges<'a>( &mut self, ranges: &'a [Range], ) -> Result<(), IncludedRangesError> { let ts_ranges: Vec = ranges.iter().cloned().map(|range| range.into()).collect(); let result = unsafe { ffi::ts_parser_set_included_ranges( self.0.as_ptr(), ts_ranges.as_ptr(), ts_ranges.len() as u32, ) }; if result { Ok(()) } else { let mut prev_end_byte = 0; for (i, range) in ranges.iter().enumerate() { if range.start_byte < prev_end_byte || range.end_byte < range.start_byte { return Err(IncludedRangesError(i)); } prev_end_byte = range.end_byte; } Err(IncludedRangesError(0)) } } /// Get the parser's current cancellation flag pointer. #[doc(alias = "ts_parser_cancellation_flag")] pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> { (ffi::ts_parser_cancellation_flag(self.0.as_ptr()) as *const AtomicUsize).as_ref() } /// Set the parser's current cancellation flag pointer. /// /// If a pointer is assigned, then the parser will periodically read from /// this pointer during parsing. If it reads a non-zero value, it will halt early, /// returning `None`. See [parse](Parser::parse) for more information. #[doc(alias = "ts_parser_set_cancellation_flag")] pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { ffi::ts_parser_set_cancellation_flag( self.0.as_ptr(), flag as *const AtomicUsize as *const usize, ); } else { ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null()); } } } impl Drop for Parser { fn drop(&mut self) { self.stop_printing_dot_graphs(); self.set_logger(None); unsafe { ffi::ts_parser_delete(self.0.as_ptr()) } } } impl Tree { /// Get the root node of the syntax tree. #[doc(alias = "ts_tree_root_node")] pub fn root_node(&self) -> Node { Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap() } /// Get the root node of the syntax tree, but with its position shifted /// forward by the given offset. #[doc(alias = "ts_tree_root_node_with_offset")] pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node { Node::new(unsafe { ffi::ts_tree_root_node_with_offset( self.0.as_ptr(), offset_bytes as u32, offset_extent.into(), ) }) .unwrap() } /// Get the language that was used to parse the syntax tree. #[doc(alias = "ts_tree_language")] pub fn language(&self) -> Language { Language(unsafe { ffi::ts_tree_language(self.0.as_ptr()) }) } /// Edit the syntax tree to keep it in sync with source code that has been /// edited. /// /// You must describe the edit both in terms of byte offsets and in terms of /// row/column coordinates. #[doc(alias = "ts_tree_edit")] pub fn edit(&mut self, edit: &InputEdit) { let edit = edit.into(); unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) }; } /// Create a new [TreeCursor] starting from the root of the tree. pub fn walk(&self) -> TreeCursor { self.root_node().walk() } /// Compare this old edited syntax tree to a new syntax tree representing the same /// document, returning a sequence of ranges whose syntactic structure has changed. /// /// For this to work correctly, this syntax tree must have been edited such that its /// ranges match up to the new tree. Generally, you'll want to call this method right /// after calling one of the [Parser::parse] functions. Call it on the old tree that /// was passed to parse, and pass the new tree that was returned from `parse`. #[doc(alias = "ts_tree_get_changed_ranges")] pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator { let mut count = 0u32; unsafe { let ptr = ffi::ts_tree_get_changed_ranges( self.0.as_ptr(), other.0.as_ptr(), &mut count as *mut u32, ); util::CBufferIter::new(ptr, count as usize).map(|r| r.into()) } } /// Get the included ranges that were used to parse the syntax tree. pub fn included_ranges(&self) -> Vec { let mut count = 0u32; unsafe { let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), &mut count as *mut u32); let ranges = slice::from_raw_parts(ptr, count as usize); let result = ranges.iter().copied().map(|range| range.into()).collect(); (FREE_FN)(ptr as *mut c_void); result } } /// Print a graph of the tree to the given file descriptor. /// The graph is formatted in the DOT language. You may want to pipe this graph /// directly to a `dot(1)` process in order to generate SVG output. #[cfg(unix)] #[doc(alias = "ts_tree_print_dot_graph")] pub fn print_dot_graph(&self, file: &impl AsRawFd) { let fd = file.as_raw_fd(); unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } } } impl fmt::Debug for Tree { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "{{Tree {:?}}}", self.root_node()) } } impl Drop for Tree { fn drop(&mut self) { unsafe { ffi::ts_tree_delete(self.0.as_ptr()) } } } impl Clone for Tree { fn clone(&self) -> Tree { unsafe { Tree(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } } } impl<'tree> Node<'tree> { fn new(node: ffi::TSNode) -> Option { if node.id.is_null() { None } else { Some(Node(node, PhantomData)) } } /// Get a numeric id for this node that is unique. /// /// Within a given syntax tree, no two nodes have the same id. However, if /// a new tree is created based on an older tree, and a node from the old /// tree is reused in the process, then that node will have the same id in /// both trees. pub fn id(&self) -> usize { self.0.id as usize } /// Get this node's type as a numerical id. #[doc(alias = "ts_node_symbol")] pub fn kind_id(&self) -> u16 { unsafe { ffi::ts_node_symbol(self.0) } } /// Get this node's type as a string. #[doc(alias = "ts_node_type")] pub fn kind(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } .to_str() .unwrap() } /// Get the [Language] that was used to parse this node's syntax tree. #[doc(alias = "ts_tree_language")] pub fn language(&self) -> Language { Language(unsafe { ffi::ts_tree_language(self.0.tree) }) } /// Check if this node is *named*. /// /// Named nodes correspond to named rules in the grammar, whereas *anonymous* nodes /// correspond to string literals in the grammar. #[doc(alias = "ts_node_is_named")] pub fn is_named(&self) -> bool { unsafe { ffi::ts_node_is_named(self.0) } } /// Check if this node is *extra*. /// /// Extra nodes represent things like comments, which are not required the grammar, /// but can appear anywhere. #[doc(alias = "ts_node_is_extra")] pub fn is_extra(&self) -> bool { unsafe { ffi::ts_node_is_extra(self.0) } } /// Check if this node has been edited. #[doc(alias = "ts_node_has_changes")] pub fn has_changes(&self) -> bool { unsafe { ffi::ts_node_has_changes(self.0) } } /// Check if this node represents a syntax error or contains any syntax errors anywhere /// within it. #[doc(alias = "ts_node_has_error")] pub fn has_error(&self) -> bool { unsafe { ffi::ts_node_has_error(self.0) } } /// Check if this node represents a syntax error. /// /// Syntax errors represent parts of the code that could not be incorporated into a /// valid syntax tree. pub fn is_error(&self) -> bool { self.kind_id() == u16::MAX } /// Check if this node is *missing*. /// /// Missing nodes are inserted by the parser in order to recover from certain kinds of /// syntax errors. #[doc(alias = "ts_node_is_missing")] pub fn is_missing(&self) -> bool { unsafe { ffi::ts_node_is_missing(self.0) } } /// Get the byte offsets where this node starts. #[doc(alias = "ts_node_start_byte")] pub fn start_byte(&self) -> usize { unsafe { ffi::ts_node_start_byte(self.0) as usize } } /// Get the byte offsets where this node end. #[doc(alias = "ts_node_end_byte")] pub fn end_byte(&self) -> usize { unsafe { ffi::ts_node_end_byte(self.0) as usize } } /// Get the byte range of source code that this node represents. pub fn byte_range(&self) -> std::ops::Range { self.start_byte()..self.end_byte() } /// Get the range of source code that this node represents, both in terms of raw bytes /// and of row/column coordinates. pub fn range(&self) -> Range { Range { start_byte: self.start_byte(), end_byte: self.end_byte(), start_point: self.start_position(), end_point: self.end_position(), } } /// Get this node's start position in terms of rows and columns. #[doc(alias = "ts_node_start_point")] pub fn start_position(&self) -> Point { let result = unsafe { ffi::ts_node_start_point(self.0) }; result.into() } /// Get this node's end position in terms of rows and columns. #[doc(alias = "ts_node_end_point")] pub fn end_position(&self) -> Point { let result = unsafe { ffi::ts_node_end_point(self.0) }; result.into() } /// Get the node's child at the given index, where zero represents the first /// child. /// /// This method is fairly fast, but its cost is technically log(i), so you /// if you might be iterating over a long list of children, you should use /// [Node::children] instead. #[doc(alias = "ts_node_child")] pub fn child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } /// Get this node's number of children. #[doc(alias = "ts_node_child_count")] pub fn child_count(&self) -> usize { unsafe { ffi::ts_node_child_count(self.0) as usize } } /// Get this node's *named* child at the given index. /// /// See also [Node::is_named]. /// This method is fairly fast, but its cost is technically log(i), so you /// if you might be iterating over a long list of children, you should use /// [Node::named_children] instead. #[doc(alias = "ts_node_named_child")] pub fn named_child<'a>(&'a self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) } /// Get this node's number of *named* children. /// /// See also [Node::is_named]. #[doc(alias = "ts_node_named_child_count")] pub fn named_child_count(&self) -> usize { unsafe { ffi::ts_node_named_child_count(self.0) as usize } } /// Get the first child with the given field name. /// /// If multiple children may have the same field name, access them using /// [children_by_field_name](Node::children_by_field_name) #[doc(alias = "ts_node_child_by_field_name")] pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); Self::new(unsafe { ffi::ts_node_child_by_field_name( self.0, field_name.as_ptr() as *const c_char, field_name.len() as u32, ) }) } /// Get this node's child with the given numerical field id. /// /// See also [child_by_field_name](Node::child_by_field_name). You can convert a field name to /// an id using [Language::field_id_for_name]. #[doc(alias = "ts_node_child_by_field_id")] pub fn child_by_field_id(&self, field_id: u16) -> Option { Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) } /// Get the field name of this node's child at the given index. #[doc(alias = "ts_node_field_name_for_child")] pub fn field_name_for_child(&self, child_index: u32) -> Option<&'static str> { unsafe { let ptr = ffi::ts_node_field_name_for_child(self.0, child_index); if ptr.is_null() { None } else { Some(CStr::from_ptr(ptr).to_str().unwrap()) } } } /// Iterate over this node's children. /// /// A [TreeCursor] is used to retrieve the children efficiently. Obtain /// a [TreeCursor] by calling [Tree::walk] or [Node::walk]. To avoid unnecessary /// allocations, you should reuse the same cursor for subsequent calls to /// this method. /// /// If you're walking the tree recursively, you may want to use the `TreeCursor` /// APIs directly instead. pub fn children<'a>( &self, cursor: &'a mut TreeCursor<'tree>, ) -> impl ExactSizeIterator> + 'a { cursor.reset(*self); cursor.goto_first_child(); (0..self.child_count()).into_iter().map(move |_| { let result = cursor.node(); cursor.goto_next_sibling(); result }) } /// Iterate over this node's named children. /// /// See also [Node::children]. pub fn named_children<'a>( &self, cursor: &'a mut TreeCursor<'tree>, ) -> impl ExactSizeIterator> + 'a { cursor.reset(*self); cursor.goto_first_child(); (0..self.named_child_count()).into_iter().map(move |_| { while !cursor.node().is_named() { if !cursor.goto_next_sibling() { break; } } let result = cursor.node(); cursor.goto_next_sibling(); result }) } /// Iterate over this node's children with a given field name. /// /// See also [Node::children]. pub fn children_by_field_name<'a>( &self, field_name: &str, cursor: &'a mut TreeCursor<'tree>, ) -> impl Iterator> + 'a { let field_id = self.language().field_id_for_name(field_name); self.children_by_field_id(field_id.unwrap_or(0), cursor) } /// Iterate over this node's children with a given field id. /// /// See also [Node::children_by_field_name]. pub fn children_by_field_id<'a>( &self, field_id: u16, cursor: &'a mut TreeCursor<'tree>, ) -> impl Iterator> + 'a { cursor.reset(*self); cursor.goto_first_child(); let mut done = false; iter::from_fn(move || { while !done { while cursor.field_id() != Some(field_id) { if !cursor.goto_next_sibling() { return None; } } let result = cursor.node(); if !cursor.goto_next_sibling() { done = true; } return Some(result); } None }) } /// Get this node's immediate parent. #[doc(alias = "ts_node_parent")] pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } /// Get this node's next sibling. #[doc(alias = "ts_node_next_sibling")] pub fn next_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) } /// Get this node's previous sibling. #[doc(alias = "ts_node_prev_sibling")] pub fn prev_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) } /// Get this node's next named sibling. #[doc(alias = "ts_node_next_named_sibling")] pub fn next_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) } /// Get this node's previous named sibling. #[doc(alias = "ts_node_prev_named_sibling")] pub fn prev_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) } /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_byte_range")] pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32) }) } /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_byte_range")] pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32) }) } /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_point_range")] pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into()) }) } /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_point_range")] pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into()) }) } #[doc(alias = "ts_node_string")] pub fn to_sexp(&self) -> String { let c_string = unsafe { ffi::ts_node_string(self.0) }; let result = unsafe { CStr::from_ptr(c_string) } .to_str() .unwrap() .to_string(); unsafe { (FREE_FN)(c_string as *mut c_void) }; result } pub fn utf8_text<'a>(&self, source: &'a [u8]) -> Result<&'a str, str::Utf8Error> { str::from_utf8(&source[self.start_byte()..self.end_byte()]) } pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { &source.as_ref()[self.start_byte()..self.end_byte()] } /// Create a new [TreeCursor] starting from this node. #[doc(alias = "ts_tree_cursor_new")] pub fn walk(&self) -> TreeCursor<'tree> { TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) } /// Edit this node to keep it in-sync with source code that has been edited. /// /// This function is only rarely needed. When you edit a syntax tree with the /// [Tree::edit] method, all of the nodes that you retrieve from the tree /// afterward will already reflect the edit. You only need to use [Node::edit] /// when you have a specific [Node] instance that you want to keep and continue /// to use after an edit. #[doc(alias = "ts_node_edit")] pub fn edit(&mut self, edit: &InputEdit) { let edit = edit.into(); unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) } } } impl<'a> PartialEq for Node<'a> { fn eq(&self, other: &Self) -> bool { self.0.id == other.0.id } } impl<'a> Eq for Node<'a> {} impl<'a> hash::Hash for Node<'a> { fn hash(&self, state: &mut H) { self.0.id.hash(state); self.0.context[0].hash(state); self.0.context[1].hash(state); self.0.context[2].hash(state); self.0.context[3].hash(state); } } impl<'a> fmt::Debug for Node<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( f, "{{Node {} {} - {}}}", self.kind(), self.start_position(), self.end_position() ) } } impl<'a> TreeCursor<'a> { /// Get the tree cursor's current [Node]. #[doc(alias = "ts_tree_cursor_current_node")] pub fn node(&self) -> Node<'a> { Node( unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, PhantomData, ) } /// Get the numerical field id of this tree cursor's current node. /// /// See also [field_name](TreeCursor::field_name). #[doc(alias = "ts_tree_cursor_current_field_id")] pub fn field_id(&self) -> Option { unsafe { let id = ffi::ts_tree_cursor_current_field_id(&self.0); if id == 0 { None } else { Some(id) } } } /// Get the field name of this tree cursor's current node. #[doc(alias = "ts_tree_cursor_current_field_name")] pub fn field_name(&self) -> Option<&'static str> { unsafe { let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); if ptr.is_null() { None } else { Some(CStr::from_ptr(ptr).to_str().unwrap()) } } } /// Move this cursor to the first child of its current node. /// /// This returns `true` if the cursor successfully moved, and returns `false` /// if there were no children. #[doc(alias = "ts_tree_cursor_goto_first_child")] pub fn goto_first_child(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; } /// Move this cursor to the parent of its current node. /// /// This returns `true` if the cursor successfully moved, and returns `false` /// if there was no parent node (the cursor was already on the root node). #[doc(alias = "ts_tree_cursor_goto_parent")] pub fn goto_parent(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) }; } /// Move this cursor to the next sibling of its current node. /// /// This returns `true` if the cursor successfully moved, and returns `false` /// if there was no next sibling node. #[doc(alias = "ts_tree_cursor_goto_next_sibling")] pub fn goto_next_sibling(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; } /// Move this cursor to the first child of its current node that extends beyond /// the given byte offset. /// /// This returns the index of the child node if one was found, and returns `None` /// if no such child was found. #[doc(alias = "ts_tree_cursor_goto_first_child_for_byte")] pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; if result < 0 { None } else { Some(result as usize) } } /// Move this cursor to the first child of its current node that extends beyond /// the given byte offset. /// /// This returns the index of the child node if one was found, and returns `None` /// if no such child was found. #[doc(alias = "ts_tree_cursor_goto_first_child_for_point")] pub fn goto_first_child_for_point(&mut self, point: Point) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_point(&mut self.0, point.into()) }; if result < 0 { None } else { Some(result as usize) } } /// Re-initialize this tree cursor to start at a different node. #[doc(alias = "ts_tree_cursor_reset")] pub fn reset(&mut self, node: Node<'a>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } } impl<'a> Clone for TreeCursor<'a> { fn clone(&self) -> Self { TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) } } impl<'a> Drop for TreeCursor<'a> { fn drop(&mut self) { unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } } } impl Query { /// Create a new query from a string containing one or more S-expression /// patterns. /// /// The query is associated with a particular language, and can only be run /// on syntax nodes parsed with that language. References to Queries can be /// shared between multiple threads. pub fn new(language: Language, source: &str) -> Result { let mut error_offset = 0u32; let mut error_type: ffi::TSQueryError = 0; let bytes = source.as_bytes(); // Compile the query. let ptr = unsafe { ffi::ts_query_new( language.0, bytes.as_ptr() as *const c_char, bytes.len() as u32, &mut error_offset as *mut u32, &mut error_type as *mut ffi::TSQueryError, ) }; // On failure, build an error based on the error code and offset. if ptr.is_null() { if error_type == ffi::TSQueryError_TSQueryErrorLanguage { return Err(QueryError { row: 0, column: 0, offset: 0, message: LanguageError { version: language.version(), } .to_string(), kind: QueryErrorKind::Language, }); } let offset = error_offset as usize; let mut line_start = 0; let mut row = 0; let mut line_containing_error = None; for line in source.split("\n") { let line_end = line_start + line.len() + 1; if line_end > offset { line_containing_error = Some(line); break; } line_start = line_end; row += 1; } let column = offset - line_start; let kind; let message; match error_type { // Error types that report names ffi::TSQueryError_TSQueryErrorNodeType | ffi::TSQueryError_TSQueryErrorField | ffi::TSQueryError_TSQueryErrorCapture => { let suffix = source.split_at(offset).1; let end_offset = suffix .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') .unwrap_or(source.len()); message = suffix.split_at(end_offset).0.to_string(); kind = match error_type { ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType, ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field, ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture, _ => unreachable!(), }; } // Error types that report positions _ => { message = if let Some(line) = line_containing_error { line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" } else { "Unexpected EOF".to_string() }; kind = match error_type { ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure, _ => QueryErrorKind::Syntax, }; } }; return Err(QueryError { row, column, offset, kind, message, }); } let string_count = unsafe { ffi::ts_query_string_count(ptr) }; let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize }; let mut result = Query { ptr: unsafe { NonNull::new_unchecked(ptr) }, capture_names: Vec::with_capacity(capture_count as usize), capture_quantifiers: Vec::with_capacity(pattern_count as usize), text_predicates: Vec::with_capacity(pattern_count), property_predicates: Vec::with_capacity(pattern_count), property_settings: Vec::with_capacity(pattern_count), general_predicates: Vec::with_capacity(pattern_count), }; // Build a vector of strings to store the capture names. for i in 0..capture_count { unsafe { let mut length = 0u32; let name = ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); result.capture_names.push(name.to_string()); } } // Build a vector to store capture qunatifiers. for i in 0..pattern_count { let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); for j in 0..capture_count { unsafe { let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); capture_quantifiers.push(quantifier.into()); } } result.capture_quantifiers.push(capture_quantifiers); } // Build a vector of strings to represent literal values used in predicates. let string_values = (0..string_count) .map(|i| unsafe { let mut length = 0u32; let value = ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) as *const u8; let value = slice::from_raw_parts(value, length as usize); let value = str::from_utf8_unchecked(value); value.to_string() }) .collect::>(); // Build a vector of predicates for each pattern. for i in 0..pattern_count { let predicate_steps = unsafe { let mut length = 0u32; let raw_predicates = ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); if length > 0 { slice::from_raw_parts(raw_predicates, length as usize) } else { &[] } }; let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; let row = source .char_indices() .take_while(|(i, _)| *i < byte_offset as usize) .filter(|(_, c)| *c == '\n') .count(); let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; let mut text_predicates = Vec::new(); let mut property_predicates = Vec::new(); let mut property_settings = Vec::new(); let mut general_predicates = Vec::new(); for p in predicate_steps.split(|s| s.type_ == type_done) { if p.is_empty() { continue; } if p[0].type_ != type_string { return Err(predicate_error( row, format!( "Expected predicate to start with a function name. Got @{}.", result.capture_names[p[0].value_id as usize], ), )); } // Build a predicate for each of the known predicate function names. let operator_name = &string_values[p[0].value_id as usize]; match operator_name.as_str() { "eq?" | "not-eq?" => { if p.len() != 3 { return Err(predicate_error( row, format!( "Wrong number of arguments to #eq? predicate. Expected 2, got {}.", p.len() - 1 ), )); } if p[1].type_ != type_capture { return Err(predicate_error(row, format!( "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } let is_positive = operator_name == "eq?"; text_predicates.push(if p[2].type_ == type_capture { TextPredicate::CaptureEqCapture( p[1].value_id, p[2].value_id, is_positive, ) } else { TextPredicate::CaptureEqString( p[1].value_id, string_values[p[2].value_id as usize].clone(), is_positive, ) }); } "match?" | "not-match?" => { if p.len() != 3 { return Err(predicate_error(row, format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } if p[1].type_ != type_capture { return Err(predicate_error(row, format!( "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } if p[2].type_ == type_capture { return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", result.capture_names[p[2].value_id as usize], ))); } let is_positive = operator_name == "match?"; let regex = &string_values[p[2].value_id as usize]; text_predicates.push(TextPredicate::CaptureMatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { predicate_error(row, format!("Invalid regex '{}'", regex)) })?, is_positive, )); } "set!" => property_settings.push(Self::parse_property( row, &operator_name, &result.capture_names, &string_values, &p[1..], )?), "is?" | "is-not?" => property_predicates.push(( Self::parse_property( row, &operator_name, &result.capture_names, &string_values, &p[1..], )?, operator_name == "is?", )), _ => general_predicates.push(QueryPredicate { operator: operator_name.clone().into_boxed_str(), args: p[1..] .iter() .map(|a| { if a.type_ == type_capture { QueryPredicateArg::Capture(a.value_id) } else { QueryPredicateArg::String( string_values[a.value_id as usize].clone().into_boxed_str(), ) } }) .collect(), }), } } result .text_predicates .push(text_predicates.into_boxed_slice()); result .property_predicates .push(property_predicates.into_boxed_slice()); result .property_settings .push(property_settings.into_boxed_slice()); result .general_predicates .push(general_predicates.into_boxed_slice()); } Ok(result) } /// Get the byte offset where the given pattern starts in the query's source. #[doc(alias = "ts_query_start_byte_for_pattern")] pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize { if pattern_index >= self.text_predicates.len() { panic!( "Pattern index is {} but the pattern count is {}", pattern_index, self.text_predicates.len(), ); } unsafe { ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize } } /// Get the number of patterns in the query. #[doc(alias = "ts_query_pattern_count")] pub fn pattern_count(&self) -> usize { unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize } } /// Get the names of the captures used in the query. pub fn capture_names(&self) -> &[String] { &self.capture_names } /// Get the quantifiers of the captures used in the query. pub fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { &self.capture_quantifiers[index] } /// Get the index for a given capture name. pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names .iter() .position(|n| n == name) .map(|ix| ix as u32) } /// Get the properties that are checked for the given pattern index. /// /// This includes predicates with the operators `is?` and `is-not?`. pub fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { &self.property_predicates[index] } /// Get the properties that are set for the given pattern index. /// /// This includes predicates with the operator `set!`. pub fn property_settings(&self, index: usize) -> &[QueryProperty] { &self.property_settings[index] } /// Get the other user-defined predicates associated with the given index. /// /// This includes predicate with operators other than: /// * `match?` /// * `eq?` and `not-eq?` /// * `is?` and `is-not?` /// * `set!` pub fn general_predicates(&self, index: usize) -> &[QueryPredicate] { &self.general_predicates[index] } /// Disable a certain capture within a query. /// /// This prevents the capture from being returned in matches, and also avoids any /// resource usage associated with recording the capture. #[doc(alias = "ts_query_disable_capture")] pub fn disable_capture(&mut self, name: &str) { unsafe { ffi::ts_query_disable_capture( self.ptr.as_ptr(), name.as_bytes().as_ptr() as *const c_char, name.len() as u32, ); } } /// Disable a certain pattern within a query. /// /// This prevents the pattern from matching, and also avoids any resource usage /// associated with the pattern. #[doc(alias = "ts_query_disable_pattern")] pub fn disable_pattern(&mut self, index: usize) { unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } } /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_rooted")] pub fn is_pattern_rooted(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_rooted(self.ptr.as_ptr(), index as u32) } } /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_non_local")] pub fn is_pattern_non_local(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_non_local(self.ptr.as_ptr(), index as u32) } } /// Check if a given step in a query is 'definite'. /// /// A query step is 'definite' if its parent pattern will be guaranteed to match /// successfully once it reaches the step. #[doc(alias = "ts_query_is_pattern_guaranteed_at_step")] pub fn is_pattern_guaranteed_at_step(&self, byte_offset: usize) -> bool { unsafe { ffi::ts_query_is_pattern_guaranteed_at_step(self.ptr.as_ptr(), byte_offset as u32) } } fn parse_property( row: usize, function_name: &str, capture_names: &[String], string_values: &[String], args: &[ffi::TSQueryPredicateStep], ) -> Result { if args.len() == 0 || args.len() > 3 { return Err(predicate_error( row, format!( "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", function_name, args.len(), ), )); } let mut capture_id = None; let mut key = None; let mut value = None; for arg in args { if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture { if capture_id.is_some() { return Err(predicate_error( row, format!( "Invalid arguments to {} predicate. Unexpected second capture name @{}", function_name, capture_names[arg.value_id as usize] ), )); } capture_id = Some(arg.value_id as usize); } else if key.is_none() { key = Some(&string_values[arg.value_id as usize]); } else if value.is_none() { value = Some(string_values[arg.value_id as usize].as_str()); } else { return Err(predicate_error( row, format!( "Invalid arguments to {} predicate. Unexpected third argument @{}", function_name, string_values[arg.value_id as usize] ), )); } } if let Some(key) = key { Ok(QueryProperty::new(key, value, capture_id)) } else { return Err(predicate_error( row, format!( "Invalid arguments to {} predicate. Missing key argument", function_name, ), )); } } } impl QueryCursor { /// Create a new cursor for executing a given query. /// /// The cursor stores the state that is needed to iteratively search for matches. #[doc(alias = "ts_query_cursor_new")] pub fn new() -> Self { QueryCursor { ptr: unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }, } } /// Return the maximum number of in-progress matches for this cursor. #[doc(alias = "ts_query_cursor_match_limit")] pub fn match_limit(&self) -> u32 { unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) } } /// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and /// <= 65536. #[doc(alias = "ts_query_cursor_set_match_limit")] pub fn set_match_limit(&mut self, limit: u32) { unsafe { ffi::ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit); } } /// Check if, on its last execution, this cursor exceeded its maximum number of /// in-progress matches. #[doc(alias = "ts_query_cursor_did_exceed_match_limit")] pub fn did_exceed_match_limit(&self) -> bool { unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) } } /// Iterate over all of the matches in the order that they were found. /// /// Each match contains the index of the pattern that matched, and a list of captures. /// Because multiple patterns can match the same set of nodes, one match may contain /// captures that appear *before* some of the captures from a previous match. #[doc(alias = "ts_query_cursor_exec")] pub fn matches<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( &'a mut self, query: &'a Query, node: Node<'tree>, text_provider: T, ) -> QueryMatches<'a, 'tree, T> { let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryMatches { ptr, query, text_provider, buffer1: Default::default(), buffer2: Default::default(), _tree: PhantomData, } } /// Iterate over all of the individual captures in the order that they appear. /// /// This is useful if you don't care about which pattern matched, and just want a single, /// ordered sequence of captures. #[doc(alias = "ts_query_cursor_exec")] pub fn captures<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( &'a mut self, query: &'a Query, node: Node<'tree>, text_provider: T, ) -> QueryCaptures<'a, 'tree, T> { let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(self.ptr.as_ptr(), query.ptr.as_ptr(), node.0) }; QueryCaptures { ptr, query, text_provider, buffer1: Default::default(), buffer2: Default::default(), _tree: PhantomData, } } /// Set the range in which the query will be executed, in terms of byte offsets. #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) -> &mut Self { unsafe { ffi::ts_query_cursor_set_byte_range( self.ptr.as_ptr(), range.start as u32, range.end as u32, ); } self } /// Set the range in which the query will be executed, in terms of rows and columns. #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) -> &mut Self { unsafe { ffi::ts_query_cursor_set_point_range( self.ptr.as_ptr(), range.start.into(), range.end.into(), ); } self } } impl<'a, 'tree> QueryMatch<'a, 'tree> { pub fn id(&self) -> u32 { self.id } #[doc(alias = "ts_query_cursor_remove_match")] pub fn remove(self) { unsafe { ffi::ts_query_cursor_remove_match(self.cursor, self.id) } } pub fn nodes_for_capture_index( &self, capture_ix: u32, ) -> impl Iterator> + '_ { self.captures.iter().filter_map(move |capture| { if capture.index == capture_ix { Some(capture.node) } else { None } }) } fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { QueryMatch { cursor, id: m.id, pattern_index: m.pattern_index as usize, captures: if m.capture_count > 0 { unsafe { slice::from_raw_parts( m.captures as *const QueryCapture<'tree>, m.capture_count as usize, ) } } else { &[] }, } } fn satisfies_text_predicates( &self, query: &Query, buffer1: &mut Vec, buffer2: &mut Vec, text_provider: &mut impl TextProvider<'a>, ) -> bool { fn get_text<'a, 'b: 'a, I: Iterator>( buffer: &'a mut Vec, mut chunks: I, ) -> &'a [u8] { let first_chunk = chunks.next().unwrap_or(&[]); if let Some(next_chunk) = chunks.next() { buffer.clear(); buffer.extend_from_slice(first_chunk); buffer.extend_from_slice(next_chunk); for chunk in chunks { buffer.extend_from_slice(chunk); } buffer.as_slice() } else { first_chunk } } query.text_predicates[self.pattern_index] .iter() .all(|predicate| match predicate { TextPredicate::CaptureEqCapture(i, j, is_positive) => { let node1 = self.nodes_for_capture_index(*i).next(); let node2 = self.nodes_for_capture_index(*j).next(); match (node1, node2) { (Some(node1), Some(node2)) => { let text1 = get_text(buffer1, text_provider.text(node1)); let text2 = get_text(buffer2, text_provider.text(node2)); (text1 == text2) == *is_positive } _ => true, } } TextPredicate::CaptureEqString(i, s, is_positive) => { let node = self.nodes_for_capture_index(*i).next(); match node { Some(node) => { let text = get_text(buffer1, text_provider.text(node)); (text == s.as_bytes()) == *is_positive } None => true, } } TextPredicate::CaptureMatchString(i, r, is_positive) => { let node = self.nodes_for_capture_index(*i).next(); match node { Some(node) => { let text = get_text(buffer1, text_provider.text(node)); r.is_match(text) == *is_positive } None => true, } } }) } } impl QueryProperty { pub fn new(key: &str, value: Option<&str>, capture_id: Option) -> Self { QueryProperty { capture_id, key: key.to_string().into_boxed_str(), value: value.map(|s| s.to_string().into_boxed_str()), } } } impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> { type Item = QueryMatch<'a, 'tree>; fn next(&mut self) -> Option { unsafe { loop { let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_match(self.ptr, m.as_mut_ptr()) { let result = QueryMatch::new(m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, &mut self.buffer2, &mut self.text_provider, ) { return Some(result); } } else { return None; } } } } } impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { type Item = (QueryMatch<'a, 'tree>, usize); fn next(&mut self) -> Option { unsafe { loop { let mut capture_index = 0u32; let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_capture( self.ptr, m.as_mut_ptr(), &mut capture_index as *mut u32, ) { let result = QueryMatch::new(m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, &mut self.buffer2, &mut self.text_provider, ) { return Some((result, capture_index as usize)); } else { result.remove(); } } else { return None; } } } } } impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); } } #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); } } } impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32); } } #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) { unsafe { ffi::ts_query_cursor_set_point_range(self.ptr, range.start.into(), range.end.into()); } } } impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "QueryMatch {{ id: {}, pattern_index: {}, captures: {:?} }}", self.id, self.pattern_index, self.captures ) } } impl<'a, F, I> TextProvider<'a> for F where F: FnMut(Node) -> I, I: Iterator + 'a, { type I = I; fn text(&mut self, node: Node) -> Self::I { (self)(node) } } impl<'a> TextProvider<'a> for &'a [u8] { type I = iter::Once<&'a [u8]>; fn text(&mut self, node: Node) -> Self::I { iter::once(&self[node.byte_range()]) } } impl PartialEq for Query { fn eq(&self, other: &Self) -> bool { self.ptr == other.ptr } } impl Drop for Query { fn drop(&mut self) { unsafe { ffi::ts_query_delete(self.ptr.as_ptr()) } } } impl Drop for QueryCursor { fn drop(&mut self) { unsafe { ffi::ts_query_cursor_delete(self.ptr.as_ptr()) } } } impl Point { pub fn new(row: usize, column: usize) -> Self { Point { row, column } } } impl fmt::Display for Point { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(f, "({}, {})", self.row, self.column) } } impl Into for Point { fn into(self) -> ffi::TSPoint { ffi::TSPoint { row: self.row as u32, column: self.column as u32, } } } impl From for Point { fn from(point: ffi::TSPoint) -> Self { Self { row: point.row as usize, column: point.column as usize, } } } impl Into for Range { fn into(self) -> ffi::TSRange { ffi::TSRange { start_byte: self.start_byte as u32, end_byte: self.end_byte as u32, start_point: self.start_point.into(), end_point: self.end_point.into(), } } } impl From for Range { fn from(range: ffi::TSRange) -> Self { Self { start_byte: range.start_byte as usize, end_byte: range.end_byte as usize, start_point: range.start_point.into(), end_point: range.end_point.into(), } } } impl<'a> Into for &'a InputEdit { fn into(self) -> ffi::TSInputEdit { ffi::TSInputEdit { start_byte: self.start_byte as u32, old_end_byte: self.old_end_byte as u32, new_end_byte: self.new_end_byte as u32, start_point: self.start_position.into(), old_end_point: self.old_end_position.into(), new_end_point: self.new_end_position.into(), } } } impl<'a> LossyUtf8<'a> { pub fn new(bytes: &'a [u8]) -> Self { LossyUtf8 { bytes, in_replacement: false, } } } impl<'a> Iterator for LossyUtf8<'a> { type Item = &'a str; fn next(&mut self) -> Option<&'a str> { if self.bytes.is_empty() { return None; } if self.in_replacement { self.in_replacement = false; return Some("\u{fffd}"); } match std::str::from_utf8(self.bytes) { Ok(valid) => { self.bytes = &[]; Some(valid) } Err(error) => { if let Some(error_len) = error.error_len() { let error_start = error.valid_up_to(); if error_start > 0 { let result = unsafe { std::str::from_utf8_unchecked(&self.bytes[..error_start]) }; self.bytes = &self.bytes[(error_start + error_len)..]; self.in_replacement = true; Some(result) } else { self.bytes = &self.bytes[error_len..]; Some("\u{fffd}") } } else { None } } } } } fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, row, column: 0, offset: 0, message, } } impl fmt::Display for IncludedRangesError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Incorrect range by index: {}", self.0) } } impl fmt::Display for LanguageError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "Incompatible language version {}. Expected minimum {}, maximum {}", self.version, MIN_COMPATIBLE_LANGUAGE_VERSION, LANGUAGE_VERSION, ) } } impl fmt::Display for QueryError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let msg = match self.kind { QueryErrorKind::Field => "Invalid field name ", QueryErrorKind::NodeType => "Invalid node type ", QueryErrorKind::Capture => "Invalid capture name ", QueryErrorKind::Predicate => "Invalid predicate: ", QueryErrorKind::Structure => "Impossible pattern:\n", QueryErrorKind::Syntax => "Invalid syntax:\n", QueryErrorKind::Language => "", }; if msg.len() > 0 { write!( f, "Query error at {}:{}. {}{}", self.row + 1, self.column + 1, msg, self.message ) } else { write!(f, "{}", self.message) } } } extern "C" { fn free(ptr: *mut c_void); } static mut FREE_FN: unsafe extern "C" fn(ptr: *mut c_void) = free; #[doc(alias = "ts_set_allocator")] pub unsafe fn set_allocator( new_malloc: Option *mut c_void>, new_calloc: Option *mut c_void>, new_realloc: Option *mut c_void>, new_free: Option, ) { FREE_FN = new_free.unwrap_or(free); ffi::ts_set_allocator(new_malloc, new_calloc, new_realloc, new_free); } impl error::Error for IncludedRangesError {} impl error::Error for LanguageError {} impl error::Error for QueryError {} unsafe impl Send for Language {} unsafe impl Send for Parser {} unsafe impl Send for Query {} unsafe impl Send for QueryCursor {} unsafe impl Send for Tree {} unsafe impl Sync for Language {} unsafe impl Sync for Parser {} unsafe impl Sync for Query {} unsafe impl Sync for QueryCursor {} unsafe impl Sync for Tree {} tree-sitter-0.20.10/binding_rust/util.rs000064400000000000000000000016401046102023000162460ustar 00000000000000use super::FREE_FN; use std::os::raw::c_void; /// A raw pointer and a length, exposed as an iterator. pub struct CBufferIter { ptr: *mut T, count: usize, i: usize, } impl CBufferIter { pub unsafe fn new(ptr: *mut T, count: usize) -> Self { Self { ptr, count, i: 0 } } } impl Iterator for CBufferIter { type Item = T; fn next(&mut self) -> Option { let i = self.i; if i >= self.count { None } else { self.i += 1; Some(unsafe { *self.ptr.offset(i as isize) }) } } fn size_hint(&self) -> (usize, Option) { let remaining = self.count - self.i; (remaining, Some(remaining)) } } impl ExactSizeIterator for CBufferIter {} impl Drop for CBufferIter { fn drop(&mut self) { unsafe { (FREE_FN)(self.ptr as *mut c_void) }; } }