Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tree-sitter: Bump to 0.22.6 #16481

Merged
merged 5 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/extractor/tsg-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ authors = ["Taus Brock-Nannestad <tausbn@github.com>"]
edition = "2018"

# When changing/updating these, the `Cargo.Bazel.lock` file has to be regenerated.
# Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=py_deps ./build --bazel sync --only=py_deps`
# Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=py_deps ./tools/bazel sync --only=py_deps`
# in the `semmle-code` repository to do so.
# For more information, check out the documentation at
# https://bazelbuild.github.io/rules_rust/crate_universe.html#repinning--updating-dependencies
Expand Down
18 changes: 9 additions & 9 deletions ql/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion ql/buramu/tree-sitter-blame/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ include = [
path = "bindings/rust/lib.rs"

[dependencies]
tree-sitter = "~0.20.3"
tree-sitter = ">= 0.22.6"

[build-dependencies]
cc = "1.0"

[patch.crates-io]
tree-sitter = {git = "https://github.com/redsun82/tree-sitter.git", rev = "1f5c1112ceaa8fc6aff61d1852690407670d2a96"}
13 changes: 8 additions & 5 deletions ql/extractor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,22 @@
name = "codeql-extractor-ql"
version = "0.1.0"
authors = ["GitHub"]
edition = "2018"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
tree-sitter = ">= 0.20, < 0.21"
tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", rev = "d08db734f8dc52f6bc04db53a966603122bc6985"}
tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"}
tree-sitter = ">= 0.22.6"
tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", rev = "fa5c3821dd2161f5c8528a8cbdb258daa6dc4de6"}
tree-sitter-ql-dbscheme = { git = "https://github.com/tree-sitter/tree-sitter-ql-dbscheme.git", rev = "5f770f57fa415607ff50e3d237d47c8f11440eb3"}
tree-sitter-blame = {path = "../buramu/tree-sitter-blame"}
tree-sitter-json = {git = "https://github.com/tausbn/tree-sitter-json.git", rev = "745663ee997f1576fe1e7187e6347e0db36ec7a9"}
tree-sitter-json = {git = "https://github.com/tree-sitter/tree-sitter-json.git", rev = "94f5c527b2965465956c2000ed6134dd24daf2a7"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this will break structured log extraction. Some of the commits on my fork added support for things like having multiple JSON objects/arrays in the same file (i.e. JSONC/JSONL), and some added support for trailing commas in objects/arrays. Both of these are still missing on mainline tree-sitter-json, as far as I know.

This is not to say that this PR should be blocked, however. The structured log parsing is not in active use (and to be honest, it should be replaced with a better solution than (ab)using the tree-sitter interface). I just wanted to point out that this is a breaking change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Perhaps worth submitting an upstream PR to tree-sitter/tree-sitter-json?

clap = { version = "4.2", features = ["derive"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
rayon = "1.9.0"
regex = "1.10.4"
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }

[patch.crates-io]
tree-sitter = {git = "https://github.com/redsun82/tree-sitter.git", rev = "1f5c1112ceaa8fc6aff61d1852690407670d2a96"}
18 changes: 14 additions & 4 deletions ql/ql/src/codeql_ql/StructuredLogs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,25 @@ private Predicate getPredicateFromPosition(string s) {
)
}

pragma[nomagic]
private string getJsonStringComponent(JSON::String s, int i) {
result = s.getChild(i).(JSON::Token).getValue()
}

pragma[nomagic]
private string getJsonString(JSON::String s) {
result = concat(string c, int i | c = getJsonStringComponent(s, i) | c order by i)
}

class Object extends JSON::Object {
JSON::Value getValue(string key) {
JSON::UnderscoreValue getValue(string key) {
exists(JSON::Pair p | p = this.getChild(_) |
key = p.getKey().(JSON::String).getChild().getValue() and
key = getJsonString(p.getKey()) and
result = p.getValue()
)
}

string getString(string key) { result = this.getValue(key).(JSON::String).getChild().getValue() }
string getString(string key) { result = getJsonString(this.getValue(key)) }

int getNumber(string key) { result = this.getValue(key).(JSON::Number).getValue().toInt() }

Expand All @@ -61,7 +71,7 @@ class Object extends JSON::Object {
class Array extends JSON::Array {
Object getObject(int i) { result = this.getChild(i) }

string getString(int i) { result = this.getChild(i).(JSON::String).getChild().getValue() }
string getString(int i) { result = getJsonString(this.getChild(i)) }

int getNumber(int i) { result = this.getChild(i).(JSON::Number).getValue().toInt() }

Expand Down
24 changes: 15 additions & 9 deletions ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1767,13 +1767,15 @@ module JSON {
final override string getAPrimaryQlClass() { result = "ReservedWord" }
}

class UnderscoreValue extends @json_underscore_value, AstNode { }

/** A class representing `array` nodes. */
class Array extends @json_array, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Array" }

/** Gets the `i`th child of this node. */
final Value getChild(int i) { json_array_child(this, i, result) }
final UnderscoreValue getChild(int i) { json_array_child(this, i, result) }

/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_array_child(this, _, result) }
Expand All @@ -1791,12 +1793,18 @@ module JSON {
final override string getAPrimaryQlClass() { result = "Document" }

/** Gets the `i`th child of this node. */
final Value getChild(int i) { json_document_child(this, i, result) }
final UnderscoreValue getChild(int i) { json_document_child(this, i, result) }

/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_document_child(this, _, result) }
}

/** A class representing `escape_sequence` tokens. */
class EscapeSequence extends @json_token_escape_sequence, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "EscapeSequence" }
}

/** A class representing `false` tokens. */
class False extends @json_token_false, Token {
/** Gets the name of the primary QL class for this element. */
Expand Down Expand Up @@ -1833,10 +1841,10 @@ module JSON {
final override string getAPrimaryQlClass() { result = "Pair" }

/** Gets the node corresponding to the field `key`. */
final AstNode getKey() { json_pair_def(this, result, _) }
final String getKey() { json_pair_def(this, result, _) }

/** Gets the node corresponding to the field `value`. */
final Value getValue() { json_pair_def(this, _, result) }
final UnderscoreValue getValue() { json_pair_def(this, _, result) }

/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() {
Expand All @@ -1849,11 +1857,11 @@ module JSON {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "String" }

/** Gets the child of this node. */
final StringContent getChild() { json_string_child(this, result) }
/** Gets the `i`th child of this node. */
final AstNode getChild(int i) { json_string_child(this, i, result) }

/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_string_child(this, result) }
final override AstNode getAFieldOrChild() { json_string_child(this, _, result) }
}

/** A class representing `string_content` tokens. */
Expand All @@ -1867,6 +1875,4 @@ module JSON {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "True" }
}

class Value extends @json_value, AstNode { }
}
33 changes: 18 additions & 15 deletions ql/ql/src/ql.dbscheme
Original file line number Diff line number Diff line change
Expand Up @@ -1239,11 +1239,13 @@ blame_ast_node_parent(
);

/*- JSON dbscheme -*/
@json_underscore_value = @json_array | @json_object | @json_string__ | @json_token_false | @json_token_null | @json_token_number | @json_token_true

#keyset[json_array, index]
json_array_child(
int json_array: @json_array ref,
int index: int ref,
unique int child: @json_value ref
unique int child: @json_underscore_value ref
);

json_array_def(
Expand All @@ -1254,7 +1256,7 @@ json_array_def(
json_document_child(
int json_document: @json_document ref,
int index: int ref,
unique int child: @json_value ref
unique int child: @json_underscore_value ref
);

json_document_def(
Expand All @@ -1272,25 +1274,25 @@ json_object_def(
unique int id: @json_object
);

@json_pair_key_type = @json_string__ | @json_token_number

json_pair_def(
unique int id: @json_pair,
int key__: @json_pair_key_type ref,
int value: @json_value ref
int key__: @json_string__ ref,
int value: @json_underscore_value ref
);

@json_string_child_type = @json_token_escape_sequence | @json_token_string_content

#keyset[json_string__, index]
json_string_child(
unique int json_string__: @json_string__ ref,
unique int child: @json_token_string_content ref
int json_string__: @json_string__ ref,
int index: int ref,
unique int child: @json_string_child_type ref
);

json_string_def(
unique int id: @json_string__
);

@json_value = @json_array | @json_object | @json_string__ | @json_token_false | @json_token_null | @json_token_number | @json_token_true

json_tokeninfo(
unique int id: @json_token,
int kind: int ref,
Expand All @@ -1300,11 +1302,12 @@ json_tokeninfo(
case @json_token.kind of
0 = @json_reserved_word
| 1 = @json_token_comment
| 2 = @json_token_false
| 3 = @json_token_null
| 4 = @json_token_number
| 5 = @json_token_string_content
| 6 = @json_token_true
| 2 = @json_token_escape_sequence
| 3 = @json_token_false
| 4 = @json_token_null
| 5 = @json_token_number
| 6 = @json_token_string_content
| 7 = @json_token_true
;


Expand Down
4 changes: 2 additions & 2 deletions ql/rust-toolchain.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
# extractor. It is set to the lowest version of Rust we want to support.

[toolchain]
channel = "1.68"
channel = "1.70"
profile = "minimal"
components = [ "rustfmt" ]
components = [ "rustfmt" ]
Loading
Loading