Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Implement basic semantic highlighting for functions and variables #8

Merged
merged 4 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions client/src/BitBakeDocumentSemanticTokensProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import { SemanticTokensLegend, SemanticTokensBuilder } from 'vscode'
import type { TextDocument, CancellationToken, SemanticTokens, DocumentSemanticTokensProvider } from 'vscode'

const tokenTypes = new Map<string, number>()
const tokenModifiers = new Map<string, number>()

interface IParsedToken {
line: number
startCharacter: number
length: number
tokenType: string
tokenModifiers: string[]
}

const generateSemanticTokensLegend = (): SemanticTokensLegend => {
const tokenTypesLegend = [
'function', 'variable'
]
tokenTypesLegend.forEach((tokenType, index) => tokenTypes.set(tokenType, index))

const tokenModifiersLegend = [
'declaration'
]
tokenModifiersLegend.forEach((tokenModifier, index) => tokenModifiers.set(tokenModifier, index))

return new SemanticTokensLegend(tokenTypesLegend, tokenModifiersLegend)
}

export const legend = generateSemanticTokensLegend()

export class BitBakeDocumentSemanticTokensProvider implements DocumentSemanticTokensProvider {
async provideDocumentSemanticTokens (document: TextDocument, token: CancellationToken): Promise<SemanticTokens> {
const allTokens = this._parseText(document.getText())
const builder = new SemanticTokensBuilder(legend)
allTokens.forEach((token) => {
builder.push(token.line, token.startCharacter, token.length, this._encodeTokenType(token.tokenType), this._encodeTokenModifiers(token.tokenModifiers))
})
return builder.build()
}

// Check node_modules/@types/vscode/index.d.ts for more encoding details
private _encodeTokenType (tokenType: string): number {
if (tokenTypes.has(tokenType)) {
return Number(tokenTypes.get(tokenType))
} else if (tokenType === 'notInLegend') {
return tokenTypes.size + 2
}
return 0
}

private _encodeTokenModifiers (strTokenModifiers: string[]): number {
let result = 0
for (let i = 0; i < strTokenModifiers.length; i++) {
const tokenModifier = strTokenModifiers[i]
if (tokenModifiers.has(tokenModifier)) {
result = result | (1 << Number(tokenModifiers.get(tokenModifier)))
} else if (tokenModifier === 'notInLegend') {
result = result | (1 << tokenModifiers.size + 2)
}
}
return result
}

private _parseText (text: string): IParsedToken[] {
/**
* This function traverse the document twice. In the first traverse, the function and variable declarations will be matched and stored in an array which will be used to construct another array of regex for the second traverse. In the second traverse, the array of regex is used to match the function and variable references.
*/

const resultTokens: IParsedToken[] = []
const lines = text.split(/\r\n|\r|\n/)
let declaredFunctions: string[] = []
let declaredVariables: string[] = []

const functionStartsWithDefRegex = /(?<!(#.*)|'.*|".*)(?<=def\s+)(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*\()/
const functionStartsWithPythonRegex = /(?<!(#.*)|'.*|".*)(?<=python\s+)(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*\()/
const shellFunctionsRegex = /(?<!(#.*)|'.*|".*)(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/
const anonymousFunctionsRegex = /(\bpython)(?=\s*\(.*\)\s*\{)/
const variableDeclarationRegex = /(?<name>[a-zA-Z_][\w_]*)(?<operator>:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/

// First traverse: Match functions and variables declarations
for (let i = 0; i < lines.length; i++) {
const line = lines[i]

const matchFunctionStartsWithDef = functionStartsWithDefRegex.exec(line)
const matchFunctionStartsWithPython = functionStartsWithPythonRegex.exec(line)
const matchShellFunction = shellFunctionsRegex.exec(line)
const matchVariableDeclaration = variableDeclarationRegex.exec(line)
const matchAnonymousFunction = anonymousFunctionsRegex.exec(line)

if (matchAnonymousFunction !== null) {
// Avoid mistakening anonymous functions as shell function declaration
continue
}
if (matchFunctionStartsWithDef?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchFunctionStartsWithDef.index,
length: matchFunctionStartsWithDef?.groups?.name.length,
tokenType: 'function',
tokenModifiers: ['declaration']
})
declaredFunctions.push(matchFunctionStartsWithDef?.groups?.name)
} else if (matchFunctionStartsWithPython?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchFunctionStartsWithPython.index,
length: matchFunctionStartsWithPython?.groups?.name.length,
tokenType: 'function',
tokenModifiers: ['declaration']
})

declaredFunctions.push(matchFunctionStartsWithPython?.groups?.name)
} else if (matchShellFunction?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchShellFunction.index,
length: matchShellFunction.groups?.name.length,
tokenType: 'function',
tokenModifiers: ['declaration']
})

declaredFunctions.push(matchShellFunction.groups?.name)
} else if (matchVariableDeclaration?.groups?.name.length !== undefined) {
resultTokens.push({
line: i,
startCharacter: matchVariableDeclaration.index,
length: matchVariableDeclaration.groups?.name.length,
tokenType: 'variable',
tokenModifiers: ['declaration']
})

declaredVariables.push(matchVariableDeclaration.groups?.name)
}
}
// Remove duplicates
declaredFunctions = [...new Set(declaredFunctions)]
declaredVariables = [...new Set(declaredVariables)]
// Sort the regex from longest to shortest for precise matching
declaredFunctions.sort((prev, next) => next.length - prev.length)
declaredVariables.sort((prev, next) => next.length - prev.length)

const declaredObjectsRegex: Array<{ regex: RegExp, type: string }> = []

declaredFunctions.forEach(declaredFunction => {
declaredObjectsRegex.push({ regex: new RegExp(`(?<!(#.*)|'.*|".*)(?<!python|def\\s+)${declaredFunction}\\b`, 'g'), type: 'function' })
})
declaredVariables.forEach(declaredVariable => {
declaredObjectsRegex.push({ regex: new RegExp(`(?<!(#.*)|'.*|".*)\\b${declaredVariable}\\b(?!\\s*=\\s*)`, 'g'), type: 'variable' })
})
// Second traverse: Match functions and variables references
for (let i = 0; i < lines.length; i++) {
const line = lines[i]

for (let j = 0; j < declaredObjectsRegex.length; ++j) {
const match = declaredObjectsRegex[j].regex.exec(line)
if (match !== null) {
resultTokens.push({
line: i,
startCharacter: match.index,
length: match[0].length,
tokenType: declaredObjectsRegex[j].type,
tokenModifiers: []
})
}
}
}
return resultTokens
}
}
6 changes: 5 additions & 1 deletion client/src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@

import * as path from 'path'

import { workspace } from 'vscode'
import { workspace, languages } from 'vscode'
import { LanguageClient, TransportKind } from 'vscode-languageclient/node'

import type { ExtensionContext } from 'vscode'
import type { LanguageClientOptions, ServerOptions } from 'vscode-languageclient/node'

import { legend, BitBakeDocumentSemanticTokensProvider } from './BitBakeDocumentSemanticTokensProvider'

let client: LanguageClient
export async function activate (context: ExtensionContext): Promise<void> {
console.log('Congratulations, your extension "BitBake" is now active!')
Expand Down Expand Up @@ -48,6 +50,8 @@ export async function activate (context: ExtensionContext): Promise<void> {

// Start the client and launch the server
await client.start()

context.subscriptions.push(languages.registerDocumentSemanticTokensProvider({ language: 'bitbake', scheme: 'file' }, new BitBakeDocumentSemanticTokensProvider(), legend))
}

export function deactivate (): Thenable<void> | undefined {
Expand Down
15 changes: 13 additions & 2 deletions client/syntaxes/bitbake.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@
}
},
"functions": {
"begin": "\\s*([a-zA-Z0-9_-][\\w-]*)(:(append|prepend|remove))?(?=\\s*\\()",
"begin": "\\s*([a-zA-Z_][\\w_]*)(:(append|prepend|remove))?(?=\\s*\\()",
"beginCaptures": {
"1": {
"name": "entity.name.function.python.bb"
Expand Down Expand Up @@ -189,6 +189,17 @@
{
"include": "#keywords"
},
{
"match": "(\\[)",
"name": "support.function.bracket-open.bb"
},
{
"match": "(\\])",
"name": "support.function.bracket-close.bb"
},
{
"include": "#numeric"
},
{
"include": "#functions"
},
Expand All @@ -207,7 +218,7 @@
]
},
"variable-name": {
"match": "([a-zA-Z0-9_-][\\w-]*)(?!\\s*\\()",
"match": "([a-zA-Z_][\\w_]*)",
"captures": {
"1": {
"name": "variable.other.names.bb"
Expand Down