Skip to content

Commit

Permalink
Add language detection via "auto" language
Browse files Browse the repository at this point in the history
Fixes #103.
  • Loading branch information
valentjn committed Sep 6, 2021
1 parent 3fc5239 commit e39ec34
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
3 changes: 3 additions & 0 deletions changelog.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
<action type="update">
Refactor CLI into `ltex-cli`; `--input-documents` and `--setings-file` are deprecated and will be removed in a future release
</action>
<action type="add" issue="#103">
Add support for automatic language detection via language short code `auto`; language variants like `en-US` are not detected, only generic languages like `en`; this will result in spelling errors not being reported
</action>
<action type="add">
Provide ID of LanguageTool rule via diagnostics code, not as part of diagnostics message
</action>
Expand Down
18 changes: 17 additions & 1 deletion src/main/kotlin/org/bsplines/ltexls/server/DocumentChecker.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import org.bsplines.ltexls.tools.I18n
import org.bsplines.ltexls.tools.Logging
import org.bsplines.ltexls.tools.Tools
import org.eclipse.lsp4j.Range
import org.languagetool.Language
import org.languagetool.language.LanguageIdentifier
import org.languagetool.markup.AnnotatedText
import org.languagetool.markup.TextPart
import java.time.Duration
Expand All @@ -33,6 +35,8 @@ class DocumentChecker(
var lastCheckedDocument: LtexTextDocumentItem? = null
private set

private val languageIdentifier = LanguageIdentifier()

private fun fragmentizeDocument(
document: LtexTextDocumentItem,
range: Range?,
Expand Down Expand Up @@ -86,8 +90,20 @@ class DocumentChecker(
rangeOffset: Int,
): List<LanguageToolRuleMatch> {
val codeFragment: CodeFragment = annotatedTextFragment.codeFragment
val settings: Settings = codeFragment.settings
var settings: Settings = codeFragment.settings

if (settings.languageShortCode == "auto") {
val cleanText: String = this.languageIdentifier.cleanAndShortenText(
annotatedTextFragment.annotatedText.plainText,
)
val language: Language? = this.languageIdentifier.detectLanguage(cleanText)
settings = settings.copy(
_languageShortCode = language?.shortCodeWithCountryAndVariant ?: "en-US",
)
}

this.settingsManager.settings = settings

val languageToolInterface: LanguageToolInterface =
this.settingsManager.languageToolInterface ?: run {
Logging.logger.warning(I18n.format(
Expand Down
21 changes: 17 additions & 4 deletions src/test/kotlin/org/bsplines/ltexls/server/DocumentCheckerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,28 @@ class DocumentCheckerTest {
"""
This is an **test.**
[comment]: <> "LTeX: language=de-DE"
<!-- LTeX: language=de-DE -->
Dies ist eine **Test**.
""".trimIndent()
)
val checkingResult: Pair<List<LanguageToolRuleMatch>, List<AnnotatedTextFragment>> =
checkDocument(document)
assertMatches(checkingResult.first, 8, 10, 69, 80)
assertMatches(checkDocument(document).first, 8, 10, 62, 73)
}

@Test
fun testLanguageDetection() {
val document: LtexTextDocumentItem = createDocument("markdown",
"""
This is an **test.**
<!-- LTeX: language=auto -->
Dies ist eine **Test**.
""".trimIndent()
)
assertMatches(checkDocument(document).first, 8, 10, 61, 72)
}

@Test
Expand Down

0 comments on commit e39ec34

Please sign in to comment.