From 1537df88eb51278a921925f2908ba6a6b3bd98bd Mon Sep 17 00:00:00 2001 From: Sad Ellie Date: Mon, 6 Nov 2023 23:55:07 +0300 Subject: [PATCH] Lexicon fixer improvements --- .../io/github/sadellie/evaluatto/Tokenizer.kt | 58 ++++++++++--------- .../sadellie/evaluatto/FixLexiconTest.kt | 6 +- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/data/evaluatto/src/main/java/io/github/sadellie/evaluatto/Tokenizer.kt b/data/evaluatto/src/main/java/io/github/sadellie/evaluatto/Tokenizer.kt index 03da1154..87bdd5d9 100644 --- a/data/evaluatto/src/main/java/io/github/sadellie/evaluatto/Tokenizer.kt +++ b/data/evaluatto/src/main/java/io/github/sadellie/evaluatto/Tokenizer.kt @@ -76,8 +76,8 @@ class Tokenizer(private val streamOfTokens: String) { .missingClosingBrackets() .missingMultiply() .unpackAlPercents() - // input like 80%80% should be treated as 80%-80%. - // After unpacking we get (80/100)(80/100), the multiply is missing + // input like 80%80% should be treated as 80%*80%. + // After unpacking we get (80/100)(80/100), the multiply is missing (!!!) // No, we can't unpack before fixing missing multiply. // Ideally we we need to add missing multiply for 80%80% // In that case unpackAlPercents gets input with all operators 80%*80% in this case @@ -100,37 +100,39 @@ class Tokenizer(private val streamOfTokens: String) { } private fun List.missingMultiply(): List { - val results = this.toMutableList() - val insertIndexes = mutableListOf() + val result = this.toMutableList() + val original = this + var offset = 0 - // Records the index if it needs a multiply symbol - fun needsMultiply(index: Int) { - val tokenInFront = results.getOrNull(index - 1) ?: return + fun addTokenAfter(index: Int) { + result.add(index + 1 + offset, Token.Operator.multiply) + offset += 1 + } + original.forEachIndexed { index, token -> when { - tokenInFront.first().toString() in Token.Digit.allWithDot || - tokenInFront == Token.Operator.rightBracket || - tokenInFront in Token.Const.all -> { - // Can't add token now, it will modify tokens list (we are looping over it) - insertIndexes.add(index + insertIndexes.size) + // This will not insert multiply between digits because they are grouped into a + // single token. It's not possible to get separate digit tokens near each other + // Things like ["123", "456"] are impossible, will be ["123456"] + token.isDigitToken() || + token in Token.Const.all || + token == Token.Operator.rightBracket -> { + val tokenInFront = original.tokenInFront(index) ?: return@forEachIndexed + + when { + tokenInFront == Token.Operator.leftBracket || + tokenInFront in Token.Func.all || + tokenInFront in Token.Const.all || + tokenInFront == Token.Operator.sqrt || + tokenInFront.isDigitToken() -> { + addTokenAfter(index) + } + } } } } - results.forEachIndexed { index, s -> - when (s) { - Token.Operator.leftBracket, - Token.Operator.sqrt, - in Token.Const.all, - in Token.Func.all -> needsMultiply(index) - } - } - - insertIndexes.forEach { - results.add(it, Token.Operator.multiply) - } - - return results + return result } private fun List.unpackAlPercents(): List { @@ -241,4 +243,8 @@ class Tokenizer(private val streamOfTokens: String) { return this.subList(cursor, pos) } + + private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot + + private fun List.tokenInFront(index: Int): String? = getOrNull(index + 1) } diff --git a/data/evaluatto/src/test/java/io/github/sadellie/evaluatto/FixLexiconTest.kt b/data/evaluatto/src/test/java/io/github/sadellie/evaluatto/FixLexiconTest.kt index 279744bb..80987f0b 100644 --- a/data/evaluatto/src/test/java/io/github/sadellie/evaluatto/FixLexiconTest.kt +++ b/data/evaluatto/src/test/java/io/github/sadellie/evaluatto/FixLexiconTest.kt @@ -48,6 +48,10 @@ class FixLexiconTest { assertLex( "e×e+π", "ee+π" ) + + assertLex( + "(69)×420", "(69)420" + ) } @Test @@ -61,7 +65,7 @@ class FixLexiconTest { ) assertLex( - "123)))12+4", "123)))12+4" + "123)))×12+4", "123)))12+4" ) assertLex(