Refactor Tokenizer

This commit is contained in:
Sad Ellie 2024-02-06 18:07:54 +03:00
parent 32eb7422d5
commit 67a4852741
3 changed files with 216 additions and 223 deletions

View File

@ -35,7 +35,7 @@ class Expression(
private val radianMode: Boolean = true, private val radianMode: Boolean = true,
private val roundingMode: RoundingMode = RoundingMode.HALF_EVEN private val roundingMode: RoundingMode = RoundingMode.HALF_EVEN
) { ) {
private val tokens = Tokenizer(input).tokenize() private val tokens = input.tokenize()
private var cursorPosition = 0 private var cursorPosition = 0
/** /**

View File

@ -26,14 +26,12 @@ sealed class TokenizerException(message: String) : Exception(message) {
class BadScientificNotation : TokenizerException("Expected plus or minus symbol after \"E\"") class BadScientificNotation : TokenizerException("Expected plus or minus symbol after \"E\"")
} }
class Tokenizer(private val streamOfTokens: String) { fun String.tokenize(): List<String> {
// Don't create object at all?
fun tokenize(): List<String> {
var cursor = 0 var cursor = 0
val tokens: MutableList<String> = mutableListOf() val tokens: MutableList<String> = mutableListOf()
while (cursor != streamOfTokens.length) { while (cursor != this.length) {
val nextToken = peekTokenAfter(cursor) val nextToken = peekTokenAfter(this, cursor)
if (nextToken != null) { if (nextToken != null) {
tokens.add(nextToken) tokens.add(nextToken)
@ -45,9 +43,12 @@ class Tokenizer(private val streamOfTokens: String) {
} }
return tokens.repairLexicon() return tokens.repairLexicon()
} }
private fun peekTokenAfter(cursor: Int): String? { private fun peekTokenAfter(
streamOfTokens: String,
cursor: Int
): String? {
Token.expressionTokens.forEach { token -> Token.expressionTokens.forEach { token ->
val subs = streamOfTokens val subs = streamOfTokens
.substring( .substring(
@ -71,9 +72,9 @@ class Tokenizer(private val streamOfTokens: String) {
} }
} }
return null return null
} }
private fun List<String>.repairLexicon(): List<String> { private fun MutableList<String>.repairLexicon(): List<String> {
return this return this
.missingClosingBrackets() .missingClosingBrackets()
.unpackNotation() .unpackNotation()
@ -86,100 +87,95 @@ class Tokenizer(private val streamOfTokens: String) {
// In that case unpackAllPercents gets input with all operators 80%*80% in this case // In that case unpackAllPercents gets input with all operators 80%*80% in this case
// Can't be done right now since missingMultiply checks for tokens in front only // Can't be done right now since missingMultiply checks for tokens in front only
.missingMultiply() .missingMultiply()
} }
private fun List<String>.missingClosingBrackets(): List<String> { private fun MutableList<String>.missingClosingBrackets(): MutableList<String> {
val leftBracket = this.count { it == Token.Operator.leftBracket } val leftBracket = this.count { it == Token.Operator.leftBracket }
val rightBrackets = this.count { it == Token.Operator.rightBracket } val rightBrackets = this.count { it == Token.Operator.rightBracket }
val neededBrackets = leftBracket - rightBrackets val neededBrackets = leftBracket - rightBrackets
if (neededBrackets <= 0) return this if (neededBrackets <= 0) return this
var fixed = this
repeat(neededBrackets) { repeat(neededBrackets) {
fixed = fixed + Token.Operator.rightBracket this.add(Token.Operator.rightBracket)
}
return fixed
} }
return this
}
private fun List<String>.missingMultiply(): List<String> { private fun MutableList<String>.missingMultiply(): MutableList<String> {
val result = this.toMutableList() val iterator = this.listIterator()
val original = this
var offset = 0
fun addTokenAfter(index: Int) { while (iterator.hasNext()) {
result.add(index + 1 + offset, Token.Operator.multiply) val currentToken = iterator.next()
offset += 1
}
original.forEachIndexed { index, token -> // Need two token for checks
when { if (!iterator.hasNext()) break
// This will not insert multiply between digits because they are grouped into a
// single token. It's not possible to get separate digit tokens near each other
// Things like ["123", "456"] are impossible, will be ["123456"]
token.isDigitToken() ||
token in Token.Const.all ||
token == Token.Operator.rightBracket -> {
val tokenInFront = original.tokenInFront(index) ?: return@forEachIndexed
when { val isDigit = currentToken.isDigitToken()
tokenInFront == Token.Operator.leftBracket || val isConst = currentToken in Token.Const.all
tokenInFront in Token.Func.all || val isRightBracket = currentToken == Token.Operator.rightBracket
tokenInFront in Token.Const.all ||
tokenInFront == Token.Operator.sqrt || // may need a multiplication after
tokenInFront.isDigitToken() -> { if (isDigit || isConst || isRightBracket) {
addTokenAfter(index) // Peek next, but then go back
} val tokenAfter = iterator.next()
} iterator.previous()
if (tokenAfter == Token.Operator.leftBracket ||
tokenAfter in Token.Func.all ||
tokenAfter in Token.Const.all ||
tokenAfter == Token.Operator.sqrt ||
tokenAfter.isDigitToken()) {
iterator.add(Token.Operator.multiply)
} }
} }
} }
return result return this
}
private fun MutableList<String>.unpackNotation(): MutableList<String> {
// Transform 1E+7 ==> 1*10^7
// Transform 1E-7 ==> 1/10^7
val iterator = this.listIterator()
while (iterator.hasNext()) {
if (iterator.next() == Token.DisplayOnly.engineeringE) {
iterator.remove()
val tokenAfterE = try {
iterator.next()
} catch (e: Exception) {
throw TokenizerException.BadScientificNotation()
} }
private fun List<String>.unpackAllPercents(): List<String> { iterator.remove()
when (tokenAfterE) {
Token.Operator.minus -> iterator.add(Token.Operator.divide)
Token.Operator.plus -> iterator.add(Token.Operator.multiply)
else -> throw TokenizerException.BadScientificNotation()
}
iterator.add("10")
iterator.add(Token.Operator.power)
}
}
return this
}
private fun MutableList<String>.unpackAllPercents(): MutableList<String> {
var result = this var result = this
while (result.contains(Token.Operator.percent)) { while (result.contains(Token.Operator.percent)) {
val percIndex = result.indexOf(Token.Operator.percent) val percIndex = result.indexOf(Token.Operator.percent)
result = result.unpackPercentAt(percIndex) result = result.unpackPercentAt(percIndex)
} }
return result return result
} }
private fun List<String>.unpackNotation(): List<String> { private fun MutableList<String>.unpackPercentAt(percentIndex: Int): MutableList<String> {
// Transform 1E+7 ==> 1*10^7
// Transform 1E-7 ==> 1/10^7
val result = this.toMutableList()
val listIterator = result.listIterator()
while (listIterator.hasNext()) {
if (listIterator.next() == Token.DisplayOnly.engineeringE) {
listIterator.remove()
val tokenAfterE = try {
listIterator.next()
} catch (e: Exception) {
throw TokenizerException.BadScientificNotation()
}
listIterator.remove()
when (tokenAfterE) {
Token.Operator.minus -> listIterator.add(Token.Operator.divide)
Token.Operator.plus -> listIterator.add(Token.Operator.multiply)
else -> throw TokenizerException.BadScientificNotation()
}
listIterator.add("10")
listIterator.add(Token.Operator.power)
}
}
return result
}
private fun List<String>.unpackPercentAt(percentIndex: Int): List<String> {
var cursor = percentIndex var cursor = percentIndex
// get whatever is the percentage // get whatever is the percentage
@ -231,9 +227,9 @@ class Tokenizer(private val streamOfTokens: String) {
) )
return mutList return mutList
} }
private fun List<String>.getNumberOrExpressionBefore(pos: Int): List<String> { private fun MutableList<String>.getNumberOrExpressionBefore(pos: Int): List<String> {
val digits = Token.Digit.allWithDot.map { it[0] } val digits = Token.Digit.allWithDot.map { it[0] }
val tokenInFront = this[pos - 1] val tokenInFront = this[pos - 1]
@ -258,9 +254,9 @@ class Tokenizer(private val streamOfTokens: String) {
} }
return this.subList(cursor, pos) return this.subList(cursor, pos)
} }
private fun List<String>.getBaseBefore(pos: Int): List<String> { private fun List<String>.getBaseBefore(pos: Int): List<String> {
var cursor = pos var cursor = pos
var leftBrackets = 0 var leftBrackets = 0
var rightBrackets = 0 var rightBrackets = 0
@ -278,9 +274,6 @@ class Tokenizer(private val streamOfTokens: String) {
cursor += 1 cursor += 1
return this.subList(cursor, pos) return this.subList(cursor, pos)
}
private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot
private fun List<String>.tokenInFront(index: Int): String? = getOrNull(index + 1)
} }
private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot

View File

@ -40,7 +40,7 @@ fun <T : Throwable?> assertExprFail(
} }
fun assertLex(expected: List<String>, actual: String) = fun assertLex(expected: List<String>, actual: String) =
assertEquals(expected, Tokenizer(actual).tokenize()) assertEquals(expected, actual.tokenize())
fun assertLex(expected: String, actual: String) = fun assertLex(expected: String, actual: String) =
assertEquals(expected, Tokenizer(actual).tokenize().joinToString("")) assertEquals(expected, actual.tokenize().joinToString(""))