Refactor Tokenizer

This commit is contained in:
Sad Ellie 2024-02-06 18:07:54 +03:00
parent 32eb7422d5
commit 67a4852741
3 changed files with 216 additions and 223 deletions

View File

@ -35,7 +35,7 @@ class Expression(
private val radianMode: Boolean = true,
private val roundingMode: RoundingMode = RoundingMode.HALF_EVEN
) {
private val tokens = Tokenizer(input).tokenize()
private val tokens = input.tokenize()
private var cursorPosition = 0
/**

View File

@ -26,14 +26,12 @@ sealed class TokenizerException(message: String) : Exception(message) {
class BadScientificNotation : TokenizerException("Expected plus or minus symbol after \"E\"")
}
class Tokenizer(private val streamOfTokens: String) {
// Don't create object at all?
fun tokenize(): List<String> {
fun String.tokenize(): List<String> {
var cursor = 0
val tokens: MutableList<String> = mutableListOf()
while (cursor != streamOfTokens.length) {
val nextToken = peekTokenAfter(cursor)
while (cursor != this.length) {
val nextToken = peekTokenAfter(this, cursor)
if (nextToken != null) {
tokens.add(nextToken)
@ -45,9 +43,12 @@ class Tokenizer(private val streamOfTokens: String) {
}
return tokens.repairLexicon()
}
}
private fun peekTokenAfter(cursor: Int): String? {
private fun peekTokenAfter(
streamOfTokens: String,
cursor: Int
): String? {
Token.expressionTokens.forEach { token ->
val subs = streamOfTokens
.substring(
@ -71,9 +72,9 @@ class Tokenizer(private val streamOfTokens: String) {
}
}
return null
}
}
private fun List<String>.repairLexicon(): List<String> {
private fun MutableList<String>.repairLexicon(): List<String> {
return this
.missingClosingBrackets()
.unpackNotation()
@ -86,100 +87,95 @@ class Tokenizer(private val streamOfTokens: String) {
// In that case unpackAllPercents gets input with all operators 80%*80% in this case
// Can't be done right now since missingMultiply checks for tokens in front only
.missingMultiply()
}
}
private fun List<String>.missingClosingBrackets(): List<String> {
private fun MutableList<String>.missingClosingBrackets(): MutableList<String> {
val leftBracket = this.count { it == Token.Operator.leftBracket }
val rightBrackets = this.count { it == Token.Operator.rightBracket }
val neededBrackets = leftBracket - rightBrackets
if (neededBrackets <= 0) return this
var fixed = this
repeat(neededBrackets) {
fixed = fixed + Token.Operator.rightBracket
}
return fixed
this.add(Token.Operator.rightBracket)
}
return this
}
private fun List<String>.missingMultiply(): List<String> {
val result = this.toMutableList()
val original = this
var offset = 0
private fun MutableList<String>.missingMultiply(): MutableList<String> {
val iterator = this.listIterator()
fun addTokenAfter(index: Int) {
result.add(index + 1 + offset, Token.Operator.multiply)
offset += 1
}
while (iterator.hasNext()) {
val currentToken = iterator.next()
original.forEachIndexed { index, token ->
when {
// This will not insert multiply between digits because they are grouped into a
// single token. It's not possible to get separate digit tokens near each other
// Things like ["123", "456"] are impossible, will be ["123456"]
token.isDigitToken() ||
token in Token.Const.all ||
token == Token.Operator.rightBracket -> {
val tokenInFront = original.tokenInFront(index) ?: return@forEachIndexed
// Need two token for checks
if (!iterator.hasNext()) break
when {
tokenInFront == Token.Operator.leftBracket ||
tokenInFront in Token.Func.all ||
tokenInFront in Token.Const.all ||
tokenInFront == Token.Operator.sqrt ||
tokenInFront.isDigitToken() -> {
addTokenAfter(index)
}
}
val isDigit = currentToken.isDigitToken()
val isConst = currentToken in Token.Const.all
val isRightBracket = currentToken == Token.Operator.rightBracket
// may need a multiplication after
if (isDigit || isConst || isRightBracket) {
// Peek next, but then go back
val tokenAfter = iterator.next()
iterator.previous()
if (tokenAfter == Token.Operator.leftBracket ||
tokenAfter in Token.Func.all ||
tokenAfter in Token.Const.all ||
tokenAfter == Token.Operator.sqrt ||
tokenAfter.isDigitToken()) {
iterator.add(Token.Operator.multiply)
}
}
}
return result
return this
}
private fun MutableList<String>.unpackNotation(): MutableList<String> {
// Transform 1E+7 ==> 1*10^7
// Transform 1E-7 ==> 1/10^7
val iterator = this.listIterator()
while (iterator.hasNext()) {
if (iterator.next() == Token.DisplayOnly.engineeringE) {
iterator.remove()
val tokenAfterE = try {
iterator.next()
} catch (e: Exception) {
throw TokenizerException.BadScientificNotation()
}
private fun List<String>.unpackAllPercents(): List<String> {
iterator.remove()
when (tokenAfterE) {
Token.Operator.minus -> iterator.add(Token.Operator.divide)
Token.Operator.plus -> iterator.add(Token.Operator.multiply)
else -> throw TokenizerException.BadScientificNotation()
}
iterator.add("10")
iterator.add(Token.Operator.power)
}
}
return this
}
private fun MutableList<String>.unpackAllPercents(): MutableList<String> {
var result = this
while (result.contains(Token.Operator.percent)) {
val percIndex = result.indexOf(Token.Operator.percent)
result = result.unpackPercentAt(percIndex)
}
return result
}
}
private fun List<String>.unpackNotation(): List<String> {
// Transform 1E+7 ==> 1*10^7
// Transform 1E-7 ==> 1/10^7
val result = this.toMutableList()
val listIterator = result.listIterator()
while (listIterator.hasNext()) {
if (listIterator.next() == Token.DisplayOnly.engineeringE) {
listIterator.remove()
val tokenAfterE = try {
listIterator.next()
} catch (e: Exception) {
throw TokenizerException.BadScientificNotation()
}
listIterator.remove()
when (tokenAfterE) {
Token.Operator.minus -> listIterator.add(Token.Operator.divide)
Token.Operator.plus -> listIterator.add(Token.Operator.multiply)
else -> throw TokenizerException.BadScientificNotation()
}
listIterator.add("10")
listIterator.add(Token.Operator.power)
}
}
return result
}
private fun List<String>.unpackPercentAt(percentIndex: Int): List<String> {
private fun MutableList<String>.unpackPercentAt(percentIndex: Int): MutableList<String> {
var cursor = percentIndex
// get whatever is the percentage
@ -231,9 +227,9 @@ class Tokenizer(private val streamOfTokens: String) {
)
return mutList
}
}
private fun List<String>.getNumberOrExpressionBefore(pos: Int): List<String> {
private fun MutableList<String>.getNumberOrExpressionBefore(pos: Int): List<String> {
val digits = Token.Digit.allWithDot.map { it[0] }
val tokenInFront = this[pos - 1]
@ -258,9 +254,9 @@ class Tokenizer(private val streamOfTokens: String) {
}
return this.subList(cursor, pos)
}
}
private fun List<String>.getBaseBefore(pos: Int): List<String> {
private fun List<String>.getBaseBefore(pos: Int): List<String> {
var cursor = pos
var leftBrackets = 0
var rightBrackets = 0
@ -278,9 +274,6 @@ class Tokenizer(private val streamOfTokens: String) {
cursor += 1
return this.subList(cursor, pos)
}
private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot
private fun List<String>.tokenInFront(index: Int): String? = getOrNull(index + 1)
}
private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot

View File

@ -40,7 +40,7 @@ fun <T : Throwable?> assertExprFail(
}
fun assertLex(expected: List<String>, actual: String) =
assertEquals(expected, Tokenizer(actual).tokenize())
assertEquals(expected, actual.tokenize())
fun assertLex(expected: String, actual: String) =
assertEquals(expected, Tokenizer(actual).tokenize().joinToString(""))
assertEquals(expected, actual.tokenize().joinToString(""))