mirror of
https://github.com/Myzel394/NumberHub.git
synced 2025-06-18 16:25:27 +02:00
Refactor Tokenizer
This commit is contained in:
parent
32eb7422d5
commit
67a4852741
@ -35,7 +35,7 @@ class Expression(
|
|||||||
private val radianMode: Boolean = true,
|
private val radianMode: Boolean = true,
|
||||||
private val roundingMode: RoundingMode = RoundingMode.HALF_EVEN
|
private val roundingMode: RoundingMode = RoundingMode.HALF_EVEN
|
||||||
) {
|
) {
|
||||||
private val tokens = Tokenizer(input).tokenize()
|
private val tokens = input.tokenize()
|
||||||
private var cursorPosition = 0
|
private var cursorPosition = 0
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -26,188 +26,169 @@ sealed class TokenizerException(message: String) : Exception(message) {
|
|||||||
class BadScientificNotation : TokenizerException("Expected plus or minus symbol after \"E\"")
|
class BadScientificNotation : TokenizerException("Expected plus or minus symbol after \"E\"")
|
||||||
}
|
}
|
||||||
|
|
||||||
class Tokenizer(private val streamOfTokens: String) {
|
fun String.tokenize(): List<String> {
|
||||||
// Don't create object at all?
|
var cursor = 0
|
||||||
fun tokenize(): List<String> {
|
val tokens: MutableList<String> = mutableListOf()
|
||||||
var cursor = 0
|
|
||||||
val tokens: MutableList<String> = mutableListOf()
|
|
||||||
|
|
||||||
while (cursor != streamOfTokens.length) {
|
while (cursor != this.length) {
|
||||||
val nextToken = peekTokenAfter(cursor)
|
val nextToken = peekTokenAfter(this, cursor)
|
||||||
|
|
||||||
if (nextToken != null) {
|
if (nextToken != null) {
|
||||||
tokens.add(nextToken)
|
tokens.add(nextToken)
|
||||||
cursor += nextToken.length
|
cursor += nextToken.length
|
||||||
} else {
|
} else {
|
||||||
// Didn't find any token, move left slowly (by 1 symbol)
|
// Didn't find any token, move left slowly (by 1 symbol)
|
||||||
cursor++
|
cursor++
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return tokens.repairLexicon()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun peekTokenAfter(cursor: Int): String? {
|
return tokens.repairLexicon()
|
||||||
Token.expressionTokens.forEach { token ->
|
}
|
||||||
val subs = streamOfTokens
|
|
||||||
.substring(
|
|
||||||
cursor,
|
|
||||||
(cursor + token.length).coerceAtMost(streamOfTokens.length)
|
|
||||||
)
|
|
||||||
if (subs == token) {
|
|
||||||
// Got a digit, see if there are other digits coming after
|
|
||||||
if (token in Token.Digit.allWithDot) {
|
|
||||||
val number = streamOfTokens
|
|
||||||
.substring(cursor)
|
|
||||||
.takeWhile { Token.Digit.allWithDot.contains(it.toString()) }
|
|
||||||
|
|
||||||
if (number.count { it.toString() == Token.Digit.dot } > 1) {
|
private fun peekTokenAfter(
|
||||||
throw TokenizerException.TooManyFractionSymbols()
|
streamOfTokens: String,
|
||||||
}
|
cursor: Int
|
||||||
|
): String? {
|
||||||
|
Token.expressionTokens.forEach { token ->
|
||||||
|
val subs = streamOfTokens
|
||||||
|
.substring(
|
||||||
|
cursor,
|
||||||
|
(cursor + token.length).coerceAtMost(streamOfTokens.length)
|
||||||
|
)
|
||||||
|
if (subs == token) {
|
||||||
|
// Got a digit, see if there are other digits coming after
|
||||||
|
if (token in Token.Digit.allWithDot) {
|
||||||
|
val number = streamOfTokens
|
||||||
|
.substring(cursor)
|
||||||
|
.takeWhile { Token.Digit.allWithDot.contains(it.toString()) }
|
||||||
|
|
||||||
return number
|
if (number.count { it.toString() == Token.Digit.dot } > 1) {
|
||||||
}
|
throw TokenizerException.TooManyFractionSymbols()
|
||||||
return token
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun List<String>.repairLexicon(): List<String> {
|
|
||||||
return this
|
|
||||||
.missingClosingBrackets()
|
|
||||||
.unpackNotation()
|
|
||||||
.missingMultiply()
|
|
||||||
.unpackAllPercents()
|
|
||||||
// input like 80%80% should be treated as 80%*80%.
|
|
||||||
// After unpacking we get (80/100)(80/100), the multiply is missing (!!!)
|
|
||||||
// No, we can't unpack before fixing missing multiply.
|
|
||||||
// Ideally we we need to add missing multiply for 80%80%
|
|
||||||
// In that case unpackAllPercents gets input with all operators 80%*80% in this case
|
|
||||||
// Can't be done right now since missingMultiply checks for tokens in front only
|
|
||||||
.missingMultiply()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun List<String>.missingClosingBrackets(): List<String> {
|
|
||||||
val leftBracket = this.count { it == Token.Operator.leftBracket }
|
|
||||||
val rightBrackets = this.count { it == Token.Operator.rightBracket }
|
|
||||||
val neededBrackets = leftBracket - rightBrackets
|
|
||||||
|
|
||||||
if (neededBrackets <= 0) return this
|
|
||||||
|
|
||||||
var fixed = this
|
|
||||||
repeat(neededBrackets) {
|
|
||||||
fixed = fixed + Token.Operator.rightBracket
|
|
||||||
}
|
|
||||||
return fixed
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun List<String>.missingMultiply(): List<String> {
|
|
||||||
val result = this.toMutableList()
|
|
||||||
val original = this
|
|
||||||
var offset = 0
|
|
||||||
|
|
||||||
fun addTokenAfter(index: Int) {
|
|
||||||
result.add(index + 1 + offset, Token.Operator.multiply)
|
|
||||||
offset += 1
|
|
||||||
}
|
|
||||||
|
|
||||||
original.forEachIndexed { index, token ->
|
|
||||||
when {
|
|
||||||
// This will not insert multiply between digits because they are grouped into a
|
|
||||||
// single token. It's not possible to get separate digit tokens near each other
|
|
||||||
// Things like ["123", "456"] are impossible, will be ["123456"]
|
|
||||||
token.isDigitToken() ||
|
|
||||||
token in Token.Const.all ||
|
|
||||||
token == Token.Operator.rightBracket -> {
|
|
||||||
val tokenInFront = original.tokenInFront(index) ?: return@forEachIndexed
|
|
||||||
|
|
||||||
when {
|
|
||||||
tokenInFront == Token.Operator.leftBracket ||
|
|
||||||
tokenInFront in Token.Func.all ||
|
|
||||||
tokenInFront in Token.Const.all ||
|
|
||||||
tokenInFront == Token.Operator.sqrt ||
|
|
||||||
tokenInFront.isDigitToken() -> {
|
|
||||||
addTokenAfter(index)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun List<String>.unpackAllPercents(): List<String> {
|
|
||||||
var result = this
|
|
||||||
while (result.contains(Token.Operator.percent)) {
|
|
||||||
val percIndex = result.indexOf(Token.Operator.percent)
|
|
||||||
result = result.unpackPercentAt(percIndex)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun List<String>.unpackNotation(): List<String> {
|
|
||||||
// Transform 1E+7 ==> 1*10^7
|
|
||||||
// Transform 1E-7 ==> 1/10^7
|
|
||||||
val result = this.toMutableList()
|
|
||||||
val listIterator = result.listIterator()
|
|
||||||
|
|
||||||
while (listIterator.hasNext()) {
|
|
||||||
if (listIterator.next() == Token.DisplayOnly.engineeringE) {
|
|
||||||
listIterator.remove()
|
|
||||||
|
|
||||||
val tokenAfterE = try {
|
|
||||||
listIterator.next()
|
|
||||||
} catch (e: Exception) {
|
|
||||||
throw TokenizerException.BadScientificNotation()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
listIterator.remove()
|
return number
|
||||||
|
}
|
||||||
|
return token
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
when (tokenAfterE) {
|
private fun MutableList<String>.repairLexicon(): List<String> {
|
||||||
Token.Operator.minus -> listIterator.add(Token.Operator.divide)
|
return this
|
||||||
Token.Operator.plus -> listIterator.add(Token.Operator.multiply)
|
.missingClosingBrackets()
|
||||||
else -> throw TokenizerException.BadScientificNotation()
|
.unpackNotation()
|
||||||
}
|
.missingMultiply()
|
||||||
|
.unpackAllPercents()
|
||||||
|
// input like 80%80% should be treated as 80%*80%.
|
||||||
|
// After unpacking we get (80/100)(80/100), the multiply is missing (!!!)
|
||||||
|
// No, we can't unpack before fixing missing multiply.
|
||||||
|
// Ideally we we need to add missing multiply for 80%80%
|
||||||
|
// In that case unpackAllPercents gets input with all operators 80%*80% in this case
|
||||||
|
// Can't be done right now since missingMultiply checks for tokens in front only
|
||||||
|
.missingMultiply()
|
||||||
|
}
|
||||||
|
|
||||||
listIterator.add("10")
|
private fun MutableList<String>.missingClosingBrackets(): MutableList<String> {
|
||||||
listIterator.add(Token.Operator.power)
|
val leftBracket = this.count { it == Token.Operator.leftBracket }
|
||||||
|
val rightBrackets = this.count { it == Token.Operator.rightBracket }
|
||||||
|
val neededBrackets = leftBracket - rightBrackets
|
||||||
|
|
||||||
|
if (neededBrackets <= 0) return this
|
||||||
|
|
||||||
|
repeat(neededBrackets) {
|
||||||
|
this.add(Token.Operator.rightBracket)
|
||||||
|
}
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun MutableList<String>.missingMultiply(): MutableList<String> {
|
||||||
|
val iterator = this.listIterator()
|
||||||
|
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
val currentToken = iterator.next()
|
||||||
|
|
||||||
|
// Need two token for checks
|
||||||
|
if (!iterator.hasNext()) break
|
||||||
|
|
||||||
|
val isDigit = currentToken.isDigitToken()
|
||||||
|
val isConst = currentToken in Token.Const.all
|
||||||
|
val isRightBracket = currentToken == Token.Operator.rightBracket
|
||||||
|
|
||||||
|
// may need a multiplication after
|
||||||
|
if (isDigit || isConst || isRightBracket) {
|
||||||
|
// Peek next, but then go back
|
||||||
|
val tokenAfter = iterator.next()
|
||||||
|
iterator.previous()
|
||||||
|
|
||||||
|
if (tokenAfter == Token.Operator.leftBracket ||
|
||||||
|
tokenAfter in Token.Func.all ||
|
||||||
|
tokenAfter in Token.Const.all ||
|
||||||
|
tokenAfter == Token.Operator.sqrt ||
|
||||||
|
tokenAfter.isDigitToken()) {
|
||||||
|
|
||||||
|
iterator.add(Token.Operator.multiply)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun List<String>.unpackPercentAt(percentIndex: Int): List<String> {
|
return this
|
||||||
var cursor = percentIndex
|
}
|
||||||
|
|
||||||
// get whatever is the percentage
|
private fun MutableList<String>.unpackNotation(): MutableList<String> {
|
||||||
val percentage = this.getNumberOrExpressionBefore(percentIndex)
|
// Transform 1E+7 ==> 1*10^7
|
||||||
// Move cursor
|
// Transform 1E-7 ==> 1/10^7
|
||||||
cursor -= percentage.size
|
val iterator = this.listIterator()
|
||||||
|
|
||||||
// get the operator in front
|
while (iterator.hasNext()) {
|
||||||
cursor -= 1
|
if (iterator.next() == Token.DisplayOnly.engineeringE) {
|
||||||
val operator = this.getOrNull(cursor)
|
iterator.remove()
|
||||||
|
|
||||||
// Don't go further
|
val tokenAfterE = try {
|
||||||
if ((operator == null) or (operator !in listOf(Token.Operator.plus, Token.Operator.minus))) {
|
iterator.next()
|
||||||
val mutList = this.toMutableList()
|
} catch (e: Exception) {
|
||||||
|
throw TokenizerException.BadScientificNotation()
|
||||||
|
}
|
||||||
|
|
||||||
// Remove percentage
|
iterator.remove()
|
||||||
mutList.removeAt(percentIndex)
|
|
||||||
|
|
||||||
//Add opening bracket before percentage
|
when (tokenAfterE) {
|
||||||
mutList.add(percentIndex - percentage.size, Token.Operator.leftBracket)
|
Token.Operator.minus -> iterator.add(Token.Operator.divide)
|
||||||
|
Token.Operator.plus -> iterator.add(Token.Operator.multiply)
|
||||||
|
else -> throw TokenizerException.BadScientificNotation()
|
||||||
|
}
|
||||||
|
|
||||||
// Add "/ 100" and closing bracket
|
iterator.add("10")
|
||||||
mutList.addAll(percentIndex + 1, listOf(Token.Operator.divide, "100", Token.Operator.rightBracket))
|
iterator.add(Token.Operator.power)
|
||||||
|
|
||||||
return mutList
|
|
||||||
}
|
}
|
||||||
// Get the base
|
}
|
||||||
val base = this.getBaseBefore(cursor)
|
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun MutableList<String>.unpackAllPercents(): MutableList<String> {
|
||||||
|
var result = this
|
||||||
|
while (result.contains(Token.Operator.percent)) {
|
||||||
|
val percIndex = result.indexOf(Token.Operator.percent)
|
||||||
|
result = result.unpackPercentAt(percIndex)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun MutableList<String>.unpackPercentAt(percentIndex: Int): MutableList<String> {
|
||||||
|
var cursor = percentIndex
|
||||||
|
|
||||||
|
// get whatever is the percentage
|
||||||
|
val percentage = this.getNumberOrExpressionBefore(percentIndex)
|
||||||
|
// Move cursor
|
||||||
|
cursor -= percentage.size
|
||||||
|
|
||||||
|
// get the operator in front
|
||||||
|
cursor -= 1
|
||||||
|
val operator = this.getOrNull(cursor)
|
||||||
|
|
||||||
|
// Don't go further
|
||||||
|
if ((operator == null) or (operator !in listOf(Token.Operator.plus, Token.Operator.minus))) {
|
||||||
val mutList = this.toMutableList()
|
val mutList = this.toMutableList()
|
||||||
|
|
||||||
// Remove percentage
|
// Remove percentage
|
||||||
@ -216,71 +197,83 @@ class Tokenizer(private val streamOfTokens: String) {
|
|||||||
//Add opening bracket before percentage
|
//Add opening bracket before percentage
|
||||||
mutList.add(percentIndex - percentage.size, Token.Operator.leftBracket)
|
mutList.add(percentIndex - percentage.size, Token.Operator.leftBracket)
|
||||||
|
|
||||||
// Add "/ 100" and other stuff
|
// Add "/ 100" and closing bracket
|
||||||
mutList.addAll(
|
mutList.addAll(percentIndex + 1, listOf(Token.Operator.divide, "100", Token.Operator.rightBracket))
|
||||||
percentIndex + 1,
|
|
||||||
listOf(
|
|
||||||
Token.Operator.divide,
|
|
||||||
"100",
|
|
||||||
Token.Operator.multiply,
|
|
||||||
Token.Operator.leftBracket,
|
|
||||||
*base.toTypedArray(),
|
|
||||||
Token.Operator.rightBracket,
|
|
||||||
Token.Operator.rightBracket
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return mutList
|
return mutList
|
||||||
}
|
}
|
||||||
|
// Get the base
|
||||||
|
val base = this.getBaseBefore(cursor)
|
||||||
|
val mutList = this.toMutableList()
|
||||||
|
|
||||||
private fun List<String>.getNumberOrExpressionBefore(pos: Int): List<String> {
|
// Remove percentage
|
||||||
val digits = Token.Digit.allWithDot.map { it[0] }
|
mutList.removeAt(percentIndex)
|
||||||
|
|
||||||
val tokenInFront = this[pos - 1]
|
//Add opening bracket before percentage
|
||||||
|
mutList.add(percentIndex - percentage.size, Token.Operator.leftBracket)
|
||||||
|
|
||||||
// Just number
|
// Add "/ 100" and other stuff
|
||||||
if (tokenInFront.all { it in digits }) return listOf(tokenInFront)
|
mutList.addAll(
|
||||||
|
percentIndex + 1,
|
||||||
|
listOf(
|
||||||
|
Token.Operator.divide,
|
||||||
|
"100",
|
||||||
|
Token.Operator.multiply,
|
||||||
|
Token.Operator.leftBracket,
|
||||||
|
*base.toTypedArray(),
|
||||||
|
Token.Operator.rightBracket,
|
||||||
|
Token.Operator.rightBracket
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
// For cases like "100+(2+5)|%". The check above won't pass, so the next expected thing is
|
return mutList
|
||||||
// a number in brackets. Anything else is not expected.
|
|
||||||
if (tokenInFront != Token.Operator.rightBracket) throw TokenizerException.FailedToUnpackNumber()
|
|
||||||
|
|
||||||
// Start walking left until we get balanced brackets
|
|
||||||
var cursor = pos - 1
|
|
||||||
var leftBrackets = 0
|
|
||||||
var rightBrackets = 1 // We set 1 because we start with closing bracket
|
|
||||||
|
|
||||||
while (leftBrackets != rightBrackets) {
|
|
||||||
cursor--
|
|
||||||
val currentToken = this[cursor]
|
|
||||||
if (currentToken == Token.Operator.leftBracket) leftBrackets++
|
|
||||||
if (currentToken == Token.Operator.rightBracket) rightBrackets++
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.subList(cursor, pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun List<String>.getBaseBefore(pos: Int): List<String> {
|
|
||||||
var cursor = pos
|
|
||||||
var leftBrackets = 0
|
|
||||||
var rightBrackets = 0
|
|
||||||
|
|
||||||
while ((--cursor >= 0)) {
|
|
||||||
val currentToken = this[cursor]
|
|
||||||
|
|
||||||
if (currentToken == Token.Operator.leftBracket) leftBrackets++
|
|
||||||
if (currentToken == Token.Operator.rightBracket) rightBrackets++
|
|
||||||
|
|
||||||
if (leftBrackets > rightBrackets) break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return cursor back to last token
|
|
||||||
cursor += 1
|
|
||||||
|
|
||||||
return this.subList(cursor, pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot
|
|
||||||
|
|
||||||
private fun List<String>.tokenInFront(index: Int): String? = getOrNull(index + 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun MutableList<String>.getNumberOrExpressionBefore(pos: Int): List<String> {
|
||||||
|
val digits = Token.Digit.allWithDot.map { it[0] }
|
||||||
|
|
||||||
|
val tokenInFront = this[pos - 1]
|
||||||
|
|
||||||
|
// Just number
|
||||||
|
if (tokenInFront.all { it in digits }) return listOf(tokenInFront)
|
||||||
|
|
||||||
|
// For cases like "100+(2+5)|%". The check above won't pass, so the next expected thing is
|
||||||
|
// a number in brackets. Anything else is not expected.
|
||||||
|
if (tokenInFront != Token.Operator.rightBracket) throw TokenizerException.FailedToUnpackNumber()
|
||||||
|
|
||||||
|
// Start walking left until we get balanced brackets
|
||||||
|
var cursor = pos - 1
|
||||||
|
var leftBrackets = 0
|
||||||
|
var rightBrackets = 1 // We set 1 because we start with closing bracket
|
||||||
|
|
||||||
|
while (leftBrackets != rightBrackets) {
|
||||||
|
cursor--
|
||||||
|
val currentToken = this[cursor]
|
||||||
|
if (currentToken == Token.Operator.leftBracket) leftBrackets++
|
||||||
|
if (currentToken == Token.Operator.rightBracket) rightBrackets++
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.subList(cursor, pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun List<String>.getBaseBefore(pos: Int): List<String> {
|
||||||
|
var cursor = pos
|
||||||
|
var leftBrackets = 0
|
||||||
|
var rightBrackets = 0
|
||||||
|
|
||||||
|
while ((--cursor >= 0)) {
|
||||||
|
val currentToken = this[cursor]
|
||||||
|
|
||||||
|
if (currentToken == Token.Operator.leftBracket) leftBrackets++
|
||||||
|
if (currentToken == Token.Operator.rightBracket) rightBrackets++
|
||||||
|
|
||||||
|
if (leftBrackets > rightBrackets) break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return cursor back to last token
|
||||||
|
cursor += 1
|
||||||
|
|
||||||
|
return this.subList(cursor, pos)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun String.isDigitToken(): Boolean = first().toString() in Token.Digit.allWithDot
|
||||||
|
@ -40,7 +40,7 @@ fun <T : Throwable?> assertExprFail(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fun assertLex(expected: List<String>, actual: String) =
|
fun assertLex(expected: List<String>, actual: String) =
|
||||||
assertEquals(expected, Tokenizer(actual).tokenize())
|
assertEquals(expected, actual.tokenize())
|
||||||
|
|
||||||
fun assertLex(expected: String, actual: String) =
|
fun assertLex(expected: String, actual: String) =
|
||||||
assertEquals(expected, Tokenizer(actual).tokenize().joinToString(""))
|
assertEquals(expected, actual.tokenize().joinToString(""))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user