mirror of
https://github.com/Myzel394/NumberHub.git
synced 2025-06-19 08:45:27 +02:00
Levenshtein distance improvements
This commit is contained in:
parent
63332d9055
commit
033e783e4c
@ -452,13 +452,7 @@ class MainViewModel @Inject constructor(
|
|||||||
} else {
|
} else {
|
||||||
// We are searching for a specific unit, we don't care about popularity
|
// We are searching for a specific unit, we don't care about popularity
|
||||||
// We need search accuracy
|
// We need search accuracy
|
||||||
basicFilteredUnits
|
basicFilteredUnits.sortByLev(query)
|
||||||
.sortedBy {
|
|
||||||
it.renderedName
|
|
||||||
.substring(0, minOf(query.length, it.renderedName.length))
|
|
||||||
.lev(query)
|
|
||||||
}
|
|
||||||
.sortedByDescending { it.renderedName.contains(query) }
|
|
||||||
}
|
}
|
||||||
// Group by unit group
|
// Group by unit group
|
||||||
.groupBy { it.group }
|
.groupBy { it.group }
|
||||||
|
@ -8,6 +8,7 @@ import com.sadellie.unitto.data.KEY_COMMA
|
|||||||
import com.sadellie.unitto.data.KEY_DOT
|
import com.sadellie.unitto.data.KEY_DOT
|
||||||
import com.sadellie.unitto.data.KEY_E
|
import com.sadellie.unitto.data.KEY_E
|
||||||
import com.sadellie.unitto.data.preferences.Separator
|
import com.sadellie.unitto.data.preferences.Separator
|
||||||
|
import com.sadellie.unitto.data.units.AbstractUnit
|
||||||
import java.math.BigDecimal
|
import java.math.BigDecimal
|
||||||
import java.math.RoundingMode
|
import java.math.RoundingMode
|
||||||
import java.text.NumberFormat
|
import java.text.NumberFormat
|
||||||
@ -75,12 +76,12 @@ object Formatter {
|
|||||||
* @param[prefScale] Is the preferred scale, the one which will be compared against
|
* @param[prefScale] Is the preferred scale, the one which will be compared against
|
||||||
*/
|
*/
|
||||||
fun BigDecimal.setMinimumRequiredScale(prefScale: Int): BigDecimal {
|
fun BigDecimal.setMinimumRequiredScale(prefScale: Int): BigDecimal {
|
||||||
/* Here we are getting the amount of zeros in fractional part before non zero value
|
/**
|
||||||
|
* Here we are getting the amount of zeros in fractional part before non zero value
|
||||||
* For example, for 0.00000123456 we need the length of 00000
|
* For example, for 0.00000123456 we need the length of 00000
|
||||||
* Next we add one to get the position of the first non zero value
|
* Next we add one to get the position of the first non zero value
|
||||||
*
|
|
||||||
* Also, this block is only for VERY small numbers
|
* Also, this block is only for VERY small numbers
|
||||||
* */
|
*/
|
||||||
return this.setScale(
|
return this.setScale(
|
||||||
max(
|
max(
|
||||||
prefScale,
|
prefScale,
|
||||||
@ -102,31 +103,33 @@ fun openLink(mContext: Context, url: String) {
|
|||||||
mContext.startActivity(Intent(Intent.ACTION_VIEW).setData(Uri.parse(url)))
|
mContext.startActivity(Intent(Intent.ACTION_VIEW).setData(Uri.parse(url)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute Levenshtein Distance. Doesn't really matter which string goes first
|
* Compute Levenshtein Distance. Doesn't really matter which string goes first
|
||||||
*
|
*
|
||||||
* @param stringB Second string
|
* @param stringToCompare Second string
|
||||||
* @return The amount of changes that are needed to transform one string into another
|
* @return The amount of changes that are needed to transform one string into another
|
||||||
*/
|
*/
|
||||||
fun CharSequence.lev(stringB: String): Int {
|
fun String.lev(stringToCompare: String): Int {
|
||||||
// Skipping computation for this cases
|
val stringA = this.lowercase()
|
||||||
if (this == stringB) return 0
|
val stringB = stringToCompare.lowercase()
|
||||||
if (this.isEmpty()) return stringB.length
|
|
||||||
// This case is basically unreal in this app, because stringB is a unit name and are never empty
|
|
||||||
if (stringB.isEmpty()) return this.length
|
|
||||||
|
|
||||||
var cost = IntArray(this.length + 1) { it }
|
// Skipping computation for this cases
|
||||||
var newCost = IntArray(this.length + 1)
|
if (stringA == stringB) return 0
|
||||||
|
if (stringA.isEmpty()) return stringB.length
|
||||||
|
// This case is basically unreal in this app, because stringToCompare is a unit name and they are never empty
|
||||||
|
if (stringB.isEmpty()) return stringA.length
|
||||||
|
|
||||||
|
var cost = IntArray(stringA.length + 1) { it }
|
||||||
|
var newCost = IntArray(stringA.length + 1)
|
||||||
|
|
||||||
for (i in 1..stringB.length) {
|
for (i in 1..stringB.length) {
|
||||||
// basically shifting this to the right by 1 each time
|
// basically shifting this to the right by 1 each time
|
||||||
newCost[0] = i
|
newCost[0] = i
|
||||||
|
|
||||||
for (j in 1..this.length) {
|
for (j in 1..stringA.length) {
|
||||||
newCost[j] = minOf(
|
newCost[j] = minOf(
|
||||||
// Adding 1 if they don't match, i.e. need to replace
|
// Adding 1 if they don't match, i.e. need to replace
|
||||||
cost[j - 1] + if (this[j - 1] == stringB[i - 1]) 0 else 1,
|
cost[j - 1] + if (stringA[j - 1] == stringB[i - 1]) 0 else 1,
|
||||||
// Insert
|
// Insert
|
||||||
cost[j] + 1,
|
cost[j] + 1,
|
||||||
// Delete
|
// Delete
|
||||||
@ -140,3 +143,58 @@ fun CharSequence.lev(stringB: String): Int {
|
|||||||
|
|
||||||
return cost[this.length]
|
return cost[this.length]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sorts sequence of units by Levenshtein distance
|
||||||
|
*
|
||||||
|
* @param stringA String for Levenshtein distance
|
||||||
|
* @return Sorted sequence of units. Units with lower Levenshtein distance are higher
|
||||||
|
*/
|
||||||
|
fun Sequence<AbstractUnit>.sortByLev(stringA: String): Sequence<AbstractUnit> {
|
||||||
|
// We don't need units where name is too different, half of the symbols is wrong in this situation
|
||||||
|
val threshold = stringA.length / 2
|
||||||
|
val unitsWithDist = mutableListOf<Pair<AbstractUnit, Int>>()
|
||||||
|
this.forEach {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* There is chance that unit name doesn't need any edits (contains part of the query)
|
||||||
|
* So computing levDist is a waste of resources
|
||||||
|
*
|
||||||
|
* We just add this unit and assume that levDist is '1'. Not '0' so that such units
|
||||||
|
* will not be always on top of the list (maybe unit contains query in it's name
|
||||||
|
* but it's not the desired one so it will compete with other units that are possibly
|
||||||
|
* the ones user needs)
|
||||||
|
*/
|
||||||
|
if (it.renderedName.contains(stringA)) {
|
||||||
|
unitsWithDist.add(Pair(it, 1))
|
||||||
|
return@forEach
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Levenshtein Distance for this specific name of this unit
|
||||||
|
*
|
||||||
|
* We use substring so that we compare not the whole unit name, but only part of it
|
||||||
|
* It's required because without it levDist will be too high for units with longer
|
||||||
|
* names than the search query
|
||||||
|
*
|
||||||
|
* For example:
|
||||||
|
* Search query is 'Kelometer' and unit name is 'Kilometer per hour'
|
||||||
|
* Without substring levDist will be 9 which means that this unit will be skipped
|
||||||
|
*
|
||||||
|
* With substring levDist will be 3 so unit will be included
|
||||||
|
*/
|
||||||
|
val levDist = it.renderedName
|
||||||
|
.substring(0, minOf(stringA.length, it.renderedName.length))
|
||||||
|
.lev(stringA)
|
||||||
|
|
||||||
|
// Threshold
|
||||||
|
if (levDist < threshold) {
|
||||||
|
unitsWithDist.add(Pair(it, levDist))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Sorting by levDist and getting units
|
||||||
|
return unitsWithDist
|
||||||
|
.sortedBy { it.second }
|
||||||
|
.map { it.first }
|
||||||
|
.asSequence()
|
||||||
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
package com.sadellie.unitto.screens
|
||||||
|
|
||||||
|
import com.sadellie.unitto.data.units.AbstractUnit
|
||||||
|
import com.sadellie.unitto.data.units.MyUnit
|
||||||
|
import com.sadellie.unitto.data.units.UnitGroup
|
||||||
|
import org.junit.Assert.assertEquals
|
||||||
|
import org.junit.Test
|
||||||
|
import java.math.BigDecimal
|
||||||
|
|
||||||
|
val baseList: List<AbstractUnit> = listOf(
|
||||||
|
"Attometer",
|
||||||
|
"Nanometer",
|
||||||
|
"Millimeter",
|
||||||
|
"Meter",
|
||||||
|
"Kilometer",
|
||||||
|
"Mile",
|
||||||
|
"Pound",
|
||||||
|
"Kilometer per square"
|
||||||
|
).map { name ->
|
||||||
|
MyUnit("", BigDecimal.ONE, UnitGroup.ANGLE, 0, 0)
|
||||||
|
.also { it.renderedName = name }
|
||||||
|
}
|
||||||
|
|
||||||
|
class LevenshteinFilterAndSortTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testOneEdit() {
|
||||||
|
val searchQuery = "Kelometer"
|
||||||
|
val result = baseList.asSequence().sortByLev(searchQuery).map { it.renderedName }.toList()
|
||||||
|
println(result)
|
||||||
|
assertEquals(
|
||||||
|
listOf("Kilometer", "Kilometer per square", "Attometer", "Nanometer"),
|
||||||
|
result
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testLongQuery() {
|
||||||
|
val searchQuery = "Kelometers per"
|
||||||
|
val result = baseList.asSequence().sortByLev(searchQuery).map { it.renderedName }.toList()
|
||||||
|
println(result)
|
||||||
|
assertEquals(
|
||||||
|
listOf("Kilometer per square", "Kilometer"),
|
||||||
|
result
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testMultipleMatches() {
|
||||||
|
val searchQuery = "meter"
|
||||||
|
val result = baseList.asSequence().sortByLev(searchQuery).map { it.renderedName }.toList()
|
||||||
|
println(result)
|
||||||
|
assertEquals(
|
||||||
|
listOf("Meter", "Attometer", "Nanometer", "Millimeter", "Kilometer","Kilometer per square"),
|
||||||
|
result
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun testNone() {
|
||||||
|
val searchQuery = "Very long unit name that doesn't exist"
|
||||||
|
val result = baseList.asSequence().sortByLev(searchQuery).map { it.renderedName }.toList()
|
||||||
|
println(result)
|
||||||
|
assertEquals(
|
||||||
|
listOf<String>(),
|
||||||
|
result
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -40,4 +40,9 @@ class LevenshteinTest {
|
|||||||
fun levEmptyB() {
|
fun levEmptyB() {
|
||||||
assertEquals(9, "red truck".lev(""))
|
assertEquals(9, "red truck".lev(""))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun levDifferentCases() {
|
||||||
|
assertEquals(0, "red truck".lev("red TRUCK"))
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user