mirror of
https://github.com/vbatts/go-mtree.git
synced 2025-07-24 07:10:27 +00:00
go: update modules
Signed-off-by: Vincent Batts <vbatts@hashbangbash.com>
This commit is contained in:
parent
00deb3ada6
commit
9e437eee80
166 changed files with 9373 additions and 3493 deletions
86
vendor/github.com/xrash/smetrics/jaro.go
generated
vendored
Normal file
86
vendor/github.com/xrash/smetrics/jaro.go
generated
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
package smetrics
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// The Jaro distance. The result is 1 for equal strings, and 0 for completely different strings.
|
||||
func Jaro(a, b string) float64 {
|
||||
// If both strings are zero-length, they are completely equal,
|
||||
// therefore return 1.
|
||||
if len(a) == 0 && len(b) == 0 {
|
||||
return 1
|
||||
}
|
||||
|
||||
// If one string is zero-length, strings are completely different,
|
||||
// therefore return 0.
|
||||
if len(a) == 0 || len(b) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Define the necessary variables for the algorithm.
|
||||
la := float64(len(a))
|
||||
lb := float64(len(b))
|
||||
matchRange := int(math.Max(0, math.Floor(math.Max(la, lb)/2.0)-1))
|
||||
matchesA := make([]bool, len(a))
|
||||
matchesB := make([]bool, len(b))
|
||||
var matches float64 = 0
|
||||
|
||||
// Step 1: Matches
|
||||
// Loop through each character of the first string,
|
||||
// looking for a matching character in the second string.
|
||||
for i := 0; i < len(a); i++ {
|
||||
start := int(math.Max(0, float64(i-matchRange)))
|
||||
end := int(math.Min(lb-1, float64(i+matchRange)))
|
||||
|
||||
for j := start; j <= end; j++ {
|
||||
if matchesB[j] {
|
||||
continue
|
||||
}
|
||||
|
||||
if a[i] == b[j] {
|
||||
matchesA[i] = true
|
||||
matchesB[j] = true
|
||||
matches++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no matches, strings are completely different,
|
||||
// therefore return 0.
|
||||
if matches == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Step 2: Transpositions
|
||||
// Loop through the matches' arrays, looking for
|
||||
// unaligned matches. Count the number of unaligned matches.
|
||||
unaligned := 0
|
||||
j := 0
|
||||
for i := 0; i < len(a); i++ {
|
||||
if !matchesA[i] {
|
||||
continue
|
||||
}
|
||||
|
||||
for !matchesB[j] {
|
||||
j++
|
||||
}
|
||||
|
||||
if a[i] != b[j] {
|
||||
unaligned++
|
||||
}
|
||||
|
||||
j++
|
||||
}
|
||||
|
||||
// The number of unaligned matches divided by two, is the number of _transpositions_.
|
||||
transpositions := math.Floor(float64(unaligned / 2))
|
||||
|
||||
// Jaro distance is the average between these three numbers:
|
||||
// 1. matches / length of string A
|
||||
// 2. matches / length of string B
|
||||
// 3. (matches - transpositions/matches)
|
||||
// So, all that divided by three is the final result.
|
||||
return ((matches / la) + (matches / lb) + ((matches - transpositions) / matches)) / 3.0
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue