Skip to content

Commit f80057c

Browse files
committed
Checked, marked as Immutable and code clean
1 parent 21f36df commit f80057c

File tree

2 files changed

+48
-16
lines changed

2 files changed

+48
-16
lines changed

src/main/java/info/debatty/java/stringsimilarity/NGram.java

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package info.debatty.java.stringsimilarity;
22

33
import info.debatty.java.stringsimilarity.interfaces.NormalizedStringDistance;
4+
import net.jcip.annotations.Immutable;
45

56
/**
67
* N-Gram Similarity as defined by Kondrak, "N-Gram Similarity and Distance",
@@ -13,20 +14,34 @@
1314
*
1415
* http://webdocs.cs.ualberta.ca/~kondrak/papers/spire05.pdf
1516
*/
17+
@Immutable
1618
public class NGram implements NormalizedStringDistance {
1719

20+
private static final int DEFAULT_N = 2;
1821
private final int n;
1922

20-
public NGram(int n) {
23+
/**
24+
* Instantiate with given value for n-gram length.
25+
* @param n
26+
*/
27+
public NGram(final int n) {
2128
this.n = n;
2229
}
2330

31+
/**
32+
* Instantiate with default value for n-gram length (2).
33+
*/
2434
public NGram() {
25-
this.n = 2;
35+
this.n = DEFAULT_N;
2636
}
2737

28-
@Override
29-
public double distance(String s0, String s1) {
38+
/**
39+
* Compute n-gram distance.
40+
* @param s0
41+
* @param s1
42+
* @return
43+
*/
44+
public final double distance(final String s0, final String s1) {
3045
final char special = '\n';
3146
final int sl = s0.length();
3247
final int tl = s1.length();
@@ -50,9 +65,9 @@ public double distance(String s0, String s1) {
5065
}
5166

5267
char[] sa = new char[sl + n - 1];
53-
float p[]; //'previous' cost array, horizontally
54-
float d[]; // cost array, horizontally
55-
float _d[]; //placeholder to assist in swapping p and d
68+
float[] p; //'previous' cost array, horizontally
69+
float[] d; // cost array, horizontally
70+
float[] d2; //placeholder to assist in swapping p and d
5671

5772
//construct sa with prefix
5873
for (int i = 0; i < sa.length; i++) {
@@ -76,7 +91,7 @@ public double distance(String s0, String s1) {
7691
}
7792

7893
for (j = 1; j <= tl; j++) {
79-
//construct t_j n-gram
94+
//construct t_j n-gram
8095
if (j < n) {
8196
for (int ti = 0; ti < n - j; ti++) {
8297
t_j[ti] = special; //add prefix
@@ -95,18 +110,21 @@ public double distance(String s0, String s1) {
95110
for (int ni = 0; ni < n; ni++) {
96111
if (sa[i - 1 + ni] != t_j[ni]) {
97112
cost++;
98-
} else if (sa[i - 1 + ni] == special) { //discount matches on prefix
113+
} else if (sa[i - 1 + ni] == special) {
114+
//discount matches on prefix
99115
tn--;
100116
}
101117
}
102118
float ec = (float) cost / tn;
103-
// minimum of cell to the left+1, to the top+1, diagonally left and up +cost
104-
d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec);
119+
// minimum of cell to the left+1, to the top+1,
120+
// diagonally left and up +cost
121+
d[i] = Math.min(
122+
Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec);
105123
}
106124
// copy current distance counts to 'previous row' distance counts
107-
_d = p;
125+
d2 = p;
108126
p = d;
109-
d = _d;
127+
d = d2;
110128
}
111129

112130
// our last action in the above loop was to switch d and p, so p now

src/main/java/info/debatty/java/stringsimilarity/NormalizedLevenshtein.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import info.debatty.java.stringsimilarity.interfaces.NormalizedStringSimilarity;
2727
import info.debatty.java.stringsimilarity.interfaces.NormalizedStringDistance;
28+
import net.jcip.annotations.Immutable;
2829

2930
/**
3031
* This distance is computed as levenshtein distance divided by the length of
@@ -34,16 +35,29 @@
3435
*
3536
* @author Thibault Debatty
3637
*/
37-
public class NormalizedLevenshtein implements
38+
@Immutable
39+
public class NormalizedLevenshtein implements
3840
NormalizedStringDistance, NormalizedStringSimilarity {
3941

4042
private final Levenshtein l = new Levenshtein();
4143

42-
public double distance(String s1, String s2) {
44+
/**
45+
* Compute distance as Levenshtein(s1, s2) / max(|s1|, |s2|).
46+
* @param s1
47+
* @param s2
48+
* @return
49+
*/
50+
public final double distance(final String s1, final String s2) {
4351
return l.distance(s1, s2) / Math.max(s1.length(), s2.length());
4452
}
4553

46-
public double similarity(String s1, String s2) {
54+
/**
55+
* Return 1 - distance.
56+
* @param s1
57+
* @param s2
58+
* @return
59+
*/
60+
public final double similarity(final String s1, final String s2) {
4761
return 1.0 - distance(s1, s2);
4862
}
4963

0 commit comments

Comments
 (0)