30
30
import java .util .TreeSet ;
31
31
32
32
/**
33
- * Sparse vector of int, implemented using two arrays
33
+ * Sparse vector of int, implemented using two arrays.
34
34
* @author Thibault Debatty
35
35
*/
36
36
public class SparseIntegerVector implements Serializable {
37
-
38
- protected int [] keys ;
39
- protected int [] values ;
40
- protected int size = 0 ;
41
37
42
- public SparseIntegerVector (int size ) {
38
+ private int [] keys ;
39
+ private int [] values ;
40
+ private int size = 0 ;
41
+
42
+ private static final int DEFAULT_SIZE = 20 ;
43
+
44
+ /**
45
+ * Sparse vector of int, implemented using two arrays.
46
+ * @param size number of non zero elements in the vector
47
+ */
48
+ public SparseIntegerVector (final int size ) {
43
49
keys = new int [size ];
44
50
values = new int [size ];
45
51
}
46
-
52
+
53
+ /**
54
+ * Sparse vector of int, implemented using two arrays.
55
+ * Default size is 20.
56
+ */
47
57
public SparseIntegerVector () {
48
- this (20 );
58
+ this (DEFAULT_SIZE );
49
59
}
50
-
51
- public SparseIntegerVector (HashMap <Integer , Integer > hashmap ) {
60
+
61
+ /**
62
+ * Sparse vector of int, implemented using two arrays.
63
+ * @param hashmap
64
+ */
65
+ public SparseIntegerVector (final HashMap <Integer , Integer > hashmap ) {
52
66
this (hashmap .size ());
53
67
SortedSet <Integer > sorted_keys = new TreeSet <Integer >(hashmap .keySet ());
54
68
for (int key : sorted_keys ) {
@@ -59,17 +73,17 @@ public SparseIntegerVector(HashMap<Integer, Integer> hashmap) {
59
73
}
60
74
61
75
/**
62
- *
63
- * @param array
76
+ * Sparse vector of int, implemented using two arrays.
77
+ * @param array
64
78
*/
65
- public SparseIntegerVector (int [] array ) {
66
-
79
+ public SparseIntegerVector (final int [] array ) {
80
+
67
81
for (int i = 0 ; i < array .length ; i ++) {
68
82
if (array [i ] != 0 ) {
69
83
size ++;
70
84
}
71
85
}
72
-
86
+
73
87
keys = new int [size ];
74
88
values = new int [size ];
75
89
int j = 0 ;
@@ -81,8 +95,14 @@ public SparseIntegerVector(int[] array) {
81
95
}
82
96
}
83
97
}
84
-
85
- public double cosineSimilarity (SparseIntegerVector other ) {
98
+
99
+ /**
100
+ * Compute and return the cosine similarity (cosine of angle between both
101
+ * vectors).
102
+ * @param other
103
+ * @return
104
+ */
105
+ public final double cosineSimilarity (final SparseIntegerVector other ) {
86
106
double den = this .norm () * other .norm ();
87
107
double agg = 0 ;
88
108
int i = 0 ;
@@ -92,7 +112,7 @@ public double cosineSimilarity(SparseIntegerVector other) {
92
112
int k2 = other .keys [j ];
93
113
94
114
if (k1 == k2 ) {
95
- agg += this .values [i ] * other .values [j ] / den ;
115
+ agg += 1.0 * this .values [i ] * other .values [j ] / den ;
96
116
i ++;
97
117
j ++;
98
118
@@ -104,13 +124,13 @@ public double cosineSimilarity(SparseIntegerVector other) {
104
124
}
105
125
return agg ;
106
126
}
107
-
127
+
108
128
/**
109
- *
129
+ * Compute and return the dot product.
110
130
* @param other
111
- * @return
131
+ * @return
112
132
*/
113
- public double dotProduct (SparseIntegerVector other ) {
133
+ public final double dotProduct (final SparseIntegerVector other ) {
114
134
double agg = 0 ;
115
135
int i = 0 ;
116
136
int j = 0 ;
@@ -119,7 +139,7 @@ public double dotProduct(SparseIntegerVector other) {
119
139
int k2 = other .keys [j ];
120
140
121
141
if (k1 == k2 ) {
122
- agg += this .values [i ] * other .values [j ];
142
+ agg += 1.0 * this .values [i ] * other .values [j ];
123
143
i ++;
124
144
j ++;
125
145
@@ -131,55 +151,61 @@ public double dotProduct(SparseIntegerVector other) {
131
151
}
132
152
return agg ;
133
153
}
134
-
135
- public double dotProduct (double [] other ) {
154
+
155
+ /**
156
+ * Compute and return the dot product.
157
+ * @param other
158
+ * @return
159
+ */
160
+ public final double dotProduct (final double [] other ) {
136
161
double agg = 0 ;
137
162
for (int i = 0 ; i < keys .length ; i ++) {
138
- agg += other [keys [i ]] * values [i ];
163
+ agg += 1.0 * other [keys [i ]] * values [i ];
139
164
}
140
165
return agg ;
141
166
}
142
-
167
+
143
168
/**
144
- * Compute and return the L2 norm of the vector
145
- * @return
169
+ * Compute and return the L2 norm of the vector.
170
+ * @return
146
171
*/
147
- public double norm () {
172
+ public final double norm () {
148
173
double agg = 0 ;
149
174
for (int i = 0 ; i < values .length ; i ++) {
150
- agg += values [i ] * values [i ];
175
+ agg += 1.0 * values [i ] * values [i ];
151
176
}
152
177
return Math .sqrt (agg );
153
178
}
154
-
179
+
155
180
/**
156
181
* Computes and return the Jaccard index with other SparseVector.
157
182
* |A inter B| / |A union B|
158
183
* It is actually computed as |A inter B| / (|A| +|B| - | A inter B|)
159
184
* using a single loop over A and B
160
185
* @param other
161
- * @return
186
+ * @return
162
187
*/
163
- public double jaccard (SparseIntegerVector other ) {
188
+ public final double jaccard (final SparseIntegerVector other ) {
164
189
int intersection = this .intersection (other );
165
190
return (double ) intersection / (this .size + other .size - intersection );
166
191
}
167
-
192
+
168
193
/**
169
- *
194
+ * Compute the size of the union of these two vectors.
170
195
* @param other
171
- * @return
196
+ * @return
172
197
*/
173
- public int union (SparseIntegerVector other ) {
198
+ public final int union (final SparseIntegerVector other ) {
174
199
return this .size + other .size - this .intersection (other );
175
200
}
176
-
201
+
177
202
/**
178
- *
203
+ * Compute the number of values that are present in both vectors (used to
204
+ * compute jaccard index).
179
205
* @param other
180
- * @return
206
+ * @return
181
207
*/
182
- public int intersection (SparseIntegerVector other ) {
208
+ public final int intersection (final SparseIntegerVector other ) {
183
209
int agg = 0 ;
184
210
int i = 0 ;
185
211
int j = 0 ;
@@ -194,35 +220,35 @@ public int intersection(SparseIntegerVector other) {
194
220
195
221
} else if (k1 < k2 ) {
196
222
i ++;
197
-
223
+
198
224
} else {
199
225
j ++;
200
226
}
201
227
}
202
228
return agg ;
203
229
}
204
-
230
+
205
231
@ Override
206
- public String toString () {
232
+ public final String toString () {
207
233
String r = "" ;
208
234
for (int i = 0 ; i < size ; i ++) {
209
235
r += keys [i ] + ":" + values [i ] + " " ;
210
236
}
211
-
237
+
212
238
return r ;
213
239
}
214
240
215
241
/**
216
242
* Compute and return the qgram similarity with other vector.
217
243
* Sum(|a_i - b_i|)
218
244
* @param other
219
- * @return
245
+ * @return
220
246
*/
221
- public double qgram (SparseIntegerVector other ) {
247
+ public final double qgram (final SparseIntegerVector other ) {
222
248
double agg = 0 ;
223
249
int i = 0 , j = 0 ;
224
250
int k1 , k2 ;
225
-
251
+
226
252
while (i < this .keys .length && j < other .keys .length ) {
227
253
k1 = this .keys [i ];
228
254
k2 = other .keys [j ];
@@ -235,19 +261,19 @@ public double qgram(SparseIntegerVector other) {
235
261
} else if (k1 < k2 ) {
236
262
agg += Math .abs (this .values [i ]);
237
263
i ++;
238
-
264
+
239
265
} else {
240
266
agg += Math .abs (other .values [j ]);
241
267
j ++;
242
268
}
243
269
}
244
-
270
+
245
271
// Maybe one of the two vectors was not completely walked...
246
272
while (i < this .keys .length ) {
247
273
agg += Math .abs (this .values [i ]);
248
274
i ++;
249
275
}
250
-
276
+
251
277
while (j < other .keys .length ) {
252
278
agg += Math .abs (other .values [j ]);
253
279
j ++;
@@ -257,17 +283,27 @@ public double qgram(SparseIntegerVector other) {
257
283
258
284
/**
259
285
* Return the number of (non-zero) elements in this vector.
260
- * @return
286
+ * @return
261
287
*/
262
- public int size () {
288
+ public final int size () {
263
289
return this .size ;
264
290
}
265
291
266
- public int getKey (int i ) {
292
+ /**
293
+ * Get the key at position i.
294
+ * @param i
295
+ * @return
296
+ */
297
+ public final int getKey (final int i ) {
267
298
return this .keys [i ];
268
299
}
269
300
270
- public int getValue (int i ) {
301
+ /**
302
+ * Get the value of position i.
303
+ * @param i
304
+ * @return
305
+ */
306
+ public final int getValue (final int i ) {
271
307
return this .values [i ];
272
308
}
273
309
}
0 commit comments