19
19
20
20
import com .google .common .base .Charsets ;
21
21
import com .google .common .primitives .Longs ;
22
+ import com .google .common .primitives .UnsignedBytes ;
22
23
23
24
import org .apache .spark .annotation .Private ;
24
25
import org .apache .spark .unsafe .types .UTF8String ;
@@ -35,36 +36,33 @@ private PrefixComparators() {}
35
36
public static final class StringPrefixComparator extends PrefixComparator {
36
37
@ Override
37
38
public int compare (long aPrefix , long bPrefix ) {
38
- // TODO: this can certainly be done more efficiently
39
+ // TODO: can done more efficiently
39
40
byte [] a = Longs .toByteArray (aPrefix );
40
41
byte [] b = Longs .toByteArray (bPrefix );
41
42
for (int i = 0 ; i < 8 ; i ++) {
42
- if (a [i ] == b [i ]) continue ;
43
- if (a [i ] > b [i ]) return -1 ;
44
- else if (a [i ] < b [i ]) return 1 ;
43
+ int c = UnsignedBytes .compare (a [i ], b [i ]);
44
+ if (c != 0 ) return c ;
45
45
}
46
46
return 0 ;
47
47
}
48
48
49
- public long computePrefix (UTF8String value ) {
50
- // TODO: this can certainly be done more efficiently
51
- return value == null ? 0L : computePrefix (value .toString ());
52
- }
53
-
54
- public long computePrefix (String value ) {
55
- // TODO: this can certainly be done more efficiently
56
- if (value == null || value .length () == 0 ) {
49
+ public long computePrefix (byte [] bytes ) {
50
+ if (bytes == null ) {
57
51
return 0L ;
58
52
} else {
59
- String first4Chars = value .substring (0 , Math .min (3 , value .length () - 1 ));
60
- byte [] utf16Bytes = first4Chars .getBytes (Charsets .UTF_16 );
61
53
byte [] padded = new byte [8 ];
62
- if (utf16Bytes .length < 8 ) {
63
- System .arraycopy (utf16Bytes , 0 , padded , 0 , utf16Bytes .length );
64
- }
54
+ System .arraycopy (bytes , 0 , padded , 0 , Math .min (bytes .length , 8 ));
65
55
return Longs .fromByteArray (padded );
66
56
}
67
57
}
58
+
59
+ public long computePrefix (String value ) {
60
+ return value == null ? 0L : computePrefix (value .getBytes (Charsets .UTF_8 ));
61
+ }
62
+
63
+ public long computePrefix (UTF8String value ) {
64
+ return value == null ? 0L : computePrefix (value .getBytes ());
65
+ }
68
66
}
69
67
70
68
/**
0 commit comments