Skip to content

Commit 721f853

Browse files
committed
Support reading UTF-8 encoded strings
1 parent c78e302 commit 721f853

File tree

2 files changed

+34
-6
lines changed

2 files changed

+34
-6
lines changed

src/main/java/io/tiledb/java/api/NativeArray.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package io.tiledb.java.api;
3434

3535
import io.tiledb.libtiledb.*;
36+
import java.nio.charset.Charset;
3637
import java.nio.charset.StandardCharsets;
3738

3839
public class NativeArray implements AutoCloseable {
@@ -197,9 +198,15 @@ private int getSize(Object buffer) throws TileDBError {
197198
return ((long[]) buffer).length;
198199
}
199200
case TILEDB_STRING_ASCII:
201+
{
202+
Charset charset = StandardCharsets.ISO_8859_1;
203+
return stringToBytes((String) buffer, charset).length;
204+
}
205+
case TILEDB_STRING_UTF8:
200206
case TILEDB_CHAR:
201207
{
202-
return stringToBytes(buffer).length;
208+
Charset charset = StandardCharsets.UTF_8;
209+
return stringToBytes((String) buffer, charset).length;
203210
}
204211
case TILEDB_DATETIME_YEAR:
205212
case TILEDB_DATETIME_MONTH:
@@ -277,10 +284,16 @@ private void createNativeArrayFromBuffer(Object buffer) throws TileDBError {
277284
break;
278285
}
279286
case TILEDB_STRING_ASCII:
287+
{
288+
Charset charset = StandardCharsets.ISO_8859_1;
289+
int8_tArray = Utils.newInt8_tArray(stringToBytes((String) buffer, charset));
290+
break;
291+
}
292+
case TILEDB_STRING_UTF8:
280293
case TILEDB_CHAR:
281294
{
282-
byte[] bytes = stringToBytes(buffer);
283-
int8_tArray = Utils.newInt8_tArray(bytes);
295+
Charset charset = StandardCharsets.UTF_8;
296+
int8_tArray = Utils.newInt8_tArray(stringToBytes((String) buffer, charset));
284297
break;
285298
}
286299
case TILEDB_DATETIME_YEAR:
@@ -532,9 +545,19 @@ public void setItem(int index, Object value) throws ArrayIndexOutOfBoundsExcepti
532545
break;
533546
}
534547
case TILEDB_STRING_ASCII:
548+
{
549+
Charset charset = StandardCharsets.ISO_8859_1;
550+
for (byte b : stringToBytes((String) value, charset)) {
551+
int8_tArray.setitem(index, b);
552+
index++;
553+
}
554+
break;
555+
}
556+
case TILEDB_STRING_UTF8:
535557
case TILEDB_CHAR:
536558
{
537-
for (byte b : stringToBytes(value)) {
559+
Charset charset = StandardCharsets.UTF_8;
560+
for (byte b : stringToBytes((String) value, charset)) {
538561
int8_tArray.setitem(index, b);
539562
index++;
540563
}
@@ -607,6 +630,7 @@ public SWIGTYPE_p_void toVoidPointer() throws TileDBError {
607630
return PointerUtils.toVoid(uint64_tArray);
608631
}
609632
case TILEDB_STRING_ASCII:
633+
case TILEDB_STRING_UTF8:
610634
case TILEDB_CHAR:
611635
{
612636
return PointerUtils.toVoid(int8_tArray);
@@ -778,6 +802,7 @@ public Object toJavaArray(int position, int elements) throws TileDBError {
778802
return Utils.int64ArrayGet(uint64_tArray, position, elements);
779803
}
780804
case TILEDB_STRING_ASCII:
805+
case TILEDB_STRING_UTF8:
781806
case TILEDB_CHAR:
782807
{
783808
return Utils.int8ArrayGet(int8_tArray, position, elements);
@@ -872,6 +897,8 @@ private void createNativeArrayFromVoidPointer(SWIGTYPE_p_p_void pointer) throws
872897
int64_tArray = PointerUtils.int64_tArrayFromVoid(pointer);
873898
break;
874899
}
900+
case TILEDB_STRING_ASCII:
901+
case TILEDB_STRING_UTF8:
875902
case TILEDB_CHAR:
876903
{
877904
int8_tArray = PointerUtils.int8_tArrayFromVoid(pointer);
@@ -959,8 +986,8 @@ private void createNativeArrayFromVoidPointer(SWIGTYPE_p_void pointer) throws Ti
959986
}
960987
}
961988

962-
private byte[] stringToBytes(Object buffer) {
963-
return ((String) buffer).getBytes(StandardCharsets.UTF_8);
989+
private byte[] stringToBytes(String buffer, Charset charset) {
990+
return buffer.getBytes(charset);
964991
}
965992

966993
protected Datatype getNativeType() {

src/main/java/io/tiledb/java/api/Types.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ public static Class getJavaType(Datatype type) throws TileDBError {
111111
{
112112
return Long.class;
113113
}
114+
case TILEDB_STRING_UTF8:
114115
case TILEDB_STRING_ASCII:
115116
case TILEDB_CHAR:
116117
{

0 commit comments

Comments
 (0)