Skip to content

Commit f30ca88

Browse files
committed
Interning for short strings, keywords and symbols
1 parent b7c9653 commit f30ca88

File tree

8 files changed

+209
-20
lines changed

8 files changed

+209
-20
lines changed

convex-core/src/main/java/convex/core/data/ASymbolic.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public final StringShort getName() {
4949
return name;
5050
}
5151

52-
protected static boolean validateName(AString name) {
52+
public static boolean validateName(AString name) {
5353
if (name == null) return false;
5454
long n = name.count();
5555
if ((n < 1) || (n > (Constants.MAX_NAME_LENGTH))) {
@@ -116,6 +116,10 @@ public long longValue() {
116116
public ABlob toBlob() {
117117
return name.toBlob();
118118
}
119+
120+
public Blob toFlatBlob() {
121+
return name.toFlatBlob();
122+
}
119123

120124
@Override
121125
public boolean equalsBytes(ABlob b) {

convex-core/src/main/java/convex/core/data/Blobs.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package convex.core.data;
22

3-
import java.io.ByteArrayOutputStream;
43
import java.io.IOException;
54
import java.io.InputStream;
65
import java.util.Random;

convex-core/src/main/java/convex/core/data/Keyword.java

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package convex.core.data;
22

33
import convex.core.Constants;
4+
import convex.core.data.impl.StringStore;
45
import convex.core.data.type.AType;
56
import convex.core.data.type.Types;
67
import convex.core.data.util.BlobBuilder;
@@ -42,9 +43,27 @@ public AType getType() {
4243
*/
4344
public static Keyword create(String name) {
4445
if (name==null) return null;
46+
StringStore.Entry e=StringStore.get(name);
47+
if (e!=null) return e.getKeyword();
4548
return create(Strings.create(name));
4649
}
4750

51+
/**
52+
* Creates a Keyword with the given name
53+
*
54+
* @param name A String to use as the keyword name
55+
* @return The new Keyword, or null if the name is invalid for a Keyword
56+
*/
57+
public static Keyword create(AString name) {
58+
if (name==null) return null;
59+
StringStore.Entry e=StringStore.get(name);
60+
if (e!=null) return e.getKeyword();
61+
if (!validateName(name)) {
62+
return null;
63+
}
64+
return new Keyword((StringShort)name);
65+
}
66+
4867
/**
4968
* Creates a Keyword with the given name
5069
*
@@ -62,13 +81,15 @@ public static Keyword create(Object o) {
6281
return null;
6382
}
6483

84+
85+
6586
/**
6687
* Creates an interned Keyword. Use only for internal constants, won't get GC'd
6788
* @param name Symbolic name for keyword
6889
* @return Interned Keyword
6990
*/
7091
public static Keyword intern(String name) {
71-
return Cells.intern(Keyword.create(name));
92+
return Cells.intern(Keyword.create(Strings.intern(name)));
7293
}
7394

7495
/**
@@ -89,19 +110,7 @@ public static Keyword unsafeCreate(StringShort rawName) {
89110
return new Keyword(rawName);
90111
}
91112

92-
/**
93-
* Creates a Keyword with the given name
94-
*
95-
* @param name A String to use as the keyword name
96-
* @return The new Keyword, or null if the name is invalid for a Keyword
97-
*/
98-
public static Keyword create(AString name) {
99-
if (name==null) return null;
100-
if (!validateName(name)) {
101-
return null;
102-
}
103-
return new Keyword((StringShort)name);
104-
}
113+
105114

106115
/**
107116
* Creates a Keyword with the given name, throwing an exception if name is not

convex-core/src/main/java/convex/core/data/StringShort.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ protected StringShort(byte[] data, int offset, int length) {
5151
super(length);
5252
this.data = Blob.wrap(data, offset, length);
5353
}
54+
55+
/**
56+
* Creates a StringShort, wrapping a Blob. Warning: might not be valid UTF=8
57+
* @throws IllegalArgumentException if the wrapped Blob is too large for a StringShort
58+
*/
59+
public static StringShort wrap(Blob b) {
60+
if (b.count()>MAX_LENGTH) throw new IllegalArgumentException("Invalid Blob length for StringShort");
61+
return new StringShort(b);
62+
}
5463

5564
/**
5665
* Creates a StringShort instance from a regular Java String
@@ -182,6 +191,10 @@ public static StringShort read(long length, Blob blob, int pos) {
182191
public Blob toBlob() {
183192
return data;
184193
}
194+
195+
public Blob toFlatBlob() {
196+
return data;
197+
}
185198

186199
@Override
187200
public boolean equals(AString b) {

convex-core/src/main/java/convex/core/data/Strings.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import java.util.Comparator;
1212

1313
import convex.core.Constants;
14+
import convex.core.data.impl.StringStore;
1415
import convex.core.data.prim.CVMBool;
1516
import convex.core.data.prim.CVMChar;
1617
import convex.core.data.util.BlobBuilder;
@@ -140,13 +141,13 @@ public static AString create(Object o) {
140141
return create(o.toString());
141142
}
142143

143-
public static <T extends AString> T intern(T value) {
144-
return Cells.intern(value);
144+
public static StringShort intern(AString value) {
145+
return StringStore.intern(value);
145146
}
146147

147148
@SuppressWarnings("unchecked")
148149
public static <T extends AString> T intern(String value) {
149-
return (T) intern(create(value));
150+
return (T) StringStore.intern(value);
150151
}
151152

152153
public static AString create(CVMChar c) {

convex-core/src/main/java/convex/core/data/Symbol.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.util.WeakHashMap;
44

5+
import convex.core.data.impl.StringStore;
56
import convex.core.data.type.AType;
67
import convex.core.data.type.Types;
78
import convex.core.data.util.BlobBuilder;
@@ -45,6 +46,8 @@ public AType getType() {
4546
*/
4647
public static Symbol create(String name) {
4748
if (name==null) return null;
49+
StringStore.Entry e=StringStore.get(name);
50+
if (e!=null) return e.getSymbol();
4851
return create(Strings.create(name));
4952
}
5053

@@ -70,7 +73,12 @@ public static Symbol create(AString name) {
7073
}
7174

7275
public static Symbol intern(AString name) {
73-
Symbol sym=create(name);
76+
Symbol sym=create(Strings.intern(name));
77+
return Cells.intern(sym);
78+
}
79+
80+
public static Symbol intern(String name) {
81+
Symbol sym=create(Strings.intern(name));
7482
return Cells.intern(sym);
7583
}
7684

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
package convex.core.data.impl;
2+
3+
import java.nio.charset.StandardCharsets;
4+
import java.util.HashMap;
5+
6+
import convex.core.data.ABlob;
7+
import convex.core.data.AString;
8+
import convex.core.data.ASymbolic;
9+
import convex.core.data.Blob;
10+
import convex.core.data.Cells;
11+
import convex.core.data.Keyword;
12+
import convex.core.data.StringShort;
13+
import convex.core.data.Symbol;
14+
15+
/**
16+
* Internal caching for permanently interned Strings.
17+
*
18+
* Don't use this for anything sent in externally!
19+
*/
20+
public class StringStore {
21+
22+
public static class Entry {
23+
String string=null;
24+
StringShort astring=null;
25+
Keyword keyword=null;
26+
Symbol symbol = null;
27+
Blob blob;
28+
29+
public Entry(Blob b) {
30+
this.blob=b;
31+
}
32+
33+
/**
34+
* Gets the StringShort version of an interned String
35+
* @return StringShort
36+
*/
37+
public StringShort getStringShort() {
38+
StringShort result=astring;
39+
if (result==null) {
40+
result=Cells.intern(StringShort.wrap(blob));
41+
astring = result;
42+
}
43+
return result;
44+
}
45+
46+
/**
47+
* Gets the Keyword version of an interned String
48+
* @return Keyword instance, or null if not a valid Keyword
49+
*/
50+
public Keyword getKeyword() {
51+
Keyword result=keyword;
52+
if (result==null) {
53+
StringShort ss=getStringShort();
54+
if (!ASymbolic.validateName(ss)) return null;
55+
result=Cells.intern(Keyword.unsafeCreate(ss));
56+
keyword = result;
57+
}
58+
return result;
59+
}
60+
61+
/**
62+
* Gets the Keyword version of an interned String
63+
* @return Keyword instance, or null if not a valid Keyword
64+
*/
65+
public Symbol getSymbol() {
66+
Symbol result=symbol;
67+
if (result==null) {
68+
StringShort ss=getStringShort();
69+
if (!ASymbolic.validateName(ss)) return null;
70+
result=Cells.intern(Symbol.unsafeCreate(ss));
71+
symbol = result;
72+
}
73+
return result;
74+
}
75+
}
76+
77+
static HashMap<String,Entry> stringIndex=new HashMap<>();
78+
79+
static HashMap<Blob,Entry> blobIndex=new HashMap<>();
80+
81+
82+
public static Entry get(String string) {
83+
Entry e=stringIndex.get(string);
84+
return e;
85+
}
86+
87+
public static Entry get(AString name) {
88+
return get(name.toBlob());
89+
}
90+
91+
public static Entry get(ABlob blob) {
92+
Entry e=blobIndex.get(blob);
93+
return e;
94+
}
95+
96+
public static StringShort intern(String s) {
97+
Entry e=get(s);
98+
if (e==null) {
99+
Blob b=Blob.wrap(s.getBytes(StandardCharsets.UTF_8));
100+
if (b.count()>StringShort.MAX_LENGTH) throw new IllegalArgumentException("String too large to intern");
101+
e=new Entry(b);
102+
e.string=s;
103+
104+
StringShort astring=StringShort.wrap(b);
105+
astring=Cells.intern(astring);
106+
e.astring=astring;
107+
108+
stringIndex.put(s, e);
109+
blobIndex.put(b, e);
110+
111+
return astring;
112+
} else {
113+
return e.getStringShort();
114+
}
115+
}
116+
117+
public static StringShort intern(AString s) {
118+
if (s.count()>StringShort.MAX_LENGTH) throw new IllegalArgumentException("String too large to intern");
119+
Blob b=s.toFlatBlob();
120+
Entry e=get(b);
121+
if (e==null) {
122+
123+
e=new Entry(b);
124+
StringShort astring=(s instanceof StringShort ss)?ss:StringShort.wrap(b);
125+
astring=Cells.intern(astring);
126+
e.astring=astring;
127+
128+
String js=astring.toString();
129+
e.string=js;
130+
131+
stringIndex.put(js, e);
132+
blobIndex.put(b, e);
133+
134+
return astring;
135+
} else {
136+
return e.getStringShort();
137+
}
138+
}
139+
140+
141+
}

convex-core/src/test/java/convex/core/data/StringsTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import static org.junit.jupiter.api.Assertions.assertEquals;
44
import static org.junit.jupiter.api.Assertions.assertFalse;
55
import static org.junit.jupiter.api.Assertions.assertNull;
6+
import static org.junit.jupiter.api.Assertions.assertSame;
67
import static org.junit.jupiter.api.Assertions.assertTrue;
78

89
import org.junit.jupiter.api.Test;
910

1011
import convex.core.Constants;
12+
import convex.core.ErrorCodes;
1113
import convex.core.cvm.Keywords;
1214
import convex.core.cvm.Symbols;
1315
import convex.core.data.prim.CVMChar;
@@ -209,6 +211,18 @@ public void testIntAt() {
209211
assertEquals(0xffffffff, s.intAt(6)); // 0xff beyond end of string
210212
assertEquals(0xffffffff, s.intAt(-6)); // 0xff before start of string
211213
}
214+
215+
@Test public void testIntern() {
216+
AString s1=Strings.intern("interned");
217+
AString s2=Strings.intern("interned");
218+
assertSame(s1,s2);
219+
AString s3=Strings.intern(s1);
220+
assertSame(s1,s3);
221+
222+
assertTrue(s1.getRef().isInternal());
223+
224+
assertSame(ErrorCodes.TIMEOUT,Keyword.create("TIMEOUT"));
225+
}
212226

213227
@Test
214228
public void testCharAt() {

0 commit comments

Comments
 (0)