diff --git a/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java b/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java index e37b48528..7a5ea5918 100644 --- a/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java +++ b/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java @@ -24,5 +24,5 @@ * @author Michele Rastelli */ public enum AnalyzerType { - identity, delimiter, stem, norm, ngram, text, pipeline, stopwords, aql, geojson, geopoint + identity, delimiter, stem, norm, ngram, text, pipeline, stopwords, aql, geojson, geopoint, segmentation, collation } diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzer.java new file mode 100644 index 000000000..e07d820a3 --- /dev/null +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzer.java @@ -0,0 +1,64 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import com.arangodb.entity.arangosearch.AnalyzerType; + +import java.util.Objects; + +/** + * An Analyzer capable of converting the input into a set of language-specific tokens. This makes comparisons follow the + * rules of the respective language, most notable in range queries against Views. + * + * @author Michele Rastelli + * @see API Documentation + * @since ArangoDB 3.9 + */ +public class CollationAnalyzer extends SearchAnalyzer { + public CollationAnalyzer() { + setType(AnalyzerType.collation); + } + + private CollationAnalyzerProperties properties; + + public CollationAnalyzerProperties getProperties() { + return properties; + } + + public void setProperties(CollationAnalyzerProperties properties) { + this.properties = properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + CollationAnalyzer that = (CollationAnalyzer) o; + return Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), properties); + } +} diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzerProperties.java new file mode 100644 index 000000000..d056dbaa2 --- /dev/null +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/CollationAnalyzerProperties.java @@ -0,0 +1,60 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import java.util.Objects; + +/** + * @author Michele Rastelli + * @since ArangoDB 3.9 + */ +public class CollationAnalyzerProperties { + + private String locale; + + /** + * @return a locale in the format `language[_COUNTRY][.encoding][@variant]` (square brackets denote optional parts), + * e.g. `de.utf-8` or `en_US.utf-8`. Only UTF-8 encoding is meaningful in ArangoDB. + * The locale is forwarded to ICU without checks. An invalid locale does not prevent the creation of the Analyzer. + * @see Supported Languages + */ + public String getLocale() { + return locale; + } + + public void setLocale(String locale) { + this.locale = locale; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CollationAnalyzerProperties that = (CollationAnalyzerProperties) o; + return Objects.equals(locale, that.locale); + } + + @Override + public int hashCode() { + return Objects.hash(locale); + } +} diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/NormAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/NormAnalyzerProperties.java index 4ea42db17..cb1bcfbb8 100644 --- a/src/main/java/com/arangodb/entity/arangosearch/analyzer/NormAnalyzerProperties.java +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/NormAnalyzerProperties.java @@ -66,6 +66,9 @@ public SearchAnalyzerCase getAnalyzerCase() { return analyzerCase; } + /** + * @param analyzerCase defaults to {@link SearchAnalyzerCase#none} + */ public void setAnalyzerCase(SearchAnalyzerCase analyzerCase) { this.analyzerCase = analyzerCase; } diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzerCase.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzerCase.java index b01d35dbf..32049f882 100644 --- a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzerCase.java +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzerCase.java @@ -36,7 +36,7 @@ public enum SearchAnalyzerCase { upper, /** - * to not change character case (default) + * to not change character case */ none } diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzer.java new file mode 100644 index 000000000..70ce2f4cf --- /dev/null +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzer.java @@ -0,0 +1,66 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import com.arangodb.entity.arangosearch.AnalyzerType; + +import java.util.Objects; + +/** + * An Analyzer capable of breaking up the input text into tokens in a language-agnostic manner, making it suitable for + * mixed language strings. + * It can optionally preserve all non-whitespace or all characters instead of keeping alphanumeric characters only, as + * well as apply case conversion. + * + * @author Michele Rastelli + * @see API Documentation + * @since ArangoDB 3.9 + */ +public class SegmentationAnalyzer extends SearchAnalyzer { + public SegmentationAnalyzer() { + setType(AnalyzerType.segmentation); + } + + private SegmentationAnalyzerProperties properties; + + public SegmentationAnalyzerProperties getProperties() { + return properties; + } + + public void setProperties(SegmentationAnalyzerProperties properties) { + this.properties = properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + SegmentationAnalyzer that = (SegmentationAnalyzer) o; + return Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), properties); + } +} diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzerProperties.java new file mode 100644 index 000000000..221cda81e --- /dev/null +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/SegmentationAnalyzerProperties.java @@ -0,0 +1,78 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import com.arangodb.velocypack.annotations.SerializedName; + +import java.util.Objects; + +/** + * @author Michele Rastelli + * @since ArangoDB 3.9 + */ +public class SegmentationAnalyzerProperties { + + @SerializedName("break") + private BreakMode breakMode; + + @SerializedName("case") + private SearchAnalyzerCase analyzerCase; + + public BreakMode getBreakMode() { + return breakMode; + } + + /** + * @param breakMode defaults to {@link BreakMode#alpha} + */ + public void setBreakMode(BreakMode breakMode) { + this.breakMode = breakMode; + } + + public SearchAnalyzerCase getAnalyzerCase() { + return analyzerCase; + } + + /** + * @param analyzerCase defaults to {@link SearchAnalyzerCase#lower} + */ + public void setAnalyzerCase(SearchAnalyzerCase analyzerCase) { + this.analyzerCase = analyzerCase; + } + + public enum BreakMode { + all, alpha, graphic + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SegmentationAnalyzerProperties that = (SegmentationAnalyzerProperties) o; + return breakMode == that.breakMode && analyzerCase == that.analyzerCase; + } + + @Override + public int hashCode() { + return Objects.hash(breakMode, analyzerCase); + } +} diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzer.java index 060d6743f..264b48d54 100644 --- a/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzer.java +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzer.java @@ -26,14 +26,6 @@ import java.util.Objects; /** - * WARNING: - * The implementation of Stopwords analyzer is not final in ArangoDB 3.8.0, so using it might result in unpredictable behavior. - * This will be fixed in ArangoDB 3.8.1 and will have a different API. - * Any usage of the current Java driver API related to it is therefore discouraged. - * See related bug report - *
- *
- *
* An Analyzer capable of removing specified tokens from the input.
*
* @author Michele Rastelli
diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/TextAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/TextAnalyzerProperties.java
index 8e1ee63fc..b22f7bc34 100644
--- a/src/main/java/com/arangodb/entity/arangosearch/analyzer/TextAnalyzerProperties.java
+++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/TextAnalyzerProperties.java
@@ -80,6 +80,9 @@ public SearchAnalyzerCase getAnalyzerCase() {
return analyzerCase;
}
+ /**
+ * @param analyzerCase defaults to {@link SearchAnalyzerCase#lower}
+ */
public void setAnalyzerCase(SearchAnalyzerCase analyzerCase) {
this.analyzerCase = analyzerCase;
}
diff --git a/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java b/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java
index da6ff99f4..3a5a32084 100644
--- a/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java
+++ b/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java
@@ -43,18 +43,7 @@
import com.arangodb.entity.arangosearch.PrimarySort;
import com.arangodb.entity.arangosearch.StoreValuesType;
import com.arangodb.entity.arangosearch.StoredValue;
-import com.arangodb.entity.arangosearch.analyzer.AQLAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.DelimiterAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.GeoJSONAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.GeoPointAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.IdentityAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NGramAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NormAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.PipelineAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.SearchAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.StemAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.StopwordsAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.TextAnalyzer;
+import com.arangodb.entity.arangosearch.analyzer.*;
import com.arangodb.model.CollectionSchema;
import com.arangodb.model.ZKDIndexOptions;
import com.arangodb.velocypack.VPackDeserializer;
@@ -120,6 +109,10 @@ public class VPackDeserializers {
return context.deserialize(vpack, GeoJSONAnalyzer.class);
case geopoint:
return context.deserialize(vpack, GeoPointAnalyzer.class);
+ case segmentation:
+ return context.deserialize(vpack, SegmentationAnalyzer.class);
+ case collation:
+ return context.deserialize(vpack, CollationAnalyzer.class);
default:
throw new IllegalArgumentException("Unknown analyzer type: " + type);
}
diff --git a/src/test/java/com/arangodb/ArangoSearchTest.java b/src/test/java/com/arangodb/ArangoSearchTest.java
index 83b5f2f1b..75d49f19d 100644
--- a/src/test/java/com/arangodb/ArangoSearchTest.java
+++ b/src/test/java/com/arangodb/ArangoSearchTest.java
@@ -34,32 +34,7 @@
import com.arangodb.entity.arangosearch.PrimarySort;
import com.arangodb.entity.arangosearch.StoreValuesType;
import com.arangodb.entity.arangosearch.StoredValue;
-import com.arangodb.entity.arangosearch.analyzer.AQLAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.AQLAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.DelimiterAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.DelimiterAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.EdgeNgram;
-import com.arangodb.entity.arangosearch.analyzer.GeoJSONAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.GeoAnalyzerOptions;
-import com.arangodb.entity.arangosearch.analyzer.GeoJSONAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.GeoPointAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.GeoPointAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.IdentityAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NGramAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NGramAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.NormAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.NormAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.PipelineAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.PipelineAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.SearchAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.SearchAnalyzerCase;
-import com.arangodb.entity.arangosearch.analyzer.StemAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.StemAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.StopwordsAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.StopwordsAnalyzerProperties;
-import com.arangodb.entity.arangosearch.analyzer.StreamType;
-import com.arangodb.entity.arangosearch.analyzer.TextAnalyzer;
-import com.arangodb.entity.arangosearch.analyzer.TextAnalyzerProperties;
+import com.arangodb.entity.arangosearch.analyzer.*;
import com.arangodb.model.arangosearch.AnalyzerDeleteOptions;
import com.arangodb.model.arangosearch.ArangoSearchCreateOptions;
import com.arangodb.model.arangosearch.ArangoSearchPropertiesOptions;
@@ -967,4 +942,46 @@ public void geoPointAnalyzer() {
}
+ @Test
+ public void segmentationAnalyzer() {
+ assumeTrue(isAtLeastVersion(3, 9));
+
+ SegmentationAnalyzerProperties properties = new SegmentationAnalyzerProperties();
+ properties.setBreakMode(SegmentationAnalyzerProperties.BreakMode.graphic);
+ properties.setAnalyzerCase(SearchAnalyzerCase.upper);
+
+ Set