Skip to content

Commit b50bb6b

Browse files
Samiul-TheSoccerFanelasticsearchmachineelasticmachine
authored
Adding support to exclude semantic_text subfields (#127664)
* Adding support to exclude semantic_text subfields * Update docs/changelog/127664.yaml * Updating changelog file * remove duplicate test from yaml file * Adding support to exclude semantic_text subfields from mapper builders * Adding support for generic field types * refactoring to use builder and setting exclude value from semantic_text mapper * update in semantic_text mapper and fetcher to incorporate the support functionality * Fix code style issue * adding node feature for yaml tests * Adding more restrictive checks on yaml tests and few refactoring * Returns metadata fields from metadata mappers * returns all source fields for fieldcaps * gather all fields and iterate to process for fieldcaps api * revert back all changes from MappedFieldtype and subclasses * revert back exclude logic from semantic_text mapper * fix lint issues * fix lint issues * Adding runtime fields into fieldCaps * Fix linting issue * removing unused functions that used in previous implementation * fix multifield tests failure * getting alias fields for field caps * adding support for query time runtime fields * [CI] Auto commit changes from spotless * Fix empty mapping fieldCaps call * Address passthrough behavior for mappers * Fix SearchAsYoutype mapper failures * rename abstract method to have more meaningful name * Rename mapper function to match its functionality * Adding filtering for infernece subfields * revert back previous implementation changes * Adding yaml test for field caps not filtering multi-field * Fixing yaml test * Adding comment why .infernece filter is added --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Elastic Machine <[email protected]>
1 parent 6370d60 commit b50bb6b

File tree

6 files changed

+117
-1
lines changed

6 files changed

+117
-1
lines changed

docs/changelog/127664.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127664
2+
summary: Exclude `semantic_text` subfields from field capabilities API
3+
area: "Mapping"
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/action/fieldcaps/FieldCapabilitiesFetcher.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.action.fieldcaps;
1111

12+
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
1213
import org.elasticsearch.cluster.metadata.MappingMetadata;
1314
import org.elasticsearch.core.Booleans;
1415
import org.elasticsearch.core.Nullable;
@@ -30,6 +31,7 @@
3031
import org.elasticsearch.tasks.CancellableTask;
3132

3233
import java.io.IOException;
34+
import java.util.Collection;
3335
import java.util.Collections;
3436
import java.util.HashMap;
3537
import java.util.Map;
@@ -256,6 +258,14 @@ private static Predicate<MappedFieldType> buildFilter(String[] filters, String[]
256258
Set<String> acceptedTypes = Set.of(fieldTypes);
257259
fcf = ft -> acceptedTypes.contains(ft.familyTypeName());
258260
}
261+
262+
// Exclude internal ".inference" subfields of semantic_text fields from the field capabilities response
263+
Collection<InferenceFieldMetadata> inferenceFields = context.getMappingLookup().inferenceFields().values();
264+
for (InferenceFieldMetadata inferenceField : inferenceFields) {
265+
Predicate<MappedFieldType> next = ft -> ft.name().startsWith(inferenceField.getName() + ".inference") == false;
266+
fcf = fcf == null ? next : fcf.and(next);
267+
}
268+
259269
for (String filter : filters) {
260270
if ("parent".equals(filter) || "-parent".equals(filter)) {
261271
continue;

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import java.util.Set;
1717

18+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS;
1819
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG;
1920
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX;
2021
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
@@ -59,7 +60,8 @@ public Set<NodeFeature> getTestFeatures() {
5960
SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT,
6061
TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS,
6162
SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG,
62-
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER
63+
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER,
64+
SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS
6365
);
6466
}
6567
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
134134
public static final NodeFeature SEMANTIC_TEXT_SKIP_INFERENCE_FIELDS = new NodeFeature("semantic_text.skip_inference_fields");
135135
public static final NodeFeature SEMANTIC_TEXT_BIT_VECTOR_SUPPORT = new NodeFeature("semantic_text.bit_vector_support");
136136
public static final NodeFeature SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG = new NodeFeature("semantic_text.support_chunking_config");
137+
public static final NodeFeature SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS = new NodeFeature(
138+
"semantic_text.exclude_sub_fields_from_field_caps"
139+
);
137140

138141
public static final String CONTENT_TYPE = "semantic_text";
139142
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,76 @@ setup:
359359
index: test-always-include-inference-id-index
360360

361361
- exists: test-always-include-inference-id-index.mappings.properties.semantic_field.inference_id
362+
363+
---
364+
"Field caps exclude chunks and embedding fields":
365+
- requires:
366+
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
367+
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
368+
369+
- do:
370+
field_caps:
371+
include_empty_fields: true
372+
index: test-index
373+
fields: "*"
374+
375+
- match: { indices: [ "test-index" ] }
376+
- exists: fields.sparse_field
377+
- exists: fields.dense_field
378+
- not_exists: fields.sparse_field.inference.chunks.embeddings
379+
- not_exists: fields.sparse_field.inference.chunks.offset
380+
- not_exists: fields.sparse_field.inference.chunks
381+
- not_exists: fields.sparse_field.inference
382+
- not_exists: fields.dense_field.inference.chunks.embeddings
383+
- not_exists: fields.dense_field.inference.chunks.offset
384+
- not_exists: fields.dense_field.inference.chunks
385+
- not_exists: fields.dense_field.inference
386+
387+
---
388+
"Field caps does not exclude multi-fields under semantic_text":
389+
- requires:
390+
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
391+
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
392+
- do:
393+
indices.create:
394+
index: test-multi-field-index
395+
body:
396+
settings:
397+
index:
398+
mapping:
399+
semantic_text:
400+
use_legacy_format: false
401+
mappings:
402+
properties:
403+
sparse_field:
404+
type: semantic_text
405+
inference_id: sparse-inference-id
406+
fields:
407+
sparse_keyword_field:
408+
type: keyword
409+
dense_field:
410+
type: semantic_text
411+
inference_id: dense-inference-id
412+
fields:
413+
dense_keyword_field:
414+
type: keyword
415+
416+
- do:
417+
field_caps:
418+
include_empty_fields: true
419+
index: test-multi-field-index
420+
fields: "*"
421+
422+
- match: { indices: [ "test-multi-field-index" ] }
423+
- exists: fields.sparse_field
424+
- exists: fields.dense_field
425+
- exists: fields.sparse_field\.sparse_keyword_field
426+
- exists: fields.dense_field\.dense_keyword_field
427+
- not_exists: fields.sparse_field.inference.chunks.embeddings
428+
- not_exists: fields.sparse_field.inference.chunks.offset
429+
- not_exists: fields.sparse_field.inference.chunks
430+
- not_exists: fields.sparse_field.inference
431+
- not_exists: fields.dense_field.inference.chunks.embeddings
432+
- not_exists: fields.dense_field.inference.chunks.offset
433+
- not_exists: fields.dense_field.inference.chunks
434+
- not_exists: fields.dense_field.inference

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping_bwc.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,26 @@ setup:
307307
another_field:
308308
type: keyword
309309

310+
---
311+
"Field caps exclude chunks embedding and text fields":
312+
- requires:
313+
cluster_features: "semantic_text.exclude_sub_fields_from_field_caps"
314+
reason: field caps api exclude semantic_text subfields from 9.1.0 & 8.19.0
315+
316+
- do:
317+
field_caps:
318+
include_empty_fields: true
319+
index: test-index
320+
fields: "*"
321+
322+
- match: { indices: [ "test-index" ] }
323+
- exists: fields.sparse_field
324+
- exists: fields.dense_field
325+
- not_exists: fields.sparse_field.inference.chunks.embeddings
326+
- not_exists: fields.sparse_field.inference.chunks.text
327+
- not_exists: fields.sparse_field.inference.chunks
328+
- not_exists: fields.sparse_field.inference
329+
- not_exists: fields.dense_field.inference.chunks.embeddings
330+
- not_exists: fields.dense_field.inference.chunks.text
331+
- not_exists: fields.dense_field.inference.chunks
332+
- not_exists: fields.dense_field.inference

0 commit comments

Comments
 (0)