chanzuckerberg
diff --git a/‎apiv2/db_import/importers/annotation.py‎
Lines changed: 1 addition & 1 deletion b/‎apiv2/db_import/importers/annotation.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apiv2/db_import/tests/conftest.py‎
Lines changed: 1 addition & 1 deletion b/‎apiv2/db_import/tests/conftest.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apiv2/db_import/tests/test_db_annotation_import.py‎
Lines changed: 9 additions & 0 deletions b/‎apiv2/db_import/tests/test_db_annotation_import.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎apiv2/graphql_api/types/annotation_shape.py‎
Lines changed: 3 additions & 3 deletions b/‎apiv2/graphql_api/types/annotation_shape.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎apiv2/schema/schema.yaml‎
Lines changed: 5 additions & 2 deletions b/‎apiv2/schema/schema.yaml‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎apiv2/support/enums.py‎
Lines changed: 1 addition & 0 deletions b/‎apiv2/support/enums.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apiv2/test_infra/factories/annotation_shape.py‎
Lines changed: 1 addition & 0 deletions b/‎apiv2/test_infra/factories/annotation_shape.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ingestion_tools/scripts/importers/annotation.py‎
Lines changed: 50 additions & 1 deletion b/‎ingestion_tools/scripts/importers/annotation.py‎
Lines changed: 50 additions & 1 deletion
diff --git a/‎ingestion_tools/scripts/tests/fixtures/annotations/annotation_caption.json‎
Lines changed: 29 additions & 0 deletions b/‎ingestion_tools/scripts/tests/fixtures/annotations/annotation_caption.json‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎ingestion_tools/scripts/tests/s3_import/test_annotations.py‎
Lines changed: 68 additions & 0 deletions b/‎ingestion_tools/scripts/tests/s3_import/test_annotations.py‎
Lines changed: 68 additions & 0 deletions
@@ -107,7 +107,7 @@ def get_finder_args(self) -> dict[str, Any]:
         return {
             "path": os.path.join(self.tomogram_voxel_spacing.s3_prefix, "Annotations/"),
             # Use *[0-9].json to match only annotation metadata files (e.g., foo-1.0.json)
-            # and exclude annotation data files which have a _{shape} suffix (e.g., foo-1.0_globalcaption.json)
+            # and exclude annotation data files which have a _{shape} suffix (e.g., foo-1.0_globalcaption.json, foo-1.0_point_caption.json)
             "file_glob": "*/*[0-9].json",
         }
 
 
@@ -133,5 +133,5 @@ def expected_dataset(http_prefix: str) -> dict[str, Any]:
         "key_photo_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/snapshot.png",
         "key_photo_thumbnail_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/thumbnail.png",
         "deposition_id": 300,
-        "file_size": 1375370.0,
+        "file_size": 1375590.0,
     }
@@ -54,6 +54,15 @@ def expected_annotations(http_prefix: str) -> list[dict[str, Any]]:
 def expected_annotation_files(http_prefix: str) -> list[dict[str, Any]]:
     path = f"{DATASET_ID}/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/"
     return [
+        {
+            "tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1,
+            "s3_path": f"s3://test-public-bucket/{path}100-foo-1.0_point_caption.json",
+            "https_path": f"{http_prefix}/{path}100-foo-1.0_point_caption.json",
+            "source": "community",
+            "format": "saber",
+            "is_visualization_default": False,
+            "file_size": 0,
+        },
         {
             "tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1,
             "s3_path": f"s3://test-public-bucket/{path}100-foo-1.0_globalcaption.json",
 
@@ -288,6 +288,9 @@ enums:
       GlobalCaption:
         text: GlobalCaption
         description: A text caption for the tomogram
+      AnnotationCaption:
+        text: AnnotationCaption
+        description: Per-instance text captions for an annotation shape
   annotation_method_link_type_enum:
     name: annotation_method_link_type_enum
     description: Describes the type of link associated to the annotation method.
@@ -928,11 +931,11 @@ classes:
         annotations:
           cascade_delete: true
       shape_type:
-        description: The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask, GlobalCaption)
+        description: The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask, GlobalCaption, AnnotationCaption)
         name: shape_type
         from_schema: cdp-dataset-config
         range: annotation_file_shape_type_enum
-        pattern: (^SegmentationMask$)|(^OrientedPoint$)|(^Point$)|(^InstanceSegmentation$)|(^Mesh$)|(^InstanceSegmentationMask$)|(^GlobalCaption$)
+        pattern: (^SegmentationMask$)|(^OrientedPoint$)|(^Point$)|(^InstanceSegmentation$)|(^Mesh$)|(^InstanceSegmentationMask$)|(^GlobalCaption$)|(^AnnotationCaption$)
   Annotation:
     name: Annotation
     annotations:
 
@@ -33,7 +33,7 @@ def _get_metadata_glob(cls, config: DepositionImportConfig, parents: dict[str, A
         vs = parents["voxel_spacing"]
         anno_dir_path = config.resolve_output_path("annotation", vs, {"annotation_id": "*"})
         # Use *[0-9].json to match only annotation metadata files (e.g., some_protein-1.0.json)
-        # and exclude annotation data files which have a _{shape} suffix (e.g., some_protein-1.0_globalcaption.json)
+        # and exclude annotation data files which have a _{shape} suffix (e.g., some_protein-1.0_globalcaption.json, some_protein-1.0_point_caption.json)
         return os.path.join(anno_dir_path, "*[0-9].json")
 
     @classmethod
@@ -117,6 +117,8 @@ def _instantiate(
             anno = TriangularMeshAnnotationGroup(**instance_args)
         if shape == "GlobalCaption":
             anno = GlobalCaptionAnnotation(**instance_args)
+        if shape == "AnnotationCaption":
+            anno = AnnotationCaptionAnnotation(**instance_args)
         if not anno:
             raise NotImplementedError(f"Unknown shape {shape}")
         if anno.is_valid():
@@ -719,3 +721,50 @@ def get_object_count(self, output_prefix: str) -> int:
         with self.config.fs.open(output_file, "r") as f:
             data = json.load(f)
         return len(data.get("captions", []))
+
+
+class AnnotationCaptionAnnotation(BaseAnnotationSource):
+    """Annotation source for per-instance annotation captions tied to a companion shape."""
+
+    shape = "AnnotationCaption"
+    output_format: str = "json"
+    companion_shape: str
+    # TODO: Implement converter functions when the json structure change or the input format is not json
+    map_functions = {
+        "saber": shutil.copy,
+    }
+    valid_file_formats = list(map_functions.keys())
+
+    def __init__(self, companion_shape: str, *args, **kwargs) -> None:
+        self.companion_shape = companion_shape
+        super().__init__(*args, **kwargs)
+
+    def get_output_filename(self, output_prefix: str, extension: str | None = None) -> str:
+        filename = f"{output_prefix}_{self.companion_shape.lower()}_caption"
+        if extension:
+            filename = f"{filename}.{extension}"
+        return filename
+
+    def convert(self, output_prefix: str):
+        output_file_name = self.get_output_filename(output_prefix, self.output_format)
+        input_file = self.config.fs.localreadable(self.path)
+        output_file = self.config.fs.localwritable(output_file_name)
+        self.map_functions[self.file_format](input_file, output_file)
+        self.config.fs.push(output_file)
+
+    def get_metadata(self, output_prefix: str) -> list[dict[str, Any]]:
+        metadata = [
+            {
+                "format": self.output_format,
+                "path": self.get_output_filename(output_prefix, self.output_format),
+                "shape": self.shape,
+                "is_visualization_default": False,
+            },
+        ]
+        return metadata
+
+    def get_object_count(self, output_prefix: str) -> int:
+        output_file = self.get_output_filename(output_prefix, self.output_format)
+        with self.config.fs.open(output_file, "r") as f:
+            data = json.load(f)
+        return len(data.get("objects", []))
@@ -0,0 +1,29 @@
+{
+    "objects": [
+        {
+            "id": 0,
+            "annotation_label": 1,
+            "text": "A caption for instance with label 1."
+        },
+        {
+            "id": 1,
+            "annotation_label": 1,
+            "text": "A 2nd caption for instance with label 1."
+        },
+        {
+            "id": 2,
+            "annotation_label": 2,
+            "text": "A caption for instance with label 2."
+        },
+        {
+            "id": 3,
+            "annotation_label": 2,
+            "text": "A 2nd caption for instance with label 2."
+        },
+        {
+            "id": 4,
+            "annotation_label": 2,
+            "text": "A 3rd caption for instance with label 2."
+        }
+    ]
+}
@@ -8,6 +8,7 @@
 import pytest
 import trimesh
 from importers.annotation import (
+    AnnotationCaptionAnnotation,
     GlobalCaptionAnnotation,
     InstanceSegmentationAnnotation,
     InstanceSegmentationMaskAnnotation,
@@ -1843,3 +1844,70 @@ def test_ingest_global_caption(
         input_data = json.load(fh)
     assert output_data == input_data
     assert len(output_data["captions"]) == 3
+
+
+def test_ingest_annotation_caption(
+    voxel_spacing_importer_local,
+    deposition_config_local: DepositionImportConfig,
+    local_test_data_dir: str,
+):
+    # Arrange
+    glob_string = "annotations/annotation_caption.json"
+    deposition_config_local._set_object_configs(
+        "annotation",
+        [
+            {
+                "metadata": default_anno_metadata,
+                "sources": [
+                    {
+                        "AnnotationCaption": {
+                            "file_format": "saber",
+                            "glob_string": glob_string,
+                            "is_visualization_default": False,
+                            "companion_shape": "InstanceSegmentation",
+                        },
+                    },
+                ],
+            },
+        ],
+    )
+    fixtures_dir = os.path.join(local_test_data_dir, "fixtures")
+
+    # Action
+    anno = AnnotationCaptionAnnotation(
+        config=deposition_config_local,
+        metadata=default_anno_metadata,
+        path=os.path.join(fixtures_dir, glob_string),
+        parents={"voxel_spacing": voxel_spacing_importer_local, **voxel_spacing_importer_local.parents},
+        file_format="saber",
+        identifier=100,
+        alignment_metadata_path="foo",
+        companion_shape="InstanceSegmentation",
+    )
+    anno.import_item()
+    anno.import_metadata()
+
+    # Assert - verify local_metadata
+    path = "dataset1/run1/Reconstructions/VoxelSpacing1.123/Annotations/100/some_protein-1.0_instancesegmentation_caption.json"
+    expected_local_metadata = {
+        "object_count": 5,
+        "alignment_metadata_path": "foo",
+        "files": [
+            {
+                "format": "json",
+                "path": path,
+                "shape": "AnnotationCaption",
+                "is_visualization_default": False,
+            },
+        ],
+    }
+    assert anno.local_metadata == expected_local_metadata
+
+    # Verify the output file content matches the input
+    output_file = anno.get_output_filename(anno.get_output_path(), "json")
+    with open(output_file, "r") as fh:
+        output_data = json.load(fh)
+    with open(os.path.join(fixtures_dir, glob_string), "r") as fh:
+        input_data = json.load(fh)
+    assert output_data == input_data
+    assert len(output_data["objects"]) == 5
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@ def get_finder_args(self) -> dict[str, Any]:`
`107`	`107`	`return {`
`108`	`108`	`"path": os.path.join(self.tomogram_voxel_spacing.s3_prefix, "Annotations/"),`
`109`	`109`	`# Use *[0-9].json to match only annotation metadata files (e.g., foo-1.0.json)`
`110`		`- # and exclude annotation data files which have a _{shape} suffix (e.g., foo-1.0_globalcaption.json)`
	`110`	`+ # and exclude annotation data files which have a _{shape} suffix (e.g., foo-1.0_globalcaption.json, foo-1.0_point_caption.json)`
`111`	`111`	`"file_glob": "/[0-9].json",`
`112`	`112`	`}`
`113`	`113`
Original file line number	Diff line number	Diff line change
`@@ -133,5 +133,5 @@ def expected_dataset(http_prefix: str) -> dict[str, Any]:`
`133`	`133`	`"key_photo_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/snapshot.png",`
`134`	`134`	`"key_photo_thumbnail_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/thumbnail.png",`
`135`	`135`	`"deposition_id": 300,`
`136`		`- "file_size": 1375370.0,`
	`136`	`+ "file_size": 1375590.0,`
`137`	`137`	`}`