Skip to content

Commit b06b786

Browse files
surygengJinghui Geng
andauthored
feat: annotation caption for s3 and db/api ingestion (#629)
* add annotation caption for s3 and db/api ingestion * validate config * clean up --------- Co-authored-by: Jinghui Geng <jgeng@CZIMACOS5722.local>
1 parent 2e1f63f commit b06b786

File tree

22 files changed

+1047
-93
lines changed

22 files changed

+1047
-93
lines changed

apiv2/db_import/importers/annotation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def get_finder_args(self) -> dict[str, Any]:
107107
return {
108108
"path": os.path.join(self.tomogram_voxel_spacing.s3_prefix, "Annotations/"),
109109
# Use *[0-9].json to match only annotation metadata files (e.g., foo-1.0.json)
110-
# and exclude annotation data files which have a _{shape} suffix (e.g., foo-1.0_globalcaption.json)
110+
# and exclude annotation data files which have a _{shape} suffix (e.g., foo-1.0_globalcaption.json, foo-1.0_point_caption.json)
111111
"file_glob": "*/*[0-9].json",
112112
}
113113

apiv2/db_import/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,5 +133,5 @@ def expected_dataset(http_prefix: str) -> dict[str, Any]:
133133
"key_photo_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/snapshot.png",
134134
"key_photo_thumbnail_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/thumbnail.png",
135135
"deposition_id": 300,
136-
"file_size": 1375370.0,
136+
"file_size": 1375590.0,
137137
}

apiv2/db_import/tests/test_db_annotation_import.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ def expected_annotations(http_prefix: str) -> list[dict[str, Any]]:
5454
def expected_annotation_files(http_prefix: str) -> list[dict[str, Any]]:
5555
path = f"{DATASET_ID}/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/"
5656
return [
57+
{
58+
"tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1,
59+
"s3_path": f"s3://test-public-bucket/{path}100-foo-1.0_point_caption.json",
60+
"https_path": f"{http_prefix}/{path}100-foo-1.0_point_caption.json",
61+
"source": "community",
62+
"format": "saber",
63+
"is_visualization_default": False,
64+
"file_size": 0,
65+
},
5766
{
5867
"tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1,
5968
"s3_path": f"s3://test-public-bucket/{path}100-foo-1.0_globalcaption.json",

apiv2/graphql_api/types/annotation_shape.py

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apiv2/schema/schema.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,9 @@ enums:
288288
GlobalCaption:
289289
text: GlobalCaption
290290
description: A text caption for the tomogram
291+
AnnotationCaption:
292+
text: AnnotationCaption
293+
description: Per-instance text captions for an annotation shape
291294
annotation_method_link_type_enum:
292295
name: annotation_method_link_type_enum
293296
description: Describes the type of link associated to the annotation method.
@@ -928,11 +931,11 @@ classes:
928931
annotations:
929932
cascade_delete: true
930933
shape_type:
931-
description: The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask, GlobalCaption)
934+
description: The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask, GlobalCaption, AnnotationCaption)
932935
name: shape_type
933936
from_schema: cdp-dataset-config
934937
range: annotation_file_shape_type_enum
935-
pattern: (^SegmentationMask$)|(^OrientedPoint$)|(^Point$)|(^InstanceSegmentation$)|(^Mesh$)|(^InstanceSegmentationMask$)|(^GlobalCaption$)
938+
pattern: (^SegmentationMask$)|(^OrientedPoint$)|(^Point$)|(^InstanceSegmentation$)|(^Mesh$)|(^InstanceSegmentationMask$)|(^GlobalCaption$)|(^AnnotationCaption$)
936939
Annotation:
937940
name: Annotation
938941
annotations:

apiv2/support/enums.py

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apiv2/test_infra/factories/annotation_shape.py

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ingestion_tools/scripts/importers/annotation.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _get_metadata_glob(cls, config: DepositionImportConfig, parents: dict[str, A
3333
vs = parents["voxel_spacing"]
3434
anno_dir_path = config.resolve_output_path("annotation", vs, {"annotation_id": "*"})
3535
# Use *[0-9].json to match only annotation metadata files (e.g., some_protein-1.0.json)
36-
# and exclude annotation data files which have a _{shape} suffix (e.g., some_protein-1.0_globalcaption.json)
36+
# and exclude annotation data files which have a _{shape} suffix (e.g., some_protein-1.0_globalcaption.json, some_protein-1.0_point_caption.json)
3737
return os.path.join(anno_dir_path, "*[0-9].json")
3838

3939
@classmethod
@@ -117,6 +117,8 @@ def _instantiate(
117117
anno = TriangularMeshAnnotationGroup(**instance_args)
118118
if shape == "GlobalCaption":
119119
anno = GlobalCaptionAnnotation(**instance_args)
120+
if shape == "AnnotationCaption":
121+
anno = AnnotationCaptionAnnotation(**instance_args)
120122
if not anno:
121123
raise NotImplementedError(f"Unknown shape {shape}")
122124
if anno.is_valid():
@@ -719,3 +721,50 @@ def get_object_count(self, output_prefix: str) -> int:
719721
with self.config.fs.open(output_file, "r") as f:
720722
data = json.load(f)
721723
return len(data.get("captions", []))
724+
725+
726+
class AnnotationCaptionAnnotation(BaseAnnotationSource):
727+
"""Annotation source for per-instance annotation captions tied to a companion shape."""
728+
729+
shape = "AnnotationCaption"
730+
output_format: str = "json"
731+
companion_shape: str
732+
# TODO: Implement converter functions when the json structure change or the input format is not json
733+
map_functions = {
734+
"saber": shutil.copy,
735+
}
736+
valid_file_formats = list(map_functions.keys())
737+
738+
def __init__(self, companion_shape: str, *args, **kwargs) -> None:
739+
self.companion_shape = companion_shape
740+
super().__init__(*args, **kwargs)
741+
742+
def get_output_filename(self, output_prefix: str, extension: str | None = None) -> str:
743+
filename = f"{output_prefix}_{self.companion_shape.lower()}_caption"
744+
if extension:
745+
filename = f"{filename}.{extension}"
746+
return filename
747+
748+
def convert(self, output_prefix: str):
749+
output_file_name = self.get_output_filename(output_prefix, self.output_format)
750+
input_file = self.config.fs.localreadable(self.path)
751+
output_file = self.config.fs.localwritable(output_file_name)
752+
self.map_functions[self.file_format](input_file, output_file)
753+
self.config.fs.push(output_file)
754+
755+
def get_metadata(self, output_prefix: str) -> list[dict[str, Any]]:
756+
metadata = [
757+
{
758+
"format": self.output_format,
759+
"path": self.get_output_filename(output_prefix, self.output_format),
760+
"shape": self.shape,
761+
"is_visualization_default": False,
762+
},
763+
]
764+
return metadata
765+
766+
def get_object_count(self, output_prefix: str) -> int:
767+
output_file = self.get_output_filename(output_prefix, self.output_format)
768+
with self.config.fs.open(output_file, "r") as f:
769+
data = json.load(f)
770+
return len(data.get("objects", []))
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"objects": [
3+
{
4+
"id": 0,
5+
"annotation_label": 1,
6+
"text": "A caption for instance with label 1."
7+
},
8+
{
9+
"id": 1,
10+
"annotation_label": 1,
11+
"text": "A 2nd caption for instance with label 1."
12+
},
13+
{
14+
"id": 2,
15+
"annotation_label": 2,
16+
"text": "A caption for instance with label 2."
17+
},
18+
{
19+
"id": 3,
20+
"annotation_label": 2,
21+
"text": "A 2nd caption for instance with label 2."
22+
},
23+
{
24+
"id": 4,
25+
"annotation_label": 2,
26+
"text": "A 3rd caption for instance with label 2."
27+
}
28+
]
29+
}

ingestion_tools/scripts/tests/s3_import/test_annotations.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99
import trimesh
1010
from importers.annotation import (
11+
AnnotationCaptionAnnotation,
1112
GlobalCaptionAnnotation,
1213
InstanceSegmentationAnnotation,
1314
InstanceSegmentationMaskAnnotation,
@@ -1843,3 +1844,70 @@ def test_ingest_global_caption(
18431844
input_data = json.load(fh)
18441845
assert output_data == input_data
18451846
assert len(output_data["captions"]) == 3
1847+
1848+
1849+
def test_ingest_annotation_caption(
1850+
voxel_spacing_importer_local,
1851+
deposition_config_local: DepositionImportConfig,
1852+
local_test_data_dir: str,
1853+
):
1854+
# Arrange
1855+
glob_string = "annotations/annotation_caption.json"
1856+
deposition_config_local._set_object_configs(
1857+
"annotation",
1858+
[
1859+
{
1860+
"metadata": default_anno_metadata,
1861+
"sources": [
1862+
{
1863+
"AnnotationCaption": {
1864+
"file_format": "saber",
1865+
"glob_string": glob_string,
1866+
"is_visualization_default": False,
1867+
"companion_shape": "InstanceSegmentation",
1868+
},
1869+
},
1870+
],
1871+
},
1872+
],
1873+
)
1874+
fixtures_dir = os.path.join(local_test_data_dir, "fixtures")
1875+
1876+
# Action
1877+
anno = AnnotationCaptionAnnotation(
1878+
config=deposition_config_local,
1879+
metadata=default_anno_metadata,
1880+
path=os.path.join(fixtures_dir, glob_string),
1881+
parents={"voxel_spacing": voxel_spacing_importer_local, **voxel_spacing_importer_local.parents},
1882+
file_format="saber",
1883+
identifier=100,
1884+
alignment_metadata_path="foo",
1885+
companion_shape="InstanceSegmentation",
1886+
)
1887+
anno.import_item()
1888+
anno.import_metadata()
1889+
1890+
# Assert - verify local_metadata
1891+
path = "dataset1/run1/Reconstructions/VoxelSpacing1.123/Annotations/100/some_protein-1.0_instancesegmentation_caption.json"
1892+
expected_local_metadata = {
1893+
"object_count": 5,
1894+
"alignment_metadata_path": "foo",
1895+
"files": [
1896+
{
1897+
"format": "json",
1898+
"path": path,
1899+
"shape": "AnnotationCaption",
1900+
"is_visualization_default": False,
1901+
},
1902+
],
1903+
}
1904+
assert anno.local_metadata == expected_local_metadata
1905+
1906+
# Verify the output file content matches the input
1907+
output_file = anno.get_output_filename(anno.get_output_path(), "json")
1908+
with open(output_file, "r") as fh:
1909+
output_data = json.load(fh)
1910+
with open(os.path.join(fixtures_dir, glob_string), "r") as fh:
1911+
input_data = json.load(fh)
1912+
assert output_data == input_data
1913+
assert len(output_data["objects"]) == 5

0 commit comments

Comments
 (0)