Skip to content

Commit bc93463

Browse files
committed
Add EventLinkingType, change format of Event.colocated_ids
1 parent 3065042 commit bc93463

File tree

10 files changed

+103
-61
lines changed

10 files changed

+103
-61
lines changed

python/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
### Changed
1414

15+
- Event: `colocated_ids` has changed from listing VolumeIDs to listing tuples of `(VolumeID, EventLinkingType)`, where EventLinkingType now makes clear if the association between the VolumeID and the Event was inferred or explicitly defined in the XML (in a `<colocated>` block).
1516
- MarkupText: Typographic quotes now convert to/from LaTeX quotes more consistently.
1617
- Names: Fixed some inconsistencies where `<first/>`, `<first></first>`, and a missing "first" tag would not be considered fully equivalent (within `Name` and `utils.xml.assert_equals`).
1718
- Paper attachments without a type attribute in the XML now give their type as an empty string (instead of defaulting to "attachment"), in order to be able to reconstruct whether there was an explicit type attribute or not.

python/acl_anthology/collections/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from .eventindex import EventIndex
1919
from .event import Event, Talk
2020
from .volume import Volume
21-
from .types import VolumeType
21+
from .types import EventLinkingType, PaperDeletionType, PaperType, VolumeType
2222
from .paper import Paper
2323

2424

@@ -28,7 +28,10 @@
2828
"CollectionIndex",
2929
"Event",
3030
"EventIndex",
31+
"EventLinkingType",
3132
"Paper",
33+
"PaperDeletionType",
34+
"PaperType",
3235
"Talk",
3336
"Volume",
3437
"VolumeType",

python/acl_anthology/collections/collection.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from ..utils.logging import get_logger
3434
from ..utils import xml
3535
from .event import Event
36-
from .types import VolumeType
36+
from .types import EventLinkingType, VolumeType
3737
from .volume import Volume
3838
from .paper import Paper
3939

@@ -284,11 +284,12 @@ def load(self) -> None:
284284
if self.event is not None:
285285
# Events are implicitly linked to volumes defined in the same collection
286286
self.event.colocated_ids = [
287-
volume.full_id_tuple
287+
(volume.full_id_tuple, EventLinkingType.INFERRED)
288288
for volume in self.data.values()
289289
# Edge case: in case the <colocated> block lists a volume in
290290
# the same collection, don't add it twice
291-
if volume.full_id_tuple not in self.event.colocated_ids
291+
if (volume.full_id_tuple, EventLinkingType.EXPLICIT)
292+
not in self.event.colocated_ids
292293
] + self.event.colocated_ids
293294

294295
self.is_data_loaded = True

python/acl_anthology/collections/event.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from lxml.builder import E
2121
from typing import Any, Iterator, Optional, TYPE_CHECKING
2222

23+
from .types import EventLinkingType
2324
from ..constants import RE_EVENT_ID
2425
from ..files import EventFileReference
2526
from ..people import NameSpecification
@@ -99,7 +100,7 @@ class Event:
99100
is_explicit: True if this event was defined explicitly in the XML.
100101
101102
Attributes: List Attributes:
102-
colocated_ids: Volume IDs of proceedings that were colocated with this event.
103+
colocated_ids: Tuples of volume IDs and their [`EventLinkingType`][acl_anthology.collections.types.EventLinkingType] that are colocated with this event.
103104
links: Links to materials for this event paper. The dictionary key specifies the type of link (e.g., "handbook" or "website").
104105
talks: Zero or more references to talks belonging to this event.
105106
@@ -113,9 +114,9 @@ class Event:
113114
parent: Collection = field(repr=False, eq=False)
114115
is_explicit: bool = field(default=False, converter=bool)
115116

116-
colocated_ids: list[AnthologyIDTuple] = field(
117+
colocated_ids: list[tuple[AnthologyIDTuple, EventLinkingType]] = field(
117118
factory=list,
118-
repr=lambda x: f"<list of {len(x)} AnthologyIDTuple objects>",
119+
repr=lambda x: f"<list of {len(x)} tuples>",
119120
)
120121
links: dict[str, EventFileReference] = field(factory=dict, repr=False)
121122
talks: list[Talk] = field(
@@ -143,7 +144,7 @@ def root(self) -> Anthology:
143144

144145
def volumes(self) -> Iterator[Volume]:
145146
"""Returns an iterator over all volumes co-located with this event."""
146-
for anthology_id in self.colocated_ids:
147+
for anthology_id, _ in self.colocated_ids:
147148
volume = self.root.get_volume(anthology_id)
148149
if volume is None:
149150
raise ValueError(
@@ -152,13 +153,18 @@ def volumes(self) -> Iterator[Volume]:
152153
)
153154
yield volume
154155

155-
def add_colocated(self, volume: Volume | AnthologyID) -> None:
156+
def add_colocated(
157+
self,
158+
volume: Volume | AnthologyID,
159+
type_: EventLinkingType = EventLinkingType.EXPLICIT,
160+
) -> None:
156161
"""Add a co-located volume to this event.
157162
158-
Will do nothing if the given volume is already co-located with this event.
163+
If the given volume is already co-located with this event and type_ is 'explicit', this will change its type to 'explicit'; otherwise, it will do nothing.
159164
160165
Parameters:
161166
volume: The ID or Volume object to co-locate with this event.
167+
type_: Whether this volume is/should be explicitly linked in the XML or is inferred. (Defaults to 'explicit'.)
162168
"""
163169
from .volume import Volume
164170

@@ -167,10 +173,16 @@ def add_colocated(self, volume: Volume | AnthologyID) -> None:
167173
else:
168174
volume_id = parse_id(volume)
169175

170-
if volume_id in self.colocated_ids:
171-
return
176+
for idx, (existing_id, existing_type) in enumerate(self.colocated_ids):
177+
if volume_id == existing_id:
178+
if (
179+
existing_type == EventLinkingType.INFERRED
180+
and type_ == EventLinkingType.EXPLICIT
181+
):
182+
self.colocated_ids[idx] = (volume_id, type_)
183+
return
172184

173-
self.colocated_ids.append(volume_id)
185+
self.colocated_ids.append((volume_id, type_))
174186

175187
# Update the event index as well
176188
if self.root.events.is_data_loaded:
@@ -201,7 +213,7 @@ def from_xml(cls, parent: Collection, event: etree._Element) -> Event:
201213
kwargs["talks"].append(Talk.from_xml(element))
202214
elif element.tag == "colocated":
203215
kwargs["colocated_ids"] = [
204-
parse_id(str(volume_id.text))
216+
(parse_id(str(volume_id.text)), EventLinkingType.EXPLICIT)
205217
for volume_id in element
206218
if volume_id.tag == "volume-id"
207219
]
@@ -239,10 +251,11 @@ def to_xml(self) -> etree._Element:
239251
# <colocated>
240252
if self.colocated_ids:
241253
colocated = E.colocated()
242-
for id_tuple in self.colocated_ids:
243-
if id_tuple[0] != self.parent.id:
254+
for id_tuple, el_type in self.colocated_ids:
255+
if el_type == EventLinkingType.EXPLICIT:
244256
colocated.append(
245257
getattr(E, "volume-id")(build_id_from_tuple(id_tuple))
246258
)
247-
elem.append(colocated)
259+
if len(colocated):
260+
elem.append(colocated)
248261
return elem

python/acl_anthology/collections/eventindex.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023-2024 Marcel Bollmann <[email protected]>
1+
# Copyright 2023-2025 Marcel Bollmann <[email protected]>
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
2424
from ..utils.ids import AnthologyID, AnthologyIDTuple, parse_id
2525
from ..utils.logging import get_logger
2626
from .event import Event
27+
from .types import EventLinkingType
2728
from .volume import Volume
2829

2930
if TYPE_CHECKING:
@@ -79,11 +80,10 @@ def _add_to_index(self, event: Event) -> None:
7980
return
8081

8182
if event.id in self.data:
82-
for co_id in self.data[event.id].colocated_ids:
83-
if co_id not in event.colocated_ids:
84-
event.colocated_ids.append(co_id)
83+
for co_id, co_type in self.data[event.id].colocated_ids:
84+
event.add_colocated(co_id, co_type)
8585
self.data[event.id] = event
86-
for volume_fid in event.colocated_ids:
86+
for volume_fid, _ in event.colocated_ids:
8787
self.reverse[volume_fid].add(event.id)
8888

8989
def load(self) -> None:
@@ -104,11 +104,10 @@ def load(self) -> None:
104104
if explicit_event.id in self.data:
105105
# This event has already been implicitly created in another file
106106
# See https://github.com/acl-org/acl-anthology/issues/2743#issuecomment-2453501562
107-
for co_id in self.data[explicit_event.id].colocated_ids:
108-
if co_id not in explicit_event.colocated_ids:
109-
explicit_event.colocated_ids.append(co_id)
107+
for co_id, co_type in self.data[explicit_event.id].colocated_ids:
108+
explicit_event.add_colocated(co_id, co_type)
110109
self.data[explicit_event.id] = explicit_event
111-
for volume_fid in explicit_event.colocated_ids:
110+
for volume_fid, _ in explicit_event.colocated_ids:
112111
self.reverse[volume_fid].add(explicit_event.id)
113112

114113
for volume in collection.volumes():
@@ -125,12 +124,12 @@ def load(self) -> None:
125124
event_id,
126125
collection,
127126
is_explicit=False,
128-
colocated_ids=[volume_fid],
127+
colocated_ids=[(volume_fid, EventLinkingType.INFERRED)],
129128
title=MarkupText.from_string(event_name),
130129
)
131-
elif volume_fid not in event.colocated_ids:
130+
else:
132131
# Add implicit connection to existing event
133-
event.colocated_ids.append(volume_fid)
132+
event.add_colocated(volume_fid, EventLinkingType.INFERRED)
134133
self.reverse[volume_fid].add(event_id)
135134
except Exception as exc:
136135
log.exception(exc)

python/acl_anthology/collections/types.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@
1515
from enum import Enum
1616

1717

18+
class EventLinkingType(Enum):
19+
"""How a volume ID was connected to an Event."""
20+
21+
EXPLICIT = "explicit"
22+
"""Volume ID is explicitly listed in <colocated> block in XML."""
23+
24+
INFERRED = "inferred"
25+
"""Volume ID was inferred to belong to Event through venue association."""
26+
27+
1828
class PaperDeletionType(Enum):
1929
"""Type of deletion of a paper."""
2030

python/docs/guide/getting-started.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ belong to the conference or to colocated workshops:
148148
Event(
149149
id='acl-2022',
150150
is_explicit=True,
151-
colocated_ids=<list of 34 AnthologyIDTuple objects>,
151+
colocated_ids=<list of 34 tuples>,
152152
title=MarkupText('60th Annual Meeting of the Association for Computational Linguistics'),
153153
location='Dublin, Ireland',
154154
dates='May 22–27, 2022'
@@ -177,9 +177,9 @@ event (`ws-2020`), as well as the EMNLP 2020 event.
177177
>>> volume = anthology.get("2020.blackboxnlp-1")
178178
>>> volume.get_events()
179179
[
180-
Event(id='blackboxnlp-2020', colocated_ids=<list of 1 AnthologyIDTuple objects>, ...),
181-
Event(id='ws-2020', colocated_ids=<list of 105 AnthologyIDTuple objects>, ...),
182-
Event(id='emnlp-2020', colocated_ids=<list of 27 AnthologyIDTuple objects>, ...)
180+
Event(id='blackboxnlp-2020', colocated_ids=<list of 1 tuples>, ...),
181+
Event(id='ws-2020', colocated_ids=<list of 105 tuples>, ...),
182+
Event(id='emnlp-2020', colocated_ids=<list of 27 tuples>, ...)
183183
]
184184
```
185185

python/tests/collections/collection_test.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@
1717
from pathlib import Path
1818

1919
from acl_anthology import Anthology
20-
from acl_anthology.collections import Collection, CollectionIndex, VolumeType
20+
from acl_anthology.collections import (
21+
Collection,
22+
CollectionIndex,
23+
EventLinkingType,
24+
VolumeType,
25+
)
2126
from acl_anthology.people import NameSpecification
2227
from acl_anthology.utils import xml
2328
from acl_anthology.text import MarkupText
@@ -231,7 +236,9 @@ def test_collection_create_volume_should_create_event(anthology, pre_load, reset
231236

232237
# New implicit event should exist in the event index
233238
assert "acl-2000" in anthology.events
234-
assert volume.full_id_tuple in anthology.events["acl-2000"].colocated_ids
239+
assert (volume.full_id_tuple, EventLinkingType.INFERRED) in anthology.events[
240+
"acl-2000"
241+
].colocated_ids
235242
assert volume.full_id_tuple in anthology.events.reverse
236243
assert anthology.events.reverse[volume.full_id_tuple] == {"acl-2000"}
237244

@@ -262,7 +269,9 @@ def test_collection_create_volume_should_update_event(anthology, pre_load, reset
262269

263270
# New volume should be added to existing event
264271
assert "acl-2022" in anthology.events
265-
assert volume.full_id_tuple in anthology.events["acl-2022"].colocated_ids
272+
assert (volume.full_id_tuple, EventLinkingType.INFERRED) in anthology.events[
273+
"acl-2022"
274+
].colocated_ids
266275
assert volume.full_id_tuple in anthology.events.reverse
267276
assert anthology.events.reverse[volume.full_id_tuple] == {"acl-2022"}
268277

@@ -331,7 +340,9 @@ def test_collection_create_event_should_update_eventindex(pre_load, anthology):
331340

332341
if pre_load:
333342
# Volume should automatically have been added
334-
assert event.colocated_ids == [collection.get("1").full_id_tuple]
343+
assert event.colocated_ids == [
344+
(collection.get("1").full_id_tuple, EventLinkingType.INFERRED)
345+
]
335346
else:
336347
# If event index wasn't loaded, it's not
337348
assert event.colocated_ids == []

python/tests/collections/event_test.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023-2024 Marcel Bollmann <[email protected]>
1+
# Copyright 2023-2025 Marcel Bollmann <[email protected]>
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
1616
from attrs import define
1717
from lxml import etree
1818

19-
from acl_anthology.collections import Event, Talk
19+
from acl_anthology.collections import Event, EventLinkingType, Talk
2020
from acl_anthology.files import EventFileReference
2121
from acl_anthology.text import MarkupText
2222
from acl_anthology.utils.xml import indent
@@ -91,9 +91,9 @@ def test_event_all_attribs():
9191
location="Online",
9292
dates="August 17-19, 2023",
9393
colocated_ids=[
94-
("2023.foobar", "1", None),
95-
("2023.baz", "1", None),
96-
("2023.asdf", "1", None),
94+
(("2023.foobar", "1", None), EventLinkingType.EXPLICIT),
95+
(("2023.baz", "1", None), EventLinkingType.EXPLICIT),
96+
(("2023.asdf", "1", None), EventLinkingType.EXPLICIT),
9797
],
9898
talks=[Talk(MarkupText.from_string("Invited talk"))],
9999
links={"Website": EventFileReference("http://foobar.com")},
@@ -108,10 +108,10 @@ def test_event_to_xml_dont_list_colocated_volumes_of_parent():
108108
id="li-2023",
109109
parent=CollectionStub("2023.li"),
110110
colocated_ids=[
111-
("2023.baz", "1", None),
112-
("2023.li", "main", None),
113-
("2023.li", "side", None),
114-
("2023.ling", "1", None),
111+
(("2023.baz", "1", None), EventLinkingType.EXPLICIT),
112+
(("2023.li", "main", None), EventLinkingType.INFERRED),
113+
(("2023.li", "side", None), EventLinkingType.INFERRED),
114+
(("2023.ling", "1", None), EventLinkingType.EXPLICIT),
115115
],
116116
)
117117
out = event.to_xml()
@@ -143,7 +143,9 @@ def test_event_volumes(anthology):
143143
assert len(event.colocated_ids) == 4
144144
volumes = list(event.volumes())
145145
assert len(volumes) == 4
146-
assert {vol.full_id_tuple for vol in volumes} == set(event.colocated_ids)
146+
assert {vol.full_id_tuple for vol in volumes} == set(
147+
x[0] for x in event.colocated_ids
148+
)
147149
with pytest.raises(ValueError):
148150
# acl-2022 lists co-located volumes that we don't have in the toy
149151
# dataset, so trying to access them should raise an error
@@ -158,7 +160,7 @@ def test_event_add_colocated(anthology):
158160
# Adding colocated volume should update Event & EventIndex
159161
event.add_colocated(volume)
160162
assert len(event.colocated_ids) == 2
161-
assert volume.full_id_tuple in event.colocated_ids
163+
assert (volume.full_id_tuple, EventLinkingType.EXPLICIT) in event.colocated_ids
162164
assert event in anthology.events.by_volume(volume)
163165

164166
# Adding the same volume a second time shouldn't change anything

0 commit comments

Comments
 (0)