Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
0fe9795
feat: rsync rfc content, store in blob, rebuild references
rjsparks Jan 13, 2026
817ed54
fix: isolate subprocess. Guard against missing file
rjsparks Jan 13, 2026
59345b1
fix: correct variable initialization. guard against unnecessary call
rjsparks Jan 13, 2026
2477d21
test: mock rsync task calls
rjsparks Jan 13, 2026
b944af5
fix: use list for typing rather than List
rjsparks Jan 14, 2026
e76b6fb
fix: string formatting
rjsparks Jan 14, 2026
350cbff
fix: generalize error string when there are no files to parse
rjsparks Jan 14, 2026
50f7b46
fix: use delete_on_close with NamedTemporaryFile
rjsparks Jan 14, 2026
98922b8
fix: mtime is less distracting than m_time
rjsparks Jan 14, 2026
d398c9f
fix: store the notprepped file on the fs
rjsparks Jan 14, 2026
cf8e25b
fix: typo
rjsparks Jan 14, 2026
451d50f
fix: fetch json, remove unneeded unlink
rjsparks Jan 14, 2026
98a5995
chore: ruff
rjsparks Jan 14, 2026
eb9e458
fix: use list for typing
rjsparks Jan 14, 2026
bb123b5
fix: typo
rjsparks Jan 14, 2026
814b147
feat: bulk load rfcs into blob storage
rjsparks Jan 14, 2026
cb023ad
fix: restrict the rsync_helper to rsync
rjsparks Jan 14, 2026
4a45d4f
test: test ietf.sync.utils
rjsparks Jan 14, 2026
df0afbf
chore: honor typing choices
rjsparks Jan 14, 2026
1f7ba5e
test: sync task tests
rjsparks Jan 14, 2026
897fb33
refactor: isolate the rsync from-file construction and test it
rjsparks Jan 14, 2026
f1f89ca
chore: ruff
rjsparks Jan 14, 2026
56e6499
ci: merge ietf-tools/main
rjsparks Jan 14, 2026
22c7169
fix: reflect current changes in older test
rjsparks Jan 14, 2026
09324c9
fix: address incorrect test assumption
rjsparks Jan 14, 2026
a7fe0f9
chore: adhere to task naming conventions
rjsparks Jan 15, 2026
f3dfb9e
ci: merge ietf-tools/main
rjsparks Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions ietf/doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .utils import (
generate_idnits2_rfc_status,
generate_idnits2_rfcs_obsoleted,
rebuild_reference_relations,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
investigate_fragment,
Expand Down Expand Up @@ -128,3 +129,23 @@ def investigate_fragment_task(name_fragment: str):
"name_fragment": name_fragment,
"results": investigate_fragment(name_fragment),
}

@shared_task
def rebuild_reference_relations_task(doc_names: list[str]):
log.log(f"Task: Rebuilding reference relations for {doc_names}")
for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]):
filenames = dict()
base = (
settings.RFC_PATH
if doc.type_id == "rfc"
else settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR
)
stem = doc.name if doc.type_id == "rfc" else f"{doc.name}-{doc.rev}"
for ext in ["xml", "txt"]:
path = Path(base) / f"{stem}.{ext}"
if path.is_file():
filenames[ext] = str(path)
if len(filenames) > 0:
rebuild_reference_relations(doc, filenames)
else:
log.log(f"Found no content for {stem}")
4 changes: 2 additions & 2 deletions ietf/doc/tests_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,13 +389,13 @@ def test_requires_txt_or_xml(self):
result = rebuild_reference_relations(self.doc, {})
self.assertCountEqual(result.keys(), ['errors'])
self.assertEqual(len(result['errors']), 1)
self.assertIn('No Internet-Draft text available', result['errors'][0],
self.assertIn('No file available', result['errors'][0],
'Error should be reported if no Internet-Draft file is given')

result = rebuild_reference_relations(self.doc, {'md': 'cant-do-this.md'})
self.assertCountEqual(result.keys(), ['errors'])
self.assertEqual(len(result['errors']), 1)
self.assertIn('No Internet-Draft text available', result['errors'][0],
self.assertIn('No file available', result['errors'][0],
'Error should be reported if no XML or plaintext file is given')

@patch.object(XMLDraft, 'get_refs')
Expand Down
48 changes: 32 additions & 16 deletions ietf/doc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,50 +941,66 @@ def rebuild_reference_relations(doc, filenames):

filenames should be a dict mapping file ext (i.e., type) to the full path of each file.
"""
if doc.type.slug != 'draft':
if doc.type.slug not in ["draft", "rfc"]:
return None

log.log(f"Rebuilding reference relations for {doc.name}")

# try XML first
if 'xml' in filenames:
refs = XMLDraft(filenames['xml']).get_refs()
elif 'txt' in filenames:
filename = filenames['txt']
if "xml" in filenames:
refs = XMLDraft(filenames["xml"]).get_refs()
elif "txt" in filenames:
filename = filenames["txt"]
try:
refs = draft.PlaintextDraft.from_file(filename).get_refs()
except IOError as e:
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
return {"errors": [f"{e.strerror}: {filename}"]}
else:
return {'errors': ['No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext.']}
return {
"errors": [
"No file available for rebuilding reference relations. Need XML or plaintext."
]
}

doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
doc.relateddocument_set.filter(
relationship__slug__in=["refnorm", "refinfo", "refold", "refunk"]
).delete()

warnings = []
errors = []
unfound = set()
for ( ref, refType ) in refs.items():
for ref, refType in refs.items():
refdoc = Document.objects.filter(name=ref)
if not refdoc and re.match(r"^draft-.*-\d{2}$", ref):
refdoc = Document.objects.filter(name=ref[:-3])
count = refdoc.count()
if count == 0:
unfound.add( "%s" % ref )
unfound.add("%s" % ref)
continue
elif count > 1:
errors.append("Too many Document objects found for %s"%ref)
errors.append("Too many Document objects found for %s" % ref)
else:
# Don't add references to ourself
if doc != refdoc[0]:
RelatedDocument.objects.get_or_create( source=doc, target=refdoc[ 0 ], relationship=DocRelationshipName.objects.get( slug='ref%s' % refType ) )
RelatedDocument.objects.get_or_create(
source=doc,
target=refdoc[0],
relationship=DocRelationshipName.objects.get(
slug="ref%s" % refType
),
)
if unfound:
warnings.append('There were %d references with no matching Document'%len(unfound))
warnings.append(
"There were %d references with no matching Document" % len(unfound)
)

ret = {}
if errors:
ret['errors']=errors
ret["errors"] = errors
if warnings:
ret['warnings']=warnings
ret["warnings"] = warnings
if unfound:
ret['unfound']=list(unfound)
ret["unfound"] = list(unfound)

return ret

Expand Down
1 change: 1 addition & 0 deletions ietf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,7 @@ def skip_unreadable_post(record):
"polls",
"procmaterials",
"review",
"rfc",
"slides",
"staging",
"statchg",
Expand Down
50 changes: 50 additions & 0 deletions ietf/sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#
import datetime
import io
from pathlib import Path
from tempfile import NamedTemporaryFile
import requests

from celery import shared_task
Expand All @@ -12,9 +14,11 @@
from django.utils import timezone

from ietf.doc.models import DocEvent, RelatedDocument
from ietf.doc.tasks import rebuild_reference_relations_task
from ietf.sync import iana
from ietf.sync import rfceditor
from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue
from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper
from ietf.utils import log
from ietf.utils.timezone import date_today

Expand Down Expand Up @@ -65,11 +69,16 @@ def rfc_editor_index_update_task(full_index=False):
if len(errata_data) < rfceditor.MIN_ERRATA_RESULTS:
log.log("Not enough errata entries, only %s" % len(errata_data))
return # failed
newly_published = set()
for rfc_number, changes, doc, rfc_published in rfceditor.update_docs_from_rfc_index(
index_data, errata_data, skip_older_than_date=skip_date
):
for c in changes:
log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c))
if rfc_published:
newly_published.add(rfc_number)
if len(newly_published) > 0:
rsync_rfcs_from_rfceditor.delay(list(newly_published))


@shared_task
Expand Down Expand Up @@ -222,3 +231,44 @@ def fix_subseries_docevents_task():
DocEvent.objects.filter(type="sync_from_rfc_editor", desc=desc).update(
time=obsoleting_time
)

@shared_task
def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]):
log.log(f"Rsyncing rfcs from rfc-editor: {rfc_numbers}")
from_file = None
with NamedTemporaryFile(mode="w", delete_on_close=False) as fp:
fp.write(build_from_file_content(rfc_numbers))
fp.close()
from_file = Path(fp.name)
rsync_helper(
[
"-a",
"--ignore-existing",
f"--include-from={from_file}",
"--exclude=*",
"rsync.rfc-editor.org::rfcs/",
f"{settings.RFC_PATH}",
]
)
load_rfcs_into_blobdb(rfc_numbers)

rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers])


@shared_task
def load_rfcs_into_blobdb_task(start: int, end: int):
"""Move file content for rfcs from rfc{start} to rfc{end} inclusive

As this is expected to be removed once the blobdb is populated, it
will truncate its work to a coded max end.
This will not overwrite any existing blob content, and will only
log a small complaint if asked to load a non-exsiting RFC.
"""
# Protect us from ourselves
if end < start:
return
if start < 1:
start = 1
if end > 11000: # Arbitrarily chosen
end = 11000
load_rfcs_into_blobdb(list(range(start, end + 1)))
82 changes: 81 additions & 1 deletion ietf/sync/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,8 +889,9 @@ class TaskTests(TestCase):
@mock.patch("ietf.sync.tasks.rfceditor.update_docs_from_rfc_index")
@mock.patch("ietf.sync.tasks.rfceditor.parse_index")
@mock.patch("ietf.sync.tasks.requests.get")
@mock.patch("ietf.sync.tasks.rsync_rfcs_from_rfceditor.delay")
def test_rfc_editor_index_update_task(
self, requests_get_mock, parse_index_mock, update_docs_mock
self, rsync_task_mock, requests_get_mock, parse_index_mock, update_docs_mock
) -> None: # the annotation here prevents mypy from complaining about annotation-unchecked
"""rfc_editor_index_update_task calls helpers correctly

Expand Down Expand Up @@ -922,6 +923,7 @@ def json(self):
rfc = RfcFactory()

# Test with full_index = False
rsync_task_mock.return_value = None
requests_get_mock.side_effect = (index_response, errata_response) # will step through these
parse_index_mock.return_value = MockIndexData(length=rfceditor.MIN_INDEX_RESULTS)
update_docs_mock.return_value = (
Expand All @@ -947,10 +949,13 @@ def json(self):
)
self.assertIsNotNone(update_docs_kwargs["skip_older_than_date"])

self.assertFalse(rsync_task_mock.called)

# Test again with full_index = True
requests_get_mock.reset_mock()
parse_index_mock.reset_mock()
update_docs_mock.reset_mock()
rsync_task_mock.reset_mock()
requests_get_mock.side_effect = (index_response, errata_response) # will step through these
tasks.rfc_editor_index_update_task(full_index=True)

Expand All @@ -971,40 +976,67 @@ def json(self):
)
self.assertIsNone(update_docs_kwargs["skip_older_than_date"])

self.assertFalse(rsync_task_mock.called)

# Test again where the index would cause a new RFC to come into existance
requests_get_mock.reset_mock()
parse_index_mock.reset_mock()
update_docs_mock.reset_mock()
rsync_task_mock.reset_mock()
requests_get_mock.side_effect = (
index_response,
errata_response,
) # will step through these
update_docs_mock.return_value = (
(rfc.rfc_number, ("something changed",), rfc, True),
)
tasks.rfc_editor_index_update_task(full_index=True)
self.assertTrue(rsync_task_mock.called)
rsync_task_args, rsync_task_kwargs = rsync_task_mock.call_args
self.assertEqual((([rfc.rfc_number],), {}), (rsync_task_args, rsync_task_kwargs))

# Test error handling
requests_get_mock.reset_mock()
parse_index_mock.reset_mock()
update_docs_mock.reset_mock()
rsync_task_mock.reset_mock()
requests_get_mock.side_effect = requests.Timeout # timeout on every get()
tasks.rfc_editor_index_update_task(full_index=False)
self.assertFalse(parse_index_mock.called)
self.assertFalse(update_docs_mock.called)
self.assertFalse(rsync_task_mock.called)

requests_get_mock.reset_mock()
parse_index_mock.reset_mock()
update_docs_mock.reset_mock()
rsync_task_mock.reset_mock()
requests_get_mock.side_effect = [index_response, requests.Timeout] # timeout second get()
tasks.rfc_editor_index_update_task(full_index=False)
self.assertFalse(update_docs_mock.called)
self.assertFalse(rsync_task_mock.called)

requests_get_mock.reset_mock()
parse_index_mock.reset_mock()
update_docs_mock.reset_mock()
rsync_task_mock.reset_mock()
requests_get_mock.side_effect = [index_response, errata_response]
# feed in an index that is too short
parse_index_mock.return_value = MockIndexData(length=rfceditor.MIN_INDEX_RESULTS - 1)
tasks.rfc_editor_index_update_task(full_index=False)
self.assertTrue(parse_index_mock.called)
self.assertFalse(update_docs_mock.called)
self.assertFalse(rsync_task_mock.called)

requests_get_mock.reset_mock()
parse_index_mock.reset_mock()
update_docs_mock.reset_mock()
rsync_task_mock.reset_mock()
requests_get_mock.side_effect = [index_response, errata_response]
errata_response.json_length = rfceditor.MIN_ERRATA_RESULTS - 1 # too short
parse_index_mock.return_value = MockIndexData(length=rfceditor.MIN_INDEX_RESULTS)
tasks.rfc_editor_index_update_task(full_index=False)
self.assertFalse(update_docs_mock.called)
self.assertFalse(rsync_task_mock.called)

@override_settings(RFC_EDITOR_QUEUE_URL="https://rfc-editor.example.com/queue/")
@mock.patch("ietf.sync.tasks.update_drafts_from_queue")
Expand Down Expand Up @@ -1134,3 +1166,51 @@ def test_iana_protocols_update_task(
self.assertTrue(requests_get_mock.called)
self.assertFalse(parse_protocols_mock.called)
self.assertFalse(update_rfc_log_mock.called)

@mock.patch("ietf.sync.tasks.rsync_helper")
@mock.patch("ietf.sync.tasks.load_rfcs_into_blobdb")
@mock.patch("ietf.sync.tasks.rebuild_reference_relations_task.delay")
def test_rsync_rfcs_from_rfceditor(
self,
rebuild_relations_mock,
load_blobs_mock,
rsync_helper_mock,
):
tasks.rsync_rfcs_from_rfceditor([12345, 54321])
self.assertTrue(rsync_helper_mock.called)
self.assertTrue(load_blobs_mock.called)
load_blobs_args, load_blobs_kwargs = load_blobs_mock.call_args
self.assertEqual(load_blobs_args, ([12345, 54321],))
self.assertEqual(load_blobs_kwargs, {})
self.assertTrue(rebuild_relations_mock.called)
rebuild_args, rebuild_kwargs = rebuild_relations_mock.call_args
self.assertEqual(rebuild_args, (["rfc12345", "rfc54321"],))
self.assertEqual(rebuild_kwargs, {})

@mock.patch("ietf.sync.tasks.load_rfcs_into_blobdb")
def test_load_rfcs_into_blobdb_task(
self,
load_blobs_mock,
):
tasks.load_rfcs_into_blobdb_task(5, 3)
self.assertFalse(load_blobs_mock.called)
load_blobs_mock.reset_mock()
tasks.load_rfcs_into_blobdb_task(-1, 1)
self.assertTrue(load_blobs_mock.called)
mock_args, mock_kwargs = load_blobs_mock.call_args
self.assertEqual(mock_args, ([1],))
self.assertEqual(mock_kwargs, {})
load_blobs_mock.reset_mock()
tasks.load_rfcs_into_blobdb_task(10999, 50000)
self.assertTrue(load_blobs_mock.called)
mock_args, mock_kwargs = load_blobs_mock.call_args
self.assertEqual(mock_args, ([10999, 11000],))
self.assertEqual(mock_kwargs, {})
load_blobs_mock.reset_mock()
tasks.load_rfcs_into_blobdb_task(3261, 3263)
self.assertTrue(load_blobs_mock.called)
mock_args, mock_kwargs = load_blobs_mock.call_args
self.assertEqual(mock_args, ([3261, 3262, 3263],))
self.assertEqual(mock_kwargs, {})


Loading
Loading