Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
0fe9795
feat: rsync rfc content, store in blob, rebuild references
rjsparks Jan 13, 2026
817ed54
fix: isolate subprocess. Guard against missing file
rjsparks Jan 13, 2026
59345b1
fix: correct variable initialization. guard against unnecessary call
rjsparks Jan 13, 2026
2477d21
test: mock rsync task calls
rjsparks Jan 13, 2026
b944af5
fix: use list for typing rather than List
rjsparks Jan 14, 2026
e76b6fb
fix: string formatting
rjsparks Jan 14, 2026
350cbff
fix: generalize error string when there are no files to parse
rjsparks Jan 14, 2026
50f7b46
fix: use delete_on_close with NamedTemporaryFile
rjsparks Jan 14, 2026
98922b8
fix: mtime is less distracting than m_time
rjsparks Jan 14, 2026
d398c9f
fix: store the notprepped file on the fs
rjsparks Jan 14, 2026
cf8e25b
fix: typo
rjsparks Jan 14, 2026
451d50f
fix: fetch json, remove unneeded unlink
rjsparks Jan 14, 2026
98a5995
chore: ruff
rjsparks Jan 14, 2026
eb9e458
fix: use list for typing
rjsparks Jan 14, 2026
bb123b5
fix: typo
rjsparks Jan 14, 2026
814b147
feat: bulk load rfcs into blob storage
rjsparks Jan 14, 2026
cb023ad
fix: restrict the rsync_helper to rsync
rjsparks Jan 14, 2026
4a45d4f
test: test ietf.sync.utils
rjsparks Jan 14, 2026
df0afbf
chore: honor typing choices
rjsparks Jan 14, 2026
1f7ba5e
test: sync task tests
rjsparks Jan 14, 2026
897fb33
refactor: isolate the rsync from-file construction and test it
rjsparks Jan 14, 2026
f1f89ca
chore: ruff
rjsparks Jan 14, 2026
56e6499
ci: merge ietf-tools/main
rjsparks Jan 14, 2026
22c7169
fix: reflect current changes in older test
rjsparks Jan 14, 2026
09324c9
fix: address incorrect test assumption
rjsparks Jan 14, 2026
a7fe0f9
chore: adhere to task naming conventions
rjsparks Jan 15, 2026
f3dfb9e
ci: merge ietf-tools/main
rjsparks Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions ietf/doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Celery task definitions
#
import datetime
from typing import List
import debug # pyflakes:ignore

from celery import shared_task
Expand All @@ -29,6 +30,7 @@
from .utils import (
generate_idnits2_rfc_status,
generate_idnits2_rfcs_obsoleted,
rebuild_reference_relations,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
investigate_fragment,
Expand Down Expand Up @@ -128,3 +130,23 @@ def investigate_fragment_task(name_fragment: str):
"name_fragment": name_fragment,
"results": investigate_fragment(name_fragment),
}

@shared_task
def rebuild_reference_relations_task(doc_names:List[str]):
log.log("Task: Rebuilding reference relations for"+str(doc_names))
for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]):
filenames = dict()
base = (
settings.RFC_PATH
if doc.type_id == "rfc"
else settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR
)
stem = doc.name if doc.type_id == "rfc" else f"{doc.name}-{doc.rev}"
for ext in ["xml", "txt"]:
path = Path(base) / f"{stem}.{ext}"
if path.is_file():
filenames[ext] = str(path)
if len(filenames) > 0:
rebuild_reference_relations(doc, filenames)
else:
log.log(f"Found no content for {stem}")
48 changes: 32 additions & 16 deletions ietf/doc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,50 +816,66 @@ def rebuild_reference_relations(doc, filenames):

filenames should be a dict mapping file ext (i.e., type) to the full path of each file.
"""
if doc.type.slug != 'draft':
if doc.type.slug not in ["draft", "rfc"]:
return None

log.log(f"Rebuilding reference relations for {doc.name}")

# try XML first
if 'xml' in filenames:
refs = XMLDraft(filenames['xml']).get_refs()
elif 'txt' in filenames:
filename = filenames['txt']
if "xml" in filenames:
refs = XMLDraft(filenames["xml"]).get_refs()
elif "txt" in filenames:
filename = filenames["txt"]
try:
refs = draft.PlaintextDraft.from_file(filename).get_refs()
except IOError as e:
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
return {"errors": ["%s :%s" % (e.strerror, filename)]}
else:
return {'errors': ['No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext.']}
return {
"errors": [
"No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext."
]
}

doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
doc.relateddocument_set.filter(
relationship__slug__in=["refnorm", "refinfo", "refold", "refunk"]
).delete()

warnings = []
errors = []
unfound = set()
for ( ref, refType ) in refs.items():
for ref, refType in refs.items():
refdoc = Document.objects.filter(name=ref)
if not refdoc and re.match(r"^draft-.*-\d{2}$", ref):
refdoc = Document.objects.filter(name=ref[:-3])
count = refdoc.count()
if count == 0:
unfound.add( "%s" % ref )
unfound.add("%s" % ref)
continue
elif count > 1:
errors.append("Too many Document objects found for %s"%ref)
errors.append("Too many Document objects found for %s" % ref)
else:
# Don't add references to ourself
if doc != refdoc[0]:
RelatedDocument.objects.get_or_create( source=doc, target=refdoc[ 0 ], relationship=DocRelationshipName.objects.get( slug='ref%s' % refType ) )
RelatedDocument.objects.get_or_create(
source=doc,
target=refdoc[0],
relationship=DocRelationshipName.objects.get(
slug="ref%s" % refType
),
)
if unfound:
warnings.append('There were %d references with no matching Document'%len(unfound))
warnings.append(
"There were %d references with no matching Document" % len(unfound)
)

ret = {}
if errors:
ret['errors']=errors
ret["errors"] = errors
if warnings:
ret['warnings']=warnings
ret["warnings"] = warnings
if unfound:
ret['unfound']=list(unfound)
ret["unfound"] = list(unfound)

return ret

Expand Down
1 change: 1 addition & 0 deletions ietf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,7 @@ def skip_unreadable_post(record):
"polls",
"procmaterials",
"review",
"rfc",
"slides",
"staging",
"statchg",
Expand Down
3 changes: 2 additions & 1 deletion ietf/sync/rfceditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ def update_docs_from_rfc_index(
doc.set_state(rfc_published_state)
if draft:
doc.formal_languages.set(draft.formal_languages.all())
for author in draft.documentauthor_set.all():
# This is known broken - it leaves RFCs with the authors of the draft that became the RFC
for author in draft.documentauthor_set.all():
# Copy the author but point at the new doc.
# See https://docs.djangoproject.com/en/4.2/topics/db/queries/#copying-model-instances
author.pk = None
Expand Down
87 changes: 87 additions & 0 deletions ietf/sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
#
import datetime
import io
from pathlib import Path
import subprocess
from tempfile import NamedTemporaryFile, TemporaryDirectory
from typing import List
import requests

from celery import shared_task
Expand All @@ -12,6 +16,8 @@
from django.utils import timezone

from ietf.doc.models import DocEvent, RelatedDocument
from ietf.doc.storage_utils import AlreadyExistsError, store_bytes
from ietf.doc.tasks import rebuild_reference_relations_task
from ietf.sync import iana
from ietf.sync import rfceditor
from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue
Expand Down Expand Up @@ -65,11 +71,15 @@ def rfc_editor_index_update_task(full_index=False):
if len(errata_data) < rfceditor.MIN_ERRATA_RESULTS:
log.log("Not enough errata entries, only %s" % len(errata_data))
return # failed
newly_published = {}
for rfc_number, changes, doc, rfc_published in rfceditor.update_docs_from_rfc_index(
index_data, errata_data, skip_older_than_date=skip_date
):
for c in changes:
log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c))
if rfc_published:
newly_published.add(rfc_number)
rsync_rfcs_from_rfceditor.delay(newly_published)


@shared_task
Expand Down Expand Up @@ -222,3 +232,80 @@ def fix_subseries_docevents_task():
DocEvent.objects.filter(type="sync_from_rfc_editor", desc=desc).update(
time=obsoleting_time
)

@shared_task
def rsync_rfcs_from_rfceditor(rfc_numbers: List[int]):
log.log("Rsyncing rfcs from rfc-editor: " + str(rfc_numbers))
from_file = None
with NamedTemporaryFile(mode="w", delete=False) as fp:
from_file = Path(fp.name)
for num in rfc_numbers:
for ext in settings.RFC_FILE_TYPES:
fp.write(f"rfc{num}.{ext}\n")
fp.close()
subprocess.run(
[
"/usr/bin/rsync",
"-a",
"--ignore-existing",
f"--include-from={str(from_file)}",
"--exclude=*",
"rsync.rfc-editor.org::rfcs/",
f"{settings.RFC_PATH}",
]
)
if from_file is not None:
from_file.unlink()
for num in rfc_numbers:
for ext in settings.RFC_FILE_TYPES:
fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}"
if fs_path.is_file():
with fs_path.open("rb") as f:
bytes = f.read()
m_time = fs_path.stat().st_mtime
try:
store_bytes(
kind="rfc",
name=f"{ext}/rfc{num}.{ext}",
content=bytes,
allow_overwrite=False, # Intentionally not allowing overwrite.
doc_name=f"rfc{num}",
doc_rev=None,
# Not setting content_type
mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC),
)
except AlreadyExistsError as e:
log.log(str(e))
# This condition will just log verbosely but not otherwise fail

# Also fetch and store the not-prepped xml
with TemporaryDirectory() as td:
name = f"rfc{num}.notprepped.xml"
subprocess.run(
[
"/usr/bin/rsync",
"-a",
f"rsync.rfc-editor.org::rfcs/prerelease/{name}",
f"{td}/",
]
)
source = Path(td)/name
with open(source,"rb") as f:
bytes = f.read()
m_time = source.stat().st_mtime
try:
store_bytes(
kind="rfc",
name=f"notprepped/{name}",
content=bytes,
allow_overwrite=False, # Intentionally not allowing overwrite.
doc_name=f"rfc{num}",
doc_rev=None,
# Not setting content_type
mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC),
)
except AlreadyExistsError as e:
log.log(str(e))

rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers])

Loading