Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/configs/nginx-proxy.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ server {

proxy_read_timeout 1d;
proxy_send_timeout 1d;
client_max_body_size 0; # disable checking

root /var/www/html;
index index.html index.htm index.nginx-debian.html;
Expand Down
7 changes: 7 additions & 0 deletions ietf/api/serializers_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from django.db import transaction
from django.urls import reverse as urlreverse
from django.utils import timezone
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers
Expand Down Expand Up @@ -571,6 +572,12 @@ class RfcFileSerializer(serializers.Serializer):
"file types, but filenames are otherwise ignored."
),
)
mtime = serializers.DateTimeField(
required=False,
default=timezone.now,
default_timezone=datetime.UTC,
help_text="Modification timestamp to apply to uploaded files",
)
replace = serializers.BooleanField(
required=False,
default=False,
Expand Down
63 changes: 56 additions & 7 deletions ietf/api/tests_views_rpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from django.conf import settings
from django.core.files.base import ContentFile
from django.db.models import Max
from django.db.models.functions import Coalesce
from django.test.utils import override_settings
from django.urls import reverse as urlreverse

from ietf.blobdb.models import Blob
from ietf.doc.factories import IndividualDraftFactory, WgDraftFactory, WgRfcFactory
from ietf.doc.models import RelatedDocument, Document
from ietf.group.factories import RoleFactory, GroupFactory
Expand All @@ -22,7 +24,9 @@ def test_draftviewset_references(self):
viewname = "ietf.api.purple_api.draft-references"

# non-existent draft
bad_id = Document.objects.aggregate(unused_id=Max("id") + 100)["unused_id"]
bad_id = Document.objects.aggregate(unused_id=Coalesce(Max("id"), 0) + 100)[
"unused_id"
]
url = urlreverse(viewname, kwargs={"doc_id": bad_id})
# Without credentials
r = self.client.get(url)
Expand Down Expand Up @@ -256,6 +260,31 @@ def _valid_post_data():
)
self.assertEqual(r.status_code, 400)

# Put a file in the way. Post should fail because replace = False
file_in_the_way = (rfc_path / f"rfc{unused_rfc_number}.txt")
file_in_the_way.touch()
r = self.client.post(
url,
_valid_post_data(),
format="multipart",
headers={"X-Api-Key": "valid-token"},
)
self.assertEqual(r.status_code, 409) # conflict
file_in_the_way.unlink()

# Put a blob in the way. Post should fail because replace = False
blob_in_the_way = Blob.objects.create(
bucket="rfc", name=f"txt/rfc{unused_rfc_number}.txt", content=b""
)
r = self.client.post(
url,
_valid_post_data(),
format="multipart",
headers={"X-Api-Key": "valid-token"},
)
self.assertEqual(r.status_code, 409) # conflict
blob_in_the_way.delete()

# valid post
r = self.client.post(
url,
Expand All @@ -264,21 +293,41 @@ def _valid_post_data():
headers={"X-Api-Key": "valid-token"},
)
self.assertEqual(r.status_code, 200)
for suffix in [".xml", ".txt", ".html", ".pdf", ".json"]:
for extension in ["xml", "txt", "html", "pdf", "json"]:
filename = f"rfc{unused_rfc_number}.{extension}"
self.assertEqual(
(rfc_path / f"rfc{unused_rfc_number}")
.with_suffix(suffix)
(rfc_path / filename)
.read_text(),
f"This is {suffix}",
f"{suffix} file should contain the expected content",
f"This is .{extension}",
f"{extension} file should contain the expected content",
)
self.assertEqual(
bytes(
Blob.objects.get(
bucket="rfc", name=f"{extension}/{filename}"
).content
),
f"This is .{extension}".encode("utf-8"),
f"{extension} blob should contain the expected content",
)
# special case for notprepped
notprepped_fn = f"rfc{unused_rfc_number}.notprepped.xml"
self.assertEqual(
(
rfc_path / "prerelease" / f"rfc{unused_rfc_number}.notprepped.xml"
rfc_path / "prerelease" / notprepped_fn
).read_text(),
"This is .notprepped.xml",
".notprepped.xml file should contain the expected content",
)
self.assertEqual(
bytes(
Blob.objects.get(
bucket="rfc", name=f"notprepped/{notprepped_fn}"
).content
),
b"This is .notprepped.xml",
".notprepped.xml blob should contain the expected content",
)

# re-post with replace = False should now fail
r = self.client.post(
Expand Down
63 changes: 58 additions & 5 deletions ietf/api/views_rpc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright The IETF Trust 2023-2026, All Rights Reserved
import os
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -35,6 +36,7 @@
)
from ietf.doc.models import Document, DocHistory, RfcAuthor
from ietf.doc.serializers import RfcAuthorSerializer
from ietf.doc.storage_utils import remove_from_storage, store_file, exists_in_storage
from ietf.person.models import Email, Person


Expand Down Expand Up @@ -366,8 +368,8 @@ class RfcPubFilesView(APIView):
api_key_endpoint = "ietf.api.views_rpc"
parser_classes = [parsers.MultiPartParser]

def _destination(self, filename: str | Path) -> Path:
"""Destination for an uploaded RFC file
def _fs_destination(self, filename: str | Path) -> Path:
"""Destination for an uploaded RFC file in the filesystem

Strips any path components in filename and returns an absolute Path.
"""
Expand All @@ -378,6 +380,23 @@ def _destination(self, filename: str | Path) -> Path:
return rfc_path / "prerelease" / filename.name
return rfc_path / filename.name

def _blob_destination(self, filename: str | Path) -> str:
"""Destination name for an uploaded RFC file in the blob store

Strips any path components in filename and returns an absolute Path.
"""
filename = Path(filename) # could potentially have directory components
extension = "".join(filename.suffixes)
if extension == ".notprepped.xml":
file_type = "notprepped"
elif extension[0] == ".":
file_type = extension[1:]
else:
raise serializers.ValidationError(
f"Extension does not begin with '.'!? ({filename})",
)
return f"{file_type}/{filename.name}"

@extend_schema(
operation_id="upload_rfc_files",
summary="Upload files for a published RFC",
Expand All @@ -394,10 +413,17 @@ def post(self, request):
uploaded_files = serializer.validated_data["contents"] # list[UploadedFile]
replace = serializer.validated_data["replace"]
dest_stem = f"rfc{rfc.rfc_number}"
mtime = serializer.validated_data["mtime"]
mtimestamp = mtime.timestamp()
blob_kind = "rfc"

# List of files that might exist for an RFC
possible_rfc_files = [
self._destination(dest_stem + ext)
self._fs_destination(dest_stem + ext)
for ext in serializer.allowed_extensions
]
possible_rfc_blobs = [
self._blob_destination(dest_stem + ext)
for ext in serializer.allowed_extensions
]
if not replace:
Expand All @@ -408,6 +434,14 @@ def post(self, request):
"File(s) already exist for this RFC",
code="files-exist",
)
for possible_existing_blob in possible_rfc_blobs:
if exists_in_storage(
kind=blob_kind, name=possible_existing_blob
):
raise Conflict(
"Blob(s) already exist for this RFC",
code="blobs-exist",
)

with TemporaryDirectory() as tempdir:
# Save files in a temporary directory. Use the uploaded filename
Expand All @@ -421,14 +455,33 @@ def post(self, request):
with tempfile_path.open("wb") as dest:
for chunk in upfile.chunks():
dest.write(chunk)
os.utime(tempfile_path, (mtimestamp, mtimestamp))
files_to_move.append(tempfile_path)
# copy files to final location, removing any existing ones first if the
# remove flag was set
if replace:
for possible_existing_file in possible_rfc_files:
possible_existing_file.unlink(missing_ok=True)
for possible_existing_blob in possible_rfc_blobs:
remove_from_storage(
blob_kind, possible_existing_blob, warn_if_missing=False
)
for ftm in files_to_move:
shutil.move(ftm, self._destination(ftm))
# todo store in blob storage as well (need a bucket for RFCs)
with ftm.open("rb") as f:
store_file(
kind=blob_kind,
name=self._blob_destination(ftm),
file=f,
doc_name=rfc.name,
doc_rev=rfc.rev, # expect None, but match whatever it is
mtime=mtime,
)
destination = self._fs_destination(ftm)
if (
settings.SERVER_MODE != "production"
and not destination.parent.exists()
):
destination.parent.mkdir()
shutil.move(ftm, destination)

return Response(NotificationAckSerializer().data)
3 changes: 3 additions & 0 deletions ietf/blobdb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ class Meta:
),
]

def __str__(self):
return f"{self.bucket}:{self.name}"

def save(self, **kwargs):
db = get_blobdb()
with transaction.atomic(using=db):
Expand Down
21 changes: 21 additions & 0 deletions ietf/doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .utils import (
generate_idnits2_rfc_status,
generate_idnits2_rfcs_obsoleted,
rebuild_reference_relations,
update_or_create_draft_bibxml_file,
ensure_draft_bibxml_path_exists,
investigate_fragment,
Expand Down Expand Up @@ -128,3 +129,23 @@ def investigate_fragment_task(name_fragment: str):
"name_fragment": name_fragment,
"results": investigate_fragment(name_fragment),
}

@shared_task
def rebuild_reference_relations_task(doc_names: list[str]):
log.log(f"Task: Rebuilding reference relations for {doc_names}")
for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]):
filenames = dict()
base = (
settings.RFC_PATH
if doc.type_id == "rfc"
else settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR
)
stem = doc.name if doc.type_id == "rfc" else f"{doc.name}-{doc.rev}"
for ext in ["xml", "txt"]:
path = Path(base) / f"{stem}.{ext}"
if path.is_file():
filenames[ext] = str(path)
if len(filenames) > 0:
rebuild_reference_relations(doc, filenames)
else:
log.log(f"Found no content for {stem}")
4 changes: 2 additions & 2 deletions ietf/doc/tests_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,13 +389,13 @@ def test_requires_txt_or_xml(self):
result = rebuild_reference_relations(self.doc, {})
self.assertCountEqual(result.keys(), ['errors'])
self.assertEqual(len(result['errors']), 1)
self.assertIn('No Internet-Draft text available', result['errors'][0],
self.assertIn('No file available', result['errors'][0],
'Error should be reported if no Internet-Draft file is given')

result = rebuild_reference_relations(self.doc, {'md': 'cant-do-this.md'})
self.assertCountEqual(result.keys(), ['errors'])
self.assertEqual(len(result['errors']), 1)
self.assertIn('No Internet-Draft text available', result['errors'][0],
self.assertIn('No file available', result['errors'][0],
'Error should be reported if no XML or plaintext file is given')

@patch.object(XMLDraft, 'get_refs')
Expand Down
48 changes: 32 additions & 16 deletions ietf/doc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,50 +941,66 @@ def rebuild_reference_relations(doc, filenames):

filenames should be a dict mapping file ext (i.e., type) to the full path of each file.
"""
if doc.type.slug != 'draft':
if doc.type.slug not in ["draft", "rfc"]:
return None

log.log(f"Rebuilding reference relations for {doc.name}")

# try XML first
if 'xml' in filenames:
refs = XMLDraft(filenames['xml']).get_refs()
elif 'txt' in filenames:
filename = filenames['txt']
if "xml" in filenames:
refs = XMLDraft(filenames["xml"]).get_refs()
elif "txt" in filenames:
filename = filenames["txt"]
try:
refs = draft.PlaintextDraft.from_file(filename).get_refs()
except IOError as e:
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
return {"errors": [f"{e.strerror}: {filename}"]}
else:
return {'errors': ['No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext.']}
return {
"errors": [
"No file available for rebuilding reference relations. Need XML or plaintext."
]
}

doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
doc.relateddocument_set.filter(
relationship__slug__in=["refnorm", "refinfo", "refold", "refunk"]
).delete()

warnings = []
errors = []
unfound = set()
for ( ref, refType ) in refs.items():
for ref, refType in refs.items():
refdoc = Document.objects.filter(name=ref)
if not refdoc and re.match(r"^draft-.*-\d{2}$", ref):
refdoc = Document.objects.filter(name=ref[:-3])
count = refdoc.count()
if count == 0:
unfound.add( "%s" % ref )
unfound.add("%s" % ref)
continue
elif count > 1:
errors.append("Too many Document objects found for %s"%ref)
errors.append("Too many Document objects found for %s" % ref)
else:
# Don't add references to ourself
if doc != refdoc[0]:
RelatedDocument.objects.get_or_create( source=doc, target=refdoc[ 0 ], relationship=DocRelationshipName.objects.get( slug='ref%s' % refType ) )
RelatedDocument.objects.get_or_create(
source=doc,
target=refdoc[0],
relationship=DocRelationshipName.objects.get(
slug="ref%s" % refType
),
)
if unfound:
warnings.append('There were %d references with no matching Document'%len(unfound))
warnings.append(
"There were %d references with no matching Document" % len(unfound)
)

ret = {}
if errors:
ret['errors']=errors
ret["errors"] = errors
if warnings:
ret['warnings']=warnings
ret["warnings"] = warnings
if unfound:
ret['unfound']=list(unfound)
ret["unfound"] = list(unfound)

return ret

Expand Down
Loading