Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions ietf/doc/templatetags/ietf_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from django import template
from django.conf import settings
from django.utils.html import escape
from django.template.defaultfilters import truncatewords_html, linebreaksbr, stringfilter, striptags
from django.template.defaultfilters import truncatewords_html, linebreaksbr, stringfilter, striptags, urlize
from django.utils.safestring import mark_safe, SafeData
from django.utils.html import strip_tags
from django.utils.encoding import force_str
Expand All @@ -29,7 +29,7 @@
from ietf.utils import log
from ietf.doc.utils import prettify_std_name
from ietf.utils.html import clean_html
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped, linkify
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
from ietf.utils.validators import validate_url

register = template.Library()
Expand Down Expand Up @@ -448,14 +448,14 @@ def ad_area(user):
def format_history_text(text, trunc_words=25):
"""Run history text through some cleaning and add ellipsis if it's too long."""
full = mark_safe(clean_html(text))
full = linkify(urlize_ietf_docs(full))
full = urlize(urlize_ietf_docs(full))

return format_snippet(full, trunc_words)

@register.filter
def format_snippet(text, trunc_words=25):
# urlize if there aren't already links present
text = linkify(text)
text = urlize(text)
full = keep_spacing(collapsebr(linebreaksbr(mark_safe(clean_html(text)))))
snippet = truncatewords_html(full, trunc_words)
if snippet != full:
Expand Down Expand Up @@ -714,10 +714,6 @@ def rfcbis(s):
m = re.search(r'^.*-rfc(\d+)-?bis(-.*)?$', s)
return None if m is None else 'rfc' + m.group(1)

@register.filter
@stringfilter
def urlize(value):
raise RuntimeError("Use linkify from textfilters instead of urlize")

@register.filter
@stringfilter
Expand Down
3 changes: 0 additions & 3 deletions ietf/group/tests_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import calendar
import datetime
import io
import bleach
from unittest import mock

from unittest.mock import call, patch
Expand Down Expand Up @@ -2136,12 +2135,10 @@ def test_see_status_update(self):

def test_view_status_update(self):
chair = RoleFactory(name_id='chair',group__type_id='wg')
event = GroupEventFactory(type='status_update',group=chair.group)
for url in group_urlreverse_list(chair.group, 'ietf.group.views.group_about_status'):
response = self.client.get(url)
self.assertEqual(response.status_code,200)
q=PyQuery(response.content)
self.assertTrue(bleach.linkify(escape(event.desc), parse_email=True) in str(q('pre')))
self.assertFalse(q('a#edit_button'))
self.client.login(username=chair.person.user.username,password='%s+password'%chair.person.user.username)
response = self.client.get(url)
Expand Down
5 changes: 2 additions & 3 deletions ietf/templates/doc/shepherd_writeup_template.html
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
{% extends "base.html" %}
{# Copyright The IETF Trust 2015, All Rights Reserved #}
{% load origin %}
{% load ietf_filters %}
{% load textfilters htmlfilters %}
{% load ietf_filters static textfilters htmlfilters %}
{% block title %}Document Shepherd Write-Up{% if type == "group" %} for Group Documents{% elif type == "individual" %} for Individual Documents{% endif %}{% endblock %}
{% block content %}
{% origin %}
{{ writeup|urlize_ietf_docs|linkify }}
{{ writeup|urlize_ietf_docs }}
{% endblock %}
2 changes: 1 addition & 1 deletion ietf/templates/meeting/important-dates.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ <h2 class="mt-5" id="IETF{{ meeting.number }}">
{% endif %}
</td>
<td>
{{ d.name.desc|urlize_ietf_docs|markdown|linkify }}{% if d.name.desc|slice:"-1:" != "." %}.{% endif %}
{{ d.name.desc|urlize_ietf_docs|markdown }}{% if d.name.desc|slice:"-1:" != "." %}.{% endif %}
{% if d.name.slug == 'draftwgagenda' or d.name.slug == 'revwgagenda' or d.name.slug == 'procsub' or d.name.slug == 'revslug' %}
Upload using the
<a href="{% url 'ietf.meeting.views.materials' num=meeting.number %}">Meeting Materials Management Tool</a>.
Expand Down
47 changes: 19 additions & 28 deletions ietf/utils/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
"""Utilities for working with HTML."""


import bleach
import nh3
import html2text

import debug # pyflakes:ignore

from django import forms
Expand All @@ -15,63 +14,57 @@
from ietf.utils.mime import get_mime_type


# Allow the protocols/tags/attributes we specifically want, plus anything that bleach declares
# to be safe. As of 2025-01-27, the explicit lists for protocols and tags are a strict superset
# of bleach's defaults.
acceptable_protocols = bleach.sanitizer.ALLOWED_PROTOCOLS.union(
{"http", "https", "mailto", "ftp", "xmpp"}
)
acceptable_tags = bleach.sanitizer.ALLOWED_TAGS.union(
# Allow the protocols/tags/attributes we specifically want, plus anything that nh3 declares
# to be safe.

acceptable_protocols = {"http", "https", "mailto", "tel", "xmpp"}
acceptable_tags = nh3.ALLOWED_TAGS.union(
{
# fmt: off
"a", "abbr", "acronym", "address", "b", "big",
"blockquote", "body", "br", "caption", "center", "cite", "code", "col",
"colgroup", "dd", "del", "dfn", "dir", "div", "dl", "dt", "em", "font",
"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i", "ins", "kbd",
"li", "ol", "p", "pre", "q", "s", "samp", "small", "span", "strike", "style",
"li", "ol", "p", "pre", "q", "s", "samp", "small", "span", "strike",
"strong", "sub", "sup", "table", "title", "tbody", "td", "tfoot", "th", "thead",
"tr", "tt", "u", "ul", "var"
"tr", "tt", "u", "ul", "var", "xmp"
# fmt: on
}
)
acceptable_attributes = bleach.sanitizer.ALLOWED_ATTRIBUTES | {
"*": ["id"],
"ol": ["start"],
acceptable_attributes = nh3.ALLOWED_ATTRIBUTES | {
"*": {"id"},
"ol": {"start"},
}


# Instantiate sanitizer classes
_bleach_cleaner = bleach.sanitizer.Cleaner(
_nh3_cleaner = nh3.Cleaner(
tags=acceptable_tags,
attributes=acceptable_attributes,
protocols=acceptable_protocols,
strip=True,
url_schemes=acceptable_protocols,
)


_liberal_bleach_cleaner = bleach.sanitizer.Cleaner(
_liberal_nh3_cleaner = nh3.Cleaner(
tags=acceptable_tags.union({"img", "figure", "figcaption"}),
attributes=acceptable_attributes | {"img": ["src", "alt"]},
protocols=acceptable_protocols,
strip=True,
attributes=acceptable_attributes | {"img": {"src", "alt"}},
url_schemes=acceptable_protocols,
)


def clean_html(text: str):
"""Clean the HTML in a string"""
return _bleach_cleaner.clean(text)
return _nh3_cleaner.clean(text)


def liberal_clean_html(text: str):
"""More permissively clean the HTML in a string"""
return _liberal_bleach_cleaner.clean(text)
return _liberal_nh3_cleaner.clean(text)


@keep_lazy(str)
def remove_tags(html, tags):
"""Returns the given HTML sanitized, and with the given tags removed."""
allowed = acceptable_tags - set(t.lower() for t in tags)
return bleach.clean(html, tags=allowed, strip=True)
return nh3.clean(html, tags=allowed)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to audit invocations of this function.



# ----------------------------------------------------------------------
Expand All @@ -96,5 +89,3 @@ def unescape(text):
This function undoes what django.utils.html.escape() does
"""
return text.replace('&amp;', '&').replace('&#39;', "'").replace('&quot;', '"').replace('&gt;', '>').replace('&lt;', '<' )


94 changes: 84 additions & 10 deletions ietf/utils/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,104 @@
from markdown.postprocessors import Postprocessor

from django.utils.safestring import mark_safe
from django.utils.regex_helper import _lazy_re_compile
from django.core.exceptions import ValidationError
from django.core.validators import URLValidator, EmailValidator

from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs
from .html import clean_html, liberal_clean_html
from .text import linkify

import re
import xml


_validate_url = URLValidator()
_validate_email = EmailValidator()

linkable_protocols = ["http", "https", "ftp", "xmpp"]

# Simple Markdown extension inspired by https://github.com/django-wiki/django-wiki/blob/main/src/wiki/plugins/links/mdx/urlize.py

URL_RE = (
r"^(?P<begin>|.*?[\s\(\<])"
r"(?P<url>"
r"(?P<protocol>([a-zA-Z:]+\/{2}|))"
r"(?P<host>"
r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}|" # IPv4
r"\[[a-zA-Z0-9:]+\]|" # IPv6
r"([A-Z0-9]([A-Z0-9-]{0,61}[A-Z0-9])?\.)+([A-Z]{2,6}\.?|[A-Z]{2,}\.?)" # FQDN
r")"
r"(:(?P<port>[0-9]+))?"
r"(/(?P<path>[^\s\[\(\]\)\<\>]*))?"
r")"
r"(?P<end>[\s\)\>].*?|)$"
)

EMAIL_RE = (
r"^(?P<begin>|.*?[\s\(\<])"
r"(?P<email>"
r"[a-zA-Z0-9._-]+@[a-zA-Z0-0._]+\.[a-zA-Z]{2,4}"
r")"
r"(?P<end>[\s\)\>].*?|)$"
)

class Linker(python_markdown.inlinepatterns.Pattern):
def __init__(self, pattern, md, linker="url"):
super().__init__(pattern, md)
self.linker = linker

def getCompiledRegExp(self):
return _lazy_re_compile(self.pattern, re.DOTALL | re.UNICODE | re.IGNORECASE)

def handleMatch(self, m):
if self.linker == "url":
text = m.group("url")
protocol = m.group("protocol")
if protocol == "" or protocol[:-3] not in linkable_protocols:
return None
href = text
try:
_validate_url(text)
except ValidationError:
return None

else:
text = m.group("email")
href = "mailto:" + text
try:
_validate_email(text)
except ValidationError:
return None

delimiter = m.group("begin") + m.group("end")
if re.search(r"(\<([\s\S])+?\>)", delimiter):
return None

element = xml.etree.ElementTree.Element("a")
element.set("href", href)
element.set("rel", "noopener noreferrer")
element.text = python_markdown.util.AtomicString(text)

return element



class LinkifyExtension(Extension):
"""
Simple Markdown extension inspired by https://github.com/daGrevis/mdx_linkify,
but using our own linker directly. Doing the linkification on the converted
Markdown output introduces artifacts.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def extendMarkdown(self, md):
md.postprocessors.register(LinkifyPostprocessor(md), "linkify", 50)
md.inlinePatterns.register(Linker(URL_RE, md, linker="url"), "linkify_url", 91)
md.inlinePatterns.register(Linker(EMAIL_RE, md, linker="email"), "linkify_email", 92)
md.postprocessors.register(LinkifyPostprocessor(md), "linkify", 93)
# disable automatic links via angle brackets for email addresses
md.inlinePatterns.deregister("automail")
# "autolink" for URLs does not seem to cause issues, so leave it on


class LinkifyPostprocessor(Postprocessor):
def run(self, text):
return urlize_ietf_docs(linkify(text))
return urlize_ietf_docs(text)


def markdown(text):
Expand Down
15 changes: 9 additions & 6 deletions ietf/utils/templatetags/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,16 @@ def test_linkify(self):
)
self.assertEqual(
linkify("https://www.ietf.org"),
'<a href="https://www.ietf.org">https://www.ietf.org</a>',
'<a href="https://www.ietf.org" rel="nofollow">https://www.ietf.org</a>',
)
self.assertEqual(
linkify("https://mailman3.ietf.org/mailman3/lists/tls@ietf.org/"),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixes #10120

Image

'<a href="https://mailman3.ietf.org/mailman3/lists/tls@ietf.org/" rel="nofollow">https://mailman3.ietf.org/mailman3/lists/tls@ietf.org/</a>',
)
self.assertEqual(
linkify('<a href="https://www.ietf.org">IETF</a>'),
(
'&lt;a href=&quot;<a href="https://www.ietf.org">https://www.ietf.org</a>&quot;&gt;IETF&lt;/a&gt;'
'&lt;a href=&quot;<a href="https://www.ietf.org" rel="nofollow">https://www.ietf.org</a>&quot;&gt;IETF&lt;/a&gt;'
),
)
self.assertEqual(
Expand All @@ -83,22 +87,21 @@ def test_linkify(self):
)
self.assertEqual(
linkify("https://www.ietf.org", autoescape=False),
'<a href="https://www.ietf.org">https://www.ietf.org</a>',
'https://www.ietf.org',
)
self.assertEqual(
linkify('<a href="https://www.ietf.org">IETF</a>', autoescape=False),
'<a href="https://www.ietf.org">IETF</a>',
)
self.assertEqual(
linkify("somebody@example.com", autoescape=False),
'<a href="mailto:somebody@example.com">somebody@example.com</a>',
'somebody@example.com',
)
# bleach.Linkifier translates the < -> &lt; and > -> &gt; on this one
self.assertEqual(
linkify("Some Body <somebody@example.com>", autoescape=False),
(
'Some Body &lt;<a href="mailto:somebody@example.com">'
'somebody@example.com</a>&gt;'
'Some Body <somebody@example.com>'
),
)
self.assertEqual(
Expand Down
11 changes: 5 additions & 6 deletions ietf/utils/templatetags/textfilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

from django import template
from django.conf import settings
from django.template.defaultfilters import stringfilter
from django.utils.html import conditional_escape
from django.template.defaultfilters import stringfilter, urlize
# from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe

import debug # pyflakes:ignore

from ietf.utils.text import linkify as _linkify, xslugify as _xslugify, texescape
from ietf.utils.text import xslugify as _xslugify, texescape

register = template.Library()

Expand Down Expand Up @@ -76,9 +76,8 @@ def texescape_filter(value):
@stringfilter
def linkify(value, autoescape=True):
if autoescape:
# Escape unless the input was already a SafeString
value = conditional_escape(value)
text = mark_safe(_linkify(value)) # _linkify is a safe operation
value = urlize(value, autoescape=True) # _linkify is a safe operation
text = mark_safe(value)
return text

@register.filter
Expand Down
Loading
Loading