11# -*- coding: utf-8 -*-
22from AccessControl import ClassSecurityInfo
33from App .class_init import InitializeClass
4+ from HTMLParser import HTMLParser
45from plone .registry .interfaces import IRegistry
56from posixpath import normpath
67from Products .CMFCore .URLTool import URLTool as BaseTool
1112import re
1213
1314
15+ hp = HTMLParser ()
16+ # These schemas are allowed in full urls to consider them in the portal:
17+ # A mailto schema is an obvious sign of a url that is not in the portal.
18+ # This is a whitelist.
19+ ALLOWED_SCHEMAS = [
20+ 'https' ,
21+ 'http' ,
22+ ]
23+ # These bad parts are not allowed in urls that are in the portal:
24+ # This is a blacklist.
25+ BAD_URL_PARTS = [
26+ '\\ \\ ' ,
27+ '<script' ,
28+ '%3cscript' ,
29+ 'javascript:' ,
30+ 'javascript%3a' ,
31+ ]
32+
33+
1434class URLTool (PloneBaseTool , BaseTool ):
1535
1636 meta_type = 'Plone URL Tool'
@@ -34,16 +54,24 @@ def isURLInPortal(self, url, context=None):
3454 # sanitize url
3555 url = re .sub ('^[\x00 -\x20 ]+' , '' , url ).strip ()
3656 cmp_url = url .lower ()
37- if ('\\ \\ ' in cmp_url or
38- '<script' in cmp_url or
39- '%3cscript' in cmp_url or
40- 'javascript:' in cmp_url or
41- 'javascript%3a' in cmp_url ):
42- return False
57+ for bad in BAD_URL_PARTS :
58+ if bad in cmp_url :
59+ return False
4360
4461 p_url = self ()
4562
46- _ , u_host , u_path , _ , _ , _ = urlparse (url )
63+ schema , u_host , u_path , _ , _ , _ = urlparse (url )
64+ if schema and schema not in ALLOWED_SCHEMAS :
65+ # Redirecting to 'data:' may be harmful,
66+ # and redirecting to 'mailto:' or 'ftp:' is silly.
67+ return False
68+
69+ # Someone may be doing tricks with escaped html code.
70+ unescaped_url = hp .unescape (url )
71+ if unescaped_url != url :
72+ if not self .isURLInPortal (unescaped_url ):
73+ return False
74+
4775 if not u_host and not u_path .startswith ('/' ):
4876 if context is None :
4977 return True # old behavior
0 commit comments