Skip to content

Commit da75bdb

Browse files
authored
Fix new DSL issues (#421)
* Fix new DSL issues * Fix ambiguous out of bounds check in hash-detector * Fix empty href case in `runSelector` * Fix logging for `storage.es` * Set all the logger levels to `debug` for the test build * Set logging level to `debug` in module prefs * Disable logs for module config * Make isSuspiciousUrl checks more specific Check if a URL is suspicious or not instead of checking if a safeUrl may be constructed from the initial URL * Fix wdp module log level for sandbox * Strictify safe checks for URLs * Fix unit tests * Fix remaining isSuspiciousURL checks * Resolve Remi's reviews
1 parent 6bf2f89 commit da75bdb

File tree

6 files changed

+52
-53
lines changed

6 files changed

+52
-53
lines changed

configs/sandbox.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ module.exports = {
1212
default_prefs: {
1313
...base.default_prefs,
1414
"logger.hpnv2.level": "debug",
15-
"logger.web-discovery-project.level": "debug",
15+
"logger.wdp.level": "debug",
16+
"showConsoleLogs": true,
1617
},
1718
};

modules/core/sources/helpers/hash-detector.es

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1499,7 +1499,7 @@ function isHashProb(str) {
14991499
for (let i = 0; i < str.length - 1; i += 1) {
15001500
const pos1 = probHashChars[str[i]];
15011501
const pos2 = probHashChars[str[i + 1]];
1502-
if (pos1 && pos2) {
1502+
if (pos1 !== undefined && pos2 !== undefined) {
15031503
logProb += probHashLogM[pos1][pos2];
15041504
transC += 1;
15051505
}

modules/web-discovery-project/sources/content-extractor.es

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,12 @@ function runSelector(item, selector, attr, baseURI) {
4747
// * links should be as close to the original page as possible
4848
// * extensions IDs must not leak into the output
4949
const rawLink = elem.getAttribute("href");
50-
return rawLink ? new URL(rawLink, baseURI).href : null;
50+
if (!rawLink) return null;
51+
try {
52+
return new URL(rawLink, baseURI).href;
53+
} catch (e) {
54+
return null;
55+
}
5156
}
5257
if (elem.hasAttribute(attr)) {
5358
return elem.getAttribute(attr);

modules/web-discovery-project/sources/web-discovery-project.es

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -961,8 +961,9 @@ const WebDiscoveryProject = {
961961
if (clean_url != url) {
962962
// they are different, sanity checks
963963
if (
964-
!sanitizeUrl(clean_url, { testMode: WebDiscoveryProject.testMode })
965-
.safeUrl ||
964+
sanitizeUrl(clean_url, {
965+
testMode: WebDiscoveryProject.testMode,
966+
}).result !== "safe" ||
966967
WebDiscoveryProject.dropLongURL(clean_url)
967968
)
968969
return url;
@@ -1026,7 +1027,10 @@ const WebDiscoveryProject = {
10261027
}
10271028

10281029
// the url is suspicious, this should never be the case here but better safe
1029-
if (!sanitizeUrl(url, { testMode: WebDiscoveryProject.testMode }).safeUrl) {
1030+
if (
1031+
sanitizeUrl(url, { testMode: WebDiscoveryProject.testMode }).result !==
1032+
"safe"
1033+
) {
10301034
return discard("URL failed the isSuspiciousURL check");
10311035
}
10321036

@@ -1725,8 +1729,9 @@ const WebDiscoveryProject = {
17251729

17261730
//Check if the URL is know to be bad: private, about:, odd ports, etc.
17271731
if (
1728-
!sanitizeUrl(activeURL, { testMode: WebDiscoveryProject.testMode })
1729-
.safeUrl
1732+
sanitizeUrl(activeURL, {
1733+
testMode: WebDiscoveryProject.testMode,
1734+
}).result !== "safe"
17301735
) {
17311736
logger.debug("[onLocationChange] isSuspiciousURL", activeURL);
17321737
return;
@@ -2330,28 +2335,22 @@ const WebDiscoveryProject = {
23302335
}
23312336
}
23322337

2338+
const sanitizedUrl = sanitizeUrl(linkURL, {
2339+
testMode: WebDiscoveryProject.testMode,
2340+
});
23332341
if (
2334-
sanitizeUrl(linkURL, { testMode: WebDiscoveryProject.testMode })
2335-
.safeUrl &&
2342+
sanitizedUrl.result === "safe" &&
23362343
!WebDiscoveryProject.dropLongURL(linkURL)
23372344
) {
23382345
WebDiscoveryProject.isAlreadyMarkedPrivate(linkURL, function (_res) {
23392346
if (_res && _res["private"] == 0) {
23402347
WebDiscoveryProject.state["v"][activeURL]["c"].push({
2341-
l:
2342-
"" +
2343-
sanitizeUrl(linkURL, {
2344-
testMode: WebDiscoveryProject.testMode,
2345-
}).safeUrl,
2348+
l: "" + sanitizedUrl.safeUrl,
23462349
t: WebDiscoveryProject.counter,
23472350
});
23482351
} else if (!_res) {
23492352
WebDiscoveryProject.state["v"][activeURL]["c"].push({
2350-
l:
2351-
"" +
2352-
sanitizeUrl(linkURL, {
2353-
testMode: WebDiscoveryProject.testMode,
2354-
}).safeUrl,
2353+
l: "" + sanitizedUrl.safeUrl,
23552354
t: WebDiscoveryProject.counter,
23562355
});
23572356
}
@@ -2437,7 +2436,7 @@ const WebDiscoveryProject = {
24372436
init: function () {
24382437
return Promise.resolve().then(() => {
24392438
logger.debug("Init function called:");
2440-
WebDiscoveryProject.log = logger.debug;
2439+
WebDiscoveryProject.logger = logger;
24412440
return Promise.resolve()
24422441
.then(() => {
24432442
if (WebDiscoveryProject.db) {
@@ -2571,18 +2570,9 @@ const WebDiscoveryProject = {
25712570
// Check if they are suspicious.
25722571
// Check if they are marked private.
25732572
if (msg.payload.ref) {
2574-
if (
2575-
!sanitizeUrl(msg.payload["ref"], {
2576-
testMode: WebDiscoveryProject.testMode,
2577-
}).safeUrl
2578-
) {
2579-
msg.payload["ref"] = null;
2580-
} else {
2581-
msg.payload["ref"] = WebDiscoveryProject.sanitizeUrl(
2582-
msg.payload["ref"],
2583-
{ testMode: WebDiscoveryProject.testMode },
2584-
);
2585-
}
2573+
msg.payload["ref"] = sanitizeUrl(msg.payload["ref"], {
2574+
testMode: WebDiscoveryProject.testMode,
2575+
}).safeUrl;
25862576

25872577
// Check if ref. exists in bloom filter, then turn ref to null.
25882578
WebDiscoveryProject.isAlreadyMarkedPrivate(
@@ -2659,9 +2649,9 @@ const WebDiscoveryProject = {
26592649

26602650
// check if suspiciousURL
26612651
if (
2662-
!sanitizeUrl(msg.payload.url, {
2652+
sanitizeUrl(msg.payload.url, {
26632653
testMode: WebDiscoveryProject.testMode,
2664-
}).safeUrl
2654+
}).result !== "safe"
26652655
)
26662656
return null;
26672657

@@ -2670,9 +2660,9 @@ const WebDiscoveryProject = {
26702660
msg.payload.x.canonical_url != ""
26712661
) {
26722662
if (
2673-
!sanitizeUrl(msg.payload.x.canonical_url, {
2663+
sanitizeUrl(msg.payload.x.canonical_url, {
26742664
testMode: WebDiscoveryProject.testMode,
2675-
}).safeUrl
2665+
}).result !== "safe"
26762666
)
26772667
return null;
26782668
}
@@ -2681,13 +2671,11 @@ const WebDiscoveryProject = {
26812671
if (msg.payload.red) {
26822672
var cleanRed = [];
26832673
msg.payload.red.forEach(function (e) {
2684-
if (
2685-
(sanitizeUrl(e).safeUrl, { testMode: WebDiscoveryProject.testMode })
2686-
) {
2687-
cleanRed.push(
2688-
sanitizeUrl(e, { testMode: WebDiscoveryProject.testMode })
2689-
.safeUrl,
2690-
);
2674+
const safeUrl = sanitizeUrl(e, {
2675+
testMode: WebDiscoveryProject.testMode,
2676+
}).safeUrl;
2677+
if (safeUrl !== null) {
2678+
cleanRed.push(safeUrl);
26912679
}
26922680
});
26932681
msg.payload.red = cleanRed;
@@ -2775,7 +2763,7 @@ const WebDiscoveryProject = {
27752763
if (
27762764
sanitizeUrl(msg.payload.r[eachResult].u, {
27772765
testMode: WebDiscoveryProject.testMode,
2778-
}).safeUrl
2766+
}).result === "safe"
27792767
) {
27802768
cleanR.push(msg.payload.r[eachResult]);
27812769
}
@@ -2989,7 +2977,10 @@ const WebDiscoveryProject = {
29892977
if (!source || source === "openTabs") {
29902978
const allOpenPages = await WebDiscoveryProject.getAllOpenPages();
29912979
pages = allOpenPages
2992-
.map((url) => ({ url, page_doc: WebDiscoveryProject.state.v[url] }))
2980+
.map((url) => ({
2981+
url,
2982+
page_doc: WebDiscoveryProject.state.v[url],
2983+
}))
29932984
.filter(({ url, page_doc }) => page_doc && isRelevantUrl(url));
29942985
} else if (source === "unprocessed") {
29952986
pages = await new Promise((resolve, reject) => {
@@ -3461,8 +3452,8 @@ const WebDiscoveryProject = {
34613452

34623453
if (
34633454
queryLikeURL &&
3464-
(!sanitizeUrl(query, { testMode: WebDiscoveryProject.testMode })
3465-
.safeUrl ||
3455+
(sanitizeUrl(query, { testMode: WebDiscoveryProject.testMode }).result !==
3456+
"safe" ||
34663457
WebDiscoveryProject.dropLongURL(query))
34673458
) {
34683459
logger.debug("Query is dangerous");
@@ -3486,7 +3477,8 @@ const WebDiscoveryProject = {
34863477

34873478
// Check URL is suspicious
34883479
if (
3489-
!sanitizeUrl(url, { testMode: WebDiscoveryProject.testMode }).safeUrl
3480+
sanitizeUrl(url, { testMode: WebDiscoveryProject.testMode }).result !==
3481+
"safe"
34903482
) {
34913483
logger.debug("Url is suspicious");
34923484
url = "(PROTECTED)";
@@ -3859,7 +3851,8 @@ const WebDiscoveryProject = {
38593851
setPrivate = true;
38603852
logger.debug("Setting private because empty page data");
38613853
} else if (
3862-
!sanitizeUrl(url, { testMode: WebDiscoveryProject.testMode }).safeUrl
3854+
sanitizeUrl(url, { testMode: WebDiscoveryProject.testMode })
3855+
.result !== "safe"
38633856
) {
38643857
// if the url looks private already add it already as checked and private
38653858
let reason = "susp. url";
@@ -3965,7 +3958,7 @@ const WebDiscoveryProject = {
39653958
var page_struct_before = page_doc["x"];
39663959
url_pagedocPair[url] = page_doc;
39673960

3968-
WebDiscoveryProject.log(
3961+
logger.debug(
39693962
"Going for double fetch (url:",
39703963
url,
39713964
", page_doc:",

modules/web-discovery-project/tests/unit/web-discovery-project-test.es

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ export default describeModule(
524524

525525
it("should accept pages with a long URL, but a short canonical URL", function () {
526526
const longUrl =
527-
"https://example.test/this/is/a/very/long/url/with/ids/like/123456789012345678";
527+
"https://example.test/this/is/very/long/url?foo=foo&bar=bar&baz=baz&qux=qux";
528528
assumeFailsUrlChecks(longUrl);
529529

530530
const shortUrl = "https://example.test/short-link";

platforms/webextension/web-discovery-project/storage.es

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ const webDiscoveryProjectChromeDB = {
5454
size: (callback) => {
5555
chrome.storage.local.getBytesInUse(null, (a) => {
5656
const res = [a, a / chrome.storage.local.QUOTA_BYTES];
57-
this.WebDiscoveryProject.log("Current size: ", res[0], res[1]);
57+
this.WebDiscoveryProject.logger.debug("Current size: ", res[0], res[1]);
5858
if (callback) callback(res);
5959
});
6060
},

0 commit comments

Comments
 (0)