Skip to content

Commit 706c3d1

Browse files
authored
Extract author name from itemprop='name'. (#943)
* Extract author name from itemprop='name'. Fixes #935 * De-dupe textContent.trim()
1 parent af54155 commit 706c3d1

File tree

6 files changed

+23
-6
lines changed

6 files changed

+23
-6
lines changed

Readability.js

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,10 @@ Readability.prototype = {
940940
* (and its kids) are going away, and we want the next node over.
941941
*
942942
* Calling this in a loop will traverse the DOM depth-first.
943+
*
944+
* @param {Element} node
945+
* @param {boolean} ignoreSelfAndKids
946+
* @return {Element}
943947
*/
944948
_getNextNode(node, ignoreSelfAndKids) {
945949
// First check for kids if those aren't being ignored
@@ -1079,7 +1083,20 @@ Readability.prototype = {
10791083
!this._metadata.byline &&
10801084
this._isValidByline(node, matchString)
10811085
) {
1082-
this._articleByline = node.textContent.trim();
1086+
// Find child node matching [itemprop="name"] and use that if it exists for a more accurate author name byline
1087+
var endOfSearchMarkerNode = this._getNextNode(node, true);
1088+
var next = this._getNextNode(node);
1089+
var itemPropNameNode = null;
1090+
while (next && next != endOfSearchMarkerNode) {
1091+
var itemprop = next.getAttribute("itemprop");
1092+
if (itemprop && itemprop.includes("name")) {
1093+
itemPropNameNode = next;
1094+
break;
1095+
} else {
1096+
next = this._getNextNode(next);
1097+
}
1098+
}
1099+
this._articleByline = (itemPropNameNode ?? node).textContent.trim();
10831100
node = this._removeAndGetNext(node);
10841101
continue;
10851102
}

test/test-pages/001/expected-metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"title": "Get your Frontend JavaScript Code Covered | Code",
3-
"byline": "Nicolas Perriault",
3+
"byline": "Nicolas Perriault",
44
"dir": null,
55
"lang": "en",
66
"excerpt": "Nicolas Perriault's homepage.",

test/test-pages/ars-1/expected-metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"title": "Just-released Minecraft exploit makes it easy to crash game servers",
3-
"byline": "Dan Goodin - Apr 16, 2015 8:02 pm UTC",
3+
"byline": "Dan Goodin",
44
"dir": null,
55
"lang": "en-us",
66
"excerpt": "Two-year-old bug exposes thousands of servers to crippling attack.",

test/test-pages/msn/expected-metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"title": "Nintendo's first iPhone game will launch in December for $10",
3-
"byline": "Alex Perry\n \n 1 day ago",
3+
"byline": "Alex Perry",
44
"dir": "ltr",
55
"lang": "en-US",
66
"excerpt": "Nintendo and Apple shocked the world earlier this year by announcing \"Super Mario Run,\" the legendary gaming company's first foray into mobile gaming. ",

test/test-pages/nytimes-3/expected-metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"title": "Manhole Fires and Burst Pipes: How Winter Wreaks Havoc on What’s Underneath N.Y.C.",
3-
"byline": "By Corey Kilgannon",
3+
"byline": "Corey Kilgannon",
44
"dir": null,
55
"lang": "en",
66
"excerpt": "New York’s aging below-street infrastructure is tough to maintain, and the corrosive rock salt and “freeze-thaw” cycles of winter make it even worse.",

test/test-pages/nytimes-4/expected-metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"title": "As Debt Rises, the Government Will Soon Spend More on Interest Than on the Military",
3-
"byline": "By Nelson D. Schwartz",
3+
"byline": "Nelson D. Schwartz",
44
"dir": null,
55
"lang": "en",
66
"excerpt": "Tax cuts, spending increases and higher interest rates could make it harder to respond to future recessions and deal with other needs.",

0 commit comments

Comments
 (0)