Skip to content

Commit 581e3be

Browse files
authored
Fix context counters (#51)
* Update context counter when timeout context * Synchronisation of ContextCounters * Fixed bug when `Changing contextCounter from -4 to 0 due to synchronization` due to context falling off. * Structure changes * Logging error and version++
1 parent 0d891f9 commit 581e3be

File tree

7 files changed

+99
-27
lines changed

7 files changed

+99
-27
lines changed

app.js

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,6 @@ const STEALTH_BROWSING = (process.env.STEALTH_BROWSING || "true").toLowerCase()
4444
const MAX_CONCURRENT_CONTEXTS = process.env.MAX_CONCURRENT_CONTEXTS === "Infinity" ? Infinity : parseInt(process.env.MAX_CONCURRENT_CONTEXTS);
4545
const CONTEXT_TIMEOUT = parseInt(process.env.CONTEXT_TIMEOUT) || 600000; // 10 minutes
4646

47-
timeoutContext.initTimeoutContext(CONTEXT_TIMEOUT);
48-
limitContext.initContextCounter(MAX_CONCURRENT_CONTEXTS);
49-
loggers.initLogger(LOG_LEVEL, LOG_FILE, LOGSTASH_HOST, LOGSTASH_PORT);
50-
5147
async function setupBrowser() {
5248
try {
5349
if (TOKEN_2CAPTCHA) { // If token is given then RecaptchaPlugin is activated
@@ -88,14 +84,20 @@ async function setupBrowser() {
8884
process.exit(1);
8985
}
9086

91-
createPuppeteerMetrics(app);
87+
createPuppeteerMetrics(app); // TODO: to check if we can move it to services initialization part
9288
}
9389

90+
// App initialization
9491
(async () => {
9592
await setupBrowser();
9693
app.set('lock', new AsyncLock());
9794
})();
9895

96+
// Services initialization
97+
timeoutContext.initTimeoutContext(CONTEXT_TIMEOUT);
98+
limitContext.initContextCounter(app, MAX_CONCURRENT_CONTEXTS);
99+
loggers.initLogger(LOG_LEVEL, LOG_FILE, LOGSTASH_HOST, LOGSTASH_PORT);
100+
99101
app.use(express.json());
100102
app.use(express.urlencoded({ extended: false }));
101103
app.use(middlewares.logHTTPMiddleware());

helpers/limit_context.js

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
const { getLogger } = require('../helpers/loggers');
2+
13
let contextCounter = 0;
24

35
function incContextCounter() {}
@@ -9,10 +11,19 @@ exports.decContextCounter = decContextCounter; // Empty function or decrementer
911
function canCreateContext() { return true; }
1012
exports.canCreateContext = canCreateContext; // Truish function or checker if the context can be created
1113

12-
exports.initContextCounter = function (maxContextCounter) {
14+
exports.initContextCounter = function (app, maxContextCounter) {
1315
if (!isNaN(maxContextCounter)) {
1416
exports.incContextCounter = () => { contextCounter++ };
1517
exports.decContextCounter = () => { contextCounter-- };
16-
exports.canCreateContext = () => { return contextCounter < maxContextCounter }
18+
exports.canCreateContext = () => { return contextCounter < maxContextCounter };
19+
20+
setInterval(() => { // Synchronize number of contexts every 1 minute
21+
const contextsNumber = app.get('browser').browserContexts().length - 1; // Minus permanent context
22+
23+
if (contextsNumber !== contextCounter) {
24+
getLogger().warn(`Changing contextCounter from ${contextCounter} to ${contextsNumber} due to synchronization\n`);
25+
contextCounter = contextsNumber;
26+
}
27+
}, 60000);
1728
}
1829
}

helpers/timeout_context.js

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
const {BrowserContext} = require('puppeteer');
2+
23
const loggers = require('./loggers');
4+
const limitContext = require("./limit_context");
35

46
/**
5-
* ContextId -> Timeout timer' IDs
7+
* ContextId -> Timeout timer's IDs
68
*
79
* @type {{string: number}}
810
*/
@@ -12,25 +14,36 @@ let contextTimeout;
1214
/**
1315
* Set timeout for context.
1416
*
15-
* @param {BrowserContext} context
16-
*/
17+
* @param {BrowserContext} context Browser context.
18+
**/
1719
function setContextTimeout(context) {
1820
const logger = loggers.getLogger();
1921

2022
contextTimeoutIds[context.id] = setTimeout(
2123
async () => {
22-
logger.warn(`Closing context ${context.id} due to timeout\n`);
23-
await context.close();
24-
delete contextTimeoutIds[context.id];
24+
try {
25+
await context.close();
26+
limitContext.decContextCounter();
27+
logger.warn(`Context ${context.id} is closed due to timeout\n`);
28+
} catch (e) {
29+
logger.warn(`Context ${context.id} has fallen off\n`);
30+
logger.error({
31+
message: e,
32+
contextId: context.id,
33+
});
34+
} finally {
35+
delete contextTimeoutIds[context.id];
36+
}
2537
},
26-
contextTimeout);
38+
contextTimeout,
39+
);
2740
}
2841
exports.setContextTimeout = setContextTimeout;
2942

3043
/**
3144
* The function clears context's timeout timer.
3245
*
33-
* @param {BrowserContext} context context to be cleared
46+
* @param {BrowserContext} context Context to be cleared
3447
*/
3548
function clearContextTimeout(context) {
3649
clearTimeout(contextTimeoutIds[context.id]);
@@ -41,7 +54,7 @@ exports.clearContextTimeout = clearContextTimeout;
4154
/**
4255
* Update timeout for context.
4356
*
44-
* @param {BrowserContext} context
57+
* @param {BrowserContext} context Context.
4558
*/
4659
exports.updateContextTimeout = function updateContextTimeout (context) {
4760
clearContextTimeout(context);
@@ -51,7 +64,7 @@ exports.updateContextTimeout = function updateContextTimeout (context) {
5164
/**
5265
* Init service that timeouts contexts after CONTEXT_TIMEOUT ms.
5366
*
54-
* @param {number} timeout
67+
* @param {number} timeout Context timeout for the service.
5568
*/
5669
exports.initTimeoutContext = function initTimeoutContext (timeout) {
5770
contextTimeout = timeout;

helpers/utils.js

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
const exceptions = require("./exceptions");
1+
const {Browser} = require('puppeteer');
22
const { proxyRequest } = require('puppeteer-proxy');
3-
const timeoutContext = require('./timeout_context');
4-
const limitContext = require('./limit_context');
53
const PuppeteerHar = require('puppeteer-har');
64

5+
const exceptions = require("./exceptions");
6+
const limitContext = require('./limit_context');
7+
const timeoutContext = require('./timeout_context');
8+
79
const PROXY_URL_KEY = 'puppeteer-service-proxy-url'
810

911
async function findContextInBrowser(browser, contextId) {
@@ -24,6 +26,12 @@ async function findPageInContext(context, pageId) {
2426
throw new exceptions.PageNotFoundError();
2527
}
2628

29+
/**
30+
* Close contexts in browser.
31+
*
32+
* @param {Browser} browser Browser with contexts to close.
33+
* @param {[string]} contextIds Context ids to close.
34+
**/
2735
exports.closeContexts = async function closeContexts(browser, contextIds) {
2836
// TODO shared locks on contexts and exclusive on pages?
2937
const closePromises = [];
@@ -71,6 +79,7 @@ async function wait(page, waitFor) {
7179

7280
/***
7381
* This function returns `pageId` and `contextId` of corresponding page.
82+
*
7483
* @param page
7584
* @returns Promise
7685
*/
@@ -94,10 +103,11 @@ exports.getContents = async function getContents(page, waitFor) {
94103

95104
async function newPage(context, request) {
96105
const page = await context.newPage();
106+
97107
if (request.body.harRecording){
98-
const harWriter = new PuppeteerHar(page)
99-
harWriter.start()
100-
page.harWriter = harWriter
108+
const harWriter = new PuppeteerHar(page);
109+
await harWriter.start();
110+
page.harWriter = harWriter;
101111
}
102112

103113
await page.setRequestInterception(true);
@@ -125,7 +135,7 @@ async function newContext(browser, options = {}) {
125135
const context = await browser.createIncognitoBrowserContext(options);
126136
limitContext.incContextCounter();
127137
timeoutContext.setContextTimeout(context);
128-
return context
138+
return context;
129139
} catch (err) {
130140
limitContext.decContextCounter();
131141
throw err;
@@ -141,6 +151,7 @@ function getProxy(request) {
141151
/***
142152
* This function returns a page from browser context or create new page or even context if pageId or contextId are
143153
* none. If no context or now page found throw an error.
154+
*
144155
* @param browser
145156
* @param request
146157
* @returns {Promise<Page>}

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "scrapy-puppeteer-service",
3-
"version": "0.3.4",
3+
"version": "0.3.5",
44
"private": true,
55
"scripts": {
66
"start": "node ./bin/www"
@@ -25,7 +25,7 @@
2525
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
2626
"puppeteer-extra-plugin-stealth": "^2.11.2",
2727
"puppeteer-proxy": "^2.1.2",
28-
"puppeteer-har": "1.1.2",
28+
"puppeteer-har": "^1.1.2",
2929
"winston": "^3.11.0",
3030
"winston-logstash": "^1.2.1"
3131
}

routes/health_check.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
const express = require('express');
2+
23
const router = express.Router();
34

45
/**

yarn.lock

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,16 @@ chownr@^1.1.1:
431431
resolved "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz"
432432
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
433433

434+
chrome-har@^0.11.3:
435+
version "0.11.12"
436+
resolved "https://registry.yarnpkg.com/chrome-har/-/chrome-har-0.11.12.tgz#29a75a0d9ebb70c9c40d8fbd35c3db7d4f010e25"
437+
integrity sha512-Fi/YCoUHjQMQC0sPKCdiuGVbApeEwIUNvISrlwZgbuUcxfHJA6MjD4RsIH/YSOAo/Z3ENiF+xaEpsdqqdETIjg==
438+
dependencies:
439+
dayjs "1.8.31"
440+
debug "4.1.1"
441+
tough-cookie "4.0.0"
442+
uuid "8.0.0"
443+
434444
chromium-bidi@0.4.7:
435445
version "0.4.7"
436446
resolved "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.4.7.tgz"
@@ -594,6 +604,11 @@ cross-spawn@^6.0.5:
594604
shebang-command "^1.2.0"
595605
which "^1.2.9"
596606

607+
dayjs@1.8.31:
608+
version "1.8.31"
609+
resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.8.31.tgz#0cd1114c2539dd5ad9428be0c38df6d4bb40b9d3"
610+
integrity sha512-mPh1mslned+5PuIuiUfbw4CikHk6AEAf2Baxih+wP5fssv+wmlVhvgZ7mq+BhLt7Sr/Hc8leWDiwe6YnrpNt3g==
611+
597612
debug@2.6.9:
598613
version "2.6.9"
599614
resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
@@ -608,6 +623,13 @@ debug@4, debug@4.3.4, debug@^4.1.1, debug@~4.3.4:
608623
dependencies:
609624
ms "2.1.2"
610625

626+
debug@4.1.1:
627+
version "4.1.1"
628+
resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791"
629+
integrity sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==
630+
dependencies:
631+
ms "^2.1.1"
632+
611633
decompress-response@^6.0.0:
612634
version "6.0.0"
613635
resolved "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz"
@@ -1843,6 +1865,13 @@ puppeteer-extra@^3.3.6:
18431865
debug "^4.1.1"
18441866
deepmerge "^4.2.2"
18451867

1868+
puppeteer-har@^1.1.2:
1869+
version "1.1.2"
1870+
resolved "https://registry.yarnpkg.com/puppeteer-har/-/puppeteer-har-1.1.2.tgz#f78e832118ee083ab86bf3e6b73c6642d9e5325f"
1871+
integrity sha512-Z5zfoj8RkhUT9UbrrR8JjOHNnCr7sNINoeR346F40sLo/4zn+KX/sw/eoKNrtsISc1s/2YCZaqaSEVx6cZ8NQg==
1872+
dependencies:
1873+
chrome-har "^0.11.3"
1874+
18461875
puppeteer-proxy@^2.1.2:
18471876
version "2.1.2"
18481877
resolved "https://registry.npmjs.org/puppeteer-proxy/-/puppeteer-proxy-2.1.2.tgz"
@@ -2249,7 +2278,7 @@ toidentifier@1.0.1:
22492278
resolved "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz"
22502279
integrity sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==
22512280

2252-
tough-cookie@^4.0.0:
2281+
tough-cookie@4.0.0, tough-cookie@^4.0.0:
22532282
version "4.0.0"
22542283
resolved "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.0.0.tgz"
22552284
integrity sha512-tHdtEpQCMrc1YLrMaqXXcj6AxhYi/xgit6mZu1+EDWUn+qhUf8wMQoFIy9NXuq23zAwtcB0t/MjACGR18pcRbg==
@@ -2337,6 +2366,11 @@ utils-merge@1.0.1:
23372366
resolved "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz"
23382367
integrity sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=
23392368

2369+
uuid@8.0.0:
2370+
version "8.0.0"
2371+
resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.0.0.tgz#bc6ccf91b5ff0ac07bbcdbf1c7c4e150db4dbb6c"
2372+
integrity sha512-jOXGuXZAWdsTH7eZLtyXMqUb9EcWMGZNbL9YcGBJl4MH4nrxHmZJhEHvyLFrkxo+28uLb/NYRcStH48fnD0Vzw==
2373+
23402374
vali-date@^1.0.0:
23412375
version "1.0.0"
23422376
resolved "https://registry.npmjs.org/vali-date/-/vali-date-1.0.0.tgz"

0 commit comments

Comments
 (0)