Skip to content

Commit 615340c

Browse files
authored
Context limit middleware (#45)
* Base * separated middlewares * readme * Too many requests error * Now limit_context is a small service. * Typos * Typos * Delete useless variable * Version update
1 parent ca1e25c commit 615340c

File tree

10 files changed

+122
-71
lines changed

10 files changed

+122
-71
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ Here we list them all with their purpose.
227227
* `VIEWPORT_HEIGHT = 720` - height of the browser's window
228228
* `TOKEN_2CAPTCHA = undefined` - token of [2captcha service](https://2captcha.com)
229229
* `STEALTH_BROWSING = true` - should the service use the [stealth browsing](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth) mode
230+
* `MAX_CONCURRENT_CONTEXTS = undefined` - should the service limit the number of contexts
230231

231232
## Notes on memory usage
232233
You need to explicitly close the browser tab once you don't need it (e.g. at the end of the parse method).

app.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ const mhtmlRouter = require('./routes/mhtml');
2121
const harRouter = require('./routes/har');
2222
const closeContextRouter = require('./routes/close_context');
2323

24-
const middlewares = require('./helpers/middlewares')
24+
const middlewares = require('./helpers/middlewares');
25+
const limitContext = require('./helpers/limit_context');
2526
const loggers = require("./helpers/loggers");
2627

2728
const app = express();
@@ -36,12 +37,14 @@ const VIEWPORT_WIDTH = parseInt(process.env.VIEWPORT_WIDTH) || 1280;
3637
const VIEWPORT_HEIGHT = parseInt(process.env.VIEWPORT_HEIGHT) || 720;
3738
const TOKEN_2CAPTCHA = process.env.TOKEN_2CAPTCHA;
3839
const STEALTH_BROWSING = (process.env.STEALTH_BROWSING || "true").toLowerCase() === "true";
40+
const MAX_CONCURRENT_CONTEXTS = process.env.MAX_CONCURRENT_CONTEXTS === "Infinity" ? Infinity : parseInt(process.env.MAX_CONCURRENT_CONTEXTS);
3941

42+
limitContext.initContextCounter(MAX_CONCURRENT_CONTEXTS);
4043
loggers.initLogger(LOG_LEVEL, LOG_FILE, LOGSTASH_HOST, LOGSTASH_PORT);
4144

4245
async function setupBrowser() {
4346
try {
44-
if (TOKEN_2CAPTCHA) { // If token is given then RecapcthaPlugin is activated
47+
if (TOKEN_2CAPTCHA) { // If token is given then RecaptchaPlugin is activated
4548
puppeteer.use(
4649
RecaptchaPlugin({
4750
provider: {

helpers/exceptions.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,11 @@ exports.ContextNotFoundError = class ContextNotFoundError extends Error {
1313
this.name = "ContextNotFoundError";
1414
}
1515
}
16+
17+
exports.TooManyContextsError = class TooManyContextsError extends Error {
18+
constructor(message="Could not create new context due to restriction", ...args) {
19+
super(message, ...args);
20+
this.message = message;
21+
this.name = "TooManyContextsError";
22+
}
23+
}

helpers/limit_context.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
let contextCounter = 0;
2+
3+
function incContextCounter() {}
4+
exports.incContextCounter = incContextCounter; // Empty function or incrementer
5+
6+
function decContextCounter() {}
7+
exports.decContextCounter = decContextCounter; // Empty function or decrementer
8+
9+
function canCreateContext() { return true; }
10+
exports.canCreateContext = canCreateContext; // Truish function or checker if the context can be created
11+
12+
exports.initContextCounter = function (maxContextCounter) {
13+
if (!isNaN(maxContextCounter)) {
14+
exports.incContextCounter = () => { contextCounter++ };
15+
exports.decContextCounter = () => { contextCounter-- };
16+
exports.canCreateContext = () => { return contextCounter < maxContextCounter }
17+
}
18+
}

helpers/middlewares.js

Lines changed: 0 additions & 66 deletions
This file was deleted.

helpers/middlewares/index.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
exports.logHTTPMiddleware = require('./logging').logHTTPMiddleware;
2+
exports.logExceptionMiddleware = require('./logging').logExceptionMiddleware;
3+
exports.processExceptionMiddleware = require('./process_exception').processExceptionMiddleware;

helpers/middlewares/logging.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
const loggers = require("../loggers");
2+
const morgan = require("morgan");
3+
4+
/***
5+
* Returns the middleware for logging HTTP request-response.
6+
***/
7+
exports.logHTTPMiddleware = function logHTTPMiddleware() {
8+
const logger = loggers.getLogger();
9+
10+
return morgan(
11+
loggers.HTTPFormat,
12+
{
13+
stream: {
14+
write: (message) => logger.http(message),
15+
},
16+
}
17+
);
18+
}
19+
20+
/***
21+
* Middleware for logging exceptions.
22+
***/
23+
exports.logExceptionMiddleware = async function logExceptionMiddleware(err, req, res, next) {
24+
loggers.getLogger().error({
25+
message: err,
26+
contextId: req.query["contextId"],
27+
pageId: req.query["pageId"],
28+
});
29+
next();
30+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
const exceptions = require("../exceptions");
2+
3+
/***
4+
* Middleware for processing exceptions.
5+
***/
6+
exports.processExceptionMiddleware = async function processExceptionMiddleware(err, req, res, next) {
7+
if (res.headersSent) {
8+
return next(err);
9+
}
10+
11+
const contextId = err.contextId || req.query.contextId;
12+
const pageId = err.pageId || req.query.pageId;
13+
const errorMessage = err.message || 'Unknown error';
14+
15+
if (contextId) {
16+
res.header('scrapy-puppeteer-service-context-id', contextId);
17+
}
18+
19+
if (err instanceof exceptions.TooManyContextsError) {
20+
res.status(429); // Too Many Requests
21+
} else if (err.contextId) { // there was a context, but something went wrong
22+
res.status(500);
23+
} else { // No context. Possibly, our service was restarted
24+
if (err instanceof exceptions.PageNotFoundError || err instanceof exceptions.ContextNotFoundError) {
25+
res.status(422); // Unprocessable Entity
26+
} else {
27+
res.status(500);
28+
}
29+
}
30+
31+
res.send({
32+
contextId,
33+
pageId,
34+
error: errorMessage
35+
});
36+
37+
next(err);
38+
}

helpers/utils.js

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
const exceptions = require("./exceptions");
22
const { proxyRequest } = require('puppeteer-proxy');
3+
const limitContext = require('./limit_context');
34

45
const PROXY_URL_KEY = 'puppeteer-service-proxy-url'
56

@@ -26,6 +27,7 @@ exports.closeContexts = async function closeContexts(browser, contextIds) {
2627
const closePromises = [];
2728
for (const context of browser.browserContexts()) {
2829
if (contextIds.includes(context.id)) {
30+
limitContext.decContextCounter();
2931
closePromises.push(context.close());
3032
}
3133
}
@@ -106,6 +108,20 @@ async function newPage(context) {
106108
return page;
107109
}
108110

111+
async function newContext(browser, options = {}) {
112+
if (!limitContext.canCreateContext()) {
113+
throw new exceptions.TooManyContextsError();
114+
}
115+
116+
try {
117+
limitContext.incContextCounter();
118+
return await browser.createIncognitoBrowserContext(options);
119+
} catch (err) {
120+
limitContext.decContextCounter();
121+
throw err;
122+
}
123+
}
124+
109125
function getProxy(request) {
110126
if ('body' in request && 'proxy' in request.body) {
111127
return request.body.proxy;
@@ -127,12 +143,12 @@ exports.getBrowserPage = async function getBrowserPage(browser, request) {
127143
}
128144
const proxy = getProxy(request);
129145
if (!proxy) {
130-
const context = await browser.createIncognitoBrowserContext();
146+
const context = await newContext(browser);
131147
return newPage(context);
132148
}
133149
const { origin: proxyServer, username, password } = new URL(proxy);
134150

135-
const context = await browser.createIncognitoBrowserContext({ proxyServer });
151+
const context = await newContext(browser, { proxyServer });
136152
context[PROXY_URL_KEY] = proxy;
137153
const page = await newPage(context);
138154
if (username) {

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "scrapy-puppeteer-service",
3-
"version": "0.3.0",
3+
"version": "0.3.1",
44
"private": true,
55
"scripts": {
66
"start": "node ./bin/www"

0 commit comments

Comments
 (0)