-
Notifications
You must be signed in to change notification settings - Fork 64
Expand file tree
/
Copy pathproxy.py
More file actions
375 lines (307 loc) · 14.3 KB
/
Copy pathproxy.py
File metadata and controls
375 lines (307 loc) · 14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
from datetime import datetime
import json
from typing import Dict, List
from playwright.sync_api import sync_playwright, Page, Browser, BrowserContext, Request, Response
from urllib.parse import urlparse
class WebProxy:
"""
A proxy class that monitors and captures web traffic during security testing.
Provides comprehensive request/response monitoring using both Playwright event listeners
and Chrome DevTools Protocol (CDP). Captures important network traffic like API calls,
form submissions, and XHR requests.
"""
def __init__(self, starting_url: str, logger):
"""
Initialize the web proxy.
Args:
starting_url: Base URL to monitor traffic for
logger: Logger instance for output
"""
self.requests: List[Dict] = []
self.responses: List[Dict] = []
self.request_response_pairs: List[Dict] = []
self.starting_url = starting_url
self.starting_hostname = urlparse(starting_url).netloc
self.logger = logger
self.cdp_client = None
self.is_capturing = True
self.request_map = {}
def create_proxy(self):
"""
Create a browser instance with network monitoring capabilities.
Returns:
Tuple of (browser, context, page, playwright) instances
"""
playwright = sync_playwright().start()
browser = playwright.chromium.launch(
headless=False,
args=['--disable-blink-features=AutomationControlled', '--disable-automation']
)
# Create context with needed settings
context = browser.new_context(
bypass_csp=True,
ignore_https_errors=True,
# Add user agent to avoid detection
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
)
# Create page
page = context.new_page()
# Setup monitoring
self.setup_monitoring(context, page)
return browser, context, page, playwright
def setup_monitoring(self, context: BrowserContext, page: Page):
"""
Set up all monitoring mechanisms.
Args:
context: Browser context to monitor
page: Page instance to monitor
"""
self._setup_event_listeners(context)
self._setup_cdp_monitoring(page)
def _setup_event_listeners(self, context: BrowserContext):
"""
Set up comprehensive event listeners for network traffic.
Args:
context: Browser context to attach listeners to
"""
def handle_request(request):
# Check if we should capture this request
if self._should_capture_request(request):
url = request.url
method = request.method
resource_type = request.resource_type
request_id = f"req_{datetime.now().timestamp()}"
self.logger.info(f"Captured {method} request to {url} [{resource_type}]", color='cyan')
# Store full request data
request_data = {
'url': url,
'method': method,
'headers': dict(request.headers), # Convert to dict to ensure serializability
'timestamp': datetime.now().isoformat(),
'resource_type': resource_type,
'request_id': request_id,
'post_data': request.post_data
}
self.requests.append(request_data)
self.request_map[url] = request_data
def handle_response(response):
url = response.url
status = response.status
# Check if we have a matching request
if url in self.request_map:
request_data = self.request_map[url]
self.logger.info(f"Captured response {status} from {url}", color='green')
# Store response data
try:
response_data = {
'url': url,
'status': status,
'status_text': response.status_text,
'headers': dict(response.headers), # Convert to dict
'timestamp': datetime.now().isoformat(),
'request_id': request_data.get('request_id')
}
# Try to get body for important responses
try:
if self._should_capture_body(response):
body = response.body()
response_data['body'] = body.decode('utf-8')
# Try to parse JSON
if 'application/json' in response.headers.get('content-type', ''):
try:
json_body = json.loads(response_data['body'])
response_data['json_body'] = json_body
except json.JSONDecodeError:
pass
except Exception as e:
response_data['body_error'] = str(e)
self.responses.append(response_data)
# Create request-response pair
self.request_response_pairs.append({
'request': request_data,
'response': response_data
})
except Exception as e:
self.logger.info(f"Error processing response: {str(e)}", color='red')
# Register event listeners on context
context.on('request', handle_request)
context.on('response', handle_response)
def _setup_cdp_monitoring(self, page: Page):
"""
Set up Chrome DevTools Protocol monitoring as a backup.
Args:
page: Page instance to monitor via CDP
"""
try:
# Create CDP session
self.cdp_client = page.context.new_cdp_session(page)
# Enable network monitoring
self.cdp_client.send('Network.enable')
# Track CDP requests separately
self.cdp_requests = {}
# Handle CDP events
def handle_cdp_request(params):
request_id = params.get('requestId', '')
request = params.get('request', {})
url = request.get('url', '')
method = request.get('method', '')
# Check hostname
request_hostname = urlparse(url).netloc
if request_hostname != self.starting_hostname:
return
# Only process if we haven't already seen this request through event listeners
if any(r['url'] == url for r in self.requests):
return
# Check if this is a request we care about
if method == 'POST' or '/api/' in url or '.json' in url:
self.logger.info(f"CDP captured {method} request to {url}", color='blue')
# Store request
request_data = {
'url': url,
'method': method,
'headers': request.get('headers', {}),
'timestamp': datetime.now().isoformat(),
'request_id': f"cdp_{request_id}",
'post_data': request.get('postData'),
'source': 'cdp'
}
self.requests.append(request_data)
self.cdp_requests[request_id] = request_data
def handle_cdp_response(params):
request_id = params.get('requestId', '')
if request_id not in self.cdp_requests:
return
request_data = self.cdp_requests[request_id]
response = params.get('response', {})
url = response.get('url', '')
self.logger.info(f"CDP captured response from {url}", color='light_blue')
# Store response
response_data = {
'url': url,
'status': response.get('status', 0),
'status_text': response.get('statusText', ''),
'headers': response.get('headers', {}),
'timestamp': datetime.now().isoformat(),
'request_id': request_data.get('request_id'),
'source': 'cdp'
}
# Try to get body
try:
if 'application/json' in response.get('headers', {}).get('content-type', ''):
body_response = self.cdp_client.send('Network.getResponseBody', {'requestId': request_id})
if body_response and 'body' in body_response:
response_data['body'] = body_response['body']
except Exception:
pass
self.responses.append(response_data)
# Create pair
self.request_response_pairs.append({
'request': request_data,
'response': response_data
})
# Clean up
del self.cdp_requests[request_id]
# Register CDP event handlers
self.cdp_client.on('Network.requestWillBeSent', handle_cdp_request)
self.cdp_client.on('Network.responseReceived', handle_cdp_response)
self.logger.info("CDP monitoring enabled", color='yellow')
except Exception as e:
self.logger.info(f"Failed to set up CDP monitoring: {str(e)}", color='red')
def _should_capture_request(self, request):
"""
Check if we should capture this request based on type and hostname.
Args:
request: Request to evaluate
Returns:
bool indicating if request should be captured
"""
# First check if the hostname matches our starting URL
request_hostname = urlparse(request.url).netloc
if request_hostname != self.starting_hostname:
return False
# Now check request type
url = request.url
method = request.method
resource_type = request.resource_type
# Determine if we should capture this request
is_xhr = resource_type == 'xhr'
is_fetch = resource_type == 'fetch'
is_websocket = resource_type == 'websocket'
is_post = method == 'POST'
is_api = '/api/' in url or url.endswith('.json')
is_form = 'form' in resource_type or 'multipart/form-data' in request.headers.get('content-type', '')
# Capture any request that might be important
return is_xhr or is_fetch or is_websocket or is_post or is_api or is_form
def get_network_data(self):
"""
Get all captured network data.
Returns:
Dict containing requests, responses and request-response pairs
"""
return {
'requests': self.requests,
'responses': self.responses,
'pairs': self.request_response_pairs
}
def save_network_data(self, filepath: str):
"""
Save captured network data to a JSON file.
Args:
filepath: Path to save JSON file to
"""
data = {
'requests': self.requests,
'responses': self.responses,
'pairs': self.request_response_pairs,
'timestamp': datetime.now().isoformat()
}
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, default=str)
def pretty_print_traffic(self):
"""
Pretty print captured traffic.
Returns:
Formatted string of traffic or None if no traffic captured
"""
if not self.request_response_pairs:
return None
output = []
output.append(f"Captured {len(self.request_response_pairs)} request-response pairs:")
for idx, pair in enumerate(self.request_response_pairs):
req = pair['request']
res = pair['response']
# Request details
output.append(f"\n=== Request {idx+1} ===")
output.append(f"Type: {req.get('resource_type', 'unknown')}")
output.append(f"Method: {req['method']}")
output.append(f"URL: {req['url']}")
if req.get('post_data'):
output.append(f"Parameters: {req['post_data']}")
# Response details
output.append(f"\n--- Response {idx+1} ---")
output.append(f"Status: {res['status']}")
# Response body
if 'json_body' in res:
try:
body_str = json.dumps(res['json_body'])[:300]
output.append(f"Type: JSON")
output.append(f"Body: {body_str}")
except:
if 'body' in res:
body_str = res['body'][:300]
output.append(f"Type: Raw")
output.append(f"Body: {body_str}")
elif 'body' in res:
body_str = res['body'][:300]
output.append(f"Type: Raw")
output.append(f"Body: {body_str}")
output.append("\n")
return "\n".join(output)
def clear(self):
"""Clear all captured network data."""
self.requests = []
self.responses = []
self.request_response_pairs = []
self.request_map = {}
if hasattr(self, 'cdp_requests'):
self.cdp_requests = {}