Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit b63536f

Browse files
committed
feat: remove not needed encryption of secrets
Instead use an uuid generator as we do for pii, and reuse same session store mechanism Closes: #929
1 parent b23effd commit b63536f

File tree

10 files changed

+120
-329
lines changed

10 files changed

+120
-329
lines changed

src/codegate/pipeline/base.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,8 @@ def secure_cleanup(self):
3030
"""Securely cleanup sensitive data for this session"""
3131
if self.manager is None or self.session_id == "":
3232
return
33-
3433
self.manager.cleanup_session(self.session_id)
3534
self.session_id = ""
36-
37-
# Securely wipe the API key using the same method as secrets manager
38-
if self.api_key is not None:
39-
api_key_bytes = bytearray(self.api_key.encode())
40-
self.manager.crypto.wipe_bytearray(api_key_bytes)
41-
self.api_key = None
42-
4335
self.model = None
4436

4537

src/codegate/pipeline/pii/analyzer.py

Lines changed: 22 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7,41 +7,11 @@
77

88
from codegate.db.models import AlertSeverity
99
from codegate.pipeline.base import PipelineContext
10+
from codegate.session.session_store import SessionStore
1011

1112
logger = structlog.get_logger("codegate.pii.analyzer")
1213

1314

14-
class PiiSessionStore:
15-
"""
16-
A class to manage PII (Personally Identifiable Information) session storage.
17-
18-
Attributes:
19-
session_id (str): The unique identifier for the session. If not provided, a new UUID
20-
is generated. mappings (Dict[str, str]): A dictionary to store mappings between UUID
21-
placeholders and PII.
22-
23-
Methods:
24-
add_mapping(pii: str) -> str:
25-
Adds a PII string to the session store and returns a UUID placeholder for it.
26-
27-
get_pii(uuid_placeholder: str) -> str:
28-
Retrieves the PII string associated with the given UUID placeholder. If the placeholder
29-
is not found, returns the placeholder itself.
30-
"""
31-
32-
def __init__(self, session_id: str = None):
33-
self.session_id = session_id or str(uuid.uuid4())
34-
self.mappings: Dict[str, str] = {}
35-
36-
def add_mapping(self, pii: str) -> str:
37-
uuid_placeholder = f"<{str(uuid.uuid4())}>"
38-
self.mappings[uuid_placeholder] = pii
39-
return uuid_placeholder
40-
41-
def get_pii(self, uuid_placeholder: str) -> str:
42-
return self.mappings.get(uuid_placeholder, uuid_placeholder)
43-
44-
4515
class PiiAnalyzer:
4616
"""
4717
PiiAnalyzer class for analyzing and anonymizing text containing PII.
@@ -52,12 +22,12 @@ class PiiAnalyzer:
5222
Get or create the singleton instance of PiiAnalyzer.
5323
analyze:
5424
text (str): The text to analyze for PII.
55-
Tuple[str, List[Dict[str, Any]], PiiSessionStore]: The anonymized text, a list of
25+
Tuple[str, List[Dict[str, Any]], SessionStore]: The anonymized text, a list of
5626
found PII details, and the session store.
5727
entities (List[str]): The PII entities to analyze for.
5828
restore_pii:
5929
anonymized_text (str): The text with anonymized PII.
60-
session_store (PiiSessionStore): The PiiSessionStore used for anonymization.
30+
session_store (SessionStore): The SessionStore used for anonymization.
6131
str: The text with original PII restored.
6232
"""
6333

@@ -95,13 +65,13 @@ def __init__(self):
9565
# Create analyzer with custom NLP engine
9666
self.analyzer = AnalyzerEngine(nlp_engine=nlp_engine)
9767
self.anonymizer = AnonymizerEngine()
98-
self.session_store = PiiSessionStore()
68+
self.session_store = SessionStore()
9969

10070
PiiAnalyzer._instance = self
10171

10272
def analyze(
103-
self, text: str, context: Optional[PipelineContext] = None
104-
) -> Tuple[str, List[Dict[str, Any]], PiiSessionStore]:
73+
self, text: str, session_id: str, context: Optional[PipelineContext] = None
74+
) -> Tuple[str, List[Dict[str, Any]]]:
10575
# Prioritize credit card detection first
10676
entities = [
10777
"PHONE_NUMBER",
@@ -135,7 +105,7 @@ def analyze(
135105
anonymized_text = text
136106
for result in analyzer_results:
137107
pii_value = text[result.start : result.end]
138-
uuid_placeholder = self.session_store.add_mapping(pii_value)
108+
uuid_placeholder = self.session_store.add_mapping(session_id, pii_value)
139109
pii_info = {
140110
"type": result.entity_type,
141111
"value": pii_value,
@@ -155,7 +125,7 @@ def analyze(
155125
uuid=uuid_placeholder,
156126
# Don't log the actual PII value for security
157127
value_length=len(pii_value),
158-
session_id=self.session_store.session_id,
128+
session_id=session_id,
159129
)
160130

161131
# Log summary of all PII found in this analysis
@@ -176,30 +146,37 @@ def analyze(
176146
"PII analysis complete",
177147
total_pii_found=len(found_pii),
178148
pii_types=[p["type"] for p in found_pii],
179-
session_id=self.session_store.session_id,
149+
session_id=session_id,
180150
)
181151

182152
# Return the anonymized text, PII details, and session store
183-
return anonymized_text, found_pii, self.session_store
153+
return anonymized_text, found_pii
184154

185155
# If no PII found, return original text, empty list, and session store
186-
return text, [], self.session_store
156+
return text, []
187157

188-
def restore_pii(self, anonymized_text: str, session_store: PiiSessionStore) -> str:
158+
def restore_pii(self, anonymized_text: str, session_id: str) -> str:
189159
"""
190160
Restore the original PII (Personally Identifiable Information) in the given anonymized text.
191161
192162
This method replaces placeholders in the anonymized text with their corresponding original
193-
PII values using the mappings stored in the provided PiiSessionStore.
163+
PII values using the mappings stored in the provided SessionStore.
194164
195165
Args:
196166
anonymized_text (str): The text containing placeholders for PII.
197-
session_store (PiiSessionStore): The session store containing mappings of placeholders
167+
session_store (SessionStore): The session store containing mappings of placeholders
198168
to original PII.
199169
200170
Returns:
201171
str: The text with the original PII restored.
202172
"""
203-
for uuid_placeholder, original_pii in session_store.mappings.items():
173+
session_data = self.session_store.get_by_session_id(session_id)
174+
if not session_data:
175+
logger.warning(
176+
"No active PII session found for given session ID. Unable to restore PII."
177+
)
178+
return anonymized_text
179+
180+
for uuid_placeholder, original_pii in session_data.items():
204181
anonymized_text = anonymized_text.replace(uuid_placeholder, original_pii)
205182
return anonymized_text

src/codegate/pipeline/pii/manager.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import structlog
44

55
from codegate.pipeline.base import PipelineContext
6-
from codegate.pipeline.pii.analyzer import PiiAnalyzer, PiiSessionStore
6+
from codegate.pipeline.pii.analyzer import PiiAnalyzer
7+
from codegate.session.session_store import SessionStore
78

89
logger = structlog.get_logger("codegate")
910

@@ -16,22 +17,22 @@ class PiiManager:
1617
Attributes:
1718
analyzer (PiiAnalyzer): The singleton instance of PiiAnalyzer used for
1819
PII detection and restoration.
19-
session_store (PiiSessionStore): The session store for the current PII session.
20+
session_store (SessionStore): The session store for the current PII session.
2021
2122
Methods:
2223
__init__():
2324
Initializes the PiiManager with the singleton PiiAnalyzer instance and sets the
2425
session store.
2526
26-
analyze(text: str) -> Tuple[str, List[Dict[str, Any]]]:
27+
analyze(text: str, session_id: str) -> Tuple[str, List[Dict[str, Any]]]:
2728
Analyzes the given text for PII, anonymizes it, and logs the detected PII details.
2829
Args:
2930
text (str): The text to be analyzed for PII.
3031
Returns:
3132
Tuple[str, List[Dict[str, Any]]]: A tuple containing the anonymized text and
3233
a list of found PII details.
3334
34-
restore_pii(anonymized_text: str) -> str:
35+
restore_pii(anonymized_text: str, session_id: str ) -> str:
3536
Restores the PII in the given anonymized text using the current session.
3637
Args:
3738
anonymized_text (str): The text with anonymized PII to be restored.
@@ -48,16 +49,16 @@ def __init__(self):
4849
self._session_store = self.analyzer.session_store
4950

5051
@property
51-
def session_store(self) -> PiiSessionStore:
52+
def session_store(self) -> SessionStore:
5253
"""Get the current session store."""
5354
# Always return the analyzer's current session store
5455
return self.analyzer.session_store
5556

5657
def analyze(
57-
self, text: str, context: Optional[PipelineContext] = None
58+
self, text: str, session_id: str, context: Optional[PipelineContext] = None
5859
) -> Tuple[str, List[Dict[str, Any]]]:
5960
# Call analyzer and get results
60-
anonymized_text, found_pii, _ = self.analyzer.analyze(text, context=context)
61+
anonymized_text, found_pii = self.analyzer.analyze(text, session_id, context=context)
6162

6263
# Log found PII details (without modifying the found_pii list)
6364
if found_pii:
@@ -72,13 +73,9 @@ def analyze(
7273
# Return the exact same objects we got from the analyzer
7374
return anonymized_text, found_pii
7475

75-
def restore_pii(self, anonymized_text: str) -> str:
76+
def restore_pii(self, anonymized_text: str, session_id: str) -> str:
7677
"""
7778
Restore PII in the given anonymized text using the current session.
7879
"""
79-
if self.session_store is None:
80-
logger.warning("No active PII session found. Unable to restore PII.")
81-
return anonymized_text
82-
8380
# Use the analyzer's restore_pii method with the current session store
84-
return self.analyzer.restore_pii(anonymized_text, self.session_store)
81+
return self.analyzer.restore_pii(anonymized_text, session_id)

src/codegate/pipeline/pii/pii.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import re
22
from typing import Any, Dict, List, Optional
3+
import uuid
34

45
import structlog
56
from litellm import ChatCompletionRequest, ChatCompletionSystemMessage, ModelResponse
@@ -37,7 +38,7 @@ class CodegatePii(PipelineStep):
3738
Processes the chat completion request to detect and redact PII. Updates the request with
3839
anonymized text and stores PII details in the context metadata.
3940
40-
restore_pii(anonymized_text: str) -> str:
41+
restore_pii(anonymized_text: str, session_id: str) -> str:
4142
Restores the original PII from the anonymized text using the PiiManager.
4243
"""
4344

@@ -75,12 +76,15 @@ async def process(
7576
total_pii_found = 0
7677
all_pii_details: List[Dict[str, Any]] = []
7778
last_redacted_text = ""
79+
session_id = context.session_id if hasattr(context, "session_id") else str(uuid.uuid4())
7880

7981
for i, message in enumerate(new_request["messages"]):
8082
if "content" in message and message["content"]:
8183
# This is where analyze and anonymize the text
8284
original_text = str(message["content"])
83-
anonymized_text, pii_details = self.pii_manager.analyze(original_text, context)
85+
anonymized_text, pii_details = self.pii_manager.analyze(
86+
original_text, session_id, context
87+
)
8488

8589
if pii_details:
8690
total_pii_found += len(pii_details)
@@ -99,6 +103,7 @@ async def process(
99103
context.metadata["redacted_pii_count"] = total_pii_found
100104
context.metadata["redacted_pii_details"] = all_pii_details
101105
context.metadata["redacted_text"] = last_redacted_text
106+
context.metadata["session_id"] = session_id
102107

103108
if total_pii_found > 0:
104109
context.metadata["pii_manager"] = self.pii_manager
@@ -113,8 +118,8 @@ async def process(
113118

114119
return PipelineResult(request=new_request, context=context)
115120

116-
def restore_pii(self, anonymized_text: str) -> str:
117-
return self.pii_manager.restore_pii(anonymized_text)
121+
def restore_pii(self, anonymized_text: str, session_id: str) -> str:
122+
return self.pii_manager.restore_pii(anonymized_text, session_id)
118123

119124

120125
class PiiUnRedactionStep(OutputPipelineStep):
@@ -151,7 +156,7 @@ def _is_complete_uuid(self, uuid_str: str) -> bool:
151156
"""Check if the string is a complete UUID"""
152157
return bool(self.complete_uuid_pattern.match(uuid_str))
153158

154-
async def process_chunk(
159+
async def process_chunk( # noqa: C901
155160
self,
156161
chunk: ModelResponse,
157162
context: OutputPipelineContext,
@@ -162,6 +167,10 @@ async def process_chunk(
162167
return [chunk]
163168

164169
content = chunk.choices[0].delta.content
170+
session_id = input_context.metadata.get("session_id", "")
171+
if not session_id:
172+
logger.error("Could not get any session id, cannot process pii")
173+
return [chunk]
165174

166175
# Add current chunk to buffer
167176
if context.prefix_buffer:
@@ -199,7 +208,7 @@ async def process_chunk(
199208
if pii_manager and pii_manager.session_store:
200209
# Restore original value from PII manager
201210
logger.debug("Attempting to restore PII from UUID marker")
202-
original = pii_manager.session_store.get_pii(uuid_marker)
211+
original = pii_manager.session_store.get_mapping(session_id, uuid_marker)
203212
logger.debug(f"Restored PII: {original}")
204213
result.append(original)
205214
else:

0 commit comments

Comments
 (0)