Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit ac945bd

Browse files
committed
fix tests
1 parent d20d799 commit ac945bd

File tree

12 files changed

+127
-261
lines changed

12 files changed

+127
-261
lines changed

src/codegate/pipeline/pii/analyzer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(self):
7070
PiiAnalyzer._instance = self
7171

7272
def analyze(
73-
self, text: str, session_id: str, context: Optional[PipelineContext] = None
73+
self, session_id: str, text: str, context: Optional[PipelineContext] = None
7474
) -> Tuple[str, List[Dict[str, Any]]]:
7575
# Prioritize credit card detection first
7676
entities = [
@@ -155,7 +155,7 @@ def analyze(
155155
# If no PII found, return original text, empty list, and session store
156156
return text, []
157157

158-
def restore_pii(self, anonymized_text: str, session_id: str) -> str:
158+
def restore_pii(self, session_id: str, anonymized_text: str) -> str:
159159
"""
160160
Restore the original PII (Personally Identifiable Information) in the given anonymized text.
161161
@@ -164,7 +164,7 @@ def restore_pii(self, anonymized_text: str, session_id: str) -> str:
164164
165165
Args:
166166
anonymized_text (str): The text containing placeholders for PII.
167-
session_store (SessionStore): The session store containing mappings of placeholders
167+
session_id (str): The session id containing mappings of placeholders
168168
to original PII.
169169
170170
Returns:

src/codegate/pipeline/pii/manager.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,19 @@ class PiiManager:
2424
Initializes the PiiManager with the singleton PiiAnalyzer instance and sets the
2525
session store.
2626
27-
analyze(text: str, session_id: str) -> Tuple[str, List[Dict[str, Any]]]:
27+
analyze(session_id: str, text: str) -> Tuple[str, List[Dict[str, Any]]]:
2828
Analyzes the given text for PII, anonymizes it, and logs the detected PII details.
2929
Args:
30+
session_id (str): The session id to store the PII.
3031
text (str): The text to be analyzed for PII.
3132
Returns:
3233
Tuple[str, List[Dict[str, Any]]]: A tuple containing the anonymized text and
3334
a list of found PII details.
3435
35-
restore_pii(anonymized_text: str, session_id: str ) -> str:
36+
restore_pii(session_id: str, anonymized_text: st ) -> str:
3637
Restores the PII in the given anonymized text using the current session.
3738
Args:
39+
session_id (str): The session id for the PII to be restored.
3840
anonymized_text (str): The text with anonymized PII to be restored.
3941
Returns:
4042
str: The text with restored PII.
@@ -55,10 +57,10 @@ def session_store(self) -> SessionStore:
5557
return self.analyzer.session_store
5658

5759
def analyze(
58-
self, text: str, session_id: str, context: Optional[PipelineContext] = None
60+
self, session_id: str, text: str, context: Optional[PipelineContext] = None
5961
) -> Tuple[str, List[Dict[str, Any]]]:
6062
# Call analyzer and get results
61-
anonymized_text, found_pii = self.analyzer.analyze(text, session_id, context=context)
63+
anonymized_text, found_pii = self.analyzer.analyze(session_id, text, context=context)
6264

6365
# Log found PII details (without modifying the found_pii list)
6466
if found_pii:
@@ -73,9 +75,11 @@ def analyze(
7375
# Return the exact same objects we got from the analyzer
7476
return anonymized_text, found_pii
7577

76-
def restore_pii(self, anonymized_text: str, session_id: str) -> str:
78+
def restore_pii(self, session_id: str, anonymized_text: str) -> str:
7779
"""
7880
Restore PII in the given anonymized text using the current session.
7981
"""
82+
if not session_id:
83+
return anonymized_text
8084
# Use the analyzer's restore_pii method with the current session store
81-
return self.analyzer.restore_pii(anonymized_text, session_id)
85+
return self.analyzer.restore_pii(session_id, anonymized_text)

src/codegate/pipeline/pii/pii.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class CodegatePii(PipelineStep):
3838
Processes the chat completion request to detect and redact PII. Updates the request with
3939
anonymized text and stores PII details in the context metadata.
4040
41-
restore_pii(anonymized_text: str, session_id: str) -> str:
41+
restore_pii(session_id: str, anonymized_text: str) -> str:
4242
Restores the original PII from the anonymized text using the PiiManager.
4343
"""
4444

@@ -83,7 +83,7 @@ async def process(
8383
# This is where analyze and anonymize the text
8484
original_text = str(message["content"])
8585
anonymized_text, pii_details = self.pii_manager.analyze(
86-
original_text, session_id, context
86+
session_id, original_text, context
8787
)
8888

8989
if pii_details:
@@ -118,8 +118,8 @@ async def process(
118118

119119
return PipelineResult(request=new_request, context=context)
120120

121-
def restore_pii(self, anonymized_text: str, session_id: str) -> str:
122-
return self.pii_manager.restore_pii(anonymized_text, session_id)
121+
def restore_pii(self, session_id: str, anonymized_text: str) -> str:
122+
return self.pii_manager.restore_pii(session_id, anonymized_text)
123123

124124

125125
class PiiUnRedactionStep(OutputPipelineStep):
@@ -141,12 +141,12 @@ class PiiUnRedactionStep(OutputPipelineStep):
141141
"""
142142

143143
def __init__(self):
144-
self.redacted_pattern = re.compile(r"<([0-9a-f-]{0,36})>")
144+
self.redacted_pattern = re.compile(r"#([0-9a-f-]{0,36})#")
145145
self.complete_uuid_pattern = re.compile(
146146
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
147147
) # noqa: E501
148-
self.marker_start = "<"
149-
self.marker_end = ">"
148+
self.marker_start = "#"
149+
self.marker_end = "#"
150150

151151
@property
152152
def name(self) -> str:
@@ -181,13 +181,13 @@ async def process_chunk( # noqa: C901
181181
current_pos = 0
182182
result = []
183183
while current_pos < len(content):
184-
start_idx = content.find("<", current_pos)
184+
start_idx = content.find("#", current_pos)
185185
if start_idx == -1:
186186
# No more markers!, add remaining content
187187
result.append(content[current_pos:])
188188
break
189189

190-
end_idx = content.find(">", start_idx)
190+
end_idx = content.find("#", start_idx + 1)
191191
if end_idx == -1:
192192
# Incomplete marker, buffer the rest
193193
context.prefix_buffer = content[current_pos:]
@@ -199,7 +199,7 @@ async def process_chunk( # noqa: C901
199199

200200
# Extract potential UUID if it's a valid format!
201201
uuid_marker = content[start_idx : end_idx + 1]
202-
uuid_value = uuid_marker[1:-1] # Remove < >
202+
uuid_value = uuid_marker[1:-1] # Remove # #
203203

204204
if self._is_complete_uuid(uuid_value):
205205
# Get the PII manager from context metadata

src/codegate/pipeline/secrets/manager.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import json
2-
from typing import Optional
2+
from typing import Dict, Optional
33

44
import structlog
55

@@ -19,6 +19,9 @@ def store_secret(self, session_id: str, value: str, service: str, secret_type: s
1919
Encrypts and stores a secret value.
2020
Returns the encrypted value.
2121
"""
22+
if not session_id:
23+
raise ValueError("Session id must be provided")
24+
2225
if not value:
2326
raise ValueError("Value must be provided")
2427
if not service:
@@ -35,6 +38,16 @@ def store_secret(self, session_id: str, value: str, service: str, secret_type: s
3538
)
3639
return uuid_placeholder
3740

41+
def get_by_session_id(self, session_id: str) -> Optional[Dict]:
42+
session_data = self.session_store.get_by_session_id(session_id)
43+
if not session_data:
44+
return None
45+
# Convert all string values to dictionary objects using json.loads
46+
return {
47+
key: json.loads(value) if isinstance(value, str) else value
48+
for key, value in session_data.items()
49+
}
50+
3851
def get_original_value(self, session_id: str, uuid_placeholder: str) -> Optional[str]:
3952
"""Retrieve original value for an encrypted value"""
4053
secret_entry_json = self.session_store.get_mapping(session_id, uuid_placeholder)
@@ -45,3 +58,6 @@ def get_original_value(self, session_id: str, uuid_placeholder: str) -> Optional
4558

4659
def cleanup_session(self, session_id):
4760
self.session_store.cleanup_session(session_id)
61+
62+
def cleanup(self):
63+
self.session_store.cleanup()

src/codegate/pipeline/secrets/secrets.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,14 +430,13 @@ async def process_chunk(
430430
if encrypted_value.startswith("$"):
431431
encrypted_value = encrypted_value[1:]
432432

433-
session_id = context.sensitive.session_id
433+
session_id = input_context.sensitive.session_id
434434
if not session_id:
435435
raise ValueError("Session ID not found in context")
436436

437437
original_value = input_context.sensitive.manager.get_original_value(
438438
session_id,
439439
encrypted_value,
440-
input_context.sensitive.session_id,
441440
)
442441

443442
if original_value is None:

src/codegate/session/session_store.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ def add_mapping(self, session_id: str, data: str) -> str:
1717
self.sessions[session_id][uuid_placeholder] = data
1818
return uuid_placeholder
1919

20+
def get_by_session_id(self, session_id: str) -> Optional[Dict]:
21+
return self.sessions.get(session_id, None)
22+
2023
def get_mapping(self, session_id: str, uuid_placeholder: str) -> Optional[str]:
2124
return self.sessions.get(session_id, {}).get(uuid_placeholder)
2225

tests/pipeline/pii/test_analyzer.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,17 @@ def test_singleton_pattern(self):
7070

7171
def test_analyze_no_pii(self, analyzer, mock_analyzer_engine):
7272
text = "Hello world"
73+
session_id = "session-id"
7374
mock_analyzer_engine.analyze.return_value = []
7475

75-
result_text, found_pii, session_store = analyzer.analyze(text)
76+
result_text, found_pii = analyzer.analyze(session_id, text)
7677

7778
assert result_text == text
7879
assert found_pii == []
79-
assert isinstance(session_store, SessionStore)
8080

8181
def test_analyze_with_pii(self, analyzer, mock_analyzer_engine):
8282
text = "My email is [email protected]"
83+
session_id = "session-id"
8384
email_pii = RecognizerResult(
8485
entity_type="EMAIL_ADDRESS",
8586
start=12,
@@ -88,7 +89,7 @@ def test_analyze_with_pii(self, analyzer, mock_analyzer_engine):
8889
)
8990
mock_analyzer_engine.analyze.return_value = [email_pii]
9091

91-
result_text, found_pii, session_store = analyzer.analyze(text)
92+
result_text, found_pii = analyzer.analyze(session_id, text)
9293

9394
assert len(found_pii) == 1
9495
pii_info = found_pii[0]
@@ -101,36 +102,32 @@ def test_analyze_with_pii(self, analyzer, mock_analyzer_engine):
101102
# Verify the placeholder was used to replace the PII
102103
placeholder = pii_info["uuid_placeholder"]
103104
assert result_text == f"My email is {placeholder}"
104-
# Verify the mapping was stored
105-
assert session_store.get_pii(placeholder) == "[email protected]"
106105

107106
def test_restore_pii(self, analyzer):
108-
session_store = SessionStore()
109107
original_text = "[email protected]"
110108
session_id = "session-id"
111-
placeholder = session_store.add_mapping(session_id, original_text)
112-
anonymized_text = f"My email is {placeholder}"
113109

114-
restored_text = analyzer.restore_pii(anonymized_text, session_id)
110+
placeholder = analyzer.session_store.add_mapping(session_id, original_text)
111+
anonymized_text = f"My email is {placeholder}"
112+
restored_text = analyzer.restore_pii(session_id, anonymized_text)
115113

116114
assert restored_text == f"My email is {original_text}"
117115

118116
def test_restore_pii_multiple(self, analyzer):
119-
session_store = SessionStore()
120117
121118
phone = "123-456-7890"
122119
session_id = "session-id"
123-
email_placeholder = session_store.add_mapping(session_id, email)
124-
phone_placeholder = session_store.add_mapping(session_id, phone)
120+
email_placeholder = analyzer.session_store.add_mapping(session_id, email)
121+
phone_placeholder = analyzer.session_store.add_mapping(session_id, phone)
125122
anonymized_text = f"Email: {email_placeholder}, Phone: {phone_placeholder}"
126123

127-
restored_text = analyzer.restore_pii(anonymized_text, session_id)
124+
restored_text = analyzer.restore_pii(session_id, anonymized_text)
128125

129126
assert restored_text == f"Email: {email}, Phone: {phone}"
130127

131128
def test_restore_pii_no_placeholders(self, analyzer):
132129
text = "No PII here"
133130
session_id = "session-id"
134-
restored_text = analyzer.restore_pii(text, session_id)
131+
restored_text = analyzer.restore_pii(session_id, text)
135132

136133
assert restored_text == text

tests/pipeline/pii/test_pi.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,10 @@ async def test_process_with_pii(self, pii_step):
9696
def test_restore_pii(self, pii_step):
9797
anonymized_text = "My email is <test-uuid>"
9898
original_text = "My email is [email protected]"
99+
session_id = "session-id"
99100
pii_step.pii_manager.restore_pii = MagicMock(return_value=original_text)
100101

101-
restored = pii_step.restore_pii(anonymized_text)
102+
restored = pii_step.restore_pii(session_id, anonymized_text)
102103

103104
assert restored == original_text
104105

@@ -148,7 +149,7 @@ async def test_process_chunk_with_uuid(self, unredaction_step):
148149
StreamingChoices(
149150
finish_reason=None,
150151
index=0,
151-
delta=Delta(content=f"Text with <{uuid}>"),
152+
delta=Delta(content=f"Text with #{uuid}#"),
152153
logprobs=None,
153154
)
154155
],
@@ -157,17 +158,16 @@ async def test_process_chunk_with_uuid(self, unredaction_step):
157158
object="chat.completion.chunk",
158159
)
159160
context = OutputPipelineContext()
160-
input_context = PipelineContext()
161+
input_context = PipelineContext(metadata={"session_id": "session-id"})
161162

162163
# Mock PII manager in input context
163164
mock_pii_manager = MagicMock()
164165
mock_session = MagicMock()
165-
mock_session.get_pii = MagicMock(return_value="[email protected]")
166+
mock_session.get_mapping = MagicMock(return_value="[email protected]")
166167
mock_pii_manager.session_store = mock_session
167168
input_context.metadata["pii_manager"] = mock_pii_manager
168169

169170
result = await unredaction_step.process_chunk(chunk, context, input_context)
170-
171171
assert result[0].choices[0].delta.content == "Text with [email protected]"
172172

173173

0 commit comments

Comments
 (0)