Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit 59a78ff

Browse files
committed
unify interface in sensitive data
1 parent ac945bd commit 59a78ff

File tree

17 files changed

+252
-651
lines changed

17 files changed

+252
-651
lines changed

src/codegate/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from codegate.config import Config, ConfigurationError
1717
from codegate.db.connection import init_db_sync, init_session_if_not_exists
1818
from codegate.pipeline.factory import PipelineFactory
19-
from codegate.pipeline.secrets.manager import SecretsManager
19+
from codegate.pipeline.sensitive_data.manager import SensitiveDataManager
2020
from codegate.providers import crud as provendcrud
2121
from codegate.providers.copilot.provider import CopilotProvider
2222
from codegate.server import init_app
@@ -331,8 +331,8 @@ def serve( # noqa: C901
331331
click.echo("Existing Certificates are already present.")
332332

333333
# Initialize secrets manager and pipeline factory
334-
secrets_manager = SecretsManager()
335-
pipeline_factory = PipelineFactory(secrets_manager)
334+
sensitive_data_manager = SensitiveDataManager()
335+
pipeline_factory = PipelineFactory(sensitive_data_manager)
336336

337337
app = init_app(pipeline_factory)
338338

src/codegate/pipeline/base.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
from codegate.clients.clients import ClientType
1313
from codegate.db.models import Alert, AlertSeverity, Output, Prompt
1414
from codegate.extract_snippets.message_extractor import CodeSnippet
15-
from codegate.pipeline.secrets.manager import SecretsManager
15+
from codegate.pipeline.sensitive_data.manager import SensitiveDataManager
1616

1717
logger = structlog.get_logger("codegate")
1818

1919

2020
@dataclass
2121
class PipelineSensitiveData:
22-
manager: SecretsManager
22+
manager: SensitiveDataManager
2323
session_id: str
2424
api_key: Optional[str] = None
2525
model: Optional[str] = None
@@ -266,19 +266,19 @@ class InputPipelineInstance:
266266
def __init__(
267267
self,
268268
pipeline_steps: List[PipelineStep],
269-
secret_manager: SecretsManager,
269+
sensitive_data_manager: SensitiveDataManager,
270270
is_fim: bool,
271271
client: ClientType = ClientType.GENERIC,
272272
):
273273
self.pipeline_steps = pipeline_steps
274-
self.secret_manager = secret_manager
274+
self.sensitive_data_manager = sensitive_data_manager
275275
self.is_fim = is_fim
276276
self.context = PipelineContext(client=client)
277277

278278
# we create the sesitive context here so that it is not shared between individual requests
279279
# TODO: could we get away with just generating the session ID for an instance?
280280
self.context.sensitive = PipelineSensitiveData(
281-
manager=self.secret_manager,
281+
manager=self.sensitive_data_manager,
282282
session_id=str(uuid.uuid4()),
283283
)
284284
self.context.metadata["is_fim"] = is_fim
@@ -335,20 +335,20 @@ class SequentialPipelineProcessor:
335335
def __init__(
336336
self,
337337
pipeline_steps: List[PipelineStep],
338-
secret_manager: SecretsManager,
338+
sensitive_data_manager: SensitiveDataManager,
339339
client_type: ClientType,
340340
is_fim: bool,
341341
):
342342
self.pipeline_steps = pipeline_steps
343-
self.secret_manager = secret_manager
343+
self.sensitive_data_manager = sensitive_data_manager
344344
self.is_fim = is_fim
345345
self.instance = self._create_instance(client_type)
346346

347347
def _create_instance(self, client_type: ClientType) -> InputPipelineInstance:
348348
"""Create a new pipeline instance for processing a request"""
349349
return InputPipelineInstance(
350350
self.pipeline_steps,
351-
self.secret_manager,
351+
self.sensitive_data_manager,
352352
self.is_fim,
353353
client_type,
354354
)

src/codegate/pipeline/factory.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@
1212
PiiRedactionNotifier,
1313
PiiUnRedactionStep,
1414
)
15-
from codegate.pipeline.secrets.manager import SecretsManager
1615
from codegate.pipeline.secrets.secrets import (
1716
CodegateSecrets,
1817
SecretRedactionNotifier,
1918
SecretUnredactionStep,
2019
)
20+
from codegate.pipeline.sensitive_data.manager import SensitiveDataManager
2121
from codegate.pipeline.system_prompt.codegate import SystemPrompt
2222

2323

2424
class PipelineFactory:
25-
def __init__(self, secrets_manager: SecretsManager):
26-
self.secrets_manager = secrets_manager
25+
def __init__(self, sensitive_data_manager: SensitiveDataManager):
26+
self.sensitive_data_manager = sensitive_data_manager
2727

2828
def create_input_pipeline(self, client_type: ClientType) -> SequentialPipelineProcessor:
2929
input_steps: List[PipelineStep] = [
@@ -41,7 +41,7 @@ def create_input_pipeline(self, client_type: ClientType) -> SequentialPipelinePr
4141
]
4242
return SequentialPipelineProcessor(
4343
input_steps,
44-
self.secrets_manager,
44+
self.sensitive_data_manager,
4545
client_type,
4646
is_fim=False,
4747
)
@@ -53,7 +53,7 @@ def create_fim_pipeline(self, client_type: ClientType) -> SequentialPipelineProc
5353
]
5454
return SequentialPipelineProcessor(
5555
fim_steps,
56-
self.secrets_manager,
56+
self.sensitive_data_manager,
5757
client_type,
5858
is_fim=True,
5959
)

src/codegate/pipeline/pii/analyzer.py

Lines changed: 4 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
import uuid
2-
from typing import Any, Dict, List, Optional, Tuple
1+
from typing import Any, List, Optional
32

43
import structlog
54
from presidio_analyzer import AnalyzerEngine
65
from presidio_anonymizer import AnonymizerEngine
76

87
from codegate.db.models import AlertSeverity
98
from codegate.pipeline.base import PipelineContext
10-
from codegate.session.session_store import SessionStore
9+
from codegate.pipeline.sensitive_data.session_store import SessionStore
1110

1211
logger = structlog.get_logger("codegate.pii.analyzer")
1312

@@ -69,9 +68,7 @@ def __init__(self):
6968

7069
PiiAnalyzer._instance = self
7170

72-
def analyze(
73-
self, session_id: str, text: str, context: Optional[PipelineContext] = None
74-
) -> Tuple[str, List[Dict[str, Any]]]:
71+
def analyze(self, text: str, context: Optional[PipelineContext] = None) -> List:
7572
# Prioritize credit card detection first
7673
entities = [
7774
"PHONE_NUMBER",
@@ -95,65 +92,7 @@ def analyze(
9592
language="en",
9693
score_threshold=0.3, # Lower threshold to catch more potential matches
9794
)
98-
99-
# Track found PII
100-
found_pii = []
101-
102-
# Only anonymize if PII was found
103-
if analyzer_results:
104-
# Log each found PII instance and anonymize
105-
anonymized_text = text
106-
for result in analyzer_results:
107-
pii_value = text[result.start : result.end]
108-
uuid_placeholder = self.session_store.add_mapping(session_id, pii_value)
109-
pii_info = {
110-
"type": result.entity_type,
111-
"value": pii_value,
112-
"score": result.score,
113-
"start": result.start,
114-
"end": result.end,
115-
"uuid_placeholder": uuid_placeholder,
116-
}
117-
found_pii.append(pii_info)
118-
anonymized_text = anonymized_text.replace(pii_value, uuid_placeholder)
119-
120-
# Log each PII detection with its UUID mapping
121-
logger.info(
122-
"PII detected and mapped",
123-
pii_type=result.entity_type,
124-
score=f"{result.score:.2f}",
125-
uuid=uuid_placeholder,
126-
# Don't log the actual PII value for security
127-
value_length=len(pii_value),
128-
session_id=session_id,
129-
)
130-
131-
# Log summary of all PII found in this analysis
132-
if found_pii and context:
133-
# Create notification string for alert
134-
notify_string = (
135-
f"**PII Detected** 🔒\n"
136-
f"- Total PII Found: {len(found_pii)}\n"
137-
f"- Types Found: {', '.join(set(p['type'] for p in found_pii))}\n"
138-
)
139-
context.add_alert(
140-
self._name,
141-
trigger_string=notify_string,
142-
severity_category=AlertSeverity.CRITICAL,
143-
)
144-
145-
logger.info(
146-
"PII analysis complete",
147-
total_pii_found=len(found_pii),
148-
pii_types=[p["type"] for p in found_pii],
149-
session_id=session_id,
150-
)
151-
152-
# Return the anonymized text, PII details, and session store
153-
return anonymized_text, found_pii
154-
155-
# If no PII found, return original text, empty list, and session store
156-
return text, []
95+
return analyzer_results
15796

15897
def restore_pii(self, session_id: str, anonymized_text: str) -> str:
15998
"""

src/codegate/pipeline/pii/manager.py

Lines changed: 0 additions & 85 deletions
This file was deleted.

0 commit comments

Comments
 (0)