diff --git a/src/codegate/pipeline/comment/output.py b/src/codegate/pipeline/comment/output.py index ce70138a..44a2f1af 100644 --- a/src/codegate/pipeline/comment/output.py +++ b/src/codegate/pipeline/comment/output.py @@ -12,7 +12,8 @@ ) from codegate.pipeline.base import PipelineContext from codegate.pipeline.output import OutputPipelineContext, OutputPipelineStep -from codegate.pipeline.suspicious_commands.suspicious_commands import SuspiciousCommands + +# from codegate.pipeline.suspicious_commands.suspicious_commands import check_suspicious_code from codegate.storage import StorageEngine from codegate.utils.package_extractor import PackageExtractor @@ -51,24 +52,11 @@ def _create_chunk(self, original_chunk: ModelResponse, content: str) -> ModelRes async def _snippet_comment(self, snippet: CodeSnippet, context: PipelineContext) -> str: """Create a comment for a snippet""" comment = "" - sc = SuspiciousCommands.get_instance() - class_, prob = await sc.classify_phrase(snippet.code) - if class_ == 1: - liklihood = "possibly" - language = "code" - if prob > 0.9: - liklihood = "likely" - if snippet.language is not None: - language = snippet.language - if language not in [ - "python", - "javascript", - "typescript", - "go", - "rust", - "java", - ]: # noqa: E501 - comment = f"{comment}\n\n🛡️ CodeGate: The {language} supplied is {liklihood} unsafe. Please check carefully!\n\n" # noqa: E501 + + # Remove this for now. We need to find a better place for it. + # comment, is_suspicious = await check_suspicious_code(snippet.code, snippet.language) + # if is_suspicious: + # comment += comment snippet.libraries = PackageExtractor.extract_packages(snippet.code, snippet.language) diff --git a/src/codegate/pipeline/suspicious_commands/suspicious_commands.py b/src/codegate/pipeline/suspicious_commands/suspicious_commands.py index 1670e010..ca3c3e8e 100644 --- a/src/codegate/pipeline/suspicious_commands/suspicious_commands.py +++ b/src/codegate/pipeline/suspicious_commands/suspicious_commands.py @@ -110,3 +110,35 @@ async def classify_phrase(self, phrase, embeddings=None): prediction = np.argmax(ort_outs[0]) probability = np.max(ort_outs[0]) return prediction, probability + + +async def check_suspicious_code(code, language=None): + """ + Check if the given code is suspicious and return a comment if it is. + + Args: + code (str): The code to check. + language (str, optional): The language of the code. + + Returns: + tuple: A comment string and a boolean indicating if the code is suspicious. + """ + sc = SuspiciousCommands.get_instance() + comment = "" + class_, prob = await sc.classify_phrase(code) + if class_ == 1: + liklihood = "possibly" + if prob > 0.9: + liklihood = "likely" + if language is None: + language = "code" + if language not in [ + "python", + "javascript", + "typescript", + "go", + "rust", + "java", + ]: + comment = f"{comment}\n\n🛡️ CodeGate: The {language} supplied is {liklihood} unsafe. Please check carefully!\n\n" # noqa: E501 + return comment, class_ == 1