Skip to content

Commit 8703d1a

Browse files
authored
Fix: llm plugin OpenAI generates random plaintext (hallucinations) (#163)
* Add tests to adopt TDD. * Fix bug, LLM hallucinations
1 parent db80447 commit 8703d1a

File tree

2 files changed

+99
-4
lines changed

2 files changed

+99
-4
lines changed

plugins/llm-integration.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ import (
66
"fmt"
77
"github.com/go-resty/resty/v2"
88
"github.com/mariocandela/beelzebub/v3/tracer"
9-
109
log "github.com/sirupsen/logrus"
10+
"regexp"
1111
)
1212

1313
const (
14-
systemPromptVirtualizeLinuxTerminal = "You will act as an Ubuntu Linux terminal. The user will type commands, and you are to reply with what the terminal should show. Your responses must be contained within a single code block. Do not provide explanations or type commands unless explicitly instructed by the user. Your entire response/output is going to consist of a simple text with \n for new line, and you will NOT wrap it within string md markers"
14+
systemPromptVirtualizeLinuxTerminal = "You will act as an Ubuntu Linux terminal. The user will type commands, and you are to reply with what the terminal should show. Your responses must be contained within a single code block. Do not provide note. Do not provide explanations or type commands unless explicitly instructed by the user. Your entire response/output is going to consist of a simple text with \n for new line, and you will NOT wrap it within string md markers"
1515
systemPromptVirtualizeHTTPServer = "You will act as an unsecure HTTP Server with multiple vulnerability like aws and git credentials stored into root http directory. The user will send HTTP requests, and you are to reply with what the server should show. Do not provide explanations or type commands unless explicitly instructed by the user."
1616
LLMPluginName = "LLMHoneypot"
1717
openAIGPTEndpoint = "https://api.openai.com/v1/chat/completions"
@@ -185,7 +185,7 @@ func (llmHoneypot *LLMHoneypot) openAICaller(messages []Message) (string, error)
185185
return "", errors.New("no choices")
186186
}
187187

188-
return response.Result().(*Response).Choices[0].Message.Content, nil
188+
return removeQuotes(response.Result().(*Response).Choices[0].Message.Content), nil
189189
}
190190

191191
func (llmHoneypot *LLMHoneypot) ollamaCaller(messages []Message) (string, error) {
@@ -216,7 +216,7 @@ func (llmHoneypot *LLMHoneypot) ollamaCaller(messages []Message) (string, error)
216216
}
217217
log.Debug(response)
218218

219-
return response.Result().(*Response).Message.Content, nil
219+
return removeQuotes(response.Result().(*Response).Message.Content), nil
220220
}
221221

222222
func (llmHoneypot *LLMHoneypot) ExecuteModel(command string) (string, error) {
@@ -238,3 +238,8 @@ func (llmHoneypot *LLMHoneypot) ExecuteModel(command string) (string, error) {
238238
return "", errors.New("no model selected")
239239
}
240240
}
241+
242+
func removeQuotes(content string) string {
243+
regex := regexp.MustCompile("(```( *)?([a-z]*)?(\\n)?)")
244+
return regex.ReplaceAllString(content, "")
245+
}

plugins/llm-integration_test.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,93 @@ func TestFromString(t *testing.T) {
379379
model, err = FromStringToLLMModel("beelzebub-model")
380380
assert.Errorf(t, err, "model beelzebub-model not found")
381381
}
382+
383+
func TestBuildExecuteModelSSHWithoutPlaintextSection(t *testing.T) {
384+
client := resty.New()
385+
httpmock.ActivateNonDefault(client.GetClient())
386+
defer httpmock.DeactivateAndReset()
387+
388+
// Given
389+
httpmock.RegisterResponder("POST", ollamaEndpoint,
390+
func(req *http.Request) (*http.Response, error) {
391+
resp, err := httpmock.NewJsonResponse(200, &Response{
392+
Message: Message{
393+
Role: SYSTEM.String(),
394+
Content: "```plaintext\n```\n",
395+
},
396+
})
397+
if err != nil {
398+
return httpmock.NewStringResponse(500, ""), nil
399+
}
400+
return resp, nil
401+
},
402+
)
403+
404+
llmHoneypot := LLMHoneypot{
405+
Histories: make([]Message, 0),
406+
Protocol: tracer.SSH,
407+
Model: LLAMA3,
408+
}
409+
410+
openAIGPTVirtualTerminal := InitLLMHoneypot(llmHoneypot)
411+
openAIGPTVirtualTerminal.client = client
412+
413+
//When
414+
str, err := openAIGPTVirtualTerminal.ExecuteModel("ls")
415+
416+
//Then
417+
assert.Nil(t, err)
418+
assert.Equal(t, "", str)
419+
}
420+
421+
func TestBuildExecuteModelSSHWithoutQuotesSection(t *testing.T) {
422+
client := resty.New()
423+
httpmock.ActivateNonDefault(client.GetClient())
424+
defer httpmock.DeactivateAndReset()
425+
426+
// Given
427+
httpmock.RegisterResponder("POST", ollamaEndpoint,
428+
func(req *http.Request) (*http.Response, error) {
429+
resp, err := httpmock.NewJsonResponse(200, &Response{
430+
Message: Message{
431+
Role: SYSTEM.String(),
432+
Content: "```\n```\n",
433+
},
434+
})
435+
if err != nil {
436+
return httpmock.NewStringResponse(500, ""), nil
437+
}
438+
return resp, nil
439+
},
440+
)
441+
442+
llmHoneypot := LLMHoneypot{
443+
Histories: make([]Message, 0),
444+
Protocol: tracer.SSH,
445+
Model: LLAMA3,
446+
}
447+
448+
openAIGPTVirtualTerminal := InitLLMHoneypot(llmHoneypot)
449+
openAIGPTVirtualTerminal.client = client
450+
451+
//When
452+
str, err := openAIGPTVirtualTerminal.ExecuteModel("ls")
453+
454+
//Then
455+
assert.Nil(t, err)
456+
assert.Equal(t, "", str)
457+
}
458+
459+
func TestRemoveQuotes(t *testing.T) {
460+
plaintext := "```plaintext\n```"
461+
bash := "```bash\n```"
462+
onlyQuotes := "```\n```"
463+
complexText := "```plaintext\ntop - 10:30:48 up 1 day, 4:30, 2 users, load average: 0.15, 0.10, 0.08\nTasks: 198 total, 1 running, 197 sleeping, 0 stopped, 0 zombie\n```"
464+
complexText2 := "```\ntop - 15:06:59 up 10 days, 3:17, 1 user, load average: 0.10, 0.09, 0.08\nTasks: 285 total\n```"
465+
466+
assert.Equal(t, "", removeQuotes(plaintext))
467+
assert.Equal(t, "", removeQuotes(bash))
468+
assert.Equal(t, "", removeQuotes(onlyQuotes))
469+
assert.Equal(t, "top - 10:30:48 up 1 day, 4:30, 2 users, load average: 0.15, 0.10, 0.08\nTasks: 198 total, 1 running, 197 sleeping, 0 stopped, 0 zombie\n", removeQuotes(complexText))
470+
assert.Equal(t, "top - 15:06:59 up 10 days, 3:17, 1 user, load average: 0.10, 0.09, 0.08\nTasks: 285 total\n", removeQuotes(complexText2))
471+
}

0 commit comments

Comments
 (0)