Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions controllers/message_answer.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,6 @@ func generateMessageAnswer(id string, responseWriter http.ResponseWriter, host s
}

question = questionMessage.Text

question, err = refineQuestionTextViaParsingUrlContent(question, lang)
if err != nil {
responseErrorStream(message, err.Error())
return
}
}

if question == "" {
Expand Down
21 changes: 0 additions & 21 deletions controllers/message_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ import (
"fmt"
"io"
"net/http"
"path/filepath"
"regexp"
"strings"

"github.com/beego/beego"
"github.com/the-open-agent/openagent/i18n"
"github.com/the-open-agent/openagent/object"
"github.com/the-open-agent/openagent/txt"
"github.com/the-open-agent/openagent/util"
)

Expand Down Expand Up @@ -128,25 +126,6 @@ func (c *ApiController) ResponseErrorStream(message *object.Message, errorText s
}
}

func refineQuestionTextViaParsingUrlContent(question string, lang string) (string, error) {
re := regexp.MustCompile(`href="([^"]+)"`)
urls := re.FindStringSubmatch(question)
if len(urls) == 0 {
return question, nil
}

href := urls[1]
ext := filepath.Ext(href)
content, err := txt.GetParsedTextFromUrl(href, ext, lang)
if err != nil {
return "", err
}

aTag := regexp.MustCompile(`<a\s+[^>]*href=["']([^"']+)["'][^>]*>.*?</a>`)
res := aTag.ReplaceAllString(question, content)
return res, nil
}

func ConvertMessageDataToJSON(data string) ([]byte, error) {
jsonData := map[string]string{"text": data}
jsonBytes, err := json.Marshal(jsonData)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ require (
github.com/volcengine/volcengine-go-sdk v1.2.32
github.com/xuri/excelize/v2 v2.10.1
golang.org/x/net v0.52.0
golang.org/x/sync v0.20.0
golang.org/x/text v0.35.0
google.golang.org/genai v1.10.0
modernc.org/sqlite v1.18.1
Expand Down Expand Up @@ -206,7 +207,6 @@ require (
golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect
golang.org/x/mod v0.34.0 // indirect
golang.org/x/oauth2 v0.27.0
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.42.0
golang.org/x/term v0.41.0 // indirect
golang.org/x/tools v0.43.0 // indirect
Expand Down
177 changes: 177 additions & 0 deletions model/file_content.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
// Copyright 2026 The OpenAgent Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package model

import (
"fmt"
"io"
"net"
"net/url"
"path"
"regexp"
"strings"
"sync"

"github.com/the-open-agent/openagent/txt"
"golang.org/x/sync/singleflight"
)

var (
fileURLPattern = regexp.MustCompile(`https?://[^\s<>"']+`)
fileContentCache sync.Map
fileContentGroup singleflight.Group
)

type fileContentProvider struct {
providerType string
provider ModelProvider
}

func (p *fileContentProvider) GetPricing() string {
return p.provider.GetPricing()
}

func (p *fileContentProvider) ListModels() ([]string, error) {
return p.provider.ListModels()
}

func (p *fileContentProvider) QueryText(question string, writer io.Writer, history []*RawMessage, prompt string, knowledgeMessages []*RawMessage, toolSession *ToolSession, lang string) (*ModelResult, error) {
question, err := replaceFileURLs(question, p.providerType, lang)
if err != nil {
return nil, err
}

history, err = replaceFileURLsInMessages(history, p.providerType, lang)
if err != nil {
return nil, err
}

return p.provider.QueryText(question, writer, history, prompt, knowledgeMessages, toolSession, lang)
}

func wrapFileContentProvider(providerType string, provider ModelProvider) ModelProvider {
return &fileContentProvider{
providerType: providerType,
provider: provider,
}
}

func replaceFileURLs(text string, providerType string, lang string) (string, error) {
matches := fileURLPattern.FindAllStringIndex(text, -1)
var result strings.Builder
last := 0
for _, match := range matches {
result.WriteString(text[last:match[0]])
rawURL := text[match[0]:match[1]]
if strings.LastIndex(text[:match[0]], "<") > strings.LastIndex(text[:match[0]], ">") {
result.WriteString(rawURL)
last = match[1]
continue
}
parsedURL, ext, ok := parseDocumentURL(rawURL)
if !ok || !shouldReplaceFileURL(providerType, parsedURL) {
result.WriteString(rawURL)
last = match[1]
continue
}

content, err := getFileContent(rawURL, ext, lang)
if err != nil {
return "", err
}
fmt.Fprintf(&result, "URL: %s\nContent:\n%s", rawURL, content)
last = match[1]
}
result.WriteString(text[last:])
return result.String(), nil
}

func replaceFileURLsInMessages(messages []*RawMessage, providerType string, lang string) ([]*RawMessage, error) {
result := make([]*RawMessage, len(messages))
for i, message := range messages {
if message == nil {
continue
}

text, err := replaceFileURLs(message.Text, providerType, lang)
if err != nil {
return nil, err
}
if text == message.Text {
result[i] = message
continue
}

clone := *message
clone.Text = text
result[i] = &clone
}
return result, nil
}

func parseDocumentURL(rawURL string) (*url.URL, string, bool) {
parsedURL, err := url.Parse(rawURL)
if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") || parsedURL.Hostname() == "" {
return nil, "", false
}

ext := strings.ToLower(path.Ext(parsedURL.Path))
for _, supportedExt := range txt.GetSupportedFileTypes() {
if ext == supportedExt {
return parsedURL, ext, true
}
}
return nil, "", false
}

func shouldReplaceFileURL(providerType string, parsedURL *url.URL) bool {
if providerType != "OpenAI" && providerType != "Azure" {
return true
}
return isLocalURL(parsedURL)
}

func isLocalURL(parsedURL *url.URL) bool {
hostname := strings.ToLower(parsedURL.Hostname())
if hostname == "localhost" {
return true
}

ip := net.ParseIP(hostname)
return ip != nil && ip.IsLoopback()
}

func getFileContent(rawURL string, ext string, lang string) (string, error) {
if content, ok := fileContentCache.Load(rawURL); ok {
return content.(string), nil
}

value, err, _ := fileContentGroup.Do(rawURL, func() (interface{}, error) {
if content, ok := fileContentCache.Load(rawURL); ok {
return content.(string), nil
}

content, err := txt.GetParsedTextFromUrl(rawURL, ext, lang)
if err != nil {
return "", err
}
fileContentCache.Store(rawURL, content)
return content, nil
})
if err != nil {
return "", err
}
return value.(string), nil
}
2 changes: 1 addition & 1 deletion model/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,5 +181,5 @@ func GetModelProvider(typ string, subType string, clientId string, clientSecret
if err != nil {
return nil, err
}
return p, nil
return wrapFileContentProvider(typ, p), nil
}
Loading