Skip to content

Commit b374eae

Browse files
Paperlzhsluoyz
authored andcommitted
feat: add document URL content caching (#2359)
1 parent 2f734e0 commit b374eae

5 files changed

Lines changed: 182 additions & 29 deletions

File tree

controllers/message_answer.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,6 @@ func generateMessageAnswer(id string, responseWriter http.ResponseWriter, host s
247247
}
248248

249249
question = questionMessage.Text
250-
251-
question, err = refineQuestionTextViaParsingUrlContent(question, lang)
252-
if err != nil {
253-
responseErrorStream(message, err.Error())
254-
return
255-
}
256250
}
257251

258252
if question == "" {

controllers/message_util.go

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ import (
2020
"fmt"
2121
"io"
2222
"net/http"
23-
"path/filepath"
2423
"regexp"
2524
"strings"
2625

2726
"github.com/beego/beego"
2827
"github.com/the-open-agent/openagent/i18n"
2928
"github.com/the-open-agent/openagent/object"
30-
"github.com/the-open-agent/openagent/txt"
3129
"github.com/the-open-agent/openagent/util"
3230
)
3331

@@ -128,25 +126,6 @@ func (c *ApiController) ResponseErrorStream(message *object.Message, errorText s
128126
}
129127
}
130128

131-
func refineQuestionTextViaParsingUrlContent(question string, lang string) (string, error) {
132-
re := regexp.MustCompile(`href="([^"]+)"`)
133-
urls := re.FindStringSubmatch(question)
134-
if len(urls) == 0 {
135-
return question, nil
136-
}
137-
138-
href := urls[1]
139-
ext := filepath.Ext(href)
140-
content, err := txt.GetParsedTextFromUrl(href, ext, lang)
141-
if err != nil {
142-
return "", err
143-
}
144-
145-
aTag := regexp.MustCompile(`<a\s+[^>]*href=["']([^"']+)["'][^>]*>.*?</a>`)
146-
res := aTag.ReplaceAllString(question, content)
147-
return res, nil
148-
}
149-
150129
func ConvertMessageDataToJSON(data string) ([]byte, error) {
151130
jsonData := map[string]string{"text": data}
152131
jsonBytes, err := json.Marshal(jsonData)

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ require (
5555
github.com/volcengine/volcengine-go-sdk v1.2.32
5656
github.com/xuri/excelize/v2 v2.10.1
5757
golang.org/x/net v0.52.0
58+
golang.org/x/sync v0.20.0
5859
golang.org/x/text v0.35.0
5960
google.golang.org/genai v1.10.0
6061
modernc.org/sqlite v1.18.1
@@ -206,7 +207,6 @@ require (
206207
golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect
207208
golang.org/x/mod v0.34.0 // indirect
208209
golang.org/x/oauth2 v0.27.0
209-
golang.org/x/sync v0.20.0 // indirect
210210
golang.org/x/sys v0.42.0
211211
golang.org/x/term v0.41.0 // indirect
212212
golang.org/x/tools v0.43.0 // indirect

model/file_content.go

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
// Copyright 2026 The OpenAgent Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package model
16+
17+
import (
18+
"fmt"
19+
"io"
20+
"net"
21+
"net/url"
22+
"path"
23+
"regexp"
24+
"strings"
25+
"sync"
26+
27+
"github.com/the-open-agent/openagent/txt"
28+
"golang.org/x/sync/singleflight"
29+
)
30+
31+
var (
32+
fileURLPattern = regexp.MustCompile(`https?://[^\s<>"']+`)
33+
fileContentCache sync.Map
34+
fileContentGroup singleflight.Group
35+
)
36+
37+
type fileContentProvider struct {
38+
providerType string
39+
provider ModelProvider
40+
}
41+
42+
func (p *fileContentProvider) GetPricing() string {
43+
return p.provider.GetPricing()
44+
}
45+
46+
func (p *fileContentProvider) ListModels() ([]string, error) {
47+
return p.provider.ListModels()
48+
}
49+
50+
func (p *fileContentProvider) QueryText(question string, writer io.Writer, history []*RawMessage, prompt string, knowledgeMessages []*RawMessage, toolSession *ToolSession, lang string) (*ModelResult, error) {
51+
question, err := replaceFileURLs(question, p.providerType, lang)
52+
if err != nil {
53+
return nil, err
54+
}
55+
56+
history, err = replaceFileURLsInMessages(history, p.providerType, lang)
57+
if err != nil {
58+
return nil, err
59+
}
60+
61+
return p.provider.QueryText(question, writer, history, prompt, knowledgeMessages, toolSession, lang)
62+
}
63+
64+
func wrapFileContentProvider(providerType string, provider ModelProvider) ModelProvider {
65+
return &fileContentProvider{
66+
providerType: providerType,
67+
provider: provider,
68+
}
69+
}
70+
71+
func replaceFileURLs(text string, providerType string, lang string) (string, error) {
72+
matches := fileURLPattern.FindAllStringIndex(text, -1)
73+
var result strings.Builder
74+
last := 0
75+
for _, match := range matches {
76+
result.WriteString(text[last:match[0]])
77+
rawURL := text[match[0]:match[1]]
78+
if strings.LastIndex(text[:match[0]], "<") > strings.LastIndex(text[:match[0]], ">") {
79+
result.WriteString(rawURL)
80+
last = match[1]
81+
continue
82+
}
83+
parsedURL, ext, ok := parseDocumentURL(rawURL)
84+
if !ok || !shouldReplaceFileURL(providerType, parsedURL) {
85+
result.WriteString(rawURL)
86+
last = match[1]
87+
continue
88+
}
89+
90+
content, err := getFileContent(rawURL, ext, lang)
91+
if err != nil {
92+
return "", err
93+
}
94+
fmt.Fprintf(&result, "URL: %s\nContent:\n%s", rawURL, content)
95+
last = match[1]
96+
}
97+
result.WriteString(text[last:])
98+
return result.String(), nil
99+
}
100+
101+
func replaceFileURLsInMessages(messages []*RawMessage, providerType string, lang string) ([]*RawMessage, error) {
102+
result := make([]*RawMessage, len(messages))
103+
for i, message := range messages {
104+
if message == nil {
105+
continue
106+
}
107+
108+
text, err := replaceFileURLs(message.Text, providerType, lang)
109+
if err != nil {
110+
return nil, err
111+
}
112+
if text == message.Text {
113+
result[i] = message
114+
continue
115+
}
116+
117+
clone := *message
118+
clone.Text = text
119+
result[i] = &clone
120+
}
121+
return result, nil
122+
}
123+
124+
func parseDocumentURL(rawURL string) (*url.URL, string, bool) {
125+
parsedURL, err := url.Parse(rawURL)
126+
if err != nil || (parsedURL.Scheme != "http" && parsedURL.Scheme != "https") || parsedURL.Hostname() == "" {
127+
return nil, "", false
128+
}
129+
130+
ext := strings.ToLower(path.Ext(parsedURL.Path))
131+
for _, supportedExt := range txt.GetSupportedFileTypes() {
132+
if ext == supportedExt {
133+
return parsedURL, ext, true
134+
}
135+
}
136+
return nil, "", false
137+
}
138+
139+
func shouldReplaceFileURL(providerType string, parsedURL *url.URL) bool {
140+
if providerType != "OpenAI" && providerType != "Azure" {
141+
return true
142+
}
143+
return isLocalURL(parsedURL)
144+
}
145+
146+
func isLocalURL(parsedURL *url.URL) bool {
147+
hostname := strings.ToLower(parsedURL.Hostname())
148+
if hostname == "localhost" {
149+
return true
150+
}
151+
152+
ip := net.ParseIP(hostname)
153+
if ip == nil {
154+
return false
155+
}
156+
return ip.IsLoopback() || ip.IsPrivate()
157+
}
158+
159+
func getFileContent(rawURL string, ext string, lang string) (string, error) {
160+
if content, ok := fileContentCache.Load(rawURL); ok {
161+
return content.(string), nil
162+
}
163+
164+
value, err, _ := fileContentGroup.Do(rawURL, func() (interface{}, error) {
165+
if content, ok := fileContentCache.Load(rawURL); ok {
166+
return content.(string), nil
167+
}
168+
169+
content, err := txt.GetParsedTextFromUrl(rawURL, ext, lang)
170+
if err != nil {
171+
return "", err
172+
}
173+
fileContentCache.Store(rawURL, content)
174+
return content, nil
175+
})
176+
if err != nil {
177+
return "", err
178+
}
179+
return value.(string), nil
180+
}

model/provider.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,5 +181,5 @@ func GetModelProvider(typ string, subType string, clientId string, clientSecret
181181
if err != nil {
182182
return nil, err
183183
}
184-
return p, nil
184+
return wrapFileContentProvider(typ, p), nil
185185
}

0 commit comments

Comments
 (0)