Skip to content

refine knowledge doc #640

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,7 @@ public interface IFileStorageService
/// <param name="fileId"></param>
/// <returns></returns>
bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvider, string? fileId = null);

bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider,string fileId, KnowledgeDocMetaData metaData);

KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId);

IEnumerable<KnowledgeFileModel> GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider);

string GetKnowledgeBaseFileUrl(string collectionName, string fileId);
FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId);
#endregion
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using BotSharp.Abstraction.Knowledges.Enums;

namespace BotSharp.Abstraction.Files.Models;

public class ExternalFileModel : FileDataModel
Expand All @@ -10,4 +12,10 @@ public class ExternalFileModel : FileDataModel
/// </summary>
[JsonPropertyName("file_data")]
public new string? FileData { get; set; }

/// <summary>
/// The file source, e.g., api, user upload, external web, etc.
/// </summary>
[JsonPropertyName("file_source")]
public string FileSource { get; set; } = KnowledgeDocSource.Api;
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ public static class KnowledgePayloadName
public static string Request = "request";
public static string Response = "response";
public static string DataSource = "dataSource";
public static string FileId = "fileId";
public static string FileName = "fileName";
public static string FileSource = "fileSource";
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public interface IKnowledgeService
#region Document
Task<UploadKnowledgeResponse> UploadKnowledgeDocuments(string collectionName, IEnumerable<ExternalFileModel> files);
Task<bool> DeleteKnowledgeDocument(string collectionName, string fileId);
Task<IEnumerable<KnowledgeFileModel>> GetKnowledgeDocuments(string collectionName);
Task<PagedItems<KnowledgeFileModel>> GetPagedKnowledgeDocuments(string collectionName, KnowledgeFileFilter filter);
Task<FileBinaryDataModel?> GetKnowledgeDocumentBinaryData(string collectionName, string fileId);
#endregion

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using BotSharp.Abstraction.VectorStorage.Models;

namespace BotSharp.Abstraction.Knowledges.Models;

public class KnowledgeDocMetaData
Expand All @@ -13,12 +11,23 @@ public class KnowledgeDocMetaData
[JsonPropertyName("file_name")]
public string FileName { get; set; }

[JsonPropertyName("file_source")]
public string FileSource { get; set; }

[JsonPropertyName("content_type")]
public string ContentType { get; set; }

[JsonPropertyName("vector_store_provider")]
public string VectorStoreProvider { get; set; }

[JsonPropertyName("vector_data_ids")]
public IEnumerable<string> VectorDataIds { get; set; } = new List<string>();

[JsonPropertyName("web_url")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public string? WebUrl { get; set; }


[JsonPropertyName("create_date")]
public DateTime CreateDate { get; set; } = DateTime.UtcNow;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace BotSharp.Abstraction.Knowledges.Models;

public class KnowledgeFileFilter : Pagination
{
public IEnumerable<string>? FileIds { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,5 +111,8 @@ public interface IBotSharpRepository
bool AddKnowledgeCollectionConfigs(List<VectorCollectionConfig> configs, bool reset = false);
bool DeleteKnowledgeCollectionConfig(string collectionName);
IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter);

public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData);
public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter);
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,96 +67,15 @@ public bool DeleteKnowledgeFile(string collectionName, string vectorStoreProvide
return true;
}

public bool SaveKnolwedgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId, KnowledgeDocMetaData metaData)
public string GetKnowledgeBaseFileUrl(string collectionName, string fileId)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider)
|| string.IsNullOrWhiteSpace(fileId))
|| string.IsNullOrWhiteSpace(fileId))
{
return false;
return string.Empty;
}

var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
var dir = Path.Combine(docDir, fileId);
if (!ExistDirectory(dir))
{
Directory.CreateDirectory(dir);
}

var metaFile = Path.Combine(dir, KNOWLEDGE_DOC_META_FILE);
var content = JsonSerializer.Serialize(metaData, _jsonOptions);
File.WriteAllText(metaFile, content);
return true;
}

public KnowledgeDocMetaData? GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, string fileId)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider)
|| string.IsNullOrWhiteSpace(fileId))
{
return null;
}

var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
var metaFile = Path.Combine(docDir, fileId, KNOWLEDGE_DOC_META_FILE);
if (!File.Exists(metaFile))
{
return null;
}

var content = File.ReadAllText(metaFile);
var metaData = JsonSerializer.Deserialize<KnowledgeDocMetaData>(content, _jsonOptions);
return metaData;
}

public IEnumerable<KnowledgeFileModel> GetKnowledgeBaseFiles(string collectionName, string vectorStoreProvider)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
{
return Enumerable.Empty<KnowledgeFileModel>();
}

var docDir = BuildKnowledgeCollectionDocumentDir(collectionName, vectorStoreProvider);
if (!ExistDirectory(docDir))
{
return Enumerable.Empty<KnowledgeFileModel>();
}

var files = new List<KnowledgeFileModel>();
foreach (var folder in Directory.GetDirectories(docDir))
{
try
{
var metaFile = Path.Combine(folder, KNOWLEDGE_DOC_META_FILE);
if (!File.Exists(metaFile)) continue;

var content = File.ReadAllText(metaFile);
var metaData = JsonSerializer.Deserialize<KnowledgeDocMetaData>(content, _jsonOptions);
if (metaData == null) continue;

var fileName = Path.GetFileNameWithoutExtension(metaData.FileName);
var fileExtension = Path.GetExtension(metaData.FileName);

files.Add(new KnowledgeFileModel
{
FileId = metaData.FileId,
FileName = metaData.FileName,
FileExtension = fileExtension.Substring(1),
ContentType = FileUtility.GetFileContentType(metaData.FileName),
FileUrl = BuildKnowledgeFileUrl(collectionName, metaData.FileId)
});
}
catch (Exception ex)
{
_logger.LogWarning($"Error when getting knowledgebase file. ({folder})" +
$"\r\n{ex.Message}\r\n{ex.InnerException}");
continue;
}
}

return files;
return $"/knowledge/document/{collectionName}/file/{fileId}";
}

public FileBinaryDataModel? GetKnowledgeBaseFileBinaryData(string collectionName, string vectorStoreProvider, string fileId)
Expand Down Expand Up @@ -193,10 +112,5 @@ private string BuildKnowledgeCollectionDocumentDir(string collectionName, string
{
return Path.Combine(_baseDir, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName);
}

private string BuildKnowledgeFileUrl(string collectionName, string fileId)
{
return $"/knowledge/document/{collectionName}/file/{fileId}";
}
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -243,5 +243,11 @@ public bool DeleteKnowledgeCollectionConfig(string collectionName) =>

public IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorCollectionConfigFilter filter) =>
throw new NotImplementedException();

public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData) =>
throw new NotImplementedException();

public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter) =>
throw new NotImplementedException();
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ public PagedItems<Conversation> GetConversations(ConversationFilter filter)
}

if (!matched) continue;

records.Add(record);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace BotSharp.Core.Repository;

public partial class FileRepository
{
#region Configs
public bool AddKnowledgeCollectionConfigs(List<VectorCollectionConfig> configs, bool reset = false)
{
var vectorDir = BuildKnowledgeCollectionConfigDir();
Expand Down Expand Up @@ -46,7 +47,6 @@ public bool AddKnowledgeCollectionConfigs(List<VectorCollectionConfig> configs,
}

File.WriteAllText(configFile, JsonSerializer.Serialize(savedConfigs ?? new(), _options));

return true;
}

Expand Down Expand Up @@ -102,11 +102,88 @@ public IEnumerable<VectorCollectionConfig> GetKnowledgeCollectionConfigs(VectorC

return configs;
}
#endregion


#region Documents
public bool SaveKnolwedgeBaseFileMeta(KnowledgeDocMetaData metaData)
{
if (metaData == null
|| string.IsNullOrWhiteSpace(metaData.Collection)
|| string.IsNullOrWhiteSpace(metaData.VectorStoreProvider)
|| string.IsNullOrWhiteSpace(metaData.FileId))
{
return false;
}

var dir = BuildKnowledgeDocumentDir(metaData.Collection.CleanStr(), metaData.VectorStoreProvider.CleanStr());
var docDir = Path.Combine(dir, metaData.FileId);
if (!Directory.Exists(docDir))
{
Directory.CreateDirectory(docDir);
}

var metaFile = Path.Combine(docDir, KNOWLEDGE_DOC_META_FILE);
var content = JsonSerializer.Serialize(metaData, _options);
File.WriteAllText(metaFile, content);
return true;
}

public PagedItems<KnowledgeDocMetaData> GetKnowledgeBaseFileMeta(string collectionName, string vectorStoreProvider, KnowledgeFileFilter filter)
{
if (string.IsNullOrWhiteSpace(collectionName)
|| string.IsNullOrWhiteSpace(vectorStoreProvider))
{
return new PagedItems<KnowledgeDocMetaData>();
}

var dir = BuildKnowledgeDocumentDir(collectionName, vectorStoreProvider);
if (!Directory.Exists(dir))
{
return new PagedItems<KnowledgeDocMetaData>();
}

var records = new List<KnowledgeDocMetaData>();
foreach (var folder in Directory.GetDirectories(dir))
{
var metaFile = Path.Combine(folder, KNOWLEDGE_DOC_META_FILE);
if (!File.Exists(metaFile)) continue;

var content = File.ReadAllText(metaFile);
var metaData = JsonSerializer.Deserialize<KnowledgeDocMetaData>(content, _options);
if (metaData == null) continue;

var matched = true;

// Apply filter
if (filter != null && !filter.FileIds.IsNullOrEmpty())
{
matched = matched && filter.FileIds.Contains(metaData.FileId);
}

if (!matched) continue;

records.Add(metaData);
}

return new PagedItems<KnowledgeDocMetaData>
{
Items = records.Skip(filter.Offset).Take(filter.Size),
Count = records.Count
};
}
#endregion


#region Private methods
private string BuildKnowledgeCollectionConfigDir()
{
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, VECTOR_FOLDER);
}

private string BuildKnowledgeDocumentDir(string collectionName, string vectorStoreProvider)
{
return Path.Combine(_dbSettings.FileRepository, KNOWLEDGE_FOLDER, KNOWLEDGE_DOC_FOLDER, vectorStoreProvider, collectionName);
}
#endregion
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public partial class FileRepository : IBotSharpRepository
private const string KNOWLEDGE_FOLDER = "knowledgebase";
private const string VECTOR_FOLDER = "vector";
private const string COLLECTION_CONFIG_FILE = "collection-config.json";
private const string KNOWLEDGE_DOC_FOLDER = "document";
private const string KNOWLEDGE_DOC_META_FILE = "meta.json";

public FileRepository(
IServiceProvider services,
Expand Down
1 change: 1 addition & 0 deletions src/Infrastructure/BotSharp.Core/Using.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
global using BotSharp.Abstraction.Files.Utilities;
global using BotSharp.Abstraction.Translation.Attributes;
global using BotSharp.Abstraction.Messaging.Enums;
global using BotSharp.Abstraction.Knowledges.Models;
global using BotSharp.Core.Repository;
global using BotSharp.Core.Routing;
global using BotSharp.Core.Agents.Services;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,20 @@ public async Task<bool> DeleteKnowledgeDocument([FromRoute] string collection, [
return response;
}

[HttpGet("/knowledge/document/{collection}/list")]
public async Task<IEnumerable<KnowledgeFileViewModel>> GetKnowledgeDocuments([FromRoute] string collection)
[HttpPost("/knowledge/document/{collection}/list")]
public async Task<PagedItems<KnowledgeFileViewModel>> GetPagedKnowledgeDocuments([FromRoute] string collection, [FromBody] GetKnowledgeDocsRequest request)
{
var files = await _knowledgeService.GetKnowledgeDocuments(collection);
return files.Select(x => KnowledgeFileViewModel.From(x));
var data = await _knowledgeService.GetPagedKnowledgeDocuments(collection, new KnowledgeFileFilter
{
Page = request.Page,
Size = request.Size
});

return new PagedItems<KnowledgeFileViewModel>
{
Items = data.Items.Select(x => KnowledgeFileViewModel.From(x)),
Count = data.Count
};
}

[HttpGet("/knowledge/document/{collection}/file/{fileId}")]
Expand Down
1 change: 1 addition & 0 deletions src/Infrastructure/BotSharp.OpenAPI/Using.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
global using BotSharp.Abstraction.Files.Models;
global using BotSharp.Abstraction.Files;
global using BotSharp.Abstraction.VectorStorage.Enums;
global using BotSharp.Abstraction.Knowledges.Models;
global using BotSharp.OpenAPI.ViewModels.Conversations;
global using BotSharp.OpenAPI.ViewModels.Users;
global using BotSharp.OpenAPI.ViewModels.Agents;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
namespace BotSharp.OpenAPI.ViewModels.Knowledges;

public class GetKnowledgeDocsRequest : KnowledgeFileFilter
{
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
using System.Text.Json.Serialization;

namespace BotSharp.OpenAPI.ViewModels.Knowledges;

public class VectorKnowledgeUploadRequest
{
[JsonPropertyName("files")]
public IEnumerable<ExternalFileModel> Files { get; set; } = new List<ExternalFileModel>();
}
Loading