Skip to content

Commit 8683243

Browse files
committed
Support for text files attachments.
1 parent 8817929 commit 8683243

27 files changed

+2569
-30
lines changed

lib/ruby_llm/attachments.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ def encoded
5353
Base64.strict_encode64(content)
5454
end
5555

56+
def text?
57+
content.is_a?(String) || RubyLLM::MimeTypes.text?(mime_type)
58+
end
59+
5660
def mime_type
5761
RubyLLM::MimeTypes.detect_from_path(@filename)
5862
end

lib/ruby_llm/attachments/text.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# frozen_string_literal: true
2+
3+
module RubyLLM
4+
module Attachments
5+
# Represents a Text attachment
6+
class Text < Base
7+
end
8+
end
9+
end

lib/ruby_llm/content.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ def add_pdf(source)
3030
self
3131
end
3232

33+
def add_text(source)
34+
@attachments << Attachments::Text.new(source)
35+
self
36+
end
37+
3338
def format
3439
if @text && @attachments.empty?
3540
@text
@@ -57,6 +62,7 @@ def process_attachments_hash(attachments)
5762
Array(attachments[:image]).each { |source| add_image(source) }
5863
Array(attachments[:audio]).each { |source| add_audio(source) }
5964
Array(attachments[:pdf]).each { |source| add_pdf(source) }
65+
Array(attachments[:text]).each { |source| add_text(source) }
6066
end
6167

6268
def process_attachments_array_or_string(attachments)
@@ -66,8 +72,10 @@ def process_attachments_array_or_string(attachments)
6672
add_image file
6773
elsif RubyLLM::MimeTypes.audio?(mime_type)
6874
add_audio file
75+
elsif RubyLLM::MimeTypes.pdf?(mime_type)
76+
add_pdf file
6977
else
70-
add_pdf file # Default to PDF for unknown types for now
78+
add_text file
7179
end
7280
end
7381
end

lib/ruby_llm/error.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class ConfigurationError < StandardError; end
2424
class InvalidRoleError < StandardError; end
2525
class ModelNotFoundError < StandardError; end
2626
class UnsupportedFunctionsError < StandardError; end
27+
class UnsupportedAttachmentError < StandardError; end
2728

2829
# Error classes for different HTTP status codes
2930
class BadRequestError < Error; end

lib/ruby_llm/mime_types.rb

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,36 @@ def mime_type(ext, fallback = 'application/octet-stream')
2121
# @param identifier [String] File extension, path, or MIME type
2222
# @return [Boolean] true if content is an image
2323
def image?(identifier)
24-
type = identifier.include?('/') ? identifier : mime_type(".#{identifier.to_s.downcase.delete('.')}")
24+
type = mime_type_from_identifier(identifier)
2525
type.start_with?('image/')
2626
end
2727

2828
# Detect if content is audio based on extension or MIME type
2929
# @param identifier [String] File extension, path, or MIME type
3030
# @return [Boolean] true if content is audio
3131
def audio?(identifier)
32-
type = identifier.include?('/') ? identifier : mime_type(".#{identifier.to_s.downcase.delete('.')}")
32+
type = mime_type_from_identifier(identifier)
3333
type.start_with?('audio/')
3434
end
3535

3636
# Detect if content is a PDF based on extension or MIME type
3737
# @param identifier [String] File extension, path, or MIME type
3838
# @return [Boolean] true if content is a PDF
3939
def pdf?(identifier)
40-
type = identifier.include?('/') ? identifier : mime_type(".#{identifier.to_s.downcase.delete('.')}")
40+
type = mime_type_from_identifier(identifier)
4141
type == 'application/pdf'
4242
end
4343

44+
# Detect if content is text based on extension or MIME type
45+
# @param identifier [String] File extension, path, or MIME type
46+
# @return [Boolean] true if content is text
47+
def text?(identifier)
48+
type = mime_type_from_identifier(identifier)
49+
type.start_with?('text/') ||
50+
TEXT_SUFFIXES.any? { |suffix| type.end_with?(suffix) } ||
51+
NON_TEXT_PREFIX_TEXT_MIME_TYPES.include?(type)
52+
end
53+
4454
# Extract extension from filename or path
4555
# @param path [String] File path or name
4656
# @return [String] Extension with leading dot
@@ -62,6 +72,11 @@ def extension_for_mime_type(mime)
6272
MIME_TYPES.invert[mime]
6373
end
6474

75+
# Helper method to get MIME type from identifier
76+
def mime_type_from_identifier(identifier)
77+
identifier.include?('/') ? identifier : mime_type(".#{identifier.to_s.downcase.delete('.')}")
78+
end
79+
6580
# List of most common mime-types, selected various sources
6681
# according to their usefulness in a webserving scope for Ruby
6782
# users.
@@ -347,6 +362,7 @@ def extension_for_mime_type(mime)
347362
'.mc1' => 'application/vnd.medcalcdata',
348363
'.mcd' => 'application/vnd.mcd',
349364
'.mdb' => 'application/x-msaccess',
365+
'.md' => 'text/markdown',
350366
'.mdi' => 'image/vnd.ms-modi',
351367
'.mdoc' => 'text/troff',
352368
'.me' => 'text/troff',
@@ -709,5 +725,38 @@ def extension_for_mime_type(mime)
709725
'.zip' => 'application/zip',
710726
'.zmm' => 'application/vnd.handheld-entertainment+xml'
711727
}.freeze
728+
729+
NON_TEXT_PREFIX_TEXT_MIME_TYPES = [
730+
'application/json', # Base type, even if specific ones end with +json
731+
'application/xml', # Base type, even if specific ones end with +xml
732+
'application/javascript',
733+
'application/ecmascript',
734+
'application/rtf',
735+
'application/sql',
736+
'application/x-sh',
737+
'application/x-csh',
738+
'application/x-httpd-php',
739+
'application/sdp',
740+
'application/sparql-query',
741+
'application/graphql',
742+
'application/yang', # Data modeling language, often serialized as XML/JSON but the type itself is distinct
743+
'application/mbox', # Mailbox format
744+
'application/x-tex',
745+
'application/x-latex',
746+
'application/x-perl',
747+
'application/x-python',
748+
'application/x-tcl',
749+
'application/pgp-signature', # Often ASCII armored
750+
'application/pgp-keys', # Often ASCII armored
751+
'application/vnd.coffeescript',
752+
'application/vnd.dart',
753+
'application/vnd.oai.openapi', # Base for OpenAPI, often with +json or +yaml suffix
754+
'application/vnd.zul', # ZK User Interface Language (can be XML-like)
755+
'application/x-yaml', # Common non-standard for YAML
756+
'application/yaml', # Standard for YAML
757+
'application/toml' # TOML configuration files
758+
].freeze
759+
760+
TEXT_SUFFIXES = ['+json', '+xml', '+html', '+yaml', '+csv', '+plain', '+javascript', '+svg'].freeze
712761
end
713762
end

lib/ruby_llm/providers/anthropic/media.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ def format_content(content)
1919
parts << format_image(attachment)
2020
when Attachments::PDF
2121
parts << format_pdf(attachment)
22+
when Attachments::Text
23+
parts << format_text_file(attachment)
24+
else
25+
raise UnsupportedAttachmentError, attachment.class
2226
end
2327
end
2428

@@ -73,6 +77,13 @@ def format_pdf(pdf)
7377
}
7478
end
7579
end
80+
81+
def format_text_file(text_file)
82+
{
83+
type: 'text',
84+
text: Utils.format_text_file_for_llm(text_file)
85+
}
86+
end
7687
end
7788
end
7889
end

lib/ruby_llm/providers/bedrock/media.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module RubyLLM
44
module Providers
55
module Bedrock
66
# Media handling methods for the Bedrock API integration
7+
# NOTE: Bedrock does not support url attachments
78
module Media
89
extend Anthropic::Media
910

@@ -21,8 +22,10 @@ def format_content(content)
2122
parts << format_image(attachment)
2223
when Attachments::PDF
2324
parts << format_pdf(attachment)
25+
when Attachments::Text
26+
parts << Anthropic::Media.format_text_file(attachment)
2427
else
25-
raise "Unsupported attachment type: #{attachment.class}"
28+
raise UnsupportedAttachmentError, attachment.class
2629
end
2730
end
2831

lib/ruby_llm/providers/gemini/media.rb

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,43 +14,29 @@ def format_content(content)
1414
parts << format_text(content.text) if content.text
1515

1616
content.attachments.each do |attachment|
17-
case attachment
18-
when Attachments::Image
19-
parts << format_image(attachment)
20-
when Attachments::PDF
21-
parts << format_pdf(attachment)
22-
when Attachments::Audio
23-
parts << format_audio(attachment)
24-
end
17+
parts << case attachment
18+
when Attachments::Text
19+
format_text_file(attachment)
20+
else
21+
format_attachment(attachment)
22+
end
2523
end
2624

2725
parts
2826
end
2927

30-
def format_image(image)
28+
def format_attachment(attachment)
3129
{
3230
inline_data: {
33-
mime_type: image.mime_type,
34-
data: image.encoded
31+
mime_type: attachment.mime_type,
32+
data: attachment.encoded
3533
}
3634
}
3735
end
3836

39-
def format_pdf(pdf)
37+
def format_text_file(text_file)
4038
{
41-
inline_data: {
42-
mime_type: pdf.mime_type,
43-
data: pdf.encoded
44-
}
45-
}
46-
end
47-
48-
def format_audio(audio)
49-
{
50-
inline_data: {
51-
mime_type: audio.mime_type,
52-
data: audio.encoded
53-
}
39+
text: Utils.format_text_file_for_llm(text_file)
5440
}
5541
end
5642

lib/ruby_llm/providers/openai/media.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ def format_content(content)
2121
parts << format_pdf(attachment)
2222
when Attachments::Audio
2323
parts << format_audio(attachment)
24+
when Attachments::Text
25+
parts << format_text_file(attachment)
26+
else
27+
raise UnsupportedAttachmentError, attachment.class
2428
end
2529
end
2630

@@ -47,6 +51,13 @@ def format_pdf(pdf)
4751
}
4852
end
4953

54+
def format_text_file(text_file)
55+
{
56+
type: 'text',
57+
text: Utils.format_text_file_for_llm(text_file)
58+
}
59+
end
60+
5061
def format_audio(audio)
5162
{
5263
type: 'input_audio',

lib/ruby_llm/utils.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,9 @@ def deep_symbolize_keys(value)
1818
value
1919
end
2020
end
21+
22+
def format_text_file_for_llm(text_file)
23+
"<file name='#{File.basename(text_file.source)}'>#{text_file.content}</file>"
24+
end
2125
end
2226
end

0 commit comments

Comments
 (0)