1
1
pub mod file_info;
2
2
pub mod utils;
3
-
4
3
use file_info:: FileInfo ;
4
+ use grep:: {
5
+ matcher:: { Match , Matcher } ,
6
+ regex:: RegexMatcherBuilder ,
7
+ searcher:: { sinks:: UTF8 , BinaryDetection , Searcher } ,
8
+ } ;
5
9
6
10
use std:: {
7
11
env,
@@ -29,10 +33,33 @@ use crate::{
29
33
tools:: EditOperation ,
30
34
} ;
31
35
36
+ const SNIPPET_MAX_LENGTH : usize = 200 ;
37
+ const SNIPPET_BACKWARD_CHARS : usize = 30 ;
38
+
32
39
pub struct FileSystemService {
33
40
allowed_path : Vec < PathBuf > ,
34
41
}
35
42
43
+ /// Represents a single match found in a file's content.
44
+ #[ derive( Debug , Clone ) ]
45
+ pub struct ContentMatchResult {
46
+ /// The line number where the match occurred (1-based).
47
+ pub line_number : u64 ,
48
+ pub start_pos : usize ,
49
+ /// The line of text containing the match.
50
+ /// If the line exceeds 255 characters (excluding the search term), only a truncated portion will be shown.
51
+ pub line_text : String ,
52
+ }
53
+
54
+ /// Represents all matches found in a specific file.
55
+ #[ derive( Debug , Clone ) ]
56
+ pub struct FileSearchResult {
57
+ /// The path to the file where matches were found.
58
+ pub file_path : PathBuf ,
59
+ /// All individual match results within the file.
60
+ pub matches : Vec < ContentMatchResult > ,
61
+ }
62
+
36
63
impl FileSystemService {
37
64
pub fn try_new ( allowed_directories : & [ String ] ) -> ServiceResult < Self > {
38
65
let normalized_dirs: Vec < PathBuf > = allowed_directories
@@ -376,19 +403,59 @@ impl FileSystemService {
376
403
Ok ( ( ) )
377
404
}
378
405
406
+ /// Searches for files in the directory tree starting at `root_path` that match the given `pattern`,
407
+ /// excluding paths that match any of the `exclude_patterns`.
408
+ ///
409
+ /// # Arguments
410
+ /// * `root_path` - The root directory to start the search from.
411
+ /// * `pattern` - A glob pattern to match file names (case-insensitive). If no wildcards are provided,
412
+ /// the pattern is wrapped in '*' for partial matching.
413
+ /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive).
414
+ ///
415
+ /// # Returns
416
+ /// A `ServiceResult` containing a vector of`walkdir::DirEntry` objects for matching files,
417
+ /// or a `ServiceError` if an error occurs.
379
418
pub fn search_files (
380
419
& self ,
381
- // root_path: impl Into<PathBuf>,
382
420
root_path : & Path ,
383
421
pattern : String ,
384
422
exclude_patterns : Vec < String > ,
385
423
) -> ServiceResult < Vec < walkdir:: DirEntry > > {
424
+ let result = self . search_files_iter ( root_path, pattern, exclude_patterns) ?;
425
+ Ok ( result. collect :: < Vec < walkdir:: DirEntry > > ( ) )
426
+ }
427
+
428
+ /// Returns an iterator over files in the directory tree starting at `root_path` that match
429
+ /// the given `pattern`, excluding paths that match any of the `exclude_patterns`.
430
+ ///
431
+ /// # Arguments
432
+ /// * `root_path` - The root directory to start the search from.
433
+ /// * `pattern` - A glob pattern to match file names. If no wildcards are provided, the pattern is wrapped in `**/*{pattern}*` for partial matching.
434
+ /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive).
435
+ ///
436
+ /// # Returns
437
+ /// A `ServiceResult` containing an iterator yielding `walkdir::DirEntry` objects for matching files,
438
+ /// or a `ServiceError` if an error occurs.
439
+ pub fn search_files_iter < ' a > (
440
+ & ' a self ,
441
+ // root_path: impl Into<PathBuf>,
442
+ root_path : & ' a Path ,
443
+ pattern : String ,
444
+ exclude_patterns : Vec < String > ,
445
+ ) -> ServiceResult < impl Iterator < Item = walkdir:: DirEntry > + ' a > {
386
446
let valid_path = self . validate_path ( root_path) ?;
387
447
448
+ let updated_pattern = if pattern. contains ( '*' ) {
449
+ pattern. to_lowercase ( )
450
+ } else {
451
+ format ! ( "**/*{}*" , & pattern. to_lowercase( ) )
452
+ } ;
453
+ let glob_pattern = Pattern :: new ( & updated_pattern) ;
454
+
388
455
let result = WalkDir :: new ( valid_path)
389
456
. follow_links ( true )
390
457
. into_iter ( )
391
- . filter_entry ( |dir_entry| {
458
+ . filter_entry ( move |dir_entry| {
392
459
let full_path = dir_entry. path ( ) ;
393
460
394
461
// Validate each path before processing
@@ -415,18 +482,9 @@ impl FileSystemService {
415
482
} ) ;
416
483
417
484
!should_exclude
418
- } ) ;
419
-
420
- let updated_pattern = if pattern. contains ( '*' ) {
421
- pattern. to_lowercase ( )
422
- } else {
423
- format ! ( "**/*{}*" , & pattern. to_lowercase( ) )
424
- } ;
425
- let glob_pattern = Pattern :: new ( & updated_pattern) ;
426
- let final_result = result
427
- . into_iter ( )
485
+ } )
428
486
. filter_map ( |v| v. ok ( ) )
429
- . filter ( |entry| {
487
+ . filter ( move |entry| {
430
488
if root_path == entry. path ( ) {
431
489
return false ;
432
490
}
@@ -437,11 +495,10 @@ impl FileSystemService {
437
495
glob. matches ( & entry. file_name ( ) . to_str ( ) . unwrap_or ( "" ) . to_lowercase ( ) )
438
496
} )
439
497
. unwrap_or ( false ) ;
440
-
441
498
is_match
442
- } )
443
- . collect :: < Vec < walkdir :: DirEntry > > ( ) ;
444
- Ok ( final_result )
499
+ } ) ;
500
+
501
+ Ok ( result )
445
502
}
446
503
447
504
pub fn create_unified_diff (
@@ -631,4 +688,140 @@ impl FileSystemService {
631
688
632
689
Ok ( formatted_diff)
633
690
}
691
+
692
+ pub fn escape_regex ( & self , text : & str ) -> String {
693
+ // Covers special characters in regex engines (RE2, PCRE, JS, Python)
694
+ const SPECIAL_CHARS : & [ char ] = & [
695
+ '.' , '^' , '$' , '*' , '+' , '?' , '(' , ')' , '[' , ']' , '{' , '}' , '\\' , '|' , '/' ,
696
+ ] ;
697
+
698
+ let mut escaped = String :: with_capacity ( text. len ( ) ) ;
699
+
700
+ for ch in text. chars ( ) {
701
+ if SPECIAL_CHARS . contains ( & ch) {
702
+ escaped. push ( '\\' ) ;
703
+ }
704
+ escaped. push ( ch) ;
705
+ }
706
+
707
+ escaped
708
+ }
709
+
710
+ // Searches the content of a file for occurrences of the given query string.
711
+ ///
712
+ /// This method searches the file specified by `file_path` for lines matching the `query`.
713
+ /// The search can be performed as a regular expression or as a literal string,
714
+ /// depending on the `is_regex` flag.
715
+ ///
716
+ /// If matched line is larger than 255 characters, a snippet will be extracted around the matched text.
717
+ ///
718
+ pub fn content_search (
719
+ & self ,
720
+ query : & str ,
721
+ file_path : impl AsRef < Path > ,
722
+ is_regex : Option < bool > ,
723
+ ) -> ServiceResult < Option < FileSearchResult > > {
724
+ let query = if is_regex. unwrap_or_default ( ) {
725
+ query. to_string ( )
726
+ } else {
727
+ self . escape_regex ( query)
728
+ } ;
729
+
730
+ let matcher = RegexMatcherBuilder :: new ( )
731
+ . case_insensitive ( true )
732
+ . build ( query. as_str ( ) ) ?;
733
+
734
+ let mut searcher = Searcher :: new ( ) ;
735
+ let mut result = FileSearchResult {
736
+ file_path : file_path. as_ref ( ) . to_path_buf ( ) ,
737
+ matches : vec ! [ ] ,
738
+ } ;
739
+
740
+ searcher. set_binary_detection ( BinaryDetection :: quit ( b'\x00' ) ) ;
741
+
742
+ searcher. search_path (
743
+ & matcher,
744
+ file_path,
745
+ UTF8 ( |line_number, line| {
746
+ let actual_match = matcher. find ( line. as_bytes ( ) ) ?. unwrap ( ) ;
747
+
748
+ result. matches . push ( ContentMatchResult {
749
+ line_number,
750
+ start_pos : actual_match. start ( ) ,
751
+ line_text : self . extract_snippet ( line, actual_match, None , None ) ,
752
+ } ) ;
753
+ Ok ( true )
754
+ } ) ,
755
+ ) ?;
756
+
757
+ if result. matches . is_empty ( ) {
758
+ return Ok ( None ) ;
759
+ }
760
+
761
+ Ok ( Some ( result) )
762
+ }
763
+
764
+ /// Extracts a snippet from a given line of text around a match.
765
+ ///
766
+ /// It extracts a substring starting a fixed number of characters (`SNIPPET_BACKWARD_CHARS`)
767
+ /// before the start position of the `match`, and extends up to `max_length` characters
768
+ /// If the snippet does not include the beginning or end of the original line, ellipses (`"..."`) are added
769
+ /// to indicate the truncation.
770
+ pub fn extract_snippet (
771
+ & self ,
772
+ line : & str ,
773
+ match_result : Match ,
774
+ max_length : Option < usize > ,
775
+ backward_chars : Option < usize > ,
776
+ ) -> String {
777
+ let max_length = max_length. unwrap_or ( SNIPPET_MAX_LENGTH ) ;
778
+ let backward_chars = backward_chars. unwrap_or ( SNIPPET_BACKWARD_CHARS ) ;
779
+
780
+ let start_pos = line. len ( ) - line. trim_start ( ) . len ( ) ;
781
+
782
+ let line = line. trim ( ) ;
783
+
784
+ // Start SNIPPET_BACKWARD_CHARS characters before match (or at 0)
785
+ let snippet_start = ( match_result. start ( ) - start_pos) . saturating_sub ( backward_chars) ;
786
+
787
+ // Get up to SNIPPET_MAX_LENGTH characters from snippet_start
788
+ let snippet_end = ( snippet_start + max_length) . min ( line. len ( ) ) ;
789
+
790
+ let snippet = & line[ snippet_start..snippet_end] ;
791
+
792
+ // Add ellipses if line was truncated
793
+ let mut result = String :: new ( ) ;
794
+ if snippet_start > 0 {
795
+ result. push_str ( "..." ) ;
796
+ }
797
+ result. push_str ( snippet) ;
798
+ if snippet_end < line. len ( ) {
799
+ result. push_str ( "..." ) ;
800
+ }
801
+ result
802
+ }
803
+
804
+ pub fn search_files_content (
805
+ & self ,
806
+ root_path : impl AsRef < Path > ,
807
+ pattern : & str ,
808
+ query : & str ,
809
+ is_regex : bool ,
810
+ exclude_patterns : Option < Vec < String > > ,
811
+ ) -> ServiceResult < Vec < FileSearchResult > > {
812
+ let files_iter = self . search_files_iter (
813
+ root_path. as_ref ( ) ,
814
+ pattern. to_string ( ) ,
815
+ exclude_patterns. to_owned ( ) . unwrap_or_default ( ) ,
816
+ ) ?;
817
+
818
+ let results: Vec < FileSearchResult > = files_iter
819
+ . filter_map ( |entry| {
820
+ self . content_search ( query, entry. path ( ) , Some ( is_regex) )
821
+ . ok ( )
822
+ . and_then ( |v| v)
823
+ } )
824
+ . collect ( ) ;
825
+ Ok ( results)
826
+ }
634
827
}
0 commit comments