11"""Loader that loads acreom vault from a directory."""
2- from typing import Iterator
32import re
43from pathlib import Path
5- from typing import List
4+ from typing import Iterator , List
5+
66from langchain .docstore .document import Document
77from langchain .document_loaders .base import BaseLoader
88
@@ -11,7 +11,7 @@ class AcreomLoader(BaseLoader):
1111 FRONT_MATTER_REGEX = re .compile (r"^---\n(.*?)\n---\n" , re .MULTILINE | re .DOTALL )
1212
1313 def __init__ (
14- self , path : str , encoding : str = "UTF-8" , collect_metadata : bool = True
14+ self , path : str , encoding : str = "UTF-8" , collect_metadata : bool = True
1515 ):
1616 """Initialize with path."""
1717 self .file_path = path
@@ -42,10 +42,11 @@ def _remove_front_matter(self, content: str) -> str:
4242 return self .FRONT_MATTER_REGEX .sub ("" , content )
4343
4444 def _process_acreom_content (self , content : str ) -> str :
45- # remove acreom specific elements from content that do not contribute to the context of current document
46- content = re .sub ('\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*' , "" , content ) # rm tasks
47- content = re .sub ('#' , "" , content ) # rm hashtags
48- content = re .sub ('\[\[.*?\]\]' , "" , content ) # rm doclinks
45+ # remove acreom specific elements from content that
46+ # do not contribute to the context of current document
47+ content = re .sub ("\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*" , "" , content ) # rm tasks
48+ content = re .sub ("#" , "" , content ) # rm hashtags
49+ content = re .sub ("\[\[.*?\]\]" , "" , content ) # rm doclinks
4950 return content
5051
5152 def lazy_load (self ) -> Iterator [Document ]:
0 commit comments