Skip to content

Commit ce660fe

Browse files
committed
Formatting
1 parent b62d918 commit ce660fe

File tree

2 files changed

+9
-8
lines changed

2 files changed

+9
-8
lines changed

langchain/document_loaders/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""All different types of document loaders."""
22

3+
from langchain.document_loaders.acreom import AcreomLoader
34
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
45
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
56
from langchain.document_loaders.arxiv import ArxivLoader
@@ -123,7 +124,6 @@
123124
GoogleApiYoutubeLoader,
124125
YoutubeLoader,
125126
)
126-
from langchain.document_loaders.acreom import AcreomLoader
127127

128128
# Legacy: only for backwards compat. Use PyPDFLoader instead
129129
PagedPDFSplitter = PyPDFLoader

langchain/document_loaders/acreom.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Loader that loads acreom vault from a directory."""
2-
from typing import Iterator
32
import re
43
from pathlib import Path
5-
from typing import List
4+
from typing import Iterator, List
5+
66
from langchain.docstore.document import Document
77
from langchain.document_loaders.base import BaseLoader
88

@@ -11,7 +11,7 @@ class AcreomLoader(BaseLoader):
1111
FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)
1212

1313
def __init__(
14-
self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True
14+
self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True
1515
):
1616
"""Initialize with path."""
1717
self.file_path = path
@@ -42,10 +42,11 @@ def _remove_front_matter(self, content: str) -> str:
4242
return self.FRONT_MATTER_REGEX.sub("", content)
4343

4444
def _process_acreom_content(self, content: str) -> str:
45-
# remove acreom specific elements from content that do not contribute to the context of current document
46-
content = re.sub('\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*', "", content) # rm tasks
47-
content = re.sub('#', "", content) # rm hashtags
48-
content = re.sub('\[\[.*?\]\]', "", content) # rm doclinks
45+
# remove acreom specific elements from content that
46+
# do not contribute to the context of current document
47+
content = re.sub("\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*", "", content) # rm tasks
48+
content = re.sub("#", "", content) # rm hashtags
49+
content = re.sub("\[\[.*?\]\]", "", content) # rm doclinks
4950
return content
5051

5152
def lazy_load(self) -> Iterator[Document]:

0 commit comments

Comments
 (0)