Skip to content

Commit ce2bf87

Browse files
committed
dockerfile fix, txt path in api test fix
1 parent 425b311 commit ce2bf87

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

tests/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ FROM dedocproject/dedoc_p3.9_base:version_2023_08_28
44
ENV PYTHONPATH "${PYTHONPATH}:/dedoc_root:/dedoc_root/tests:/dedoc_root/langchain"
55

66
COPY requirements.txt .
7-
RUN pip3 install --no-cache-dir -r requirements.txt
87
RUN pip3 install "langchain-community<1.0"
8+
RUN pip3 install --no-cache-dir -r requirements.txt
9+
RUN apt-get update && apt-get install -y --fix-missing --no-install-recommends fontforge
910

1011
RUN mkdir /dedoc_root
1112
COPY docs/source/_static/code_examples/langchain /dedoc_root/langchain

tests/api_tests/test_api_format_pdf_broken_encoding_reader.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ def _get_abs_path(self, file_name: str) -> str:
1212

1313
def test_text_extraction(self) -> None:
1414
file_name = "mongolo.pdf"
15-
orig_path = "../data/txt/mongolo.txt"
1615
result = self._send_request(file_name, dict(pdf_with_text_layer="bad_encoding_reader"))
1716
tree = result["content"]["structure"]
1817
text1 = self._get_by_tree_path(tree, "0.0")["text"]
@@ -22,6 +21,6 @@ def test_text_extraction(self) -> None:
2221
text5 = "\n".join(self._get_by_tree_path(tree, "0.1.2.0")["text"].split("\n")[:3])
2322

2423
fulltext = text1 + text2 + text3 + text4 + text5
25-
with open(orig_path, encoding="utf8", mode="r") as txt:
24+
with open(os.path.join(self.data_directory_path, "txt", "mongolo.txt"), encoding="utf8", mode="r") as txt:
2625
accuracy = Levenshtein.ratio(txt.read(), fulltext)
2726
self.assertTrue(accuracy > 0.7)

0 commit comments

Comments
 (0)