Cinnamon
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 11 additions & 1 deletion b/‎Dockerfile‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 28 additions & 1 deletion b/‎README.md‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎doc_env_reqs.txt‎
Lines changed: 1 addition & 1 deletion b/‎doc_env_reqs.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/about.md‎
Lines changed: 0 additions & 3 deletions b/‎docs/about.md‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎docs/development/index.md‎
Lines changed: 5 additions & 1 deletion b/‎docs/development/index.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎flowsettings.py‎
Lines changed: 18 additions & 24 deletions b/‎flowsettings.py‎
Lines changed: 18 additions & 24 deletions
diff --git a/‎libs/kotaemon/kotaemon/indices/ingests/files.py‎
Lines changed: 5 additions & 3 deletions b/‎libs/kotaemon/kotaemon/indices/ingests/files.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎libs/kotaemon/kotaemon/indices/qa/__init__.py‎
Lines changed: 0 additions & 2 deletions b/‎libs/kotaemon/kotaemon/indices/qa/__init__.py‎
Lines changed: 0 additions & 2 deletions
@@ -57,6 +57,7 @@ repos:
             "types-requests",
             "sqlmodel",
             "types-Markdown",
+            types-tzlocal,
           ]
         args: ["--check-untyped-defs", "--ignore-missing-imports"]
         exclude: "^templates/"
 
@@ -46,7 +46,7 @@ RUN --mount=type=ssh  \
 
 RUN --mount=type=ssh  \
     --mount=type=cache,target=/root/.cache/pip  \
-    if [ "$TARGETARCH" = "amd64" ]; then pip install graphrag future; fi
+    if [ "$TARGETARCH" = "amd64" ]; then pip install "graphrag<=0.3.6" future; fi
 
 # Clean up
 RUN apt-get autoremove \
@@ -81,6 +81,16 @@ RUN --mount=type=ssh  \
     pip install -e "libs/kotaemon[adv]" \
     && pip install unstructured[all-docs]
 
+# Install lightRAG
+ENV USE_LIGHTRAG=true
+RUN --mount=type=ssh  \
+    --mount=type=cache,target=/root/.cache/pip  \
+    pip install aioboto3 nano-vectordb ollama xxhash "lightrag-hku<=0.0.8"
+
+RUN --mount=type=ssh  \
+    --mount=type=cache,target=/root/.cache/pip  \
+    pip install "docling<=2.5.2"
+
 # Clean up
 RUN apt-get autoremove \
     && apt-get clean \
 
@@ -26,6 +26,8 @@ developers in mind.
 
 </div>
 
+<!-- start-intro -->
+
 ## Introduction
 
 This project serves as a functional RAG UI for both end users who want to do QA on their
@@ -187,12 +189,24 @@ documents and developers who want to build their own RAG pipeline.
 
 <details>
 
+<summary>Setup LIGHTRAG</summary>
+
+- Install LightRAG: `pip install git+https://github.com/HKUDS/LightRAG.git`
+- `LightRAG` install might introduce version conflicts, see [this issue](https://github.com/Cinnamon/kotaemon/issues/440)
+  - To quickly fix: `pip uninstall hnswlib chroma-hnswlib && pip install chroma-hnswlib`
+- Launch Kotaemon with `USE_LIGHTRAG=true` environment variable.
+- Set your default LLM & Embedding models in Resources setting and it will be recognized automatically from LightRAG.
+
+</details>
+
+<details>
+
 <summary>Setup MS GRAPHRAG</summary>
 
 - **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command:
 
   ```shell
-  pip install graphrag future
+  pip install "graphrag<=0.3.6" future
   ```
 
 - **Setting Up API KEY**: To use the GraphRAG retriever feature, ensure you set the `GRAPHRAG_API_KEY` environment variable. You can do this directly in your environment or by adding it to a `.env` file.
@@ -204,6 +218,17 @@ documents and developers who want to build their own RAG pipeline.
 
 See [Local model setup](docs/local_model.md).
 
+### Setup multimodal document parsing (OCR, table parsing, figure extraction)
+
+These options are available:
+
+- [Azure Document Intelligence (API)](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence)
+- [Adobe PDF Extract (API)](https://developer.adobe.com/document-services/docs/overview/pdf-extract-api/)
+- [Docling (local, open-source)](https://github.com/DS4SD/docling)
+  - To use Docling, first install required dependencies: `pip install docling`
+
+Select corresponding loaders in `Settings -> Retrieval Settings -> File loader`
+
 ### Customize your application
 
 - By default, all application data is stored in the `./ktem_app_data` folder. You can back up or copy this folder to transfer your installation to a new machine.
@@ -332,6 +357,8 @@ This file provides another way to configure your models and credentials.
 
 > (more instruction WIP).
 
+<!-- end-intro -->
+
 ## Star History
 
 <a href="https://star-history.com/#Cinnamon/kotaemon&Date">
 
@@ -3,7 +3,7 @@ mkdocstrings[python]
 mkdocs-material
 mkdocs-gen-files
 mkdocs-literate-nav
-mkdocs-video
 mkdocs-git-revision-date-localized-plugin
 mkdocs-section-index
+mkdocs-include-markdown-plugin[cache]
 mdx_truly_sane_lists
@@ -9,6 +9,3 @@ developers in mind.
 [User Guide](https://cinnamon.github.io/kotaemon/) |
 [Developer Guide](https://cinnamon.github.io/kotaemon/development/) |
 [Feedback](https://github.com/Cinnamon/kotaemon/issues)
-
-[Dark Mode](?__theme=dark) |
-[Light Mode](?__theme=light)
@@ -1 +1,5 @@
---8<-- "README.md"
+{%
+    include-markdown "../../README.md"
+    start="<!-- start-intro -->"
+    end="<!-- end-intro -->"
+%}
@@ -255,7 +255,7 @@
     "ktem.reasoning.react.ReactAgentPipeline",
     "ktem.reasoning.rewoo.RewooAgentPipeline",
 ]
-KH_REASONINGS_USE_MULTIMODAL = False
+KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool)
 KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
     config("AZURE_OPENAI_ENDPOINT", default=""),
     config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
@@ -287,41 +287,35 @@
 }
 
 USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
-GRAPHRAG_INDEX_TYPE = (
-    "ktem.index.file.graph.GraphRAGIndex"
-    if not USE_NANO_GRAPHRAG
-    else "ktem.index.file.graph.NanoGraphRAGIndex"
-)
+USE_LIGHTRAG = config("USE_LIGHTRAG", default=False, cast=bool)
+
+GRAPHRAG_INDEX_TYPES = ["ktem.index.file.graph.GraphRAGIndex"]
+
+if USE_NANO_GRAPHRAG:
+    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex")
+elif USE_LIGHTRAG:
+    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.LightRAGIndex")
+
 KH_INDEX_TYPES = [
     "ktem.index.file.FileIndex",
-    GRAPHRAG_INDEX_TYPE,
+    *GRAPHRAG_INDEX_TYPES,
 ]
 
-GRAPHRAG_INDEX = (
+GRAPHRAG_INDICES = [
     {
-        "name": "GraphRAG Collection",
-        "config": {
-            "supported_file_types": (
-                ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
-                ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
-            ),
-            "private": False,
-        },
-        "index_type": "ktem.index.file.graph.GraphRAGIndex",
-    }
-    if not USE_NANO_GRAPHRAG
-    else {
-        "name": "NanoGraphRAG Collection",
+        "name": graph_type.split(".")[-1].replace("Index", "")
+        + " Collection",  # get last name
         "config": {
             "supported_file_types": (
                 ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                 ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
             ),
             "private": False,
         },
-        "index_type": "ktem.index.file.graph.NanoGraphRAGIndex",
+        "index_type": graph_type,
     }
-)
+    for graph_type in GRAPHRAG_INDEX_TYPES
+]
 
 KH_INDICES = [
     {
@@ -335,5 +329,5 @@
         },
         "index_type": "ktem.index.file.FileIndex",
     },
-    GRAPHRAG_INDEX,
+    *GRAPHRAG_INDICES,
 ]
@@ -13,6 +13,7 @@
     AdobeReader,
     AzureAIDocumentIntelligenceLoader,
     DirectoryReader,
+    DoclingReader,
     HtmlReader,
     MathpixPDFReader,
     MhtmlReader,
@@ -32,9 +33,10 @@
     credential=str(config("AZURE_DI_CREDENTIAL", default="")),
     cache_dir=getattr(flowsettings, "KH_MARKDOWN_OUTPUT_DIR", None),
 )
-adobe_reader.vlm_endpoint = azure_reader.vlm_endpoint = getattr(
-    flowsettings, "KH_VLM_ENDPOINT", ""
-)
+docling_reader = DoclingReader()
+adobe_reader.vlm_endpoint = (
+    azure_reader.vlm_endpoint
+) = docling_reader.vlm_endpoint = getattr(flowsettings, "KH_VLM_ENDPOINT", "")
 
 
 KH_DEFAULT_FILE_EXTRACTORS: dict[str, BaseReader] = {
 
@@ -1,7 +1,5 @@
 from .citation import CitationPipeline
-from .text_based import CitationQAPipeline
 
 __all__ = [
     "CitationPipeline",
-    "CitationQAPipeline",
 ]
Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,7 @@ repos:`
`57`	`57`	`"types-requests",`
`58`	`58`	`"sqlmodel",`
`59`	`59`	`"types-Markdown",`
	`60`	`+ types-tzlocal,`
`60`	`61`	`]`
`61`	`62`	`args: ["--check-untyped-defs", "--ignore-missing-imports"]`
`62`	`63`	`exclude: "^templates/"`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,5 @@`
`1`	`1`	`from .citation import CitationPipeline`
`2`		`-from .text_based import CitationQAPipeline`
`3`	`2`
`4`	`3`	`__all__ = [`
`5`	`4`	`"CitationPipeline",`
`6`		`- "CitationQAPipeline",`
`7`	`5`	`]`