feat: add Neo4j graph backend #61

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

gusye1234 merged 8 commits into main from dev

Sep 25, 2024

.coveragerc

-Original file line number
+Diff line change
@@ Expand Up / @@ -5,4 +5,7 @@ exclude_lines = @@
         # Don't complain if tests don't hit defensive assertion code:
         raise NotImplementedError
-        logger.
+        logger.
+    omit =
+        # Don't have a nice github action for neo4j now, so skip this file:
+        nano_graphrag/_storage/gdb_neo4j.py

.github/workflows/test.yml

-Original file line number
+Diff line change
@@ Expand Up / @@ -42,6 +42,8 @@ jobs: @@
             run: |
               flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
           - name: Build and Test
+            env:
+              NANO_GRAPHRAG_TEST_IGNORE_NEO4J: true
             run: |
               python -m pytest -o log_cli=true -o log_cli_level="INFO" --cov=nano_graphrag --cov-report=xml -v ./
           - name: Check codecov file
@@ Expand Down @@

.gitignore

-Original file line number
+Diff line change
@@ -1,8 +1,7 @@
     # Created by https://www.toptal.com/developers/gitignore/api/python
     # Edit at https://www.toptal.com/developers/gitignore?templates=python
     test_cache.json
-    run_test.py
-    run_test_zh.py
+    run_test*.py
     nano_graphrag_cache*/
     *.txt
     examples/benchmarks/fixtures/
@@ Expand Down @@

docs/use_neo4j_for_graphrag.md

-Original file line number
+Diff line change
@@ -0,0 +1,27 @@
+. Install [Neo4j](https://neo4j.com/docs/operations-manual/current/installation/)
+. Install Neo4j GDS (graph data science) [plugin](https://neo4j.com/docs/graph-data-science/current/installation/neo4j-server/)
+. Start neo4j server
+. Get the `NEO4J_URL`,  `NEO4J_USER` and `NEO4J_PASSWORD`
+       - By default, `NEO4J_URL` is `neo4j://localhost:7687` ,  `NEO4J_USER` is `neo4j` and `NEO4J_PASSWORD` is `neo4j`
+    Pass your neo4j instance to `GraphRAG`:
+    ```python
+    from nano_graphrag import GraphRAG
+    from nano_graphrag._storage import Neo4jStorage
+    neo4j_config = {
+      "neo4j_url": os.environ.get("NEO4J_URL", "neo4j://localhost:7687"),
+      "neo4j_auth": (
+          os.environ.get("NEO4J_USER", "neo4j"),
+          os.environ.get("NEO4J_PASSWORD", "neo4j"),
+      )
+    }
+    GraphRAG(
+      graph_storage_cls=Neo4jStorage,
+      addon_params=neo4j_config,
+    )
+    ```

examples/no_openai_key_at_all.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -34,6 +34,7 @@ async def ollama_model_if_cache( @@
     ) -> str:
         # remove kwargs that are not supported by ollama
         kwargs.pop("max_tokens", None)
+        kwargs.pop("response_format", None)
         ollama_client = ollama.AsyncClient()
         messages = []
@@ Expand Down @@

examples/using_ollama_as_llm.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,7 @@ async def ollama_model_if_cache( @@
     ) -> str:
         # remove kwargs that are not supported by ollama
         kwargs.pop("max_tokens", None)
+        kwargs.pop("response_format", None)
         ollama_client = ollama.AsyncClient()
         messages = []
@@ Expand Down @@

examples/using_ollama_as_llm_and_embedding.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -20,11 +20,13 @@
  
    EMBEDDING_MODEL_DIM = 768

    EMBEDDING_MODEL_MAX_TOKENS = 8192

    async def ollama_model_if_cache(

        prompt, system_prompt=None, history_messages=[], **kwargs

    ) -> str:

        # remove kwargs that are not supported by ollama

        kwargs.pop("max_tokens", None)

        kwargs.pop("response_format", None)

        ollama_client = ollama.AsyncClient()

        messages = []

    @@ -98,20 +100,21 @@ def insert():
  
        # rag = GraphRAG(working_dir=WORKING_DIR, enable_llm_cache=True)

        # rag.insert(FAKE_TEXT[half_len:])

    # We're using Ollama to generate embeddings for the BGE model

    @wrap_embedding_func_with_attrs(

        embedding_dim= EMBEDDING_MODEL_DIM,

        max_token_size= EMBEDDING_MODEL_MAX_TOKENS,

        embedding_dim=EMBEDDING_MODEL_DIM,

        max_token_size=EMBEDDING_MODEL_MAX_TOKENS,

    )

    async def ollama_embedding(texts :list[str]) -> np.ndarray:

    async def ollama_embedding(texts: list[str]) -> np.ndarray:

        embed_text = []

        for text in texts:

          data = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)

          embed_text.append(data["embedding"])

            data = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)

            embed_text.append(data["embedding"])

        return embed_text

    if __name__ == "__main__":

        insert()

        query()

nano_graphrag/_llm.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -13,6 +13,23 @@
  
    from ._utils import compute_args_hash, wrap_embedding_func_with_attrs

    from .base import BaseKVStorage

    global_openai_async_client = None

    global_azure_openai_async_client = None

    def get_openai_async_client_instance():

        global global_openai_async_client

        if global_openai_async_client is None:

            global_openai_async_client = AsyncOpenAI()

        return global_openai_async_client

    def get_azure_openai_async_client_instance():

        global global_azure_openai_async_client

        if global_azure_openai_async_client is None:

            global_azure_openai_async_client = AsyncAzureOpenAI()

        return global_azure_openai_async_client

    @retry(

        stop=stop_after_attempt(5),

    @@ -22,7 +39,7 @@
  
    async def openai_complete_if_cache(

        model, prompt, system_prompt=None, history_messages=[], **kwargs

    ) -> str:

        openai_async_client = AsyncOpenAI()

        openai_async_client = get_openai_async_client_instance()

        hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)

        messages = []

        if system_prompt:

    @@ -78,7 +95,7 @@ async def gpt_4o_mini_complete(
  
        retry=retry_if_exception_type((RateLimitError, APIConnectionError)),

    )

    async def openai_embedding(texts: list[str]) -> np.ndarray:

        openai_async_client = AsyncOpenAI()

        openai_async_client = get_openai_async_client_instance()

        response = await openai_async_client.embeddings.create(

            model="text-embedding-3-small", input=texts, encoding_format="float"

        )

    @@ -93,7 +110,7 @@ async def openai_embedding(texts: list[str]) -> np.ndarray:
  
    async def azure_openai_complete_if_cache(

        deployment_name, prompt, system_prompt=None, history_messages=[], **kwargs

    ) -> str:

        azure_openai_client = AsyncAzureOpenAI()

        azure_openai_client = get_azure_openai_async_client_instance()

        hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)

        messages = []

        if system_prompt:

    @@ -154,11 +171,7 @@ async def azure_gpt_4o_mini_complete(
  
        retry=retry_if_exception_type((RateLimitError, APIConnectionError)),

    )

    async def azure_openai_embedding(texts: list[str]) -> np.ndarray:

        azure_openai_client = AsyncAzureOpenAI(

            api_key=os.environ.get("API_KEY_EMB"),

            api_version=os.environ.get("API_VERSION_EMB"),

            azure_endpoint=os.environ.get("AZURE_ENDPOINT_EMB"),

        )

        azure_openai_client = get_azure_openai_async_client_instance()

        response = await azure_openai_client.embeddings.create(

            model="text-embedding-3-small", input=texts, encoding_format="float"

        )

nano_graphrag/_storage/__init__.py

-Original file line number
+Diff line change
@@ -0,0 +1,5 @@
+    from .gdb_networkx import NetworkXStorage
+    from .gdb_neo4j import Neo4jStorage
+    from .vdb_hnswlib import HNSWVectorStorage
+    from .vdb_nanovectordb import NanoVectorDBStorage
+    from .kv_json import JsonKVStorage

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: add Neo4j graph backend #61

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!