neo4j-labs
diff --git a/‎README.md
Lines changed: 2 additions & 19 deletions b/‎README.md
Lines changed: 2 additions & 19 deletions
diff --git a/‎backend/example.env
Lines changed: 3 additions & 2 deletions b/‎backend/example.env
Lines changed: 3 additions & 2 deletions
diff --git a/‎backend/score.py
Lines changed: 28 additions & 23 deletions b/‎backend/score.py
Lines changed: 28 additions & 23 deletions
diff --git a/‎backend/src/entities/source_node.py
Lines changed: 6 additions & 0 deletions b/‎backend/src/entities/source_node.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎backend/src/graphDB_dataAccess.py
Lines changed: 61 additions & 50 deletions b/‎backend/src/graphDB_dataAccess.py
Lines changed: 61 additions & 50 deletions
diff --git a/‎backend/src/graph_query.py
Lines changed: 3 additions & 1 deletion b/‎backend/src/graph_query.py
Lines changed: 3 additions & 1 deletion
@@ -31,32 +31,14 @@ If you are using Neo4j Desktop, you will not be able to use the docker-compose b
 ### Local deployment
 #### Running through docker-compose
 By default only OpenAI and Diffbot are enabled since Gemini requires extra GCP configurations.
-Accoroding to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need.
+According to enviornment we are configuring the models which is indicated by VITE_LLM_MODELS_PROD variable we can configure model based on our need.
 EX:
 ```env
 VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash"
 ```
-
-if you only want OpenAI:
-```env
-VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o"
-```
-
-Backend ENV
-```env
 OPENAI_API_KEY="your-openai-key"
 ```
 
-if you only want Diffbot:
-```env
-VITE_LLM_MODELS_PROD="diffbot"
-```
-
-Backend ENV
-```env
-DIFFBOT_API_KEY="your-diffbot-key"
-```
-
 You can then run Docker Compose to build and start all components:
 ```bash
 docker-compose up --build
@@ -89,6 +71,7 @@ VITE_CHAT_MODES=""
 If however you want to specify the only vector mode or only graph mode you can do that by specifying the mode in the env:
 ```env
 VITE_CHAT_MODES="vector,graph"
+VITE_CHAT_MODES="vector,graph"
 ```
 
 #### Running Backend and Frontend separately (dev environment)
 
@@ -25,8 +25,9 @@ NEO4J_USER_AGENT=""
 ENABLE_USER_AGENT = ""
 LLM_MODEL_CONFIG_model_version=""
 ENTITY_EMBEDDING="" True or False
-DUPLICATE_SCORE_VALUE = ""
-DUPLICATE_TEXT_DISTANCE = ""
+DUPLICATE_SCORE_VALUE =0.97
+DUPLICATE_TEXT_DISTANCE =3
+DEFAULT_DIFFBOT_CHAT_MODEL="openai_gpt_4o"  #whichever model specified here , need to add config for that model in below format)
 #examples
 LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key"
 LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key"
 
@@ -18,24 +18,19 @@
 from src.communities import create_communities
 from src.neighbours import get_neighbour_nodes
 import json
-from typing import List, Mapping, Union
+from typing import List
 from starlette.middleware.sessions import SessionMiddleware
-import google_auth_oauthlib.flow
 from google.oauth2.credentials import Credentials
 import os
 from src.logger import CustomLogger
 from datetime import datetime, timezone
 import time
 import gc
-from Secweb import SecWeb
-from Secweb.StrictTransportSecurity import HSTS
-from Secweb.ContentSecurityPolicy import ContentSecurityPolicy
 from Secweb.XContentTypeOptions import XContentTypeOptions
 from Secweb.XFrameOptions import XFrame
 from fastapi.middleware.gzip import GZipMiddleware
 from src.ragas_eval import *
 from starlette.types import ASGIApp, Message, Receive, Scope, Send
-import gzip
 from langchain_neo4j import Neo4jGraph
 
 logger = CustomLogger()
@@ -493,11 +488,13 @@ async def connect(uri=Form(), userName=Form(), password=Form(), database=Form())
         start = time.time()
         graph = create_graph_database_connection(uri, userName, password, database)
         result = await asyncio.to_thread(connection_check_and_get_vector_dimensions, graph, database)
+        gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
         end = time.time()
         elapsed_time = end - start
         json_obj = {'api_name':'connect','db_url':uri, 'userName':userName, 'database':database,'status':result, 'count':1, 'logging_time': formatted_time(datetime.now(timezone.utc)), 'elapsed_api_time':f'{elapsed_time:.2f}'}
         logger.log_struct(json_obj, "INFO")
         result['elapsed_api_time'] = f'{elapsed_time:.2f}'
+        result['gcs_file_cache'] = gcs_file_cache
         return create_api_response('Success',data=result)
     except Exception as e:
         job_status = "Failed"
@@ -571,6 +568,8 @@ async def generate():
         uri = url
         if " " in url:
             uri= url.replace(" ","+")
+        graph = create_graph_database_connection(uri, userName, decoded_password, database)
+        graphDb_data_Access = graphDBdataAccess(graph)
         while True:
             try:
                 if await request.is_disconnected():
@@ -579,8 +578,6 @@ async def generate():
                 # get the current status of document node
 
                 else:
-                    graph = create_graph_database_connection(uri, userName, decoded_password, database)
-                    graphDb_data_Access = graphDBdataAccess(graph)
                     result = graphDb_data_Access.get_current_status_document_node(file_name)
                     if len(result) > 0:
                         status = json.dumps({'fileName':file_name, 
@@ -968,22 +965,30 @@ async def fetch_chunktext(
        gc.collect()
 
 
-@app.post("/backend_connection_configuation")
-async def backend_connection_configuation():
+@app.post("/backend_connection_configuration")
+async def backend_connection_configuration():
     try:
-        graph = Neo4jGraph()
-        logging.info(f'login connection status of object: {graph}')
-        if graph is not None:
-            graph_connection = True
-            isURI = os.getenv('NEO4J_URI')
-            isUsername= os.getenv('NEO4J_USERNAME')
-            isDatabase= os.getenv('NEO4J_DATABASE')
-            isPassword= os.getenv('NEO4J_PASSWORD')
-            encoded_password = encode_password(isPassword)
-            graphDb_data_Access = graphDBdataAccess(graph)
-            gds_status = graphDb_data_Access.check_gds_version()
-            write_access = graphDb_data_Access.check_account_access(database=isDatabase)
-            return create_api_response('Success',message=f"Backend connection successful",data={'graph_connection':graph_connection,'uri':isURI,'user_name':isUsername,'database':isDatabase,'password':encoded_password,'gds_status':gds_status,'write_access':write_access})
+        uri = os.getenv('NEO4J_URI')
+        username= os.getenv('NEO4J_USERNAME')
+        database= os.getenv('NEO4J_DATABASE')
+        password= os.getenv('NEO4J_PASSWORD')
+        gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
+        if all([uri, username, database, password]):
+            print(f'uri:{uri}, usrName:{username}, database :{database}, password: {password}')
+            graph = Neo4jGraph()
+            logging.info(f'login connection status of object: {graph}')
+            if graph is not None:
+                graph_connection = True        
+                encoded_password = encode_password(password)
+                graphDb_data_Access = graphDBdataAccess(graph)
+                result = graphDb_data_Access.connection_check_and_get_vector_dimensions(database)
+                result["graph_connection"] = graph_connection
+                result["uri"] = uri
+                result["user_name"] = username
+                result["database"] = database
+                result["password"] = encoded_password
+                result['gcs_file_cache'] = gcs_file_cache
+                return create_api_response('Success',message=f"Backend connection successful",data=result)
         else:
             graph_connection = False
             return create_api_response('Success',message=f"Backend connection is not successful",data=graph_connection)
 
@@ -11,6 +11,12 @@ class sourceNode:
     gcsBucketFolder:str=None
     gcsProjectId:str=None
     awsAccessKeyId:str=None
+    chunkNodeCount:int=None
+    chunkRelCount:int=None
+    entityNodeCount:int=None
+    entityEntityRelCount:int=None
+    communityNodeCount:int=None
+    communityRelCount:int=None
     node_count:int=None
     relationship_count:str=None
     model:str=None
 
@@ -46,14 +46,24 @@ def create_source_node(self, obj_source_node:sourceNode):
                             d.relationshipCount = $r_count, d.model= $model, d.gcsBucket=$gcs_bucket, 
                             d.gcsBucketFolder= $gcs_bucket_folder, d.language= $language,d.gcsProjectId= $gcs_project_id,
                             d.is_cancelled=False, d.total_chunks=0, d.processed_chunk=0,
-                            d.access_token=$access_token""",
+                            d.access_token=$access_token,
+                            d.chunkNodeCount=$chunkNodeCount,d.chunkRelCount=$chunkRelCount,
+                            d.entityNodeCount=$entityNodeCount,d.entityEntityRelCount=$entityEntityRelCount,
+                            d.communityNodeCount=$communityNodeCount,d.communityRelCount=$communityRelCount""",
                             {"fn":obj_source_node.file_name, "fs":obj_source_node.file_size, "ft":obj_source_node.file_type, "st":job_status, 
                             "url":obj_source_node.url,
                             "awsacc_key_id":obj_source_node.awsAccessKeyId, "f_source":obj_source_node.file_source, "c_at":obj_source_node.created_at,
                             "u_at":obj_source_node.created_at, "pt":0, "e_message":'', "n_count":0, "r_count":0, "model":obj_source_node.model,
                             "gcs_bucket": obj_source_node.gcsBucket, "gcs_bucket_folder": obj_source_node.gcsBucketFolder, 
                             "language":obj_source_node.language, "gcs_project_id":obj_source_node.gcsProjectId,
-                            "access_token":obj_source_node.access_token})
+                            "access_token":obj_source_node.access_token,
+                            "chunkNodeCount":obj_source_node.chunkNodeCount,
+                            "chunkRelCount":obj_source_node.chunkRelCount,
+                            "entityNodeCount":obj_source_node.entityNodeCount,
+                            "entityEntityRelCount":obj_source_node.entityEntityRelCount,
+                            "communityNodeCount":obj_source_node.communityNodeCount,
+                            "communityRelCount":obj_source_node.communityRelCount
+                            })
         except Exception as e:
             error_message = str(e)
             logging.info(f"error_message = {error_message}")
@@ -108,7 +118,7 @@ def update_source_node(self, obj_source_node:sourceNode):
             self.graph.query(query,param)
         except Exception as e:
             error_message = str(e)
-            self.update_exception_db(self.file_name,error_message)
+            self.update_exception_db(self,self.file_name,error_message)
             raise Exception(error_message)
 
     def get_source_list(self):
@@ -463,51 +473,52 @@ def update_node_relationship_count(self,document_name):
             param = {"document_name": document_name}
             result = self.execute_query(NODEREL_COUNT_QUERY_WITHOUT_COMMUNITY, param)
         response = {}
-        for record in result:
-            filename = record["filename"]
-            chunkNodeCount = record["chunkNodeCount"]
-            chunkRelCount = record["chunkRelCount"]
-            entityNodeCount = record["entityNodeCount"]
-            entityEntityRelCount = record["entityEntityRelCount"]
-            if (not document_name) and (community_flag):
-                communityNodeCount = record["communityNodeCount"]
-                communityRelCount = record["communityRelCount"]
-            else:
-                communityNodeCount = 0
-                communityRelCount = 0
-            nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
-            relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
-            update_query = """
-            MATCH (d:Document {fileName: $filename})
-            SET d.chunkNodeCount = $chunkNodeCount,
-                d.chunkRelCount = $chunkRelCount,
-                d.entityNodeCount = $entityNodeCount,
-                d.entityEntityRelCount = $entityEntityRelCount,
-                d.communityNodeCount = $communityNodeCount,
-                d.communityRelCount = $communityRelCount,
-                d.nodeCount = $nodeCount,
-                d.relationshipCount = $relationshipCount
-            """
-            self.execute_query(update_query,{
-                "filename": filename,
-                "chunkNodeCount": chunkNodeCount,
-                "chunkRelCount": chunkRelCount,
-                "entityNodeCount": entityNodeCount,
-                "entityEntityRelCount": entityEntityRelCount,
-                "communityNodeCount": communityNodeCount,
-                "communityRelCount": communityRelCount,
-                "nodeCount" : nodeCount,
-                "relationshipCount" : relationshipCount
-                })
-            
-            response[filename] = {"chunkNodeCount": chunkNodeCount,
-                "chunkRelCount": chunkRelCount,
-                "entityNodeCount": entityNodeCount,
-                "entityEntityRelCount": entityEntityRelCount,
-                "communityNodeCount": communityNodeCount,
-                "communityRelCount": communityRelCount,
-                "nodeCount" : nodeCount,
-                "relationshipCount" : relationshipCount
-                }
-            
+        if result:
+            for record in result:
+                filename = record.get("filename",None)
+                chunkNodeCount = int(record.get("chunkNodeCount",0))
+                chunkRelCount = int(record.get("chunkRelCount",0))
+                entityNodeCount = int(record.get("entityNodeCount",0))
+                entityEntityRelCount = int(record.get("entityEntityRelCount",0))
+                if (not document_name) and (community_flag):
+                    communityNodeCount = int(record.get("communityNodeCount",0))
+                    communityRelCount = int(record.get("communityRelCount",0))
+                else:
+                    communityNodeCount = 0
+                    communityRelCount = 0
+                nodeCount = int(chunkNodeCount) + int(entityNodeCount) + int(communityNodeCount)
+                relationshipCount = int(chunkRelCount) + int(entityEntityRelCount) + int(communityRelCount)
+                update_query = """
+                MATCH (d:Document {fileName: $filename})
+                SET d.chunkNodeCount = $chunkNodeCount,
+                    d.chunkRelCount = $chunkRelCount,
+                    d.entityNodeCount = $entityNodeCount,
+                    d.entityEntityRelCount = $entityEntityRelCount,
+                    d.communityNodeCount = $communityNodeCount,
+                    d.communityRelCount = $communityRelCount,
+                    d.nodeCount = $nodeCount,
+                    d.relationshipCount = $relationshipCount
+                """
+                self.execute_query(update_query,{
+                    "filename": filename,
+                    "chunkNodeCount": chunkNodeCount,
+                    "chunkRelCount": chunkRelCount,
+                    "entityNodeCount": entityNodeCount,
+                    "entityEntityRelCount": entityEntityRelCount,
+                    "communityNodeCount": communityNodeCount,
+                    "communityRelCount": communityRelCount,
+                    "nodeCount" : nodeCount,
+                    "relationshipCount" : relationshipCount
+                    })
+                
+                response[filename] = {"chunkNodeCount": chunkNodeCount,
+                    "chunkRelCount": chunkRelCount,
+                    "entityNodeCount": entityNodeCount,
+                    "entityEntityRelCount": entityEntityRelCount,
+                    "communityNodeCount": communityNodeCount,
+                    "communityRelCount": communityRelCount,
+                    "nodeCount" : nodeCount,
+                    "relationshipCount" : relationshipCount
+                    }
+          
         return response
@@ -226,6 +226,7 @@ def get_graph_results(uri, username, password,database,document_names):
 
 def get_chunktext_results(uri, username, password, database, document_name, page_no):
    """Retrieves chunk text, position, and page number from graph data with pagination."""
+   driver = None
    try:
        logging.info("Starting chunk text query process")
        offset = 10
@@ -254,4 +255,5 @@ def get_chunktext_results(uri, username, password, database, document_name, page
        logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}")
        raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
    finally:
-       driver.close()
+       if driver:
+           driver.close()