diff --git a/README.md b/README.md index 79008efdf..fadf6eee9 100644 --- a/README.md +++ b/README.md @@ -45,13 +45,13 @@ DIFFBOT_API_KEY="your-diffbot-key" if you only want OpenAI: ```env -VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" +VITE_LLM_MODELS_PROD="diffbot,openai-gpt-3.5,openai-gpt-4o" OPENAI_API_KEY="your-openai-key" ``` if you only want Diffbot: ```env -VITE_LLM_MODELS="diffbot" +VITE_LLM_MODELS_PROD="diffbot" DIFFBOT_API_KEY="your-diffbot-key" ``` @@ -149,7 +149,6 @@ Allow unauthenticated request : Yes | VITE_BACKEND_API_URL | Optional | http://localhost:8000 | URL for backend API | | VITE_BLOOM_URL | Optional | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization | | VITE_REACT_APP_SOURCES | Mandatory | local,youtube,wiki,s3 | List of input sources that will be available | -| VITE_LLM_MODELS | Mandatory | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A | VITE_CHAT_MODES | Mandatory | vector,graph+vector,graph,hybrid | Chat modes available for Q&A | VITE_ENV | Mandatory | DEV or PROD | Environment variable for the app | | VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing | diff --git a/backend/score.py b/backend/score.py index f7b8c4082..03fb75de9 100644 --- a/backend/score.py +++ b/backend/score.py @@ -12,14 +12,14 @@ from langchain_google_vertexai import ChatVertexAI from src.api_response import create_api_response from src.graphDB_dataAccess import graphDBdataAccess -from src.graph_query import get_graph_results +from src.graph_query import get_graph_results,get_chunktext_results from src.chunkid_entities import get_entities_from_chunkids from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding from sse_starlette.sse import EventSourceResponse from src.communities import create_communities from src.neighbours import get_neighbour_nodes import json -from typing import List, Mapping +from typing import List, Mapping, Union from starlette.middleware.sessions import SessionMiddleware import google_auth_oauthlib.flow from google.oauth2.credentials import Credentials @@ -33,8 +33,10 @@ from Secweb.ContentSecurityPolicy import ContentSecurityPolicy from Secweb.XContentTypeOptions import XContentTypeOptions from Secweb.XFrameOptions import XFrame - +from fastapi.middleware.gzip import GZipMiddleware from src.ragas_eval import * +from starlette.types import ASGIApp, Message, Receive, Scope, Send +import gzip logger = CustomLogger() CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") @@ -49,14 +51,42 @@ def healthy(): def sick(): return False - +class CustomGZipMiddleware: + def __init__( + self, + app: ASGIApp, + paths: List[str], + minimum_size: int = 1000, + compresslevel: int = 5 + ): + self.app = app + self.paths = paths + self.minimum_size = minimum_size + self.compresslevel = compresslevel + + async def __call__(self, scope: Scope, receive: Receive, send: Send): + if scope["type"] != "http": + return await self.app(scope, receive, send) + + path = scope["path"] + should_compress = any(path.startswith(gzip_path) for gzip_path in self.paths) + + if not should_compress: + return await self.app(scope, receive, send) + + gzip_middleware = GZipMiddleware( + app=self.app, + minimum_size=self.minimum_size, + compresslevel=self.compresslevel + ) + await gzip_middleware(scope, receive, send) app = FastAPI() # SecWeb(app=app, Option={'referrer': False, 'xframe': False}) -# app.add_middleware(HSTS, Option={'max-age': 4}) -# app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) -# app.add_middleware(XContentTypeOptions) -# app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) - +app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False) +app.add_middleware(XContentTypeOptions) +app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'}) +#app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5) +app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"]) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -818,5 +848,57 @@ async def calculate_metric(question: str = Form(), finally: gc.collect() +@app.post("/fetch_chunktext") +async def fetch_chunktext( + uri: str = Form(), + database: str = Form(), + userName: str = Form(), + password: str = Form(), + document_name: str = Form(), + page_no: int = Form(1) +): + try: + payload_json_obj = { + 'api_name': 'fetch_chunktext', + 'db_url': uri, + 'userName': userName, + 'database': database, + 'document_name': document_name, + 'page_no': page_no, + 'logging_time': formatted_time(datetime.now(timezone.utc)) + } + logger.log_struct(payload_json_obj, "INFO") + start = time.time() + result = await asyncio.to_thread( + get_chunktext_results, + uri=uri, + username=userName, + password=password, + database=database, + document_name=document_name, + page_no=page_no + ) + end = time.time() + elapsed_time = end - start + json_obj = { + 'api_name': 'fetch_chunktext', + 'db_url': uri, + 'document_name': document_name, + 'page_no': page_no, + 'logging_time': formatted_time(datetime.now(timezone.utc)), + 'elapsed_api_time': f'{elapsed_time:.2f}' + } + logger.log_struct(json_obj, "INFO") + return create_api_response('Success', data=result, message=f"Total elapsed API time {elapsed_time:.2f}") + except Exception as e: + job_status = "Failed" + message = "Unable to get chunk text response" + error_message = str(e) + logging.exception(f'Exception in fetch_chunktext: {error_message}') + return create_api_response(job_status, message=message, error=error_message) + finally: + gc.collect() + + if __name__ == "__main__": uvicorn.run(app) diff --git a/backend/src/communities.py b/backend/src/communities.py index d1130150c..a38b39696 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -107,24 +107,38 @@ STORE_COMMUNITY_SUMMARIES = """ UNWIND $data AS row MERGE (c:__Community__ {id:row.community}) -SET c.summary = row.summary +SET c.summary = row.summary, + c.title = row.title """ + COMMUNITY_SYSTEM_TEMPLATE = "Given input triples, generate the information summary. No pre-amble." -COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community, -generate a natural language summary of the provided information: -{community_info} -Summary:""" +COMMUNITY_TEMPLATE = """ +Based on the provided nodes and relationships that belong to the same graph community, +generate following output in exact format +title: A concise title, no more than 4 words, +summary: A natural language summary of the information +{community_info} +Example output: +title: Example Title, +summary: This is an example summary that describes the key information of this community. +""" PARENT_COMMUNITY_SYSTEM_TEMPLATE = "Given an input list of community summaries, generate a summary of the information" PARENT_COMMUNITY_TEMPLATE = """Based on the provided list of community summaries that belong to the same graph community, -generate a natural language summary of the information.Include all the necessary information as possible +generate following output in exact format +title: A concise title, no more than 4 words, +summary: A natural language summary of the information. Include all the necessary information as much as possible. + {community_info} -Summary:""" +Example output: +title: Example Title, +summary: This is an example summary that describes the key information of this community. +""" GET_COMMUNITY_DETAILS = """ @@ -277,8 +291,17 @@ def process_community_info(community, chain, is_parent=False): combined_text = " ".join(f"Summary {i+1}: {summary}" for i, summary in enumerate(community.get("texts", []))) else: combined_text = prepare_string(community) - summary = chain.invoke({'community_info': combined_text}) - return {"community": community['communityId'], "summary": summary} + summary_response = chain.invoke({'community_info': combined_text}) + lines = summary_response.splitlines() + title = "Untitled Community" + summary = "" + for line in lines: + if line.lower().startswith("title"): + title = line.split(":", 1)[-1].strip() + elif line.lower().startswith("summary"): + summary = line.split(":", 1)[-1].strip() + logging.info(f"Community Title : {title}") + return {"community": community['communityId'], "title":title, "summary": summary} except Exception as e: logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}") return None @@ -291,7 +314,7 @@ def create_community_summaries(gds, model): summaries = [] with ThreadPoolExecutor() as executor: futures = [executor.submit(process_community_info, community, community_chain) for community in community_info_list.to_dict(orient="records")] - + for future in as_completed(futures): result = future.result() if result: @@ -482,9 +505,3 @@ def create_communities(uri, username, password, database,model=COMMUNITY_CREATIO logging.warning("Failed to write communities. Constraint was not applied.") except Exception as e: logging.error(f"Failed to create communities: {e}") - - - - - - diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 58834eb92..82391d1e4 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -354,7 +354,7 @@ def get_duplicate_nodes_list(self): score_value = float(os.environ.get('DUPLICATE_SCORE_VALUE')) text_distance = int(os.environ.get('DUPLICATE_TEXT_DISTANCE')) query_duplicate_nodes = """ - MATCH (n:!Chunk&!Session&!Document&!`__Community__`) with n + MATCH (n:!Chunk&!Session&!Document&!`__Community__`&!`__Entity__`) with n WHERE n.embedding is not null and n.id is not null // and size(toString(n.id)) > 3 WITH n ORDER BY count {{ (n)--() }} DESC, size(toString(n.id)) DESC // updated WITH collect(n) as nodes diff --git a/backend/src/graph_query.py b/backend/src/graph_query.py index fb7333b48..86739ba6c 100644 --- a/backend/src/graph_query.py +++ b/backend/src/graph_query.py @@ -3,7 +3,7 @@ from neo4j import GraphDatabase import os import json -from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY +from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY # from neo4j.debug import watch # watch("neo4j") @@ -226,3 +226,34 @@ def get_graph_results(uri, username, password,database,document_names): driver.close() +def get_chunktext_results(uri, username, password, database, document_name, page_no): + """Retrieves chunk text, position, and page number from graph data with pagination.""" + try: + logging.info("Starting chunk text query process") + offset = 10 + skip = (page_no - 1) * offset + limit = offset + driver = GraphDatabase.driver(uri, auth=(username, password)) + with driver.session(database=database) as session: + total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name) + total_chunks = total_chunks_result.single()["total_chunks"] + total_pages = (total_chunks + offset - 1) // offset # Calculate total pages + records = session.run(CHUNK_TEXT_QUERY, file_name=document_name, skip=skip, limit=limit) + pageitems = [ + { + "text": record["chunk_text"], + "position": record["chunk_position"], + "pagenumber": record["page_number"] + } + for record in records + ] + logging.info(f"Query process completed with {len(pageitems)} chunks retrieved") + return { + "pageitems": pageitems, + "total_pages": total_pages + } + except Exception as e: + logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}") + raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e + finally: + driver.close() \ No newline at end of file diff --git a/backend/src/neighbours.py b/backend/src/neighbours.py index 08022ecc6..431d5b4bd 100644 --- a/backend/src/neighbours.py +++ b/backend/src/neighbours.py @@ -20,7 +20,8 @@ labels: [coalesce(apoc.coll.removeAll(labels(node), ['__Entity__'])[0], "*")], element_id: elementId(node), properties: { - id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END + id: CASE WHEN node.id IS NOT NULL THEN node.id ELSE node.fileName END, + title: CASE WHEN node.title IS NOT NULL THEN node.title ELSE " " END } } ] AS nodes, diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index b58fd3a67..084b5d1ba 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -161,6 +161,19 @@ ] AS entities """ +COUNT_CHUNKS_QUERY = """ +MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk) +RETURN count(c) AS total_chunks +""" + +CHUNK_TEXT_QUERY = """ +MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk) +RETURN c.text AS chunk_text, c.position AS chunk_position, c.page_number AS page_number +ORDER BY c.position +SKIP $skip +LIMIT $limit +""" + ## CHAT SETUP CHAT_MAX_TOKENS = 1000 CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5 @@ -717,4 +730,4 @@ value "2023-03-15"." "## 5. Strict Compliance\n" "Adhere to the rules strictly. Non-compliance will result in termination." - """ \ No newline at end of file + """ diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 548e9706f..ede8077f7 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -124,6 +124,27 @@ def test_graph_website(model_name): print("Fail: ", e) return weburl_result +def test_graph_website(model_name): + """Test graph creation from a Website page.""" + #graph, model, source_url, source_type + source_url = 'https://www.amazon.com/' + source_type = 'web-url' + create_source_node_graph_web_url(graph, model_name, source_url, source_type) + + weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '') + logging.info("WebUrl test done") + print(weburl_result) + + try: + assert weburl_result['status'] == 'Completed' + assert weburl_result['nodeCount'] > 0 + assert weburl_result['relationshipCount'] > 0 + print("Success") + except AssertionError as e: + print("Fail: ", e) + return weburl_result + + def test_graph_from_youtube_video(model_name): """Test graph creation from a YouTube video.""" source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA' diff --git a/docker-compose.yml b/docker-compose.yml index ea6d2c050..8a0fdc4b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,7 +53,6 @@ services: args: - VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-http://localhost:8000} - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,wiki,s3} - - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-} - VITE_BLOOM_URL=${VITE_BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true} - VITE_TIME_PER_PAGE=${VITE_TIME_PER_PAGE-50} @@ -62,6 +61,7 @@ services: - VITE_ENV=${VITE_ENV-DEV} - VITE_CHAT_MODES=${VITE_CHAT_MODES-} - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} + - VITE_LLM_MODELS=${VITE_LLM_MODELS-} - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} volumes: - ./frontend:/app diff --git a/example.env b/example.env index 6b542daf1..5d3a598c9 100644 --- a/example.env +++ b/example.env @@ -24,7 +24,6 @@ ENTITY_EMBEDDING=True VITE_BACKEND_API_URL="http://localhost:8000" VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true" VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web" -VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3" VITE_ENV="DEV" VITE_TIME_PER_PAGE=50 VITE_CHUNK_SIZE=5242880 diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 3053e1ba9..311294f4a 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -20,7 +20,6 @@ RUN yarn install COPY . ./ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_REACT_APP_SOURCES=$VITE_REACT_APP_SOURCES \ - VITE_LLM_MODELS=$VITE_LLM_MODELS \ VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID \ VITE_BLOOM_URL=$VITE_BLOOM_URL \ VITE_CHUNK_SIZE=$VITE_CHUNK_SIZE \ diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index 8568e0b5d..b58ee2c4d 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -261,4 +261,4 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { ); }; -export default ChunkInfo; +export default ChunkInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index bc8e5e8d3..3c6899c5b 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -75,4 +75,4 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = ); }; -export default CommunitiesInfo; +export default CommunitiesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/EntitiesInfo.tsx b/frontend/src/components/ChatBot/EntitiesInfo.tsx index 80e4fdafa..22eca8a57 100644 --- a/frontend/src/components/ChatBot/EntitiesInfo.tsx +++ b/frontend/src/components/ChatBot/EntitiesInfo.tsx @@ -146,4 +146,4 @@ const EntitiesInfo: FC = ({ loading, mode, graphonly_entities, in ); }; -export default EntitiesInfo; +export default EntitiesInfo; \ No newline at end of file diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index a934913d9..ddfe92a6e 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -106,7 +106,7 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { )} {!link?.startsWith('s3://') && - !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) && ( + !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && (
diff --git a/frontend/src/components/ChatBot/chatInfo.ts b/frontend/src/components/ChatBot/chatInfo.ts index c7e990ae7..1a229dc70 100644 --- a/frontend/src/components/ChatBot/chatInfo.ts +++ b/frontend/src/components/ChatBot/chatInfo.ts @@ -37,4 +37,4 @@ export const handleGraphNodeClick = async ( setLoadingGraphView(false); } } -}; +}; \ No newline at end of file diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 28caddc1c..8e8516666 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -11,6 +11,7 @@ import { CustomFile, OptionType, UserCredentials, + chunkdata, connectionState, } from '../types'; import deleteAPI from '../services/DeleteFiles'; @@ -44,6 +45,8 @@ import retry from '../services/retry'; import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts'; import { useMessageContext } from '../context/UserMessages'; import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast'; +import { getChunkText } from '../services/getChunkText'; +import ChunkPopUp from './Popups/ChunkPopUp'; const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal')); const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -70,6 +73,7 @@ const Content: React.FC = ({ }); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); + const [documentName, setDocumentName] = useState(''); const { setUserCredentials, userCredentials, @@ -85,6 +89,12 @@ const Content: React.FC = ({ const [retryFile, setRetryFile] = useState(''); const [retryLoading, setRetryLoading] = useState(false); const [showRetryPopup, toggleRetryPopup] = useReducer((state) => !state, false); + const [showChunkPopup, toggleChunkPopup] = useReducer((state) => !state, false); + const [chunksLoading, toggleChunksLoading] = useReducer((state) => !state, false); + const [currentPage, setCurrentPage] = useState(0); + const [totalPageCount, setTotalPageCount] = useState(null); + const [textChunks, setTextChunks] = useState([]); + const [alertStateForRetry, setAlertStateForRetry] = useState({ showAlert: false, alertType: 'neutral', @@ -107,11 +117,12 @@ const Content: React.FC = ({ setProcessedCount, setchatModes, } = useFileContext(); - const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'>('tableView'); + const [viewPoint, setViewPoint] = useState<'tableView' | 'showGraphView' | 'chatInfoView'|'neighborView'>('tableView'); const [showDeletePopUp, setshowDeletePopUp] = useState(false); const [deleteLoading, setdeleteLoading] = useState(false); const [searchParams] = useSearchParams(); + const { updateStatusForLargeFiles } = useServerSideEvent( (inMinutes, time, fileName) => { showNormalToast(`${fileName} will take approx ${time} ${inMinutes ? 'Min' : 'Sec'}`); @@ -122,7 +133,12 @@ const Content: React.FC = ({ } ); const childRef = useRef(null); - + const incrementPage = () => { + setCurrentPage((prev) => prev + 1); + }; + const decrementPage = () => { + setCurrentPage((prev) => prev - 1); + }; useEffect(() => { if (!init && !searchParams.has('connectURL')) { let session = localStorage.getItem('neo4j.connection'); @@ -149,7 +165,13 @@ const Content: React.FC = ({ setOpenConnection((prev) => ({ ...prev, openPopUp: true })); } }, []); - + useEffect(() => { + if (currentPage >= 1) { + (async () => { + await getChunks(documentName, currentPage); + })(); + } + }, [currentPage, documentName]); useEffect(() => { setFilesData((prevfiles) => { return prevfiles.map((curfile) => { @@ -251,7 +273,15 @@ const Content: React.FC = ({ setModel(selectedOption?.value); } }; - + const getChunks = async (name: string, pageNo: number) => { + toggleChunksLoading(); + const response = await getChunkText(userCredentials as UserCredentials, name, pageNo); + setTextChunks(response.data.data.pageitems); + if (!totalPageCount) { + setTotalPageCount(response.data.data.total_pages); + } + toggleChunksLoading(); + }; const extractData = async (uid: string, isselectedRows = false, filesTobeProcess: CustomFile[]) => { if (!isselectedRows) { const fileItem = filesData.find((f) => f.id == uid); @@ -497,7 +527,7 @@ const Content: React.FC = ({ } }; - function processWaitingFilesOnRefresh() { + const processWaitingFilesOnRefresh = () => { let data = []; const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length; @@ -517,7 +547,7 @@ const Content: React.FC = ({ .filter((f) => f.status === 'New' || f.status == 'Reprocess'); addFilesToQueue(selectedNewFiles as CustomFile[]); } - } + }; const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; @@ -771,6 +801,18 @@ const Content: React.FC = ({ view='contentView' > )} + {showChunkPopup && ( + toggleChunkPopup()} + showChunkPopup={showChunkPopup} + chunks={textChunks} + incrementPage={incrementPage} + decrementPage={decrementPage} + currentPage={currentPage} + totalPageCount={totalPageCount} + > + )} {showEnhancementDialog && ( = ({ setRetryFile(id); toggleRetryPopup(); }} + onChunkView={async (name) => { + setDocumentName(name); + if (name != documentName) { + toggleChunkPopup(); + if (totalPageCount) { + setTotalPageCount(null); + } + setCurrentPage(1); + // await getChunks(name, 1); + } + }} ref={childRef} handleGenerateGraph={processWaitingFilesOnRefresh} > @@ -940,4 +993,4 @@ const Content: React.FC = ({ ); }; -export default Content; +export default Content; \ No newline at end of file diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index ec367b100..42b82572a 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -38,8 +38,9 @@ import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate, import { useCredentials } from '../context/UserCredentials'; import { ArrowPathIconSolid, - ClipboardDocumentIconOutline, + ClipboardDocumentIconSolid, MagnifyingGlassCircleIconSolid, + DocumentTextIconSolid, } from '@neo4j-ndl/react/icons'; import CustomProgressBar from './UI/CustomProgressBar'; import subscribe from '../services/PollingAPI'; @@ -56,7 +57,7 @@ import { ThemeWrapperContext } from '../context/ThemeWrapper'; let onlyfortheFirstRender = true; const FileTable = forwardRef((props, ref) => { - const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props; + const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props; const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } = useFileContext(); const { userCredentials, isReadOnlyUser } = useCredentials(); @@ -527,10 +528,25 @@ const FileTable = forwardRef((props, ref) => { handleCopy(copied); }} > - + + + { + onChunkView(info?.row?.original?.name as string); + }} + clean + placement='left' + label='chunktextaction' + text='View Chunks' + size='large' + disabled={info.getValue() === 'Uploading'} + > + ), + size: 300, + minSize: 180, header: () => Actions, footer: (info) => info.column.id, }), @@ -946,4 +962,4 @@ const FileTable = forwardRef((props, ref) => { ); }); -export default FileTable; +export default FileTable; \ No newline at end of file diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index e50f215d1..ac35a93ae 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -123,9 +123,7 @@ const GraphViewModal: React.FunctionComponent = ({ try { const result = await fetchData(); if (result && result.data.data.nodes.length > 0) { - const neoNodes = result.data.data.nodes - .map((f: Node) => f) - .filter((node: ExtendedNode) => node.labels.length === 1); + const neoNodes = result.data.data.nodes; const nodeIds = new Set(neoNodes.map((node: any) => node.element_id)); const neoRels = result.data.data.relationships .map((f: Relationship) => f) @@ -447,4 +445,4 @@ const GraphViewModal: React.FunctionComponent = ({ ); }; -export default GraphViewModal; +export default GraphViewModal; \ No newline at end of file diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx index 48e94c023..bf3082fc8 100644 --- a/frontend/src/components/Layout/PageLayout.tsx +++ b/frontend/src/components/Layout/PageLayout.tsx @@ -31,7 +31,6 @@ export default function PageLayoutNew({ const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false); const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false); const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false); - const toggleLeftDrawer = () => { if (largedesktops) { setIsLeftExpanded(!isLeftExpanded); diff --git a/frontend/src/components/Popups/ChunkPopUp/index.tsx b/frontend/src/components/Popups/ChunkPopUp/index.tsx new file mode 100644 index 000000000..7966ddd6e --- /dev/null +++ b/frontend/src/components/Popups/ChunkPopUp/index.tsx @@ -0,0 +1,72 @@ +import { Dialog, Typography, Flex, IconButton } from '@neo4j-ndl/react'; +import { ArrowLeftIconOutline, ArrowRightIconOutline } from '@neo4j-ndl/react/icons'; +import { chunkdata } from '../../../types'; +import Loader from '../../../utils/Loader'; +import { useMemo } from 'react'; + +const ChunkPopUp = ({ + showChunkPopup, + chunks, + onClose, + chunksLoading, + incrementPage, + decrementPage, + currentPage, + totalPageCount, +}: { + showChunkPopup: boolean; + chunks: chunkdata[]; + onClose: () => void; + chunksLoading: boolean; + incrementPage: () => void; + decrementPage: () => void; + currentPage: number | null; + totalPageCount: number | null; +}) => { + const sortedChunksData = useMemo(() => { + return chunks.sort((a, b) => a.position - b.position); + }, [chunks]); + return ( + + Text Chunks + + {chunksLoading ? ( + + ) : ( +
    + {sortedChunksData.map((c, idx) => ( +
  1. + + + Position : + {c.position} + + {c.pagenumber ? ( + + Page No :{' '} + {c.pagenumber} + + ) : null} + {c.text} + +
  2. + ))} +
+ )} +
+ {totalPageCount != null && totalPageCount > 1 && ( + + + + + + + + + + + )} +
+ ); +}; +export default ChunkPopUp; diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx index ad7e7b48e..76df732a4 100644 --- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx +++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx @@ -460,4 +460,4 @@ export default function ConnectionModal({ ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 7140bb6b2..330f5f657 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -352,4 +352,4 @@ export default function DeduplicationTab() { )} ); -} +} \ No newline at end of file diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx index bcc2597f1..6d4daae10 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/DeleteTabForOrphanNodes/index.tsx @@ -315,4 +315,4 @@ export default function DeletePopUpForOrphanNodes({ )} ); -} +} \ No newline at end of file diff --git a/frontend/src/components/QuickStarter.tsx b/frontend/src/components/QuickStarter.tsx index 1a4e169d2..4dc4b4a0a 100644 --- a/frontend/src/components/QuickStarter.tsx +++ b/frontend/src/components/QuickStarter.tsx @@ -19,14 +19,14 @@ const QuickStarter: React.FunctionComponent = () => { - -
- - + +
+ + diff --git a/frontend/src/services/getChunkText.ts b/frontend/src/services/getChunkText.ts new file mode 100644 index 000000000..f9825dc34 --- /dev/null +++ b/frontend/src/services/getChunkText.ts @@ -0,0 +1,19 @@ +import { UserCredentials, chunksData } from '../types'; +import api from '../API/Index'; + +export const getChunkText = async (userCredentials: UserCredentials, documentName: string, page_no: number) => { + const formData = new FormData(); + formData.append('uri', userCredentials?.uri ?? ''); + formData.append('database', userCredentials?.database ?? ''); + formData.append('userName', userCredentials?.userName ?? ''); + formData.append('password', userCredentials?.password ?? ''); + formData.append('document_name', documentName); + formData.append('page_no', page_no.toString()); + try { + const response = await api.post(`/fetch_chunktext`, formData); + return response; + } catch (error) { + console.log(error); + throw error; + } +}; diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 02dbc0a2b..6ebf26b45 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -154,6 +154,7 @@ export interface FileTableProps { connectionStatus: boolean; setConnectionStatus: Dispatch>; onInspect: (id: string) => void; + onChunkView: (name: string) => void; handleGenerateGraph: () => void; onRetry: (id: string) => void; } @@ -379,7 +380,13 @@ export interface commonserverresponse { error?: string; message?: string | orphanTotalNodes; file_name?: string; - data?: labelsAndTypes | labelsAndTypes[] | uploadData | orphanNodeProps[] | dupNodes[]; + data?: + | labelsAndTypes + | labelsAndTypes[] + | uploadData + | orphanNodeProps[] + | dupNodes[] + | { pageitems: chunkdata[]; total_pages: number }; } export interface dupNodeProps { id: string; @@ -397,6 +404,11 @@ export interface selectedDuplicateNodes { firstElementId: string; similarElementIds: string[]; } +export interface chunkdata { + text: string; + position: number; + pagenumber: null | number; +} export interface ScehmaFromText extends Partial { data: labelsAndTypes; } @@ -407,6 +419,9 @@ export interface ServerData extends Partial { export interface duplicateNodesData extends Partial { data: dupNodes[]; } +export interface chunksData extends Partial { + data: { pageitems: chunkdata[]; total_pages: number }; +} export interface OrphanNodeResponse extends Partial { data: orphanNodeProps[]; } @@ -559,6 +574,10 @@ export interface MessagesContextProviderProps { children: ReactNode; } +export interface GraphContextProviderProps { + children: ReactNode; +} + export interface Chunk { id: string; position: number; @@ -715,6 +734,11 @@ export interface MessageContextType { setClearHistoryData: Dispatch>; } +export interface GraphContextType { + loadingGraph: boolean; + setLoadingGraph: Dispatch>; +} + export interface DatabaseStatusProps { isConnected: boolean; isGdsActive: boolean; diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8d9289fd8..06d03d28b 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -12,26 +12,26 @@ export const llms = process.env?.VITE_LLM_MODELS?.trim() != '' ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) : [ - 'diffbot', - 'openai_gpt_3.5', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_3_5_sonnet', - 'fireworks_llama_v3p2_90b', - 'bedrock_claude_3_5_sonnet', - ]; + 'diffbot', + 'openai_gpt_3.5', + 'openai_gpt_4o', + 'openai_gpt_4o_mini', + 'gemini_1.5_pro', + 'gemini_1.5_flash', + 'azure_ai_gpt_35', + 'azure_ai_gpt_4o', + 'ollama_llama3', + 'groq_llama3_70b', + 'anthropic_claude_3_5_sonnet', + 'fireworks_llama_v3p2_90b', + 'bedrock_claude_3_5_sonnet', + ]; export const defaultLLM = llms?.includes('openai_gpt_4o') ? 'openai_gpt_4o' : llms?.includes('gemini_1.5_pro') - ? 'gemini_1.5_pro' - : 'diffbot'; + ? 'gemini_1.5_pro' + : 'diffbot'; export const supportedLLmsForRagas = [ 'openai_gpt_3.5', 'openai_gpt_4', @@ -76,40 +76,40 @@ export const chatModeReadableLables: Record = { export const chatModes = process.env?.VITE_CHAT_MODES?.trim() != '' ? process.env.VITE_CHAT_MODES?.split(',').map((mode) => ({ - mode: mode.trim(), - description: getDescriptionForChatMode(mode.trim()), - })) + mode: mode.trim(), + description: getDescriptionForChatMode(mode.trim()), + })) : [ - { - mode: chatModeLables.vector, - description: 'Performs semantic similarity search on text chunks using vector indexing.', - }, - { - mode: chatModeLables.graph, - description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', - }, - { - mode: chatModeLables['graph+vector'], - description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', - }, - { - mode: chatModeLables.fulltext, - description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', - }, - { - mode: chatModeLables['graph+vector+fulltext'], - description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', - }, - { - mode: chatModeLables['entity search+vector'], - description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', - }, - { - mode: chatModeLables['global search+vector+fulltext'], - description: - 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', - }, - ]; + { + mode: chatModeLables.vector, + description: 'Performs semantic similarity search on text chunks using vector indexing.', + }, + { + mode: chatModeLables.graph, + description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', + }, + { + mode: chatModeLables['graph+vector'], + description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', + }, + { + mode: chatModeLables.fulltext, + description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', + }, + { + mode: chatModeLables['graph+vector+fulltext'], + description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', + }, + { + mode: chatModeLables['entity search+vector'], + description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', + }, + { + mode: chatModeLables['global search+vector+fulltext'], + description: + 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', + }, + ]; export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024; export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50; @@ -291,6 +291,7 @@ export const graphLabels = { docChunk: 'Document & Chunk', community: 'Communities', noNodesRels: 'No Nodes and No relationships', + neighborView: 'neighborView' }; export const RESULT_STEP_SIZE = 25; diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 6945c17ca..f407f21e0 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -130,6 +130,9 @@ export const getNodeCaption = (node: any) => { if (node.properties.fileName) { return node.properties.fileName; } + if(node.labels[0] === '__Community__'){ + return node.properties.title; + } return node.properties.id; }; @@ -156,11 +159,11 @@ export function extractPdfFileName(url: string): string { export const processGraphData = (neoNodes: ExtendedNode[], neoRels: ExtendedRelationship[]) => { const schemeVal: Scheme = {}; let iterator = 0; - const labels: string[] = neoNodes.map((f: any) => f.labels); + const labels: string[] = neoNodes.flatMap((f: any) => f.labels); for (let index = 0; index < labels.length; index++) { const label = labels[index]; if (schemeVal[label] == undefined) { - schemeVal[label] = calcWordColor(label[0]); + schemeVal[label] = calcWordColor(label); iterator += 1; } }