Skip to content

Commit 92a6656

Browse files
authored
Merge pull request #2211 from HKUDS/add-preprocessed-status
Add PREPROCESSED (multimodal_processed) status for multimodal document processing
2 parents 64900b5 + a81c122 commit 92a6656

File tree

11 files changed

+114
-21
lines changed

11 files changed

+114
-21
lines changed

lightrag/api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__api_version__ = "0239"
1+
__api_version__ = "0240"

lightrag/api/routers/document_routes.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ class Config:
406406
"id": "doc_123456",
407407
"content_summary": "Research paper on machine learning",
408408
"content_length": 15240,
409-
"status": "PROCESSED",
409+
"status": "processed",
410410
"created_at": "2025-03-31T12:34:56",
411411
"updated_at": "2025-03-31T12:35:30",
412412
"track_id": "upload_20250729_170612_abc123",
@@ -439,7 +439,7 @@ class Config:
439439
"id": "doc_123",
440440
"content_summary": "Pending document",
441441
"content_length": 5000,
442-
"status": "PENDING",
442+
"status": "pending",
443443
"created_at": "2025-03-31T10:00:00",
444444
"updated_at": "2025-03-31T10:00:00",
445445
"track_id": "upload_20250331_100000_abc123",
@@ -449,12 +449,27 @@ class Config:
449449
"file_path": "pending_doc.pdf",
450450
}
451451
],
452+
"PREPROCESSED": [
453+
{
454+
"id": "doc_789",
455+
"content_summary": "Document pending final indexing",
456+
"content_length": 7200,
457+
"status": "multimodal_processed",
458+
"created_at": "2025-03-31T09:30:00",
459+
"updated_at": "2025-03-31T09:35:00",
460+
"track_id": "upload_20250331_093000_xyz789",
461+
"chunks_count": 10,
462+
"error": None,
463+
"metadata": None,
464+
"file_path": "preprocessed_doc.pdf",
465+
}
466+
],
452467
"PROCESSED": [
453468
{
454469
"id": "doc_456",
455470
"content_summary": "Processed document",
456471
"content_length": 8000,
457-
"status": "PROCESSED",
472+
"status": "processed",
458473
"created_at": "2025-03-31T09:00:00",
459474
"updated_at": "2025-03-31T09:05:00",
460475
"track_id": "insert_20250331_090000_def456",
@@ -626,6 +641,7 @@ class Config:
626641
"status_counts": {
627642
"PENDING": 10,
628643
"PROCESSING": 5,
644+
"PREPROCESSED": 5,
629645
"PROCESSED": 130,
630646
"FAILED": 5,
631647
},
@@ -648,6 +664,7 @@ class Config:
648664
"status_counts": {
649665
"PENDING": 10,
650666
"PROCESSING": 5,
667+
"PREPROCESSED": 5,
651668
"PROCESSED": 130,
652669
"FAILED": 5,
653670
}
@@ -2210,7 +2227,7 @@ async def documents() -> DocsStatusesResponse:
22102227
To prevent excessive resource consumption, a maximum of 1,000 records is returned.
22112228
22122229
This endpoint retrieves the current status of all documents, grouped by their
2213-
processing status (PENDING, PROCESSING, PROCESSED, FAILED). The results are
2230+
processing status (PENDING, PROCESSING, PREPROCESSED, PROCESSED, FAILED). The results are
22142231
limited to 1000 total documents with fair distribution across all statuses.
22152232
22162233
Returns:
@@ -2226,6 +2243,7 @@ async def documents() -> DocsStatusesResponse:
22262243
statuses = (
22272244
DocStatus.PENDING,
22282245
DocStatus.PROCESSING,
2246+
DocStatus.PREPROCESSED,
22292247
DocStatus.PROCESSED,
22302248
DocStatus.FAILED,
22312249
)

lightrag/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,7 @@ class DocStatus(str, Enum):
712712

713713
PENDING = "pending"
714714
PROCESSING = "processing"
715+
PREPROCESSED = "multimodal_processed"
715716
PROCESSED = "processed"
716717
FAILED = "failed"
717718

lightrag/lightrag.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2617,7 +2617,12 @@ async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
26172617
)
26182618

26192619
# Check document status and log warning for non-completed documents
2620-
doc_status = doc_status_data.get("status")
2620+
raw_status = doc_status_data.get("status")
2621+
try:
2622+
doc_status = DocStatus(raw_status)
2623+
except ValueError:
2624+
doc_status = raw_status
2625+
26212626
if doc_status != DocStatus.PROCESSED:
26222627
if doc_status == DocStatus.PENDING:
26232628
warning_msg = (
@@ -2627,12 +2632,23 @@ async def adelete_by_doc_id(self, doc_id: str) -> DeletionResult:
26272632
warning_msg = (
26282633
f"Deleting {doc_id} {file_path}(previous status: PROCESSING)"
26292634
)
2635+
elif doc_status == DocStatus.PREPROCESSED:
2636+
warning_msg = (
2637+
f"Deleting {doc_id} {file_path}(previous status: PREPROCESSED)"
2638+
)
26302639
elif doc_status == DocStatus.FAILED:
26312640
warning_msg = (
26322641
f"Deleting {doc_id} {file_path}(previous status: FAILED)"
26332642
)
26342643
else:
2635-
warning_msg = f"Deleting {doc_id} {file_path}(previous status: {doc_status.value})"
2644+
status_text = (
2645+
doc_status.value
2646+
if isinstance(doc_status, DocStatus)
2647+
else str(doc_status)
2648+
)
2649+
warning_msg = (
2650+
f"Deleting {doc_id} {file_path}(previous status: {status_text})"
2651+
)
26362652
logger.info(warning_msg)
26372653
# Update pipeline status for monitoring
26382654
async with pipeline_status_lock:

lightrag_webui/src/api/lightrag.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ export type DeleteDocResponse = {
167167
doc_id: string
168168
}
169169

170-
export type DocStatus = 'pending' | 'processing' | 'processed' | 'failed'
170+
export type DocStatus = 'pending' | 'processing' | 'multimodal_processed' | 'processed' | 'failed'
171171

172172
export type DocStatusResponse = {
173173
id: string

lightrag_webui/src/features/DocumentManager.tsx

Lines changed: 66 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,21 @@ import PipelineStatusDialog from '@/components/documents/PipelineStatusDialog'
3838

3939
type StatusFilter = DocStatus | 'all';
4040

41+
// Utility functions defined outside component for better performance and to avoid dependency issues
42+
const getCountValue = (counts: Record<string, number>, ...keys: string[]): number => {
43+
for (const key of keys) {
44+
const value = counts[key]
45+
if (typeof value === 'number') {
46+
return value
47+
}
48+
}
49+
return 0
50+
}
51+
52+
const hasActiveDocumentsStatus = (counts: Record<string, number>): boolean =>
53+
getCountValue(counts, 'PROCESSING', 'processing') > 0 ||
54+
getCountValue(counts, 'PENDING', 'pending') > 0 ||
55+
getCountValue(counts, 'PREPROCESSED', 'preprocessed', 'multimodal_processed') > 0
4156

4257
const getDisplayFileName = (doc: DocStatusResponse, maxLength: number = 20): string => {
4358
// Check if file_path exists and is a non-empty string
@@ -242,6 +257,7 @@ export default function DocumentManager() {
242257
const [pageByStatus, setPageByStatus] = useState<Record<StatusFilter, number>>({
243258
all: 1,
244259
processed: 1,
260+
multimodal_processed: 1,
245261
processing: 1,
246262
pending: 1,
247263
failed: 1,
@@ -308,6 +324,7 @@ export default function DocumentManager() {
308324
setPageByStatus({
309325
all: 1,
310326
processed: 1,
327+
'multimodal_processed': 1,
311328
processing: 1,
312329
pending: 1,
313330
failed: 1,
@@ -452,9 +469,19 @@ export default function DocumentManager() {
452469
return counts;
453470
}, [docs]);
454471

472+
const processedCount = getCountValue(statusCounts, 'PROCESSED', 'processed') || documentCounts.processed || 0;
473+
const preprocessedCount =
474+
getCountValue(statusCounts, 'PREPROCESSED', 'preprocessed', 'multimodal_processed') ||
475+
documentCounts.multimodal_processed ||
476+
0;
477+
const processingCount = getCountValue(statusCounts, 'PROCESSING', 'processing') || documentCounts.processing || 0;
478+
const pendingCount = getCountValue(statusCounts, 'PENDING', 'pending') || documentCounts.pending || 0;
479+
const failedCount = getCountValue(statusCounts, 'FAILED', 'failed') || documentCounts.failed || 0;
480+
455481
// Store previous status counts
456482
const prevStatusCounts = useRef({
457483
processed: 0,
484+
multimodal_processed: 0,
458485
processing: 0,
459486
pending: 0,
460487
failed: 0
@@ -545,6 +572,7 @@ export default function DocumentManager() {
545572
const legacyDocs: DocsStatusesResponse = {
546573
statuses: {
547574
processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processed'),
575+
multimodal_processed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'multimodal_processed'),
548576
processing: response.documents.filter((doc: DocStatusResponse) => doc.status === 'processing'),
549577
pending: response.documents.filter((doc: DocStatusResponse) => doc.status === 'pending'),
550578
failed: response.documents.filter((doc: DocStatusResponse) => doc.status === 'failed')
@@ -827,7 +855,7 @@ export default function DocumentManager() {
827855
setTimeout(() => {
828856
if (isMountedRef.current && currentTab === 'documents' && health) {
829857
// Restore intelligent polling interval based on document status
830-
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
858+
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
831859
const normalInterval = hasActiveDocuments ? 5000 : 30000;
832860
startPollingInterval(normalInterval);
833861
}
@@ -863,7 +891,7 @@ export default function DocumentManager() {
863891
setTimeout(() => {
864892
if (isMountedRef.current && currentTab === 'documents' && health) {
865893
// Restore intelligent polling interval based on document status
866-
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
894+
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
867895
const normalInterval = hasActiveDocuments ? 5000 : 30000;
868896
startPollingInterval(normalInterval);
869897
}
@@ -887,6 +915,7 @@ export default function DocumentManager() {
887915
setPageByStatus({
888916
all: 1,
889917
processed: 1,
918+
multimodal_processed: 1,
890919
processing: 1,
891920
pending: 1,
892921
failed: 1,
@@ -927,6 +956,7 @@ export default function DocumentManager() {
927956
const legacyDocs: DocsStatusesResponse = {
928957
statuses: {
929958
processed: response.documents.filter(doc => doc.status === 'processed'),
959+
multimodal_processed: response.documents.filter(doc => doc.status === 'multimodal_processed'),
930960
processing: response.documents.filter(doc => doc.status === 'processing'),
931961
pending: response.documents.filter(doc => doc.status === 'pending'),
932962
failed: response.documents.filter(doc => doc.status === 'failed')
@@ -961,14 +991,21 @@ export default function DocumentManager() {
961991
handleIntelligentRefresh();
962992

963993
// Reset polling timer after intelligent refresh
964-
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
994+
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
965995
const pollingInterval = hasActiveDocuments ? 5000 : 30000;
966996
startPollingInterval(pollingInterval);
967997
}
968998
}
969999
// Update the previous state
9701000
prevPipelineBusyRef.current = pipelineBusy;
971-
}, [pipelineBusy, currentTab, health, handleIntelligentRefresh, statusCounts.processing, statusCounts.pending, startPollingInterval]);
1001+
}, [
1002+
pipelineBusy,
1003+
currentTab,
1004+
health,
1005+
handleIntelligentRefresh,
1006+
statusCounts,
1007+
startPollingInterval
1008+
]);
9721009

9731010
// Set up intelligent polling with dynamic interval based on document status
9741011
useEffect(() => {
@@ -978,7 +1015,7 @@ export default function DocumentManager() {
9781015
}
9791016

9801017
// Determine polling interval based on document status
981-
const hasActiveDocuments = (statusCounts.processing || 0) > 0 || (statusCounts.pending || 0) > 0;
1018+
const hasActiveDocuments = hasActiveDocumentsStatus(statusCounts);
9821019
const pollingInterval = hasActiveDocuments ? 5000 : 30000; // 5s if active, 30s if idle
9831020

9841021
startPollingInterval(pollingInterval);
@@ -995,6 +1032,7 @@ export default function DocumentManager() {
9951032
// Get new status counts
9961033
const newStatusCounts = {
9971034
processed: docs?.statuses?.processed?.length || 0,
1035+
multimodal_processed: docs?.statuses?.multimodal_processed?.length || 0,
9981036
processing: docs?.statuses?.processing?.length || 0,
9991037
pending: docs?.statuses?.pending?.length || 0,
10001038
failed: docs?.statuses?.failed?.length || 0
@@ -1224,47 +1262,59 @@ export default function DocumentManager() {
12241262
onClick={() => handleStatusFilterChange('processed')}
12251263
disabled={isRefreshing}
12261264
className={cn(
1227-
(statusCounts.PROCESSED || statusCounts.processed || documentCounts.processed) > 0 ? 'text-green-600' : 'text-gray-500',
1265+
processedCount > 0 ? 'text-green-600' : 'text-gray-500',
12281266
statusFilter === 'processed' && 'bg-green-100 dark:bg-green-900/30 font-medium border border-green-400 dark:border-green-600 shadow-sm'
12291267
)}
12301268
>
1231-
{t('documentPanel.documentManager.status.completed')} ({statusCounts.PROCESSED || statusCounts.processed || 0})
1269+
{t('documentPanel.documentManager.status.completed')} ({processedCount})
1270+
</Button>
1271+
<Button
1272+
size="sm"
1273+
variant={statusFilter === 'multimodal_processed' ? 'secondary' : 'outline'}
1274+
onClick={() => handleStatusFilterChange('multimodal_processed')}
1275+
disabled={isRefreshing}
1276+
className={cn(
1277+
preprocessedCount > 0 ? 'text-purple-600' : 'text-gray-500',
1278+
statusFilter === 'multimodal_processed' && 'bg-purple-100 dark:bg-purple-900/30 font-medium border border-purple-400 dark:border-purple-600 shadow-sm'
1279+
)}
1280+
>
1281+
{t('documentPanel.documentManager.status.preprocessed')} ({preprocessedCount})
12321282
</Button>
12331283
<Button
12341284
size="sm"
12351285
variant={statusFilter === 'processing' ? 'secondary' : 'outline'}
12361286
onClick={() => handleStatusFilterChange('processing')}
12371287
disabled={isRefreshing}
12381288
className={cn(
1239-
(statusCounts.PROCESSING || statusCounts.processing || documentCounts.processing) > 0 ? 'text-blue-600' : 'text-gray-500',
1289+
processingCount > 0 ? 'text-blue-600' : 'text-gray-500',
12401290
statusFilter === 'processing' && 'bg-blue-100 dark:bg-blue-900/30 font-medium border border-blue-400 dark:border-blue-600 shadow-sm'
12411291
)}
12421292
>
1243-
{t('documentPanel.documentManager.status.processing')} ({statusCounts.PROCESSING || statusCounts.processing || 0})
1293+
{t('documentPanel.documentManager.status.processing')} ({processingCount})
12441294
</Button>
12451295
<Button
12461296
size="sm"
12471297
variant={statusFilter === 'pending' ? 'secondary' : 'outline'}
12481298
onClick={() => handleStatusFilterChange('pending')}
12491299
disabled={isRefreshing}
12501300
className={cn(
1251-
(statusCounts.PENDING || statusCounts.pending || documentCounts.pending) > 0 ? 'text-yellow-600' : 'text-gray-500',
1301+
pendingCount > 0 ? 'text-yellow-600' : 'text-gray-500',
12521302
statusFilter === 'pending' && 'bg-yellow-100 dark:bg-yellow-900/30 font-medium border border-yellow-400 dark:border-yellow-600 shadow-sm'
12531303
)}
12541304
>
1255-
{t('documentPanel.documentManager.status.pending')} ({statusCounts.PENDING || statusCounts.pending || 0})
1305+
{t('documentPanel.documentManager.status.pending')} ({pendingCount})
12561306
</Button>
12571307
<Button
12581308
size="sm"
12591309
variant={statusFilter === 'failed' ? 'secondary' : 'outline'}
12601310
onClick={() => handleStatusFilterChange('failed')}
12611311
disabled={isRefreshing}
12621312
className={cn(
1263-
(statusCounts.FAILED || statusCounts.failed || documentCounts.failed) > 0 ? 'text-red-600' : 'text-gray-500',
1313+
failedCount > 0 ? 'text-red-600' : 'text-gray-500',
12641314
statusFilter === 'failed' && 'bg-red-100 dark:bg-red-900/30 font-medium border border-red-400 dark:border-red-600 shadow-sm'
12651315
)}
12661316
>
1267-
{t('documentPanel.documentManager.status.failed')} ({statusCounts.FAILED || statusCounts.failed || 0})
1317+
{t('documentPanel.documentManager.status.failed')} ({failedCount})
12681318
</Button>
12691319
</div>
12701320
<Button
@@ -1410,6 +1460,9 @@ export default function DocumentManager() {
14101460
{doc.status === 'processed' && (
14111461
<span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
14121462
)}
1463+
{doc.status === 'multimodal_processed' && (
1464+
<span className="text-purple-600">{t('documentPanel.documentManager.status.preprocessed')}</span>
1465+
)}
14131466
{doc.status === 'processing' && (
14141467
<span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
14151468
)}

lightrag_webui/src/locales/ar.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
"status": {
140140
"all": "الكل",
141141
"completed": "مكتمل",
142+
"preprocessed": "مُعالج مسبقًا",
142143
"processing": "قيد المعالجة",
143144
"pending": "معلق",
144145
"failed": "فشل"

lightrag_webui/src/locales/en.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
"status": {
140140
"all": "All",
141141
"completed": "Completed",
142+
"preprocessed": "Preprocessed",
142143
"processing": "Processing",
143144
"pending": "Pending",
144145
"failed": "Failed"

lightrag_webui/src/locales/fr.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
"status": {
140140
"all": "Tous",
141141
"completed": "Terminé",
142+
"preprocessed": "Prétraité",
142143
"processing": "En traitement",
143144
"pending": "En attente",
144145
"failed": "Échoué"

lightrag_webui/src/locales/zh.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
"status": {
140140
"all": "全部",
141141
"completed": "已完成",
142+
"preprocessed": "预处理",
142143
"processing": "处理中",
143144
"pending": "等待中",
144145
"failed": "失败"

0 commit comments

Comments
 (0)