Skip to content

Commit 7de4eb1

Browse files
committed
replace nltk basic functionality
1 parent 5e7a1f3 commit 7de4eb1

1 file changed

Lines changed: 1 addition & 40 deletions

File tree

core/text/curation.py

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,2 @@
1-
import threading
2-
3-
from core.logging_config import get_logger
4-
5-
logger = get_logger(__name__)
6-
7-
_nltk_lock = threading.Lock()
8-
_nltk_initialized = False
9-
10-
11-
def _ensure_nltk() -> bool:
12-
global _nltk_initialized
13-
if _nltk_initialized:
14-
return True
15-
16-
with _nltk_lock:
17-
if _nltk_initialized:
18-
return True
19-
20-
try:
21-
import nltk
22-
nltk.download('punkt', quiet=True)
23-
nltk.download('punkt_tab', quiet=True)
24-
_nltk_initialized = True
25-
return True
26-
except Exception as e:
27-
logger.warning(f"Failed to initialize NLTK: {e}")
28-
return False
29-
30-
311
def curate_text(text: str) -> str:
32-
if not _ensure_nltk():
33-
return text
34-
35-
try:
36-
from nltk.tokenize import sent_tokenize
37-
sentences = sent_tokenize(text)
38-
return ' '.join(sentences)
39-
except Exception as e:
40-
logger.warning(f"Text curation failed: {e}")
41-
return text
2+
return " ".join(text.split())

0 commit comments

Comments
 (0)