File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1- import threading
2-
3- from core .logging_config import get_logger
4-
5- logger = get_logger (__name__ )
6-
7- _nltk_lock = threading .Lock ()
8- _nltk_initialized = False
9-
10-
11- def _ensure_nltk () -> bool :
12- global _nltk_initialized
13- if _nltk_initialized :
14- return True
15-
16- with _nltk_lock :
17- if _nltk_initialized :
18- return True
19-
20- try :
21- import nltk
22- nltk .download ('punkt' , quiet = True )
23- nltk .download ('punkt_tab' , quiet = True )
24- _nltk_initialized = True
25- return True
26- except Exception as e :
27- logger .warning (f"Failed to initialize NLTK: { e } " )
28- return False
29-
30-
311def curate_text (text : str ) -> str :
32- if not _ensure_nltk ():
33- return text
34-
35- try :
36- from nltk .tokenize import sent_tokenize
37- sentences = sent_tokenize (text )
38- return ' ' .join (sentences )
39- except Exception as e :
40- logger .warning (f"Text curation failed: { e } " )
41- return text
2+ return " " .join (text .split ())
You can’t perform that action at this time.
0 commit comments