1919"""-------------------------------------entry class of Table Recognizer Module---------------------------------------"""
2020
2121
22- class TableRecognizer (object ):
22+ class TableRecognizer :
23+ """
24+ The class recognizes tables from document images. This class is internal to the system. It is called from readers such as .
25+
26+ * The class recognizes tables with borders from the document image and returns the class
27+ (function :meth:`~dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_recognizer.TableRecognizer.recognize_tables_from_image`);
28+
29+
30+ * The class also analyzes recognized single-page tables and combines them into multi-page ones
31+ (function :meth:`~dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.table_recognizer.TableRecognizer.convert_to_multipages_tables`);
32+
33+ """
2334
2435 def __init__ (self , * , config : dict = None ) -> None :
2536 self .logger = config .get ("logger" , logging .getLogger ())
@@ -29,10 +40,20 @@ def __init__(self, *, config: dict = None) -> None:
2940 self .table_type = TableTypeAdditionalOptions ()
3041
3142 def convert_to_multipages_tables (self , all_single_tables : List [ScanTable ], lines_with_meta : List [LineWithMeta ]) -> List [ScanTable ]:
43+ """
44+ The function analyzes recognized tables from the entire document (all pages) to see if they are multi-page.
45+ If single-page tables are part of one multi-page, they are combined into one multi-page table.
46+ """
3247 multipage_tables = self .multipage_tables_extractor .extract_multipage_tables (single_tables = all_single_tables , lines_with_meta = lines_with_meta )
3348 return multipage_tables
3449
3550 def recognize_tables_from_image (self , image : np .ndarray , page_number : int , language : str , table_type : str = "" ) -> Tuple [np .ndarray , List [ScanTable ]]:
51+ """
52+ The function recognizes tables with borders from scanned document image.
53+ Here, the contour analysis method is used to determine the boundaries of table cells.
54+ Then, a set of heuristics is used to detect tables, and finally,
55+ the detected table cells are converted to a matrix form (merged cells are detected and separated).
56+ """
3657 self .logger .debug (f"Page { page_number } " )
3758 try :
3859 cleaned_image , scan_tables = self .__rec_tables_from_img (image , page_num = page_number , language = language , table_type = table_type )
0 commit comments