Skip to content

Commit 235f6da

Browse files
authored
Merge pull request #200 from Ma-Dan/xlsx
Read all sheets from excel file
2 parents 093eee5 + 2af06f2 commit 235f6da

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

qanything_kernel/core/local_file.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,14 @@ def split_file_to_docs(self, ocr_engine: Callable, sentence_size=SENTENCE_SIZE,
8484
docs = loader.load_and_split(texts_splitter)
8585
elif self.file_path.lower().endswith(".xlsx"):
8686
# loader = UnstructuredExcelLoader(self.file_path, mode="elements")
87+
docs = []
8788
csv_file_path = self.file_path[:-5] + '.csv'
88-
xlsx = pd.read_excel(self.file_path, engine='openpyxl')
89-
xlsx.to_csv(csv_file_path, index=False)
90-
loader = CSVLoader(csv_file_path, csv_args={"delimiter": ",", "quotechar": '"'})
91-
docs = loader.load()
89+
xlsx = pd.read_excel(self.file_path, engine='openpyxl', sheet_name=None)
90+
for sheet in xlsx.keys():
91+
csv_file_path = self.file_path[:-5] + '_' + sheet + '.csv'
92+
xlsx[sheet].to_csv(csv_file_path, index=False)
93+
loader = CSVLoader(csv_file_path, csv_args={"delimiter": ",", "quotechar": '"'})
94+
docs += loader.load()
9295
elif self.file_path.lower().endswith(".pptx"):
9396
loader = UnstructuredPowerPointLoader(self.file_path, mode="elements")
9497
docs = loader.load()

0 commit comments

Comments
 (0)