-
Notifications
You must be signed in to change notification settings - Fork 47
Use tools-API in qualx predict #1838
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |||||||||||||||
|
|
||||||||||||||||
| from dataclasses import dataclass, field | ||||||||||||||||
| from functools import cached_property | ||||||||||||||||
| from typing import Optional | ||||||||||||||||
| from typing import Optional, List | ||||||||||||||||
|
|
||||||||||||||||
| import pandas as pd | ||||||||||||||||
|
|
||||||||||||||||
|
|
@@ -57,17 +57,37 @@ def uuid(self) -> str: | |||||||||||||||
| """ | ||||||||||||||||
| return self._app_id | ||||||||||||||||
|
|
||||||||||||||||
| def patch_into_df(self, df: pd.DataFrame) -> pd.DataFrame: | ||||||||||||||||
| def patch_into_df(self, | ||||||||||||||||
| df: pd.DataFrame, | ||||||||||||||||
| col_names: Optional[List[str]] = None) -> pd.DataFrame: | ||||||||||||||||
| """ | ||||||||||||||||
| Given a dataframe, this method will stitch the app_id and app-name to the dataframe. | ||||||||||||||||
| This can be useful in automatically adding the app-id/app-name to the data-frame | ||||||||||||||||
| :param df: the dataframe that we want to modify. | ||||||||||||||||
| :param col_names: optional list of column names that defines the app_id and app_name to the | ||||||||||||||||
| dataframe. It is assumed that the list comes in the order it is inserted in | ||||||||||||||||
| the column names. | ||||||||||||||||
| :return: the resulting dataframe from adding the columns. | ||||||||||||||||
| """ | ||||||||||||||||
| # TODO: We should consider add UUID as well, and use that for the joins instead. | ||||||||||||||||
| # append attempt_id to support multiple attempts | ||||||||||||||||
| col_values = [self.app_id] | ||||||||||||||||
| if col_names is None: | ||||||||||||||||
| # append attemptId to support multi-attempts | ||||||||||||||||
| col_names = ['appId'] | ||||||||||||||||
|
||||||||||||||||
| col_names = ['appId'] | |
| col_names = ['appId'] | |
| # Ensure col_values matches col_names in length | |
| if len(col_values) == 1 and len(col_names) > 1: | |
| col_values = col_values * len(col_names) | |
| elif len(col_values) != len(col_names): | |
| raise ValueError("Length of col_values must be 1 or match length of col_names") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -92,6 +92,11 @@ def rep_reader(self) -> 'ToolReportReaderT': | |
| raise ValueError(f'No reader found for table: {self._tbl}') | ||
| return reader | ||
|
|
||
| @property | ||
| def tbl(self) -> str: | ||
| """Get the table id.""" | ||
| return self._tbl | ||
|
|
||
| @property | ||
| def is_per_app_tbl(self) -> bool: | ||
| if self._tbl is None: | ||
|
|
@@ -206,6 +211,135 @@ def _load_single_app(self) -> LoadDFResult: | |
| ) | ||
|
|
||
|
|
||
| @dataclass | ||
| class CSVCombiner(object): | ||
| """A class that combines multiple CSV reports into a single report.""" | ||
| rep_builder: CSVReport | ||
| _failed_app_processor: Optional[Callable[[str, LoadDFResult], None]] = field(default=None, init=False) | ||
| _success_app_processor: Optional[Callable[[ToolResultHandlerT, str, pd.DataFrame, dict], pd.DataFrame]] = ( | ||
| field(default=None, init=False)) | ||
| _combine_args: Optional[dict] = field(default=None, init=False) | ||
|
|
||
| @property | ||
| def result_handler(self) -> ToolResultHandlerT: | ||
| """Get the result handler associated with this combiner.""" | ||
| return self.rep_builder.handler | ||
|
|
||
| @staticmethod | ||
| def default_success_app_processor(result_handler: ToolResultHandlerT, | ||
| app_id: str, | ||
| df: pd.DataFrame, | ||
| combine_args: dict) -> pd.DataFrame: | ||
| """Default processor for successful applications.""" | ||
| col_names = None | ||
| app_entry = result_handler.app_handlers.get(app_id) | ||
| if not app_entry: | ||
| raise ValueError(f'App entry not found for ID: {app_id}') | ||
| if combine_args: | ||
| # check if the col_names are provided to stitch the app_ids | ||
| col_names = combine_args.get('col_names', None) | ||
|
||
| if col_names: | ||
| # patch the app_uuid and if the columns are defined. | ||
| return app_entry.patch_into_df(df, col_names=col_names) | ||
| return df | ||
|
|
||
|
||
| def _evaluate_args(self) -> None: | ||
| """Evaluate the arguments to ensure they are set correctly.""" | ||
|
|
||
| if self._success_app_processor is None: | ||
| # set the default processor for successful applications | ||
| self._success_app_processor = self.default_success_app_processor | ||
| # TODO: we should fail if the the combiner is built for AppIds but columns are not defined. | ||
|
|
||
| def _create_empty_df(self) -> pd.DataFrame: | ||
| """ | ||
| creates an empty DataFrame with the columns defined in the report builder. | ||
| :return: an empty dataframe. | ||
| """ | ||
| empty_df = self.result_handler.create_empty_df(self.rep_builder.tbl) | ||
| if self._combine_args and 'use_cols' in self._combine_args: | ||
| # make sure that we insert the columns to the empty dataframe | ||
| injected_cols = pd.DataFrame(columns=self._combine_args['use_cols']) | ||
| return pd.concat([injected_cols, empty_df], axis=1) | ||
|
||
| return empty_df | ||
|
|
||
| ################################ | ||
| # Setters/Getters for processors | ||
| ################################ | ||
|
|
||
| def process_failed(self, | ||
| processor: Callable[[str, LoadDFResult], None]) -> 'CSVCombiner': | ||
| """Set the processor for failed applications.""" | ||
| self._failed_app_processor = processor | ||
| return self | ||
|
|
||
| def process_success(self, | ||
| cb_fn: Callable[[ToolResultHandlerT, str, pd.DataFrame, dict], pd.DataFrame]) -> 'CSVCombiner': | ||
| """Set the processor for successful applications.""" | ||
| self._success_app_processor = cb_fn | ||
| return self | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency, I'd settle on either
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair point. |
||
|
|
||
| def combine_args(self, args: dict) -> 'CSVCombiner': | ||
| """Set the arguments for combining the reports.""" | ||
| self._combine_args = args | ||
| return self | ||
|
|
||
| def on_apps(self) -> 'CSVCombiner': | ||
| """specify that the combiner inject AP UUID to the individual results before the concatenation.""" | ||
| self.process_success(self.default_success_app_processor) | ||
| return self | ||
|
|
||
| ######################### | ||
| # Public Interfaces | ||
| ######################### | ||
|
|
||
| def build(self) -> LoadDFResult: | ||
| """Build the combined CSV report.""" | ||
| # process teh arguments to ensure they are set correctly | ||
| self._evaluate_args() | ||
|
|
||
| load_error = None | ||
| final_df = None | ||
| success = False | ||
| try: | ||
| per_app_res = self.rep_builder.load() | ||
| # this is a dictionary and we should loop on it one by one to combine it | ||
| combined_dfs = [] | ||
| for app_id, app_res in per_app_res.items(): | ||
| # we need to patch the app_id to the dataframe | ||
| if app_res.load_error or app_res.data.empty: | ||
| # process entry with failed results or skip them if no handlder is defined. | ||
| if self._failed_app_processor: | ||
| self._failed_app_processor(app_id, app_res) | ||
| else: | ||
| # default behavior is to skip the app | ||
| continue | ||
|
||
| else: | ||
| # process entry with successful results | ||
| app_df = self._success_app_processor(self.result_handler, | ||
| app_id, | ||
| app_res.data, | ||
| self._combine_args) | ||
| # Q: Should we ignore or skip the empty dataframes? | ||
| combined_dfs.append(app_df) | ||
| if combined_dfs: | ||
| # only concatenate if we have any dataframes to combine | ||
| final_df = pd.concat(combined_dfs, ignore_index=True) | ||
| else: | ||
| # create an empty DataFrame if no data was collected. uses the table schema. | ||
| final_df = self._create_empty_df() | ||
| success = True | ||
| except Exception as e: # pylint: disable=broad-except | ||
| # handle any exceptions that occur during the combination phase | ||
| load_error = e | ||
| return LoadDFResult( | ||
| f_path='combination of multiple path for table: ' + self.rep_builder.tbl, | ||
| data=final_df, | ||
| success=success, | ||
| fallen_back=False, | ||
| load_error=load_error) | ||
|
|
||
|
|
||
| @dataclass | ||
| class JPropsReport(APIReport[JPropsResult]): | ||
| """A report that loads data in JSON properties format.""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This feels weird that
col_valuesis a fixed/default list of one value, whilecol_namescan be a user-provided list with multiple values. Also, I'm not sure how I would patchapp_nameper the docstring. And what would happen if I passed incol_names=['one', 'two', 'three']? Seems like I'd get three columns with the same (appId) value?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point!
I believe that the dev intending to stitch DFs using Apps (default combiner), has to use a column-name that can be mapped to the object. Otherwise, he has to use a custom combiner for example:
appId,app_id,App ID,App Id...] -> all those variations can be reduced toapp._app_idfield. Otherwise, there won't be a way to tell whichcol-namewill get the value of theapp._app_idapp_name,appName,App Name...] ->similarly all can be mapped toapp._app_namefields