From cb2844618d63c403f111b9c5c2ffc107c8bebb7c Mon Sep 17 00:00:00 2001 From: casey-crawford-cfa Date: Thu, 8 May 2025 09:26:27 -0500 Subject: [PATCH 1/4] determine what datasources or workbooks are associated with a schedule --- tableauserverclient/models/__init__.py | 2 + tableauserverclient/models/extract_item.py | 86 +++++++++++++++++++ .../server/endpoint/schedules_endpoint.py | 18 +++- .../schedule_get_extract_refresh_tasks.xml | 15 ++++ test/test_schedule.py | 22 +++++ 5 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 tableauserverclient/models/extract_item.py create mode 100644 test/assets/schedule_get_extract_refresh_tasks.xml diff --git a/tableauserverclient/models/__init__.py b/tableauserverclient/models/__init__.py index e4131b720..a97a4a6c3 100644 --- a/tableauserverclient/models/__init__.py +++ b/tableauserverclient/models/__init__.py @@ -48,6 +48,7 @@ from tableauserverclient.models.virtual_connection_item import VirtualConnectionItem from tableauserverclient.models.webhook_item import WebhookItem from tableauserverclient.models.workbook_item import WorkbookItem +from tableauserverclient.models.extract_item import ExtractItem __all__ = [ "ColumnItem", @@ -103,4 +104,5 @@ "LinkedTaskItem", "LinkedTaskStepItem", "LinkedTaskFlowRunItem", + "ExtractItem", ] diff --git a/tableauserverclient/models/extract_item.py b/tableauserverclient/models/extract_item.py new file mode 100644 index 000000000..181349da1 --- /dev/null +++ b/tableauserverclient/models/extract_item.py @@ -0,0 +1,86 @@ +from typing import Optional, List +from defusedxml.ElementTree import fromstring +import xml.etree.ElementTree as ET + + +class ExtractItem: + """ + An extract refresh task item. + + Attributes + ---------- + id : str + The ID of the extract refresh task + priority : int + The priority of the task + type : str + The type of extract refresh (incremental or full) + workbook_id : str, optional + The ID of the workbook if this is a workbook extract + datasource_id : str, optional + The ID of the datasource if this is a datasource extract + """ + + def __init__( + self, + priority: int, + refresh_type: str, + workbook_id: Optional[str] = None, + datasource_id: Optional[str] = None + ): + self._id: Optional[str] = None + self._priority = priority + self._type = refresh_type + self._workbook_id = workbook_id + self._datasource_id = datasource_id + + @property + def id(self) -> Optional[str]: + return self._id + + @property + def priority(self) -> int: + return self._priority + + @property + def type(self) -> str: + return self._type + + @property + def workbook_id(self) -> Optional[str]: + return self._workbook_id + + @property + def datasource_id(self) -> Optional[str]: + return self._datasource_id + + @classmethod + def from_response(cls, resp: str, ns: dict) -> List["ExtractItem"]: + """Create ExtractItem objects from XML response.""" + parsed_response = fromstring(resp) + return cls.from_xml_element(parsed_response, ns) + + @classmethod + def from_xml_element(cls, parsed_response: ET.Element, ns: dict) -> List["ExtractItem"]: + """Create ExtractItem objects from XML element.""" + all_extract_items = [] + all_extract_xml = parsed_response.findall(".//t:extract", namespaces=ns) + + for extract_xml in all_extract_xml: + extract_id = extract_xml.get("id", None) + priority = int(extract_xml.get("priority", 0)) + refresh_type = extract_xml.get("type", None) + + # Check for workbook or datasource + workbook_elem = extract_xml.find(".//t:workbook", namespaces=ns) + datasource_elem = extract_xml.find(".//t:datasource", namespaces=ns) + + workbook_id = workbook_elem.get("id", None) if workbook_elem is not None else None + datasource_id = datasource_elem.get("id", None) if datasource_elem is not None else None + + extract_item = cls(priority, refresh_type, workbook_id, datasource_id) + extract_item._id = extract_id + + all_extract_items.append(extract_item) + + return all_extract_items \ No newline at end of file diff --git a/tableauserverclient/server/endpoint/schedules_endpoint.py b/tableauserverclient/server/endpoint/schedules_endpoint.py index eec4536f9..b2a8043f8 100644 --- a/tableauserverclient/server/endpoint/schedules_endpoint.py +++ b/tableauserverclient/server/endpoint/schedules_endpoint.py @@ -7,7 +7,7 @@ from .endpoint import Endpoint, api, parameter_added_in from .exceptions import MissingRequiredFieldError from tableauserverclient.server import RequestFactory -from tableauserverclient.models import PaginationItem, ScheduleItem, TaskItem +from tableauserverclient.models import PaginationItem, ScheduleItem, TaskItem, ExtractItem from tableauserverclient.helpers.logging import logger @@ -149,3 +149,19 @@ def _add_to( ) else: return OK + + @api(version="2.3") + def get_extract_refresh_tasks(self, schedule_id: str, req_options: Optional["RequestOptions"] = None) -> tuple[list["ExtractItem"], "PaginationItem"]: + """Get all extract refresh tasks for the specified schedule.""" + if not schedule_id: + error = "Schedule ID undefined" + raise ValueError(error) + + logger.info(f"Querying extract refresh tasks for schedule (ID: {schedule_id})") + url = f"{self.siteurl}/{schedule_id}/extracts" + server_response = self.get_request(url, req_options) + + pagination_item = PaginationItem.from_response(server_response.content, self.parent_srv.namespace) + extract_items = ExtractItem.from_response(server_response.content, self.parent_srv.namespace) + + return extract_items, pagination_item diff --git a/test/assets/schedule_get_extract_refresh_tasks.xml b/test/assets/schedule_get_extract_refresh_tasks.xml new file mode 100644 index 000000000..84ffbd5b1 --- /dev/null +++ b/test/assets/schedule_get_extract_refresh_tasks.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/test/test_schedule.py b/test/test_schedule.py index b072522a4..c08a34dfa 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -25,6 +25,7 @@ ADD_WORKBOOK_TO_SCHEDULE_WITH_WARNINGS = os.path.join(TEST_ASSET_DIR, "schedule_add_workbook_with_warnings.xml") ADD_DATASOURCE_TO_SCHEDULE = os.path.join(TEST_ASSET_DIR, "schedule_add_datasource.xml") ADD_FLOW_TO_SCHEDULE = os.path.join(TEST_ASSET_DIR, "schedule_add_flow.xml") +GET_EXTRACT_TASKS_XML = os.path.join(TEST_ASSET_DIR, "schedule_get_extract_refresh_tasks.xml") WORKBOOK_GET_BY_ID_XML = os.path.join(TEST_ASSET_DIR, "workbook_get_by_id.xml") DATASOURCE_GET_BY_ID_XML = os.path.join(TEST_ASSET_DIR, "datasource_get_by_id.xml") @@ -405,3 +406,24 @@ def test_add_flow(self) -> None: flow = self.server.flows.get_by_id("bar") result = self.server.schedules.add_to_schedule("foo", flow=flow) self.assertEqual(0, len(result), "Added properly") + + def test_get_extract_refresh_tasks(self) -> None: + self.server.version = "2.3" + + + with open(GET_EXTRACT_TASKS_XML, "rb") as f: + response_xml = f.read().decode("utf-8") + with requests_mock.mock() as m: + schedule_id = "c9cff7f9-309c-4361-99ff-d4ba8c9f5467" + # baseurl = f"{self.baseurl}/schedules/{schedule_id}/extracts" + baseurl = f"{self.server.baseurl}/sites/{self.server.site_id}/schedules/{schedule_id}/extracts" + # Fix the URL construction to match the endpoint pattern + # url = f"{self.baseurl}/{schedule_id}/extracts" + m.get(baseurl, text=response_xml) + + extracts = self.server.schedules.get_extract_refresh_tasks(schedule_id) + + self.assertIsNotNone(extracts) + self.assertIsInstance(extracts[0], list) + self.assertEqual(2, len(extracts[0])) + self.assertEqual("task1", extracts[0][0].id) From 0763983c8b869127383761665fb9fd1c75ba3d56 Mon Sep 17 00:00:00 2001 From: casey-crawford-cfa Date: Thu, 8 May 2025 09:28:16 -0500 Subject: [PATCH 2/4] black formatting --- samples/extracts.py | 1 - tableauserverclient/models/extract_item.py | 22 ++++++++----------- .../server/endpoint/groups_endpoint.py | 12 ++++++---- .../server/endpoint/schedules_endpoint.py | 10 +++++---- tableauserverclient/server/pager.py | 6 +++-- tableauserverclient/server/query.py | 6 +++-- tableauserverclient/server/request_factory.py | 6 ++--- test/request_factory/test_task_requests.py | 1 - test/test_schedule.py | 3 +-- 9 files changed, 35 insertions(+), 32 deletions(-) diff --git a/samples/extracts.py b/samples/extracts.py index 8e7a66aac..d9289452a 100644 --- a/samples/extracts.py +++ b/samples/extracts.py @@ -42,7 +42,6 @@ def main(): server.add_http_options({"verify": False}) server.use_server_version() with server.auth.sign_in(tableau_auth): - wb = None ds = None if args.workbook: diff --git a/tableauserverclient/models/extract_item.py b/tableauserverclient/models/extract_item.py index 181349da1..417378418 100644 --- a/tableauserverclient/models/extract_item.py +++ b/tableauserverclient/models/extract_item.py @@ -6,7 +6,7 @@ class ExtractItem: """ An extract refresh task item. - + Attributes ---------- id : str @@ -22,11 +22,7 @@ class ExtractItem: """ def __init__( - self, - priority: int, - refresh_type: str, - workbook_id: Optional[str] = None, - datasource_id: Optional[str] = None + self, priority: int, refresh_type: str, workbook_id: Optional[str] = None, datasource_id: Optional[str] = None ): self._id: Optional[str] = None self._priority = priority @@ -65,22 +61,22 @@ def from_xml_element(cls, parsed_response: ET.Element, ns: dict) -> List["Extrac """Create ExtractItem objects from XML element.""" all_extract_items = [] all_extract_xml = parsed_response.findall(".//t:extract", namespaces=ns) - + for extract_xml in all_extract_xml: extract_id = extract_xml.get("id", None) priority = int(extract_xml.get("priority", 0)) refresh_type = extract_xml.get("type", None) - + # Check for workbook or datasource workbook_elem = extract_xml.find(".//t:workbook", namespaces=ns) datasource_elem = extract_xml.find(".//t:datasource", namespaces=ns) - + workbook_id = workbook_elem.get("id", None) if workbook_elem is not None else None datasource_id = datasource_elem.get("id", None) if datasource_elem is not None else None - + extract_item = cls(priority, refresh_type, workbook_id, datasource_id) extract_item._id = extract_id - + all_extract_items.append(extract_item) - - return all_extract_items \ No newline at end of file + + return all_extract_items diff --git a/tableauserverclient/server/endpoint/groups_endpoint.py b/tableauserverclient/server/endpoint/groups_endpoint.py index 4e9af4076..55cbdf2e7 100644 --- a/tableauserverclient/server/endpoint/groups_endpoint.py +++ b/tableauserverclient/server/endpoint/groups_endpoint.py @@ -173,10 +173,12 @@ def delete(self, group_id: str) -> None: logger.info(f"Deleted single group (ID: {group_id})") @overload - def update(self, group_item: GroupItem, as_job: Literal[False]) -> GroupItem: ... + def update(self, group_item: GroupItem, as_job: Literal[False]) -> GroupItem: + ... @overload - def update(self, group_item: GroupItem, as_job: Literal[True]) -> JobItem: ... + def update(self, group_item: GroupItem, as_job: Literal[True]) -> JobItem: + ... @api(version="2.0") def update(self, group_item, as_job=False): @@ -258,10 +260,12 @@ def create(self, group_item: GroupItem) -> GroupItem: return GroupItem.from_response(server_response.content, self.parent_srv.namespace)[0] @overload - def create_AD_group(self, group_item: GroupItem, asJob: Literal[False]) -> GroupItem: ... + def create_AD_group(self, group_item: GroupItem, asJob: Literal[False]) -> GroupItem: + ... @overload - def create_AD_group(self, group_item: GroupItem, asJob: Literal[True]) -> JobItem: ... + def create_AD_group(self, group_item: GroupItem, asJob: Literal[True]) -> JobItem: + ... @api(version="2.0") def create_AD_group(self, group_item, asJob=False): diff --git a/tableauserverclient/server/endpoint/schedules_endpoint.py b/tableauserverclient/server/endpoint/schedules_endpoint.py index b2a8043f8..8c48f9a22 100644 --- a/tableauserverclient/server/endpoint/schedules_endpoint.py +++ b/tableauserverclient/server/endpoint/schedules_endpoint.py @@ -151,17 +151,19 @@ def _add_to( return OK @api(version="2.3") - def get_extract_refresh_tasks(self, schedule_id: str, req_options: Optional["RequestOptions"] = None) -> tuple[list["ExtractItem"], "PaginationItem"]: + def get_extract_refresh_tasks( + self, schedule_id: str, req_options: Optional["RequestOptions"] = None + ) -> tuple[list["ExtractItem"], "PaginationItem"]: """Get all extract refresh tasks for the specified schedule.""" if not schedule_id: error = "Schedule ID undefined" raise ValueError(error) - + logger.info(f"Querying extract refresh tasks for schedule (ID: {schedule_id})") url = f"{self.siteurl}/{schedule_id}/extracts" server_response = self.get_request(url, req_options) - + pagination_item = PaginationItem.from_response(server_response.content, self.parent_srv.namespace) extract_items = ExtractItem.from_response(server_response.content, self.parent_srv.namespace) - + return extract_items, pagination_item diff --git a/tableauserverclient/server/pager.py b/tableauserverclient/server/pager.py index 3c7e60f74..9d5d6fea4 100644 --- a/tableauserverclient/server/pager.py +++ b/tableauserverclient/server/pager.py @@ -12,12 +12,14 @@ @runtime_checkable class Endpoint(Protocol[T]): - def get(self, req_options: Optional[RequestOptions]) -> tuple[list[T], PaginationItem]: ... + def get(self, req_options: Optional[RequestOptions]) -> tuple[list[T], PaginationItem]: + ... @runtime_checkable class CallableEndpoint(Protocol[T]): - def __call__(self, __req_options: Optional[RequestOptions], **kwargs) -> tuple[list[T], PaginationItem]: ... + def __call__(self, __req_options: Optional[RequestOptions], **kwargs) -> tuple[list[T], PaginationItem]: + ... class Pager(Iterable[T]): diff --git a/tableauserverclient/server/query.py b/tableauserverclient/server/query.py index 801ad4a13..6da962177 100644 --- a/tableauserverclient/server/query.py +++ b/tableauserverclient/server/query.py @@ -97,10 +97,12 @@ def __iter__(self: Self) -> Iterator[T]: return @overload - def __getitem__(self: Self, k: Slice) -> list[T]: ... + def __getitem__(self: Self, k: Slice) -> list[T]: + ... @overload - def __getitem__(self: Self, k: int) -> T: ... + def __getitem__(self: Self, k: int) -> T: + ... def __getitem__(self, k): page = self.page_number diff --git a/tableauserverclient/server/request_factory.py b/tableauserverclient/server/request_factory.py index 575423612..72d7286f2 100644 --- a/tableauserverclient/server/request_factory.py +++ b/tableauserverclient/server/request_factory.py @@ -1008,9 +1008,9 @@ def update_req(self, workbook_item, parent_srv: Optional["Server"] = None): if data_freshness_policy_config.option == "FreshEvery": if data_freshness_policy_config.fresh_every_schedule is not None: fresh_every_element = ET.SubElement(data_freshness_policy_element, "freshEverySchedule") - fresh_every_element.attrib["frequency"] = ( - data_freshness_policy_config.fresh_every_schedule.frequency - ) + fresh_every_element.attrib[ + "frequency" + ] = data_freshness_policy_config.fresh_every_schedule.frequency fresh_every_element.attrib["value"] = str(data_freshness_policy_config.fresh_every_schedule.value) else: raise ValueError(f"data_freshness_policy_config.fresh_every_schedule must be populated.") diff --git a/test/request_factory/test_task_requests.py b/test/request_factory/test_task_requests.py index 0258b8a93..6287fa6ea 100644 --- a/test/request_factory/test_task_requests.py +++ b/test/request_factory/test_task_requests.py @@ -5,7 +5,6 @@ class TestTaskRequest(unittest.TestCase): - def setUp(self): self.task_request = TaskRequest() self.xml_request = ET.Element("tsRequest") diff --git a/test/test_schedule.py b/test/test_schedule.py index c08a34dfa..7bccc2ed4 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -409,7 +409,6 @@ def test_add_flow(self) -> None: def test_get_extract_refresh_tasks(self) -> None: self.server.version = "2.3" - with open(GET_EXTRACT_TASKS_XML, "rb") as f: response_xml = f.read().decode("utf-8") @@ -420,7 +419,7 @@ def test_get_extract_refresh_tasks(self) -> None: # Fix the URL construction to match the endpoint pattern # url = f"{self.baseurl}/{schedule_id}/extracts" m.get(baseurl, text=response_xml) - + extracts = self.server.schedules.get_extract_refresh_tasks(schedule_id) self.assertIsNotNone(extracts) From 8d9893a09ebba71c418e4754cb0586135ba48744 Mon Sep 17 00:00:00 2001 From: casey-crawford-cfa Date: Thu, 8 May 2025 09:38:44 -0500 Subject: [PATCH 3/4] mypy found an issue with str|none --- tableauserverclient/models/extract_item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tableauserverclient/models/extract_item.py b/tableauserverclient/models/extract_item.py index 417378418..7562ffdde 100644 --- a/tableauserverclient/models/extract_item.py +++ b/tableauserverclient/models/extract_item.py @@ -65,7 +65,7 @@ def from_xml_element(cls, parsed_response: ET.Element, ns: dict) -> List["Extrac for extract_xml in all_extract_xml: extract_id = extract_xml.get("id", None) priority = int(extract_xml.get("priority", 0)) - refresh_type = extract_xml.get("type", None) + refresh_type = extract_xml.get("type", "") # Check for workbook or datasource workbook_elem = extract_xml.find(".//t:workbook", namespaces=ns) From aa46f453be2ede1741297ea32c9aae18036a7e80 Mon Sep 17 00:00:00 2001 From: casey-crawford-cfa Date: Wed, 14 May 2025 17:02:30 -0500 Subject: [PATCH 4/4] - Fixing test asset to have real value instead of api documentation value - Removing extra comments - Reformatting with proper version of black --- .../server/endpoint/groups_endpoint.py | 12 ++++-------- tableauserverclient/server/pager.py | 6 ++---- tableauserverclient/server/query.py | 6 ++---- tableauserverclient/server/request_factory.py | 6 +++--- test/assets/schedule_get_extract_refresh_tasks.xml | 4 ++-- test/test_schedule.py | 3 --- 6 files changed, 13 insertions(+), 24 deletions(-) diff --git a/tableauserverclient/server/endpoint/groups_endpoint.py b/tableauserverclient/server/endpoint/groups_endpoint.py index 55cbdf2e7..4e9af4076 100644 --- a/tableauserverclient/server/endpoint/groups_endpoint.py +++ b/tableauserverclient/server/endpoint/groups_endpoint.py @@ -173,12 +173,10 @@ def delete(self, group_id: str) -> None: logger.info(f"Deleted single group (ID: {group_id})") @overload - def update(self, group_item: GroupItem, as_job: Literal[False]) -> GroupItem: - ... + def update(self, group_item: GroupItem, as_job: Literal[False]) -> GroupItem: ... @overload - def update(self, group_item: GroupItem, as_job: Literal[True]) -> JobItem: - ... + def update(self, group_item: GroupItem, as_job: Literal[True]) -> JobItem: ... @api(version="2.0") def update(self, group_item, as_job=False): @@ -260,12 +258,10 @@ def create(self, group_item: GroupItem) -> GroupItem: return GroupItem.from_response(server_response.content, self.parent_srv.namespace)[0] @overload - def create_AD_group(self, group_item: GroupItem, asJob: Literal[False]) -> GroupItem: - ... + def create_AD_group(self, group_item: GroupItem, asJob: Literal[False]) -> GroupItem: ... @overload - def create_AD_group(self, group_item: GroupItem, asJob: Literal[True]) -> JobItem: - ... + def create_AD_group(self, group_item: GroupItem, asJob: Literal[True]) -> JobItem: ... @api(version="2.0") def create_AD_group(self, group_item, asJob=False): diff --git a/tableauserverclient/server/pager.py b/tableauserverclient/server/pager.py index 9d5d6fea4..3c7e60f74 100644 --- a/tableauserverclient/server/pager.py +++ b/tableauserverclient/server/pager.py @@ -12,14 +12,12 @@ @runtime_checkable class Endpoint(Protocol[T]): - def get(self, req_options: Optional[RequestOptions]) -> tuple[list[T], PaginationItem]: - ... + def get(self, req_options: Optional[RequestOptions]) -> tuple[list[T], PaginationItem]: ... @runtime_checkable class CallableEndpoint(Protocol[T]): - def __call__(self, __req_options: Optional[RequestOptions], **kwargs) -> tuple[list[T], PaginationItem]: - ... + def __call__(self, __req_options: Optional[RequestOptions], **kwargs) -> tuple[list[T], PaginationItem]: ... class Pager(Iterable[T]): diff --git a/tableauserverclient/server/query.py b/tableauserverclient/server/query.py index 6da962177..801ad4a13 100644 --- a/tableauserverclient/server/query.py +++ b/tableauserverclient/server/query.py @@ -97,12 +97,10 @@ def __iter__(self: Self) -> Iterator[T]: return @overload - def __getitem__(self: Self, k: Slice) -> list[T]: - ... + def __getitem__(self: Self, k: Slice) -> list[T]: ... @overload - def __getitem__(self: Self, k: int) -> T: - ... + def __getitem__(self: Self, k: int) -> T: ... def __getitem__(self, k): page = self.page_number diff --git a/tableauserverclient/server/request_factory.py b/tableauserverclient/server/request_factory.py index 72d7286f2..575423612 100644 --- a/tableauserverclient/server/request_factory.py +++ b/tableauserverclient/server/request_factory.py @@ -1008,9 +1008,9 @@ def update_req(self, workbook_item, parent_srv: Optional["Server"] = None): if data_freshness_policy_config.option == "FreshEvery": if data_freshness_policy_config.fresh_every_schedule is not None: fresh_every_element = ET.SubElement(data_freshness_policy_element, "freshEverySchedule") - fresh_every_element.attrib[ - "frequency" - ] = data_freshness_policy_config.fresh_every_schedule.frequency + fresh_every_element.attrib["frequency"] = ( + data_freshness_policy_config.fresh_every_schedule.frequency + ) fresh_every_element.attrib["value"] = str(data_freshness_policy_config.fresh_every_schedule.value) else: raise ValueError(f"data_freshness_policy_config.fresh_every_schedule must be populated.") diff --git a/test/assets/schedule_get_extract_refresh_tasks.xml b/test/assets/schedule_get_extract_refresh_tasks.xml index 84ffbd5b1..48906dde6 100644 --- a/test/assets/schedule_get_extract_refresh_tasks.xml +++ b/test/assets/schedule_get_extract_refresh_tasks.xml @@ -3,12 +3,12 @@ + type="IncrementalRefresh" > + type="IncrementalRefresh" > diff --git a/test/test_schedule.py b/test/test_schedule.py index 7bccc2ed4..4fcc85e18 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -414,10 +414,7 @@ def test_get_extract_refresh_tasks(self) -> None: response_xml = f.read().decode("utf-8") with requests_mock.mock() as m: schedule_id = "c9cff7f9-309c-4361-99ff-d4ba8c9f5467" - # baseurl = f"{self.baseurl}/schedules/{schedule_id}/extracts" baseurl = f"{self.server.baseurl}/sites/{self.server.site_id}/schedules/{schedule_id}/extracts" - # Fix the URL construction to match the endpoint pattern - # url = f"{self.baseurl}/{schedule_id}/extracts" m.get(baseurl, text=response_xml) extracts = self.server.schedules.get_extract_refresh_tasks(schedule_id)