|
| 1 | +import logging |
| 2 | +from typing import Any |
| 3 | + |
| 4 | +import boto3 |
| 5 | +import neo4j |
| 6 | + |
| 7 | +from cartography.client.core.tx import load |
| 8 | +from cartography.graph.job import GraphJob |
| 9 | +from cartography.intel.aws.ec2.util import get_botocore_config |
| 10 | +from cartography.models.aws.ec2.route_table_associations import RouteTableAssociationSchema |
| 11 | +from cartography.models.aws.ec2.route_tables import RouteTableSchema |
| 12 | +from cartography.models.aws.ec2.routes import RouteSchema |
| 13 | +from cartography.util import aws_handle_regions |
| 14 | +from cartography.util import timeit |
| 15 | + |
| 16 | +logger = logging.getLogger(__name__) |
| 17 | + |
| 18 | + |
| 19 | +def _get_route_id_and_target(route_table_id: str, route: dict[str, Any]) -> tuple[str, str | None]: |
| 20 | + """ |
| 21 | + Generate a unique identifier for an AWS EC2 route and return the target of the route |
| 22 | + regardless of its type. |
| 23 | +
|
| 24 | + Args: |
| 25 | + route_table_id: The ID of the route table this route belongs to |
| 26 | + route: The route data from AWS API |
| 27 | +
|
| 28 | + Returns: |
| 29 | + A tuple containing the unique identifier for the route and the target of the route |
| 30 | + """ |
| 31 | + route_target_keys = [ |
| 32 | + 'DestinationCidrBlock', |
| 33 | + 'DestinationIpv6CidrBlock', |
| 34 | + 'GatewayId', |
| 35 | + 'InstanceId', |
| 36 | + 'NatGatewayId', |
| 37 | + 'TransitGatewayId', |
| 38 | + 'LocalGatewayId', |
| 39 | + 'CarrierGatewayId', |
| 40 | + 'NetworkInterfaceId', |
| 41 | + 'VpcPeeringConnectionId', |
| 42 | + 'EgressOnlyInternetGatewayId', |
| 43 | + 'CoreNetworkArn', |
| 44 | + ] |
| 45 | + |
| 46 | + # Start with the route table ID |
| 47 | + parts = [route_table_id] |
| 48 | + target = None |
| 49 | + found_target = False |
| 50 | + |
| 51 | + for key in route_target_keys: |
| 52 | + # Each route is a "union"-like data structure, so only one of the keys will be present. |
| 53 | + if key in route: |
| 54 | + parts.append(route[key]) |
| 55 | + target = route[key] |
| 56 | + found_target = True |
| 57 | + break |
| 58 | + |
| 59 | + if not found_target: |
| 60 | + logger.warning( |
| 61 | + f"No target found for route in {route_table_id}. Please review the route and file an issue to " |
| 62 | + "https://github.com/cartography-cncf/cartography/issues sharing what the route table looks like " |
| 63 | + "so that we can update the available keys.", |
| 64 | + ) |
| 65 | + |
| 66 | + return '|'.join(parts), target |
| 67 | + |
| 68 | + |
| 69 | +@timeit |
| 70 | +@aws_handle_regions |
| 71 | +def get_route_tables(boto3_session: boto3.session.Session, region: str) -> list[dict[str, Any]]: |
| 72 | + client = boto3_session.client('ec2', region_name=region, config=get_botocore_config()) |
| 73 | + paginator = client.get_paginator('describe_route_tables') |
| 74 | + route_tables: list[dict[str, Any]] = [] |
| 75 | + for page in paginator.paginate(): |
| 76 | + route_tables.extend(page['RouteTables']) |
| 77 | + return route_tables |
| 78 | + |
| 79 | + |
| 80 | +def _transform_route_table_associations( |
| 81 | + route_table_id: str, |
| 82 | + associations: list[dict[str, Any]], |
| 83 | +) -> tuple[list[dict[str, Any]], bool]: |
| 84 | + """ |
| 85 | + Transform route table association data into a format suitable for cartography ingestion. |
| 86 | +
|
| 87 | + Args: |
| 88 | + route_table_id: The ID of the route table |
| 89 | + associations: List of association data from AWS API |
| 90 | +
|
| 91 | + Returns: |
| 92 | + 1. List of transformed association data |
| 93 | + 2. Boolean indicating if the association is the main association, meaning that the route table is the main |
| 94 | + route table for the VPC |
| 95 | + """ |
| 96 | + transformed = [] |
| 97 | + is_main = False |
| 98 | + for association in associations: |
| 99 | + if association.get('SubnetId'): |
| 100 | + target = association['SubnetId'] |
| 101 | + elif association.get('GatewayId'): |
| 102 | + target = association['GatewayId'] |
| 103 | + else: |
| 104 | + is_main = True |
| 105 | + target = 'main' |
| 106 | + |
| 107 | + transformed_association = { |
| 108 | + 'id': association['RouteTableAssociationId'], |
| 109 | + 'route_table_id': route_table_id, |
| 110 | + 'subnet_id': association.get('SubnetId'), |
| 111 | + 'gateway_id': association.get('GatewayId'), |
| 112 | + 'main': association.get('Main', False), |
| 113 | + 'association_state': association.get('AssociationState', {}).get('State'), |
| 114 | + 'association_state_message': association.get('AssociationState', {}).get('Message'), |
| 115 | + '_target': target, |
| 116 | + } |
| 117 | + transformed.append(transformed_association) |
| 118 | + return transformed, is_main |
| 119 | + |
| 120 | + |
| 121 | +def _transform_route_table_routes(route_table_id: str, routes: list[dict[str, Any]]) -> list[dict[str, Any]]: |
| 122 | + """ |
| 123 | + Transform route table route data into a format suitable for cartography ingestion. |
| 124 | +
|
| 125 | + Args: |
| 126 | + route_table_id: The ID of the route table |
| 127 | + routes: List of route data from AWS API |
| 128 | +
|
| 129 | + Returns: |
| 130 | + List of transformed route data |
| 131 | + """ |
| 132 | + transformed = [] |
| 133 | + for route in routes: |
| 134 | + route_id, target = _get_route_id_and_target(route_table_id, route) |
| 135 | + |
| 136 | + transformed_route = { |
| 137 | + 'id': route_id, |
| 138 | + 'route_table_id': route_table_id, |
| 139 | + 'destination_cidr_block': route.get('DestinationCidrBlock'), |
| 140 | + 'destination_ipv6_cidr_block': route.get('DestinationIpv6CidrBlock'), |
| 141 | + 'gateway_id': route.get('GatewayId'), |
| 142 | + 'instance_id': route.get('InstanceId'), |
| 143 | + 'instance_owner_id': route.get('InstanceOwnerId'), |
| 144 | + 'nat_gateway_id': route.get('NatGatewayId'), |
| 145 | + 'transit_gateway_id': route.get('TransitGatewayId'), |
| 146 | + 'local_gateway_id': route.get('LocalGatewayId'), |
| 147 | + 'carrier_gateway_id': route.get('CarrierGatewayId'), |
| 148 | + 'network_interface_id': route.get('NetworkInterfaceId'), |
| 149 | + 'vpc_peering_connection_id': route.get('VpcPeeringConnectionId'), |
| 150 | + 'state': route.get('State'), |
| 151 | + 'origin': route.get('Origin'), |
| 152 | + 'core_network_arn': route.get('CoreNetworkArn'), |
| 153 | + 'destination_prefix_list_id': route.get('DestinationPrefixListId'), |
| 154 | + 'egress_only_internet_gateway_id': route.get('EgressOnlyInternetGatewayId'), |
| 155 | + '_target': target, |
| 156 | + } |
| 157 | + transformed.append(transformed_route) |
| 158 | + return transformed |
| 159 | + |
| 160 | + |
| 161 | +def transform_route_table_data( |
| 162 | + route_tables: list[dict[str, Any]], |
| 163 | +) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]: |
| 164 | + """ |
| 165 | + Transform route table data into a format suitable for cartography ingestion. |
| 166 | +
|
| 167 | + Args: |
| 168 | + route_tables: List of route table data from AWS API |
| 169 | +
|
| 170 | + Returns: |
| 171 | + Tuple of (transformed route table data, transformed association data, transformed route data) |
| 172 | + """ |
| 173 | + transformed_tables = [] |
| 174 | + association_data = [] |
| 175 | + route_data = [] |
| 176 | + |
| 177 | + for rt in route_tables: |
| 178 | + route_table_id = rt['RouteTableId'] |
| 179 | + |
| 180 | + # Transform routes |
| 181 | + current_routes = [] |
| 182 | + if rt.get('Routes'): |
| 183 | + current_routes = _transform_route_table_routes(route_table_id, rt['Routes']) |
| 184 | + route_data.extend(current_routes) |
| 185 | + |
| 186 | + # If the rt has a association marked with main=True, then it is the main route table for the VPC. |
| 187 | + is_main = False |
| 188 | + # Transform associations |
| 189 | + if rt.get('Associations'): |
| 190 | + associations, is_main = _transform_route_table_associations(route_table_id, rt['Associations']) |
| 191 | + association_data.extend(associations) |
| 192 | + |
| 193 | + transformed_rt = { |
| 194 | + 'id': route_table_id, |
| 195 | + 'route_table_id': route_table_id, |
| 196 | + 'owner_id': rt.get('OwnerId'), |
| 197 | + 'vpc_id': rt.get('VpcId'), |
| 198 | + 'VpnGatewayIds': [vgw['GatewayId'] for vgw in rt.get('PropagatingVgws', [])], |
| 199 | + 'RouteTableAssociationIds': [assoc['RouteTableAssociationId'] for assoc in rt.get('Associations', [])], |
| 200 | + 'RouteIds': [route['id'] for route in current_routes], |
| 201 | + 'tags': rt.get('Tags', []), |
| 202 | + 'main': is_main, |
| 203 | + } |
| 204 | + transformed_tables.append(transformed_rt) |
| 205 | + |
| 206 | + return transformed_tables, association_data, route_data |
| 207 | + |
| 208 | + |
| 209 | +@timeit |
| 210 | +def load_route_tables( |
| 211 | + neo4j_session: neo4j.Session, |
| 212 | + data: list[dict[str, Any]], |
| 213 | + region: str, |
| 214 | + current_aws_account_id: str, |
| 215 | + update_tag: int, |
| 216 | +) -> None: |
| 217 | + load( |
| 218 | + neo4j_session, |
| 219 | + RouteTableSchema(), |
| 220 | + data, |
| 221 | + Region=region, |
| 222 | + AWS_ID=current_aws_account_id, |
| 223 | + lastupdated=update_tag, |
| 224 | + ) |
| 225 | + |
| 226 | + |
| 227 | +@timeit |
| 228 | +def load_route_table_associations( |
| 229 | + neo4j_session: neo4j.Session, |
| 230 | + data: list[dict[str, Any]], |
| 231 | + region: str, |
| 232 | + current_aws_account_id: str, |
| 233 | + update_tag: int, |
| 234 | +) -> None: |
| 235 | + load( |
| 236 | + neo4j_session, |
| 237 | + RouteTableAssociationSchema(), |
| 238 | + data, |
| 239 | + Region=region, |
| 240 | + AWS_ID=current_aws_account_id, |
| 241 | + lastupdated=update_tag, |
| 242 | + ) |
| 243 | + |
| 244 | + |
| 245 | +@timeit |
| 246 | +def load_routes( |
| 247 | + neo4j_session: neo4j.Session, |
| 248 | + data: list[dict[str, Any]], |
| 249 | + region: str, |
| 250 | + current_aws_account_id: str, |
| 251 | + update_tag: int, |
| 252 | +) -> None: |
| 253 | + load( |
| 254 | + neo4j_session, |
| 255 | + RouteSchema(), |
| 256 | + data, |
| 257 | + Region=region, |
| 258 | + AWS_ID=current_aws_account_id, |
| 259 | + lastupdated=update_tag, |
| 260 | + ) |
| 261 | + |
| 262 | + |
| 263 | +@timeit |
| 264 | +def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None: |
| 265 | + logger.debug("Running EC2 route tables cleanup") |
| 266 | + GraphJob.from_node_schema(RouteTableSchema(), common_job_parameters).run(neo4j_session) |
| 267 | + GraphJob.from_node_schema(RouteSchema(), common_job_parameters).run(neo4j_session) |
| 268 | + GraphJob.from_node_schema(RouteTableAssociationSchema(), common_job_parameters).run(neo4j_session) |
| 269 | + |
| 270 | + |
| 271 | +@timeit |
| 272 | +def sync_route_tables( |
| 273 | + neo4j_session: neo4j.Session, |
| 274 | + boto3_session: boto3.session.Session, |
| 275 | + regions: list[str], |
| 276 | + current_aws_account_id: str, |
| 277 | + update_tag: int, |
| 278 | + common_job_parameters: dict[str, Any], |
| 279 | +) -> None: |
| 280 | + for region in regions: |
| 281 | + logger.info("Syncing EC2 route tables for region '%s' in account '%s'.", region, current_aws_account_id) |
| 282 | + route_tables = get_route_tables(boto3_session, region) |
| 283 | + transformed_tables, association_data, route_data = transform_route_table_data(route_tables) |
| 284 | + load_routes(neo4j_session, route_data, region, current_aws_account_id, update_tag) |
| 285 | + load_route_table_associations(neo4j_session, association_data, region, current_aws_account_id, update_tag) |
| 286 | + load_route_tables(neo4j_session, transformed_tables, region, current_aws_account_id, update_tag) |
| 287 | + cleanup(neo4j_session, common_job_parameters) |
0 commit comments