DataDog · eric-weaver · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
@@ -6,7 +6,7 @@
 
 ***Added***:
 
-* Migrate SQL Server to a new schema collector, which provides improved performance in the Agent and allows the backend to handle larger schema collections ([#21729](https://github.com/DataDog/integrations-core/pull/21729))
+* Migrate Mysql to a new schema collector, which provides improved performance in the Agent and allows the backend to handle larger schema collections ([#21729](https://github.com/DataDog/integrations-core/pull/21729))
 * Add DBM Agent health events to MySQL, including basic initialization checks, unhandled errors, and missed collections ([#21867](https://github.com/DataDog/integrations-core/pull/21867))
 * Upgrade base version for Postgres, MySQL, and SQLServer ([#21906](https://github.com/DataDog/integrations-core/pull/21906))
 

@@ -429,13 +429,7 @@ files:
                   Capped by `collect_schemas.collection_interval`
                 value:
                   type: number
-                  display_default: 60
-              - name: max_tables
-                description: |
-                  Set the maximum number of tables to collect. Defaults to 300.
-                value:
-                  type: integer
-                  display_default: 300
+                  example: 60
           - name: schemas_collection
             deprecation:
               Agent version: 7.69.0

@@ -0,0 +1 @@
+Revert "Migrate MySQL to new schema collector" to avoid breaking schema collection on older versions of Mysql/MariaDB
@@ -56,7 +56,6 @@ class CollectSchemas(BaseModel):
     collection_interval: Optional[float] = None
     enabled: Optional[bool] = None
     max_execution_time: Optional[float] = None
-    max_tables: Optional[int] = None
 
 
 class CollectSettings(BaseModel):

@@ -417,12 +417,7 @@ instances:
         ## Set the maximum time for schema collection (in seconds). Defaults to 60 seconds.
         ## Capped by `collect_schemas.collection_interval`
         #
-        # max_execution_time: <MAX_EXECUTION_TIME>
-
-        ## @param max_tables - integer - optional - default: 300
-        ## Set the maximum number of tables to collect. Defaults to 300.
-        #
-        # max_tables: <MAX_TABLES>
+        # max_execution_time: 60
 
     ## DEPRECATED: Use `collect_schemas` instead.
     ## Configure collection of schemas (databases).

@@ -5,15 +5,15 @@
 from contextlib import closing
 from operator import attrgetter
 
-import pymysql  # type: ignore
+import pymysql
 
 from datadog_checks.mysql.cursor import CommenterDictCursor
-from datadog_checks.mysql.schemas import MySqlSchemaCollector
+from datadog_checks.mysql.databases_data import DEFAULT_DATABASES_DATA_COLLECTION_INTERVAL, DatabasesData
 
 from .util import ManagedAuthConnectionMixin, connect_with_session_variables
 
 try:
-    import datadog_agent  # type: ignore
+    import datadog_agent
 except ImportError:
     from datadog_checks.base.stubs import datadog_agent
 
@@ -27,7 +27,7 @@
 
 # default pg_settings collection interval in seconds
 DEFAULT_SETTINGS_COLLECTION_INTERVAL = 600
-DEFAULT_SCHEMAS_COLLECTION_INTERVAL = 600
+
 MARIADB_TABLE_NAME = "information_schema.GLOBAL_VARIABLES"
 MYSQL_TABLE_NAME = "performance_schema.global_variables"
 
@@ -48,23 +48,24 @@ class MySQLMetadata(ManagedAuthConnectionMixin, DBMAsyncJob):
     """
 
     def __init__(self, check, config, connection_args_provider, uses_managed_auth=False):
+        self._databases_data_enabled = is_affirmative(config.schemas_config.get("enabled", False))
+        self._databases_data_collection_interval = config.schemas_config.get(
+            "collection_interval", DEFAULT_DATABASES_DATA_COLLECTION_INTERVAL
+        )
         self._settings_enabled = is_affirmative(config.settings_config.get('enabled', True))
-        self._schemas_enabled = is_affirmative(config.schemas_config.get('enabled', False))
 
         self._settings_collection_interval = float(
             config.settings_config.get('collection_interval', DEFAULT_SETTINGS_COLLECTION_INTERVAL)
         )
-        self._schemas_collection_interval = float(
-            config.schemas_config.get('collection_interval', DEFAULT_SCHEMAS_COLLECTION_INTERVAL)
-        )
 
-        if self._schemas_enabled and not self._settings_enabled:
-            self.collection_interval = self._schemas_collection_interval
-        elif not self._schemas_enabled and self._settings_enabled:
+        if self._databases_data_enabled and not self._settings_enabled:
+            self.collection_interval = self._databases_data_collection_interval
+        elif not self._databases_data_enabled and self._settings_enabled:
             self.collection_interval = self._settings_collection_interval
         else:
-            self.collection_interval = min(self._settings_collection_interval, self._schemas_collection_interval)
-        self.enabled = self._settings_enabled or self._schemas_enabled
+            self.collection_interval = min(self._databases_data_collection_interval, self._settings_collection_interval)
+
+        self.enabled = self._databases_data_enabled or self._settings_enabled
 
         super(MySQLMetadata, self).__init__(
             check,
@@ -84,9 +85,9 @@ def __init__(self, check, config, connection_args_provider, uses_managed_auth=Fa
         self._uses_managed_auth = uses_managed_auth
         self._db_created_at = 0
         self._db = None
-        self._schemas_collector = MySqlSchemaCollector(check)
+        self._databases_data = DatabasesData(self, check, config)
         self._last_settings_collection_time = 0
-        self._last_schemas_collection_time = 0
+        self._last_databases_collection_time = 0
 
     def get_db_connection(self):
         """
@@ -146,10 +147,19 @@ def run_job(self):
                                 These may be unavailable until the error is resolved. The error - {}""".format(e)
                 )
 
-        elapsed_time_schemas = time.time() - self._last_schemas_collection_time
-        if self._schemas_enabled and elapsed_time_schemas >= self._schemas_collection_interval:
-            self._last_schemas_collection_time = time.time()
-            self._schemas_collector.collect_schemas()
+        elapsed_time_databases = time.time() - self._last_databases_collection_time
+        if self._databases_data_enabled and elapsed_time_databases >= self._databases_data_collection_interval:
+            self._last_databases_collection_time = time.time()
+            try:
+                self._databases_data.collect_databases_data(self._tags)
+            except Exception as e:
+                self._log.error(
+                    """An error occurred while collecting schema data.
+                                These may be unavailable until the error is resolved. The error - {}""".format(e)
+                )
+
+    def shut_down(self):
+        self._databases_data.shut_down()
 
     @tracked_method(agent_check_getter=attrgetter('_check'))
     def report_mysql_metadata(self):

@@ -15,8 +15,7 @@
 import pymysql
 from cachetools import TTLCache
 
-from datadog_checks.base import AgentCheck, is_affirmative
-from datadog_checks.base.checks.db import DatabaseCheck
+from datadog_checks.base import AgentCheck, DatabaseCheck, is_affirmative
 from datadog_checks.base.utils.db import QueryExecutor, QueryManager
 from datadog_checks.base.utils.db.health import HealthEvent, HealthStatus
 from datadog_checks.base.utils.db.utils import (
@@ -87,7 +86,7 @@
 )
 from .statement_samples import MySQLStatementSamples
 from .statements import MySQLStatementMetrics
-from .util import connect_with_session_variables
+from .util import DatabaseConfigurationError, connect_with_session_variables  # noqa: F401
 from .version_utils import parse_version
 
 try:
@@ -224,12 +223,6 @@ def resolved_hostname(self):
     def cloud_metadata(self):
         return self._cloud_metadata
 
-    @property
-    def dbms_version(self):
-        if self.version is None:
-            return None
-        return self.version.version + '+' + self.version.build
-
     @property
     def database_identifier(self):
         # type: () -> str

@@ -89,26 +89,25 @@
 SELECT plugin_status
 FROM information_schema.plugins WHERE plugin_name='group_replication'"""
 
+# Alisases add to homogenize fields across different database types like SQLServer, PostgreSQL
 SQL_DATABASES = """
-SELECT schema_name as `schema_name`,
+SELECT schema_name as `name`,
        default_character_set_name as `default_character_set_name`,
        default_collation_name as `default_collation_name`
        FROM information_schema.SCHEMATA
        WHERE schema_name not in ('sys', 'mysql', 'performance_schema', 'information_schema')"""
 
 SQL_TABLES = """\
-SELECT table_name as `table_name`,
+SELECT table_name as `name`,
        engine as `engine`,
        row_format as `row_format`,
-       create_time as `create_time`,
-       table_schema as `schema_name`
+       create_time as `create_time`
        FROM information_schema.TABLES
-       WHERE TABLE_TYPE="BASE TABLE"
+       WHERE TABLE_SCHEMA = %s AND TABLE_TYPE="BASE TABLE"
 """
 
 SQL_COLUMNS = """\
 SELECT table_name as `table_name`,
-       table_schema as `schema_name`,
        column_name as `name`,
        column_type as `column_type`,
        column_default as `default`,
@@ -117,52 +116,50 @@
        column_key as `column_key`,
        extra as `extra`
 FROM INFORMATION_SCHEMA.COLUMNS
+WHERE table_schema = %s AND table_name IN ({});
 """
 
 SQL_INDEXES = """\
 SELECT
     table_name as `table_name`,
-    table_schema as `schema_name`,
     index_name as `name`,
+    collation as `collation`,
     cardinality as `cardinality`,
     index_type as `index_type`,
+    seq_in_index as `seq_in_index`,
+    column_name as `column_name`,
+    sub_part as `sub_part`,
+    packed as `packed`,
+    nullable as `nullable`,
     non_unique as `non_unique`,
-    NULL as `expression`,
-    json_arrayagg(json_object(
-        'name', column_name,
-        'collation', collation,
-        'nullable', nullable,
-        'sub_part', sub_part
-    )) as `columns`
+    NULL as `expression`
 FROM INFORMATION_SCHEMA.STATISTICS
-GROUP BY index_name, table_name, schema_name, cardinality, index_type, non_unique, expression
+WHERE table_schema = %s AND table_name IN ({});
 """
 
 SQL_INDEXES_8_0_13 = """\
 SELECT
     table_name as `table_name`,
-    table_schema as `schema_name`,
     index_name as `name`,
+    collation as `collation`,
     cardinality as `cardinality`,
     index_type as `index_type`,
+    seq_in_index as `seq_in_index`,
+    column_name as `column_name`,
+    sub_part as `sub_part`,
+    packed as `packed`,
+    nullable as `nullable`,
     non_unique as `non_unique`,
-    expression as `expression`,
-    json_arrayagg(json_object(
-        'name', column_name,
-        'collation', collation,
-        'nullable', nullable,
-        'sub_part', sub_part
-    )) as `columns`
+    expression as `expression`
 FROM INFORMATION_SCHEMA.STATISTICS
-GROUP BY index_name, table_name, schema_name, cardinality, index_type, non_unique, expression
+WHERE table_schema = %s AND table_name IN ({});
 """
 
 SQL_FOREIGN_KEYS = """\
 SELECT
     kcu.constraint_schema as constraint_schema,
     kcu.constraint_name as name,
     kcu.table_name as table_name,
-    kcu.table_schema as schema_name,
     group_concat(kcu.column_name order by kcu.ordinal_position asc) as column_names,
     kcu.referenced_table_schema as referenced_table_schema,
     kcu.referenced_table_name as referenced_table_name,
@@ -176,12 +173,12 @@
     ON kcu.CONSTRAINT_SCHEMA = rc.CONSTRAINT_SCHEMA
     AND kcu.CONSTRAINT_NAME = rc.CONSTRAINT_NAME
 WHERE
-    kcu.referenced_table_name is not null
+    kcu.table_schema = %s AND kcu.table_name in ({})
+    AND kcu.referenced_table_name is not null
 GROUP BY
     kcu.constraint_schema,
     kcu.constraint_name,
     kcu.table_name,
-    kcu.table_schema,
     kcu.referenced_table_schema,
     kcu.referenced_table_name,
     rc.update_rule,
@@ -191,28 +188,22 @@
 SQL_PARTITION = """\
 SELECT
     table_name as `table_name`,
-    table_schema as `schema_name`,
     partition_name as `name`,
+    subpartition_name as `subpartition_name`,
     partition_ordinal_position as `partition_ordinal_position`,
+    subpartition_ordinal_position as `subpartition_ordinal_position`,
     partition_method as `partition_method`,
+    subpartition_method as `subpartition_method`,
     partition_expression as `partition_expression`,
+    subpartition_expression as `subpartition_expression`,
     partition_description as `partition_description`,
-    json_arrayagg(json_object(
-        'name', subpartition_name,
-        'subpartition_ordinal_position', subpartition_ordinal_position,
-        'subpartition_method', subpartition_method,
-        'subpartition_expression', subpartition_expression,
-        'table_rows', table_rows,
-        'data_length', data_length
-    )) as `subpartitions`
+    table_rows as `table_rows`,
+    data_length as `data_length`
 FROM INFORMATION_SCHEMA.PARTITIONS
 WHERE
-    partition_name IS NOT NULL
-GROUP BY table_name, table_schema, partition_name, partition_ordinal_position,
-   partition_method, partition_expression, partition_description
+    table_schema = %s AND table_name in ({}) AND partition_name IS NOT NULL
 """
 
-
 QUERY_DEADLOCKS = {
     'name': 'information_schema.INNODB_METRICS.lock_deadlocks',
     'query': """
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Revert "Migrate MySQL to new schema collector" to avoid breaking schema collection on older versions of Mysql/MariaDB