diff --git a/.gitignore b/.gitignore index 4d65088..202485a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ .envrc .ruff_cache/ +.specstory/ +.venv/ +.github/ # Byte-compiled / optimized / DLL files __pycache__/ @@ -172,3 +175,5 @@ cython_debug/ # PyPI configuration file .pypirc +.specstory/* +.specstory/history/* \ No newline at end of file diff --git a/mcp_clickhouse/__init__.py b/mcp_clickhouse/__init__.py index 21931d0..d954fcc 100644 --- a/mcp_clickhouse/__init__.py +++ b/mcp_clickhouse/__init__.py @@ -3,11 +3,13 @@ list_databases, list_tables, run_select_query, + get_table_sample, ) __all__ = [ "list_databases", "list_tables", "run_select_query", + "get_table_sample", "create_clickhouse_client", ] diff --git a/mcp_clickhouse/main.py b/mcp_clickhouse/main.py index 3653ca1..fcfadc1 100644 --- a/mcp_clickhouse/main.py +++ b/mcp_clickhouse/main.py @@ -1,7 +1,16 @@ +""" +MCP ClickHouse - Model Context Protocol server for ClickHouse database integration. + +This module provides the entry point for running the MCP ClickHouse server, +which enables AI models to interact with ClickHouse databases through a set of +well-defined tools. +""" + from .mcp_server import mcp def main(): + """Run the MCP ClickHouse server.""" mcp.run() diff --git a/mcp_clickhouse/mcp_env.py b/mcp_clickhouse/mcp_env.py index 0c36afc..fbd4038 100644 --- a/mcp_clickhouse/mcp_env.py +++ b/mcp_clickhouse/mcp_env.py @@ -16,49 +16,44 @@ class ClickHouseConfig: This class handles all environment variable configuration with sensible defaults and type conversion. It provides typed methods for accessing each configuration value. - Required environment variables: - CLICKHOUSE_HOST: The hostname of the ClickHouse server - CLICKHOUSE_USER: The username for authentication - CLICKHOUSE_PASSWORD: The password for authentication - - Optional environment variables (with defaults): - CLICKHOUSE_PORT: The port number (default: 8443 if secure=True, 8123 if secure=False) - CLICKHOUSE_SECURE: Enable HTTPS (default: true) - CLICKHOUSE_VERIFY: Verify SSL certificates (default: true) - CLICKHOUSE_CONNECT_TIMEOUT: Connection timeout in seconds (default: 30) - CLICKHOUSE_SEND_RECEIVE_TIMEOUT: Send/receive timeout in seconds (default: 300) - CLICKHOUSE_DATABASE: Default database to use (default: None) + Default values (if environment variables are not set): + CLICKHOUSE_HOST: "localhost" + CLICKHOUSE_USER: "" + CLICKHOUSE_PASSWORD: "" + CLICKHOUSE_PORT: 8123 + CLICKHOUSE_SECURE: false + CLICKHOUSE_VERIFY: false + CLICKHOUSE_CONNECT_TIMEOUT: 5 + CLICKHOUSE_SEND_RECEIVE_TIMEOUT: 300 + CLICKHOUSE_DATABASE: None """ def __init__(self): """Initialize the configuration from environment variables.""" - self._validate_required_vars() + self._set_default_vars() @property def host(self) -> str: """Get the ClickHouse host.""" - return os.environ["CLICKHOUSE_HOST"] + return os.environ.get("CLICKHOUSE_HOST", "localhost") @property def port(self) -> int: """Get the ClickHouse port. - Defaults to 8443 if secure=True, 8123 if secure=False. - Can be overridden by CLICKHOUSE_PORT environment variable. + Defaults to 8123 if not specified. """ - if "CLICKHOUSE_PORT" in os.environ: - return int(os.environ["CLICKHOUSE_PORT"]) - return 8443 if self.secure else 8123 + return int(os.environ.get("CLICKHOUSE_PORT", "8123")) @property def username(self) -> str: """Get the ClickHouse username.""" - return os.environ["CLICKHOUSE_USER"] + return os.environ.get("CLICKHOUSE_USER", "") @property def password(self) -> str: """Get the ClickHouse password.""" - return os.environ["CLICKHOUSE_PASSWORD"] + return os.environ.get("CLICKHOUSE_PASSWORD", "") @property def database(self) -> Optional[str]: @@ -69,25 +64,25 @@ def database(self) -> Optional[str]: def secure(self) -> bool: """Get whether HTTPS is enabled. - Default: True + Default: False """ - return os.getenv("CLICKHOUSE_SECURE", "true").lower() == "true" + return os.getenv("CLICKHOUSE_SECURE", "false").lower() == "true" @property def verify(self) -> bool: """Get whether SSL certificate verification is enabled. - Default: True + Default: False """ - return os.getenv("CLICKHOUSE_VERIFY", "true").lower() == "true" + return os.getenv("CLICKHOUSE_VERIFY", "false").lower() == "true" @property def connect_timeout(self) -> int: """Get the connection timeout in seconds. - Default: 30 + Default: 5 """ - return int(os.getenv("CLICKHOUSE_CONNECT_TIMEOUT", "30")) + return int(os.getenv("CLICKHOUSE_CONNECT_TIMEOUT", "5")) @property def send_receive_timeout(self) -> int: @@ -120,22 +115,23 @@ def get_client_config(self) -> dict: return config - def _validate_required_vars(self) -> None: - """Validate that all required environment variables are set. + def _set_default_vars(self) -> None: + """Set default values for environment variables if they are not already set.""" + defaults = { + "CLICKHOUSE_HOST": "localhost", + "CLICKHOUSE_USER": "", + "CLICKHOUSE_PASSWORD": "", + "CLICKHOUSE_PORT": "8123", + "CLICKHOUSE_SECURE": "false", + "CLICKHOUSE_VERIFY": "false", + "CLICKHOUSE_CONNECT_TIMEOUT": "5", + "CLICKHOUSE_SEND_RECEIVE_TIMEOUT": "300", + } - Raises: - ValueError: If any required environment variable is missing. - """ - missing_vars = [] - for var in ["CLICKHOUSE_HOST", "CLICKHOUSE_USER", "CLICKHOUSE_PASSWORD"]: + for var, default_value in defaults.items(): if var not in os.environ: - missing_vars.append(var) - - if missing_vars: - raise ValueError( - f"Missing required environment variables: {', '.join(missing_vars)}" - ) + os.environ[var] = default_value # Global instance for easy access -config = ClickHouseConfig() \ No newline at end of file +config = ClickHouseConfig() diff --git a/mcp_clickhouse/mcp_server.py b/mcp_clickhouse/mcp_server.py index 95875fa..27292e0 100644 --- a/mcp_clickhouse/mcp_server.py +++ b/mcp_clickhouse/mcp_server.py @@ -28,16 +28,23 @@ mcp = FastMCP(MCP_SERVER_NAME, dependencies=deps) -@mcp.tool() +@mcp.tool( + description="Lists all available databases in the ClickHouse server. Use this tool to get a complete list of databases before exploring their tables. No parameters required." +) def list_databases(): logger.info("Listing all databases") client = create_clickhouse_client() result = client.command("SHOW DATABASES") - logger.info(f"Found {len(result) if isinstance(result, list) else 1} databases") + logger.info( + f"Found {len(result) if isinstance(result, list) else 1} databases") return result -@mcp.tool() +@mcp.tool( + description="Lists tables in a ClickHouse database with detailed schema information. " + "Provides complete table structure including columns, types, and creation statements. " + "Use the 'like' parameter to filter results with SQL LIKE pattern." +) def list_tables(database: str, like: str = None): logger.info(f"Listing tables in database '{database}'") client = create_clickhouse_client() @@ -49,7 +56,8 @@ def list_tables(database: str, like: str = None): # Get all table comments in one query table_comments_query = f"SELECT name, comment FROM system.tables WHERE database = {format_query_value(database)}" table_comments_result = client.query(table_comments_query) - table_comments = {row[0]: row[1] for row in table_comments_result.result_rows} + table_comments = {row[0]: row[1] + for row in table_comments_result.result_rows} # Get all column comments in one query column_comments_query = f"SELECT table, name, comment FROM system.columns WHERE database = {format_query_value(database)}" @@ -105,7 +113,12 @@ def get_table_info(table): return tables -@mcp.tool() +@mcp.tool( + description="Executes a SELECT query against the ClickHouse database. " + "Use for custom data retrieval with your own SQL. " + "Queries are executed in read-only mode for safety. " + "Format your query without specifying database names in SQL." +) def run_select_query(query: str): logger.info(f"Executing SELECT query: {query}") client = create_clickhouse_client() @@ -125,6 +138,74 @@ def run_select_query(query: str): return f"error running query: {err}" +@mcp.tool( + description="Retrieves a random sample of rows from a table using ORDER BY RAND(). " + "Perfect for data exploration and quick analysis. " + "Limit parameter capped at 10 rows. " + "Use the where parameter for filtering specific data patterns." +) +def get_table_sample(database: str, table: str, columns: str = "*", limit: int = 5, where: str = None): + """Retrieves a random sample of rows from a table with ORDER BY RAND() + + Args: + database: The database containing the table + table: The table to sample data from + columns: Comma-separated list of columns to retrieve (default: "*" for all columns) + limit: Maximum number of rows to return (default: 5, max: 10) + where: Optional WHERE clause to filter the data + + Returns: + List of dictionaries, each representing a random row from the table + + Raises: + ValueError: If limit is > 10 or < 1 + ConnectionError: If there's an issue connecting to ClickHouse + ClickHouseError: If there's an error executing the query + """ + # Validate limit + if limit > 10: + logger.warning( + f"Requested limit {limit} exceeds maximum of 10, using 10 instead") + limit = 10 + elif limit < 1: + logger.warning( + f"Requested limit {limit} is less than 1, using 1 instead") + limit = 1 + + logger.info(f"Sampling {limit} random rows from {database}.{table}") + client = create_clickhouse_client() + + try: + # Build the query + query = f"SELECT {columns} FROM {quote_identifier(database)}.{quote_identifier(table)}" + + # Add WHERE clause if provided + if where: + query += f" WHERE {where}" + + # Add random ordering and limit + query += f" ORDER BY rand() LIMIT {limit}" + + logger.info(f"Executing sampling query: {query}") + + # Execute query with readonly setting for safety + res = client.query(query, settings={"readonly": 1}) + column_names = res.column_names + rows = [] + + for row in res.result_rows: + row_dict = {} + for i, col_name in enumerate(column_names): + row_dict[col_name] = row[i] + rows.append(row_dict) + + logger.info(f"Sample query returned {len(rows)} rows") + return rows + except Exception as err: + logger.error(f"Error executing sample query: {err}") + return f"error running sample query: {err}" + + def create_clickhouse_client(): client_config = config.get_client_config() logger.info( @@ -139,7 +220,8 @@ def create_clickhouse_client(): client = clickhouse_connect.get_client(**client_config) # Test the connection version = client.server_version - logger.info(f"Successfully connected to ClickHouse server version {version}") + logger.info( + f"Successfully connected to ClickHouse server version {version}") return client except Exception as e: logger.error(f"Failed to connect to ClickHouse: {str(e)}") diff --git a/uv.lock b/uv.lock index 9992576..05ff244 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.13" [[package]] @@ -235,6 +236,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "uvicorn", specifier = ">=0.34.0" }, ] +provides-extras = ["dev"] [[package]] name = "mdurl"