From da4db5c5acc063d55f859aa21dc3ae5616a14b75 Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:28:21 +0300 Subject: [PATCH 1/3] Feat: Add support for dot env variables --- docs/guides/configuration.md | 27 ++++++++- sqlmesh/core/config/loader.py | 4 ++ tests/core/test_config.py | 104 ++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index df8fd9e3f4..18bc7fec24 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -98,7 +98,32 @@ All software runs within a system environment that stores information as "enviro SQLMesh can access environment variables during configuration, which enables approaches like storing passwords/secrets outside the configuration file and changing configuration parameters dynamically based on which user is running SQLMesh. -You can use environment variables in two ways: specifying them in the configuration file or creating properly named variables to override configuration file values. +You can specify environment variables in the configuration file or by storing them in a `.env` file. + +### .env files + +SQLMesh automatically loads environment variables from a `.env` file in your project directory. This provides a convenient way to manage environment variables without having to set them in your shell. + +Create a `.env` file in your project root with key-value pairs: + +```bash +# .env file +SNOWFLAKE_PW=my_secret_password +S3_BUCKET=s3://my-data-bucket/warehouse +DATABASE_URL=postgresql://user:pass@localhost/db + +# Override specific SQLMesh configuration values +SQLMESH__DEFAULT_GATEWAY=production +SQLMESH__MODEL_DEFAULTS__DIALECT=snowflake +``` + +See the [overrides](#overrides) section for a detailed explanation of how these are defined. + +The rest of the `.env` file variables can be used in your configuration files with `{{ env_var('VARIABLE_NAME') }}` syntax in YAML or accessed via `os.environ['VARIABLE_NAME']` in Python. + +**Important considerations:** +- Add `.env` to your `.gitignore` file to avoid committing sensitive information +- SQLMesh will only load the `.env` file if it exists in the project directory ### Configuration file diff --git a/sqlmesh/core/config/loader.py b/sqlmesh/core/config/loader.py index 10acf74fa8..543299a8ba 100644 --- a/sqlmesh/core/config/loader.py +++ b/sqlmesh/core/config/loader.py @@ -6,6 +6,7 @@ from pathlib import Path from pydantic import ValidationError +from dotenv import load_dotenv from sqlglot.helper import ensure_list from sqlmesh.core import constants as c @@ -35,6 +36,9 @@ def load_configs( for p in (glob.glob(str(path)) or [str(path)]) ] + for path in absolute_paths: + load_dotenv(dotenv_path=path / ".env", override=True) + if not isinstance(config, str): if type(config) != config_type: config = convert_config_type(config, config_type) diff --git a/tests/core/test_config.py b/tests/core/test_config.py index b3457345a8..5c3c6b5d4a 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -24,6 +24,7 @@ load_config_from_env, load_config_from_paths, load_config_from_python_module, + load_configs, ) from sqlmesh.core.context import Context from sqlmesh.core.engine_adapter.athena import AthenaEngineAdapter @@ -1132,3 +1133,106 @@ def test_environment_suffix_target_catalog(tmp_path: Path) -> None: Config, project_paths=[config_path], ) + + +def test_load_python_config_dot_env_vars(tmp_path_factory): + main_dir = tmp_path_factory.mktemp("python_config") + config_path = main_dir / "config.py" + with open(config_path, "w", encoding="utf-8") as fd: + fd.write( + """from sqlmesh.core.config import Config, DuckDBConnectionConfig, GatewayConfig, ModelDefaultsConfig +config = Config(gateways={"duckdb_gateway": GatewayConfig(connection=DuckDBConnectionConfig())}, model_defaults=ModelDefaultsConfig(dialect='')) + """ + ) + + # The environment variable value from the dot env file should be set + # SQLMESH__ variables override config fields directly if they follow the naming structure + dot_path = main_dir / ".env" + with open(dot_path, "w", encoding="utf-8") as fd: + fd.write( + """SQLMESH__GATEWAYS__DUCKDB_GATEWAY__STATE_CONNECTION__TYPE="bigquery" +SQLMESH__GATEWAYS__DUCKDB_GATEWAY__STATE_CONNECTION__CHECK_IMPORT="false" +SQLMESH__DEFAULT_GATEWAY="duckdb_gateway" + """ + ) + + # Use mock.patch.dict to isolate environment variables between the tests + with mock.patch.dict(os.environ, {}, clear=True): + configs = load_configs( + "config", + Config, + paths=[main_dir], + ) + + assert next(iter(configs.values())) == Config( + gateways={ + "duckdb_gateway": GatewayConfig( + connection=DuckDBConnectionConfig(), + state_connection=BigQueryConnectionConfig(check_import=False), + ), + }, + model_defaults=ModelDefaultsConfig(dialect=""), + default_gateway="duckdb_gateway", + ) + + +def test_load_yaml_config_dot_env_vars(tmp_path_factory): + main_dir = tmp_path_factory.mktemp("yaml_config") + config_path = main_dir / "config.yaml" + with open(config_path, "w", encoding="utf-8") as fd: + fd.write( + """gateways: + duckdb_gateway: + connection: + type: duckdb + catalogs: + local: local.db + cloud_sales: {{ env_var('S3_BUCKET') }} + extensions: + - name: httpfs + secrets: + - type: "s3" + key_id: {{ env_var('S3_KEY') }} + secret: {{ env_var('S3_SECRET') }} +model_defaults: + dialect: "" +""" + ) + + # This test checks both using SQLMESH__ prefixed environment variables with underscores + # and setting a regular environment variable for use with env_var(). + dot_path = main_dir / ".env" + with open(dot_path, "w", encoding="utf-8") as fd: + fd.write( + """S3_BUCKET="s3://metrics_bucket/sales.db" +S3_KEY="S3_KEY_ID" +S3_SECRET="XXX_S3_SECRET_XXX" +SQLMESH__DEFAULT_GATEWAY="duckdb_gateway" +SQLMESH__MODEL_DEFAULTS__DIALECT="athena" +""" + ) + + # Use mock.patch.dict to isolate environment variables between the tests + with mock.patch.dict(os.environ, {}, clear=True): + configs = load_configs( + "config", + Config, + paths=[main_dir], + ) + + assert next(iter(configs.values())) == Config( + gateways={ + "duckdb_gateway": GatewayConfig( + connection=DuckDBConnectionConfig( + catalogs={ + "local": "local.db", + "cloud_sales": "s3://metrics_bucket/sales.db", + }, + extensions=[{"name": "httpfs"}], + secrets=[{"type": "s3", "key_id": "S3_KEY_ID", "secret": "XXX_S3_SECRET_XXX"}], + ), + ), + }, + default_gateway="duckdb_gateway", + model_defaults=ModelDefaultsConfig(dialect="athena"), + ) From fa847ec62ede291207bf44f93419df56aabcf9df Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Fri, 27 Jun 2025 19:56:45 +0300 Subject: [PATCH 2/3] add ability to provide custom path --- docs/guides/configuration.md | 12 +++++++- pyproject.toml | 1 + sqlmesh/cli/main.py | 9 +++++- sqlmesh/core/config/loader.py | 8 +++-- sqlmesh/magics.py | 9 +++++- tests/core/test_config.py | 57 +++++++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+), 5 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 18bc7fec24..97c3097a21 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -121,9 +121,19 @@ See the [overrides](#overrides) section for a detailed explanation of how these The rest of the `.env` file variables can be used in your configuration files with `{{ env_var('VARIABLE_NAME') }}` syntax in YAML or accessed via `os.environ['VARIABLE_NAME']` in Python. +#### Custom dot env file location and name + +By default, SQLMesh loads `.env` files from each project directory. Alternatively, you can export the `SQLMESH_DOTENV_PATH` environment variable to specify a custom path and persist it across commands: + +```bash +export SQLMESH_DOTENV_PATH=/path/to/custom/.custom_env +sqlmesh plan +``` + **Important considerations:** - Add `.env` to your `.gitignore` file to avoid committing sensitive information -- SQLMesh will only load the `.env` file if it exists in the project directory +- SQLMesh will only load the `.env` file if it exists in the project directory (unless a custom path is specified) +- When using a custom path, that specific file takes precedence over any `.env` file in the project directory. ### Configuration file diff --git a/pyproject.toml b/pyproject.toml index 993128874e..e6d14129d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,7 @@ dev = [ "PyAthena[Pandas]", "PyGithub>=2.6.0", "pyperf", + "python-dotenv", "pyspark~=3.5.0", "pytest", "pytest-asyncio", diff --git a/sqlmesh/cli/main.py b/sqlmesh/cli/main.py index f3c3afa46a..93d3051bcd 100644 --- a/sqlmesh/cli/main.py +++ b/sqlmesh/cli/main.py @@ -84,6 +84,12 @@ def _sqlmesh_version() -> str: type=str, help="The directory to write log files to.", ) +@click.option( + "--dotenv", + type=click.Path(exists=True, path_type=Path), + help="Path to a custom .env file to load environment variables.", + envvar="SQLMESH_DOTENV_PATH", +) @click.pass_context @error_handler def cli( @@ -95,6 +101,7 @@ def cli( debug: bool = False, log_to_stdout: bool = False, log_file_dir: t.Optional[str] = None, + dotenv: t.Optional[Path] = None, ) -> None: """SQLMesh command line tool.""" if "--help" in sys.argv: @@ -118,7 +125,7 @@ def cli( ) configure_console(ignore_warnings=ignore_warnings) - configs = load_configs(config, Context.CONFIG_TYPE, paths) + configs = load_configs(config, Context.CONFIG_TYPE, paths, dotenv_path=dotenv) log_limit = list(configs.values())[0].log_limit remove_excess_logs(log_file_dir, log_limit) diff --git a/sqlmesh/core/config/loader.py b/sqlmesh/core/config/loader.py index 543299a8ba..f1ef0ed5a7 100644 --- a/sqlmesh/core/config/loader.py +++ b/sqlmesh/core/config/loader.py @@ -26,6 +26,7 @@ def load_configs( config_type: t.Type[C], paths: t.Union[str | Path, t.Iterable[str | Path]], sqlmesh_path: t.Optional[Path] = None, + dotenv_path: t.Optional[Path] = None, ) -> t.Dict[Path, C]: sqlmesh_path = sqlmesh_path or c.SQLMESH_PATH config = config or "config" @@ -36,8 +37,11 @@ def load_configs( for p in (glob.glob(str(path)) or [str(path)]) ] - for path in absolute_paths: - load_dotenv(dotenv_path=path / ".env", override=True) + if dotenv_path: + load_dotenv(dotenv_path=dotenv_path, override=True) + else: + for path in absolute_paths: + load_dotenv(dotenv_path=path / ".env", override=True) if not isinstance(config, str): if type(config) != config_type: diff --git a/sqlmesh/magics.py b/sqlmesh/magics.py index 2d299df668..58f7135654 100644 --- a/sqlmesh/magics.py +++ b/sqlmesh/magics.py @@ -8,6 +8,7 @@ from argparse import Namespace, SUPPRESS from collections import defaultdict from copy import deepcopy +from pathlib import Path from hyperscript import h @@ -166,6 +167,9 @@ def _shell(self) -> t.Any: @argument("--ignore-warnings", action="store_true", help="Ignore warnings.") @argument("--debug", action="store_true", help="Enable debug mode.") @argument("--log-file-dir", type=str, help="The directory to write the log file to.") + @argument( + "--dotenv", type=str, help="Path to a custom .env file to load environment variables from." + ) @line_magic def context(self, line: str) -> None: """Sets the context in the user namespace.""" @@ -181,7 +185,10 @@ def context(self, line: str) -> None: ) configure_console(ignore_warnings=args.ignore_warnings) - configs = load_configs(args.config, Context.CONFIG_TYPE, args.paths) + dotenv_path = Path(args.dotenv) if args.dotenv else None + configs = load_configs( + args.config, Context.CONFIG_TYPE, args.paths, dotenv_path=dotenv_path + ) log_limit = list(configs.values())[0].log_limit remove_excess_logs(log_file_dir, log_limit) diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 5c3c6b5d4a..dd07c8395f 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -1236,3 +1236,60 @@ def test_load_yaml_config_dot_env_vars(tmp_path_factory): default_gateway="duckdb_gateway", model_defaults=ModelDefaultsConfig(dialect="athena"), ) + + +def test_load_yaml_config_custom_dotenv_path(tmp_path_factory): + main_dir = tmp_path_factory.mktemp("yaml_config_2") + config_path = main_dir / "config.yaml" + with open(config_path, "w", encoding="utf-8") as fd: + fd.write( + """gateways: + test_gateway: + connection: + type: duckdb + database: {{ env_var('DB_NAME') }} +""" + ) + + # Create a custom dot env file in a different location + custom_env_dir = tmp_path_factory.mktemp("custom_env") + custom_env_path = custom_env_dir / ".my_env" + with open(custom_env_path, "w", encoding="utf-8") as fd: + fd.write( + """DB_NAME="custom_database.db" +SQLMESH__DEFAULT_GATEWAY="test_gateway" +SQLMESH__MODEL_DEFAULTS__DIALECT="postgres" +""" + ) + + # Test that without custom dotenv path, env vars are not loaded + with mock.patch.dict(os.environ, {}, clear=True): + with pytest.raises( + ConfigError, match=r"Default model SQL dialect is a required configuratio*" + ): + load_configs( + "config", + Config, + paths=[main_dir], + ) + + # Test that with custom dotenv path, env vars are loaded correctly + with mock.patch.dict(os.environ, {}, clear=True): + configs = load_configs( + "config", + Config, + paths=[main_dir], + dotenv_path=custom_env_path, + ) + + assert next(iter(configs.values())) == Config( + gateways={ + "test_gateway": GatewayConfig( + connection=DuckDBConnectionConfig( + database="custom_database.db", + ), + ), + }, + default_gateway="test_gateway", + model_defaults=ModelDefaultsConfig(dialect="postgres"), + ) From 375e91078b73954b7cf4928f17225ad7a48c60ca Mon Sep 17 00:00:00 2001 From: Themis Valtinos <73662635+themisvaltinos@users.noreply.github.com> Date: Mon, 30 Jun 2025 11:48:52 +0300 Subject: [PATCH 3/3] update docs and clarify use --- docs/guides/configuration.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 97c3097a21..b1b06c59f6 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -123,11 +123,21 @@ The rest of the `.env` file variables can be used in your configuration files wi #### Custom dot env file location and name -By default, SQLMesh loads `.env` files from each project directory. Alternatively, you can export the `SQLMESH_DOTENV_PATH` environment variable to specify a custom path and persist it across commands: +By default, SQLMesh loads `.env` files from each project directory. However, you can specify a custom path using the `--dotenv` CLI flag directly when running a command: + +```bash +sqlmesh --dotenv /path/to/custom/.env plan +``` + +!!! note + The `--dotenv` flag is a global option and must be placed **before** the subcommand (e.g. `plan`, `run`), not after. + +Alternatively, you can export the `SQLMESH_DOTENV_PATH` environment variable once, to persist a custom path across all subsequent commands in your shell session: ```bash export SQLMESH_DOTENV_PATH=/path/to/custom/.custom_env sqlmesh plan +sqlmesh run ``` **Important considerations:**