Feature: Added grail budget tracker (#126)

christian-kreuzberger-dtx · web-flow · commit 051726cfc944 · 2025-09-15T14:33:44.000+02:00
* feat: Added Grail Budget Tracking and a reset_grail_budget tool

* fix: Allow setting Grail Budget to -1
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,15 +1,20 @@
 # @dynatrace-oss/dynatrace-mcp-server
 
-- Fixed an issue with stateless HTTP server only taking a single connection
-
 ## Unreleased Changes
 
+- Fixed an issue with stateless HTTP server only taking a single connection
+- Added Grail budget tracking with `DT_GRAIL_QUERY_BUDGET_GB` environment variable (default: 1000 GB, setting it to `-1` disables it), as well as warnings and exceeded alerts in `execute_dql` tool responses
+- Enforce Grail budget by throwing an exception when the budget has been exceeded, preventing further DQL query execution
+
 ## 0.6.0 (Release Candidate 1)
 
 - Added metadata output to `execute_dql` tool which includes scanned bytes information, enabling better cost tracking for Dynatrace Grail data access
 - Added next-steps guidance to `get_entity_details` tool to help users discover related metrics, problems, and logs for entities
 - Added telemetry via Dynatrace OpenKit to improve the product with anonymous usage statistics and error information, enhancing product development while respecting user privacy (can be disabled via `DT_MCP_DISABLE_TELEMETRY` environment variable)
 - Added `server.json` configuration and published the MCP server to the official MCP Registry, making it easier for users to discover and install the server
+- Added metadata output which includes Grail scanned bytes (for cost tracking) to `execute_dql`
+- Added next-steps for `get_entity_details` to find out about metrics, problems and logs
+- Added Telemetry via Dynatrace OpenKit to improve the product with anonymous usage statistics and error information (can be disabled via `DT_MCP_DISABLE_TELEMETRY` environment variable)
 
 ## 0.5.0
 
diff --git a/README.md b/README.md
@@ -60,6 +60,17 @@ depend on the volume (GB scanned).
 1. Review your current Dynatrace consumption model and pricing
 2. Understand the cost implications of the specific data you plan to query (logs, events, metrics) - see [Dynatrace Pricing and Rate Card](https://www.dynatrace.com/pricing/)
 3. Start with smaller timeframes (e.g., 12h-24h) and make use of [buckets](https://docs.dynatrace.com/docs/discover-dynatrace/platform/grail/data-model#built-in-grail-buckets) to reduce the cost impact
+4. Set an appropriate `DT_GRAIL_QUERY_BUDGET_GB` environment variable (default: 1000 GB) to control and monitor your Grail query consumption
+
+**Grail Budget Tracking:**
+
+The MCP server includes built-in budget tracking for Grail queries to help you monitor and control costs:
+
+- Set `DT_GRAIL_QUERY_BUDGET_GB` (default: 1000 GB) to define your session budget limit
+- The server tracks bytes scanned across all Grail queries in the current session
+- You'll receive warnings when approaching 80% of your budget
+- Budget exceeded alerts help prevent unexpected high consumption
+- Budget resets when you restart the MCP server session
 
 **To understand costs that occured:**
 
@@ -321,6 +332,7 @@ You can set up authentication via **Platform Tokens** (recommended) or **OAuth C
 - `DT_PLATFORM_TOKEN` (string, e.g., `dt0s16.SAMPLE.abcd1234`) - **Recommended**: Dynatrace Platform Token
 - `OAUTH_CLIENT_ID` (string, e.g., `dt0s02.SAMPLE`) - Alternative: Dynatrace OAuth Client ID (for advanced use cases)
 - `OAUTH_CLIENT_SECRET` (string, e.g., `dt0s02.SAMPLE.abcd1234`) - Alternative: Dynatrace OAuth Client Secret (for advanced use cases)
+- `DT_GRAIL_QUERY_BUDGET_GB` (number, default: 1000) - Budget limit in GB (base 1000) for Grail query bytes scanned per session. The MCP server tracks your Grail usage and warns when approaching or exceeding this limit.
 
 **Platform Tokens are recommended** for most use cases as they provide a simpler authentication flow. OAuth Clients should only be used when specific OAuth features are required.
 
diff --git a/integration-tests/davis-copilot-explain-dql.integration.test.ts b/integration-tests/davis-copilot-explain-dql.integration.test.ts
@@ -103,7 +103,7 @@ describe('DQL Explanation Integration Tests', () => {
     expect(response.status === 'SUCCESSFUL' || response.status === 'SUCCESSFUL_WITH_WARNINGS').toBeTruthy();
 
     expect(response.summary.toLowerCase()).toContain('group logs by');
-    expect(response.summary.toLowerCase()).toContain('count the number of logs');
+    expect(response.summary.toLowerCase()).toContain('calculate the total number of logs');
     // The explanation should be reasonably detailed
     expect(response.explanation.length).toBeGreaterThan(50);
   });
diff --git a/src/capabilities/execute-dql.test.ts b/src/capabilities/execute-dql.test.ts
@@ -0,0 +1,138 @@
+import { executeDql } from './execute-dql';
+import { HttpClient } from '@dynatrace-sdk/http-client';
+import { QueryExecutionClient, QueryStartResponse } from '@dynatrace-sdk/client-query';
+import { resetGrailBudgetTracker, getGrailBudgetTracker } from '../utils/grail-budget-tracker';
+
+// Mock the external dependencies
+jest.mock('@dynatrace-sdk/client-query');
+jest.mock('../utils/user-agent', () => ({
+  getUserAgent: () => 'test-user-agent',
+}));
+
+describe('executeDql Budget Check', () => {
+  let mockHttpClient: jest.Mocked<HttpClient>;
+  let mockQueryExecutionClient: jest.Mocked<QueryExecutionClient>;
+
+  beforeEach(() => {
+    // Reset budget tracker before each test
+    resetGrailBudgetTracker();
+
+    // Create mock HTTP client
+    mockHttpClient = {
+      // Add any necessary properties/methods for HttpClient mock
+    } as jest.Mocked<HttpClient>;
+
+    // Create mock QueryExecutionClient
+    mockQueryExecutionClient = {
+      queryExecute: jest.fn(),
+      queryPoll: jest.fn(),
+      queryCancel: jest.fn(),
+    } as unknown as jest.Mocked<QueryExecutionClient>;
+
+    // Mock the QueryExecutionClient constructor
+    (QueryExecutionClient as jest.MockedClass<typeof QueryExecutionClient>).mockImplementation(
+      () => mockQueryExecutionClient,
+    );
+  });
+
+  afterEach(() => {
+    jest.clearAllMocks();
+    resetGrailBudgetTracker();
+  });
+
+  it('should prevent execution when budget is exceeded', async () => {
+    const budgetLimitGB = 0.001; // Very small budget limit (1 MB)
+
+    // First, exhaust the budget by adding bytes to tracker
+    const tracker = getGrailBudgetTracker(budgetLimitGB);
+    tracker.addBytesScanned(2 * 1000 * 1000); // Add 2 MB, exceeding the 1 MB limit
+
+    const dqlStatement = 'fetch logs | limit 10';
+    const body = { query: dqlStatement };
+
+    // Execute DQL with budget limit and expect it to throw
+    await expect(executeDql(mockHttpClient, body, budgetLimitGB)).rejects.toThrow(/budget/);
+
+    // Verify that queryExecute was NOT called
+    expect(mockQueryExecutionClient.queryExecute).not.toHaveBeenCalled();
+  });
+
+  it('should allow execution when budget is not exceeded', async () => {
+    const budgetLimitGB = 1; // 1 GB budget limit
+    const dqlStatement = 'fetch logs | limit 10';
+    const body = { query: dqlStatement };
+
+    // Mock successful response
+    const mockResponse: QueryStartResponse = {
+      state: 'RUNNING',
+      result: {
+        records: [{ field1: 'value1' }],
+        types: [],
+        metadata: {
+          grail: {
+            scannedBytes: 1000,
+            scannedRecords: 1,
+            executionTimeMilliseconds: 100,
+            queryId: 'test-query-id',
+          },
+        },
+      },
+    };
+
+    mockQueryExecutionClient.queryExecute.mockResolvedValue(mockResponse);
+
+    // Execute DQL with budget limit
+    const result = await executeDql(mockHttpClient, body, budgetLimitGB);
+
+    // Verify that queryExecute WAS called
+    expect(mockQueryExecutionClient.queryExecute).toHaveBeenCalledWith({
+      body,
+      dtClientContext: 'test-user-agent',
+    });
+
+    // Verify the result is returned correctly
+    expect(result).toBeDefined();
+    expect(result?.records).toEqual([{ field1: 'value1' }]);
+    expect(result?.scannedBytes).toBe(1000);
+    expect(result?.budgetState?.isBudgetExceeded).toBe(false);
+  });
+
+  it('should allow execution when no budget limit is provided', async () => {
+    const dqlStatement = 'fetch logs | limit 10';
+    const body = { query: dqlStatement };
+
+    // Mock successful response
+    const mockResponse: QueryStartResponse = {
+      state: 'RUNNING',
+      result: {
+        records: [{ field1: 'value1' }],
+        types: [],
+        metadata: {
+          grail: {
+            scannedBytes: 1000000000, // 1 GB - would exceed small budgets
+            scannedRecords: 1000,
+            executionTimeMilliseconds: 100,
+            queryId: 'test-query-id',
+          },
+        },
+      },
+    };
+
+    mockQueryExecutionClient.queryExecute.mockResolvedValue(mockResponse);
+
+    // Execute DQL without budget limit
+    const result = await executeDql(mockHttpClient, body);
+
+    // Verify that queryExecute WAS called
+    expect(mockQueryExecutionClient.queryExecute).toHaveBeenCalledWith({
+      body,
+      dtClientContext: 'test-user-agent',
+    });
+
+    // Verify the result is returned correctly
+    expect(result).toBeDefined();
+    expect(result?.records).toEqual([{ field1: 'value1' }]);
+    expect(result?.scannedBytes).toBe(1000000000);
+    expect(result?.budgetState).toBeUndefined(); // No budget tracking
+  });
+});
diff --git a/src/capabilities/execute-dql.ts b/src/capabilities/execute-dql.ts
@@ -1,6 +1,7 @@
 import { HttpClient } from '@dynatrace-sdk/http-client';
 import { QueryExecutionClient, QueryAssistanceClient, QueryResult, ExecuteRequest } from '@dynatrace-sdk/client-query';
 import { getUserAgent } from '../utils/user-agent';
+import { getGrailBudgetTracker, GrailBudgetTracker, generateBudgetWarning } from '../utils/grail-budget-tracker';
 
 export const verifyDqlStatement = async (dtClient: HttpClient, dqlStatement: string) => {
   const queryAssistanceClient = new QueryAssistanceClient(dtClient);
@@ -23,23 +24,46 @@ export interface DqlExecutionResult {
   executionTimeMilliseconds?: number;
   queryId?: string;
   sampled?: boolean;
+  /** Budget tracking information */
+  budgetState?: GrailBudgetTracker;
+  /** Budget warning message if applicable */
+  budgetWarning?: string;
 }
 
 /**
  * Helper function to create a DQL execution result and log metadata information.
  * @param queryResult - The query result from Dynatrace API
  * @param logPrefix - Prefix for the log message (e.g., "DQL Execution Metadata" or "DQL Execution Metadata (Polled)")
+ * @param budgetLimitGB - Budget limit in GB for tracking purposes
  * @returns DqlExecutionResult with extracted metadata
  */
-const createResultAndLog = (queryResult: QueryResult, logPrefix: string): DqlExecutionResult => {
+const createResultAndLog = (
+  queryResult: QueryResult,
+  logPrefix: string,
+  budgetLimitGB?: number,
+): DqlExecutionResult => {
+  const scannedBytes = queryResult.metadata?.grail?.scannedBytes || 0;
+
+  // Track budget if limit is provided
+  let budgetState: GrailBudgetTracker | undefined;
+  let budgetWarning: string | undefined;
+
+  if (budgetLimitGB !== undefined) {
+    const tracker = getGrailBudgetTracker(budgetLimitGB);
+    budgetState = tracker.addBytesScanned(scannedBytes);
+    budgetWarning = generateBudgetWarning(budgetState, scannedBytes) || undefined;
+  }
+
   const result: DqlExecutionResult = {
     records: queryResult.records,
     metadata: queryResult.metadata,
-    scannedBytes: queryResult.metadata?.grail?.scannedBytes,
+    scannedBytes,
     scannedRecords: queryResult.metadata?.grail?.scannedRecords,
     executionTimeMilliseconds: queryResult.metadata?.grail?.executionTimeMilliseconds,
     queryId: queryResult.metadata?.grail?.queryId,
     sampled: queryResult.metadata?.grail?.sampled,
+    budgetState,
+    budgetWarning,
   };
 
   console.error(
@@ -55,12 +79,27 @@ const createResultAndLog = (queryResult: QueryResult, logPrefix: string): DqlExe
  * If the result is not immediately available, it will poll for the result until it is available.
  * @param dtClient
  * @param body - Contains the DQL statement to execute, and optional parameters like maxResultRecords and maxResultBytes
+ * @param budgetLimitGB - Optional budget limit in GB for tracking bytes scanned
  * @returns the result with records, metadata and cost information, or undefined if the query failed or no result was returned.
  */
 export const executeDql = async (
   dtClient: HttpClient,
   body: ExecuteRequest,
+  budgetLimitGB?: number,
 ): Promise<DqlExecutionResult | undefined> => {
+  // Check budget before executing the query if budget limit is provided
+  if (budgetLimitGB !== undefined) {
+    const tracker = getGrailBudgetTracker(budgetLimitGB);
+    const currentState = tracker.getState();
+
+    if (currentState.isBudgetExceeded) {
+      console.error('DQL execution aborted: Grail budget has been exceeded');
+      const budgetWarning = generateBudgetWarning(currentState, 0);
+
+      throw new Error(budgetWarning || 'DQL execution aborted: Grail budget has been exceeded');
+    }
+  }
+
   // create a Dynatrace QueryExecutionClient
   const queryExecutionClient = new QueryExecutionClient(dtClient);
 
@@ -74,7 +113,7 @@ export const executeDql = async (
   // check if we already got a result back
   if (response.result) {
     // yes - return response result immediately
-    return createResultAndLog(response.result, 'execute_dql - Metadata:');
+    return createResultAndLog(response.result, 'execute_dql - Metadata:', budgetLimitGB);
   }
 
   // no result yet? we have wait and poll (this requires requestToken to be set)
@@ -92,7 +131,7 @@ export const executeDql = async (
       // check if we got a result from the polling endpoint
       if (pollResponse.result) {
         // yes - let's return the polled result
-        return createResultAndLog(pollResponse.result, 'execute_dql Metadata (polled):');
+        return createResultAndLog(pollResponse.result, 'execute_dql Metadata (polled):', budgetLimitGB);
       }
     } while (pollResponse.state === 'RUNNING' || pollResponse.state === 'NOT_STARTED');
 
diff --git a/src/getDynatraceEnv.test.ts b/src/getDynatraceEnv.test.ts
@@ -18,6 +18,7 @@ describe('getDynatraceEnv', () => {
       dtEnvironment: env.DT_ENVIRONMENT,
       dtPlatformToken: env.DT_PLATFORM_TOKEN,
       slackConnectionId: env.SLACK_CONNECTION_ID,
+      grailBudgetGB: 1000, // Default value
     });
   });
 
diff --git a/src/getDynatraceEnv.ts b/src/getDynatraceEnv.ts
@@ -5,6 +5,7 @@ export interface DynatraceEnv {
   dtPlatformToken?: string;
   dtEnvironment: string;
   slackConnectionId: string;
+  grailBudgetGB: number;
 }
 
 /**
@@ -17,6 +18,7 @@ export function getDynatraceEnv(env: NodeJS.ProcessEnv = process.env): Dynatrace
   const dtPlatformToken = env.DT_PLATFORM_TOKEN;
   const dtEnvironment = env.DT_ENVIRONMENT;
   const slackConnectionId = env.SLACK_CONNECTION_ID || 'fake-slack-connection-id';
+  const grailBudgetGB = parseFloat(env.DT_GRAIL_QUERY_BUDGET_GB || '1000'); // Default to 1000 GB
 
   if (!dtEnvironment) {
     throw new Error('Please set DT_ENVIRONMENT environment variable to your Dynatrace Platform Environment');
@@ -28,6 +30,11 @@ export function getDynatraceEnv(env: NodeJS.ProcessEnv = process.env): Dynatrace
     );
   }
 
+  // ToDo: Allow the case of -1 for unlimited Budget
+  if (isNaN(grailBudgetGB) || (grailBudgetGB <= 0 && grailBudgetGB !== -1)) {
+    throw new Error('DT_GRAIL_QUERY_BUDGET_GB must be a positive number representing GB budget for Grail queries');
+  }
+
   if (!dtEnvironment.startsWith('https://')) {
     throw new Error(
       'Please set DT_ENVIRONMENT to a valid Dynatrace Environment URL (e.g., https://<environment-id>.apps.dynatrace.com)',
@@ -40,5 +47,5 @@ export function getDynatraceEnv(env: NodeJS.ProcessEnv = process.env): Dynatrace
     );
   }
 
-  return { oauthClientId, oauthClientSecret, dtPlatformToken, dtEnvironment, slackConnectionId };
+  return { oauthClientId, oauthClientSecret, dtPlatformToken, dtEnvironment, slackConnectionId, grailBudgetGB };
 }
diff --git a/src/index.ts b/src/index.ts
diff --git a/src/utils/grail-budget-tracker.test.ts b/src/utils/grail-budget-tracker.test.ts
diff --git a/src/utils/grail-budget-tracker.ts b/src/utils/grail-budget-tracker.ts