Merge branch 'main' into feat/admonition

manika-signoz · manika-signoz · commit fc736249cec9 · 2026-03-31T21:53:27.000+05:30
diff --git a/components/ArticleMetaDetailsCard/ArticleMetaDetailsCard.tsx b/components/ArticleMetaDetailsCard/ArticleMetaDetailsCard.tsx
@@ -51,7 +51,7 @@ export default function ArticleMetaDetailsCard({
                       <Link
                         href={author.url}
                         target="_blank"
-                        rel="noopener noreferrer"
+                        rel="noopener noreferrer nofollow"
                         className="!text-gray-200 transition-colors hover:text-signoz_robin-400"
                         prefetch={false}
                       >
diff --git a/components/Link.tsx b/components/Link.tsx
@@ -48,7 +48,7 @@ const CustomLink = ({ href, ...rest }: LinkProps & AnchorHTMLAttributes<HTMLAnch
     return <a href={href} {...rest} />
   }
 
-  return <a target="_blank" rel="noopener noreferrer" href={href} {...rest} />
+  return <a target="_blank" rel="noopener noreferrer nofollow" href={href} {...rest} />
 }
 
 export default CustomLink
diff --git a/constants/docsSideNav.ts b/constants/docsSideNav.ts
@@ -2894,13 +2894,36 @@ const docsSideNav = [
       {
         type: 'doc',
         route: '/docs/ai/signoz-mcp-server',
-        label: 'MCP Server',
+        label: 'SigNoz MCP Server',
       },
       {
         type: 'doc',
         route: '/docs/ai/agent-skills',
         label: 'Agent Skills',
       },
+      {
+        type: 'category',
+        isExpanded: false,
+        route: '/docs/ai/use-cases',
+        label: 'MCP Use Cases',
+        items: [
+          {
+            type: 'doc',
+            route: '/docs/ai/use-cases/natural-language-log-exploration',
+            label: 'Log Exploration',
+          },
+          {
+            type: 'doc',
+            route: '/docs/ai/use-cases/latency-spike-explainer',
+            label: 'Latency Spike Explainer',
+          },
+          {
+            type: 'doc',
+            route: '/docs/ai/use-cases/reconstruct-bug-from-trace-id',
+            label: 'Report from Trace ID',
+          },
+        ],
+      },
     ],
   },
   {
diff --git a/data/docs/ai/overview.mdx b/data/docs/ai/overview.mdx
@@ -1,5 +1,5 @@
 ---
-date: 2026-03-11
+date: 2026-03-30
 id: overview
 title: AI Tools and Skills
 description: Integrate SigNoz with your AI coding assistants using the MCP Server and Agent Skills.
@@ -29,3 +29,13 @@ The <a href="https://github.com/SigNoz/signoz-mcp-server" target="_blank" rel="n
 - Install with a single command
 
 [Get started with Agent Skills →](https://signoz.io/docs/ai/agent-skills)
+
+## MCP Use Cases
+
+Once you have the MCP server connected, explore practical workflows:
+
+- Search and analyze logs by asking questions in plain English.
+- Ask "why is this slow?" and get a span breakdown with the bottleneck identified.
+- Paste a trace ID and reconstruct the full request path with root cause analysis.
+
+[Browse all MCP use cases →](https://signoz.io/docs/ai/use-cases)
diff --git a/data/docs/ai/signoz-mcp-server.mdx b/data/docs/ai/signoz-mcp-server.mdx
@@ -9,6 +9,10 @@ doc_type: howto
 
 The SigNoz MCP Server implements the <a href="https://modelcontextprotocol.io/" target="_blank" rel="noopener noreferrer nofollow">Model Context Protocol (MCP)</a> — an open standard that lets AI assistants interact with your SigNoz observability data. Query metrics, traces, logs, alerts, and dashboards through natural language.
 
+<KeyPointCallout title="Already configured?" defaultCollapsed={true}>
+If you've already set up the MCP server, skip ahead to the [use cases](https://signoz.io/docs/ai/use-cases) to see what you can do with it.
+</KeyPointCallout>
+
 ## Connect to SigNoz's MCP server
 
 <Tabs>
diff --git a/data/docs/ai/use-cases.mdx b/data/docs/ai/use-cases.mdx
@@ -0,0 +1,33 @@
+---
+date: 2026-03-30
+id: use-cases
+title: MCP Use Cases
+description: Practical workflows for using the SigNoz MCP Server with AI assistants to debug, explore, and analyze your observability data.
+doc_type: explanation
+---
+
+Real-world workflows you can run with the [SigNoz MCP Server](https://signoz.io/docs/ai/signoz-mcp-server) and any MCP-compatible AI assistant.
+
+Each guide walks through a specific scenario - the prompt to try, what to expect, and what the MCP server does under the hood.
+
+<DocCardContainer>
+
+<DocCard
+    title="Natural Language Log Exploration"
+    description="Search, filter, and analyze logs by asking questions in plain English - no query syntax required."
+    href="/docs/ai/use-cases/natural-language-log-exploration/"
+/>
+
+<DocCard
+    title="Latency Spike Explainer"
+    description="Ask 'why is this slow?' and get a full span breakdown identifying the bottleneck service."
+    href="/docs/ai/use-cases/latency-spike-explainer/"
+/>
+
+<DocCard
+    title="Reconstruct a Bug from a Trace ID"
+    description="Paste a trace ID from a support ticket and reconstruct the full request path with root cause."
+    href="/docs/ai/use-cases/reconstruct-bug-from-trace-id/"
+/>
+
+</DocCardContainer>
diff --git a/data/docs/ai/use-cases/latency-spike-explainer.mdx b/data/docs/ai/use-cases/latency-spike-explainer.mdx
@@ -0,0 +1,121 @@
+---
+date: 2026-03-30
+id: latency-spike-explainer
+title: Latency Spike Explainer
+description: Ask your AI assistant "why is this slow?" and get a full span breakdown identifying the bottleneck service.
+doc_type: howto
+---
+
+import GetHelp from '@/components/shared/get-help.md'
+
+PagerDuty fires. The alert reads: `checkout-service p99 latency > 2s (currently 4.7s), triggered 3 min ago`. You already know what is slow. You need to know why.
+
+You open your AI assistant, connected to SigNoz via the MCP server, and start asking.
+
+## Prerequisites
+
+- Connect your AI assistant to SigNoz using the [MCP Server guide](https://signoz.io/docs/ai/signoz-mcp-server).
+- Make sure your services are instrumented with distributed tracing. See [Instrument Your Application](https://signoz.io/docs/instrumentation/) if you haven't set this up.
+
+## Step 1: Inspect a Slow Trace
+
+```
+Show me traces from checkout-service slower than 2 seconds in the last 30 minutes. Break down the spans for the slowest one.
+```
+
+The span tree comes back:
+
+```
+POST /api/checkout (checkout-service, 4,712ms)
+  |-- ValidateCart (checkout-service, 8ms)
+  |-- GetCustomerProfile (customer-service, 41ms)
+  |-- ProcessPayment (payment-service, 4,480ms)  <-- 95% of total
+  |     |-- ChargeCard (stripe-gateway, 4,430ms)
+  |-- SendConfirmation (notification-service, skipped, upstream failure)
+```
+
+95% of the time is in the `ChargeCard` call to the Stripe gateway.
+
+## Step 2: Is This All Requests or Just the Tail?
+
+```
+Show me the p50 and p99 latency for checkout-service /api/checkout over the last 2 hours, broken down in 5-minute intervals.
+```
+
+Both p50 and p99 were stable at ~400ms until 1:47 AM, then both jumped. p50 is at 3.8s, p99 at 4.7s. This is not tail latency. Nearly every request is affected. Something broke at 1:47 AM.
+
+## Step 3: Compare With a Healthy Trace
+
+```
+Find me a trace from checkout-service between 2 and 3 hours ago where duration was under 500ms.
+```
+
+A healthy trace from before the spike:
+
+```
+POST /api/checkout (checkout-service, 387ms)
+  |-- ValidateCart (checkout-service, 6ms)
+  |-- GetCustomerProfile (customer-service, 38ms)
+  |-- ProcessPayment (payment-service, 291ms)
+  |     |-- ChargeCard (stripe-gateway, 248ms)
+  |-- SendConfirmation (notification-service, 31ms)
+```
+
+Same call chain. The only difference: `ChargeCard` went from 248ms to 4,430ms. The problem is not in your code. It is downstream.
+
+## Step 4: Check the Dependency
+
+```
+Show me p99 latency for payment-service over the last 2 hours in 5-minute intervals. Also pull any error or warning logs from payment-service in the last 30 minutes.
+```
+
+Payment-service latency spiked at the exact same time. The logs show the cause:
+
+```
+01:47:12 WARN  Stripe endpoint config reloaded: region changed us-east-1 -> eu-west-1
+01:47:14 WARN  ChargeCard latency elevated (2,341ms), retrying
+01:47:15 ERROR ChargeCard timeout after 5000ms
+01:47:18 WARN  ChargeCard latency elevated (4,102ms)
+```
+
+A config change at 1:47 AM switched the Stripe endpoint to a different region. Every charge request is now making a cross-Atlantic round trip.
+
+## Step 5: Quantify and Decide
+
+```
+Show me total request count and error rate for checkout-service over the last 2 hours in 5-minute intervals. What percentage of requests are slower than 2 seconds?
+```
+
+847 requests since the spike. 94% are over 2 seconds. Error rate is 12% (timeouts). The trend is flat, not worsening, but nearly every customer is getting a degraded experience. You revert the config change.
+
+## Tips for Your Own Investigations
+
+- **Check percentiles, not just p99.** If p50 is fine but p99 is bad, only a subset of requests are slow. If both are bad, something systemic broke.
+- **Follow the dependency chain.** If the bottleneck span is a call to another service, check that service directly. Correlate latency spikes and error logs across both.
+- **Quantify before you act.** Know the blast radius before you wake someone up or trigger a rollback.
+
+<details>
+<ToggleHeading>
+## Under the Hood
+</ToggleHeading>
+
+During this investigation, the MCP server called these tools:
+
+| Step | MCP Tool | What It Did |
+|------|----------|-------------|
+| 1 | `signoz_search_traces` | Found traces matching the duration and time range filter |
+| 1 | `signoz_get_trace_details` | Returned the full span tree for the slowest trace |
+| 2 | `signoz_aggregate_traces` | Computed p50/p99 latency in time-series buckets |
+| 3 | `signoz_search_traces` | Found a healthy baseline trace from before the spike |
+| 4 | `signoz_get_service_top_operations` | Got latency breakdown for the downstream service |
+| 4 | `signoz_search_logs` | Pulled error and warning logs from payment-service |
+| 5 | `signoz_aggregate_traces` | Computed request counts and error rates over time |
+
+</details>
+
+## Next Steps
+
+- [Natural Language Log Exploration](https://signoz.io/docs/ai/use-cases/natural-language-log-exploration) - Search and analyze logs without writing queries.
+- [Reconstruct a Bug from a Trace ID](https://signoz.io/docs/ai/use-cases/reconstruct-bug-from-trace-id) - Debug a support ticket with a trace ID.
+
+<GetHelp />
diff --git a/data/docs/ai/use-cases/natural-language-log-exploration.mdx b/data/docs/ai/use-cases/natural-language-log-exploration.mdx
@@ -0,0 +1,155 @@
+---
+date: 2026-03-30
+id: natural-language-log-exploration
+title: Natural Language Log Exploration
+description: Search, filter, and analyze logs in SigNoz by asking questions in plain English through your AI assistant.
+doc_type: howto
+---
+
+import GetHelp from '@/components/shared/get-help.md'
+
+A product manager posts in #incidents:
+
+> Multiple sellers are complaining that products they updated hours ago still show old prices in search results. The catalog page shows the right data, but search is stale.
+
+You have SigNoz collecting logs and the MCP server connected to your AI assistant. You know search is powered by an indexing pipeline, but you don't know the internals.
+
+## Prerequisites
+
+- Connect your AI assistant to SigNoz using the [MCP Server guide](https://signoz.io/docs/ai/signoz-mcp-server).
+- Make sure your services are sending logs to SigNoz. See [Send Logs to SigNoz](https://signoz.io/docs/userguide/logs) if you haven't set this up.
+
+## Step 1: Search for the Symptom
+
+```
+Show me recent error or warning logs from any service related to search indexing or index lag in the last 6 hours.
+```
+
+Results come back from `search-indexer`:
+
+```
+Found 34 logs matching across 2 services:
+
+1. 14:52:11 WARN  search-indexer - "Index lag exceeds threshold: 4h12m behind head (threshold: 15m)"
+2. 14:47:03 WARN  search-indexer - "Index lag exceeds threshold: 4h07m behind head (threshold: 15m)"
+3. 14:42:01 WARN  search-indexer - "Index lag exceeds threshold: 4h02m behind head (threshold: 15m)"
+4. 14:22:18 WARN  search-indexer - "Consumer group rebalance completed, partition assignment unchanged"
+5. 13:15:44 WARN  search-indexer - "Batch processing rate: 12 events/sec (normal: ~340 events/sec)"
+...
+```
+
+The search indexer is 4+ hours behind. Processing speed has dropped from 340 events/sec to 12. That explains the stale results. But there are no errors, just slowness. Why is it crawling?
+
+## Step 2: Understand Why Throughput Dropped
+
+```
+Show me logs from search-indexer in the last 6 hours that mention "skip", "drop", "malformed", "parse", or "invalid".
+```
+
+The volume is striking:
+
+```
+Found 9,847 logs matching:
+
+1. 14:51:58 WARN  "Skipping malformed event: missing required field 'sku_id' (event_source: catalog-pipeline)"
+2. 14:51:57 WARN  "Skipping malformed event: field 'price' is not numeric: 'USD29.99' (event_source: catalog-pipeline)"
+3. 14:51:55 WARN  "Parse retry exhausted for event, moving to dead letter queue (event_source: catalog-pipeline)"
+...
+```
+
+Nearly 10,000 malformed events in 6 hours. The indexer is spending all its time retrying bad data and dead-lettering it. Valid events are stuck behind the flood. Every bad event comes from `catalog-pipeline`.
+
+```
+How many "Skipping malformed event" warnings has search-indexer logged per hour over the last 24 hours?
+```
+
+The hourly breakdown shows a clear inflection point:
+
+```
+Malformed event warnings per hour (search-indexer):
+
+  00:00 - 10:00 UTC:  0-3/hour (baseline noise)
+  10:00 - 10:59:      2
+  11:00 - 11:59:      1,847   <-- spike
+  12:00 - 12:59:      1,923
+  13:00 - 13:59:      1,812
+  14:00 - 14:59:      1,690 (ongoing)
+```
+
+The malformed events started at 11:00 UTC. Something changed in `catalog-pipeline` around that time.
+
+## Step 3: Trace the Root Cause Upstream
+
+```
+Show me INFO and WARN logs from catalog-pipeline between 10:45 and 11:15 UTC today. I am looking for deployments, config changes, or schema changes.
+```
+
+The deployment logs tell the story:
+
+```
+Found 28 logs:
+
+1. 10:52:03 INFO  "Deployment started: catalog-pipeline v2.14.0 -> v2.15.0 (deployer: ci-bot)"
+2. 10:52:18 INFO  "Migration applied: product_event_schema_v3"
+3. 10:52:19 INFO  "Event format updated: sku_id field moved from root to nested product.identifiers.sku_id"
+4. 10:52:19 INFO  "Event format updated: price field changed from cents (int) to formatted string (e.g. 'USD29.99')"
+5. 10:52:31 INFO  "Deployment complete: catalog-pipeline v2.15.0 healthy"
+6. 10:53:01 INFO  "Backfill started: reprocessing 14,291 products with new schema"
+7. 11:01:12 INFO  "Backfill complete: 14,291 events published"
+```
+
+`catalog-pipeline` v2.15.0 changed the event schema in two breaking ways: it moved `sku_id` into a nested path and changed `price` from integer cents to a formatted string. The search indexer still expects the old schema. Every event from the new version fails validation. On top of that, the backfill re-published 14,291 products in the new format, flooding the indexer with unparseable data.
+
+## Step 4: Scope the Impact
+
+```
+How many events has search-indexer moved to the dead letter queue in the last 6 hours? What is the current index lag?
+```
+
+The damage:
+
+```
+Dead letter queue (last 6 hours):
+  - Events moved to DLQ: 9,214
+  - Estimated unique products affected: ~6,800
+
+Current index lag: 4h17m behind head
+Indexer throughput: 12 events/sec (normal: 340 events/sec)
+```
+
+~6,800 products have stale search data. The lag keeps growing because new events from v2.15.0 continue arriving in the broken format. The fix: either roll back `catalog-pipeline` to v2.14.0, or deploy a hotfix to `search-indexer` to handle both schema versions. The 9,214 dead-lettered events will need to be replayed after the fix.
+
+## Tips for Your Own Investigations
+
+1. **Start with what you know.** The Slack message, the error alert, the customer complaint. Search for that first.
+2. **Follow the thread.** When results mention another service, a timeout, or an error code, ask about that next.
+3. **Scope before you dig.** Once you know what is failing, check how many errors, when they started, and whether they are increasing.
+4. **Find the boundary.** Zoom into the moment errors started. The logs right before the first error often reveal the trigger.
+
+<Admonition type="tip">
+If a field like `service.name` is not available, ask the assistant to discover fields: _"What resource attributes are available for logs?"_ Field availability depends on how your services are instrumented.
+</Admonition>
+
+<details>
+<ToggleHeading>
+## Under the Hood
+</ToggleHeading>
+
+During this investigation, the MCP server called these tools:
+
+| Step | MCP Tool | What It Did |
+|------|----------|-------------|
+| 1 | `signoz_search_logs` | Searched across all services for warning/error logs matching search indexing keywords |
+| 2 | `signoz_search_logs` | Found malformed event warnings in the indexer, revealing upstream data quality issue |
+| 2 | `signoz_aggregate_logs` | Computed malformed event counts per hour to pinpoint when the problem started |
+| 3 | `signoz_search_logs` | Found deployment and schema migration logs in catalog-pipeline around the start time |
+| 4 | `signoz_aggregate_logs` | Counted dead-lettered events to measure blast radius |
+
+</details>
+
+## Next Steps
+
+- [Latency Spike Explainer](https://signoz.io/docs/ai/use-cases/latency-spike-explainer) - Ask "why is this slow?" and trace the bottleneck.
+- [Reconstruct a Bug from a Trace ID](https://signoz.io/docs/ai/use-cases/reconstruct-bug-from-trace-id) - Debug a support ticket with a trace ID.
+
+<GetHelp />
diff --git a/data/docs/ai/use-cases/reconstruct-bug-from-trace-id.mdx b/data/docs/ai/use-cases/reconstruct-bug-from-trace-id.mdx
diff --git a/data/docs/messaging-queues/celery-setup.mdx b/data/docs/messaging-queues/celery-setup.mdx
diff --git a/layouts/ArticleLayout.tsx b/layouts/ArticleLayout.tsx
diff --git a/layouts/OpenTelemetryHubLayout.tsx b/layouts/OpenTelemetryHubLayout.tsx

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ export default function ArticleMetaDetailsCard({`
`51`	`51`	`<Link`
`52`	`52`	`href={author.url}`
`53`	`53`	`target="_blank"`
`54`		`- rel="noopener noreferrer"`
	`54`	`+ rel="noopener noreferrer nofollow"`
`55`	`55`	`className="!text-gray-200 transition-colors hover:text-signoz_robin-400"`
`56`	`56`	`prefetch={false}`
`57`	`57`	`>`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ const CustomLink = ({ href, ...rest }: LinkProps & AnchorHTMLAttributes<HTMLAnch`
`48`	`48`	`return <a href={href} {...rest} />`
`49`	`49`	`}`
`50`	`50`
`51`		`- return <a target="_blank" rel="noopener noreferrer" href={href} {...rest} />`
	`51`	`+ return <a target="_blank" rel="noopener noreferrer nofollow" href={href} {...rest} />`
`52`	`52`	`}`
`53`	`53`
`54`	`54`	`export default CustomLink`