Skip to content

Commit 452d45e

Browse files
lpcoxCopilot
andcommitted
feat: add proxy mode for GitHub API DIFC filtering
Add an `awmg proxy` subcommand that runs as an HTTP forward proxy, intercepting gh CLI requests (via GH_HOST redirect) and applying the same DIFC filtering pipeline as the MCP gateway. Architecture: - Reuses the existing WASM guard, DIFC evaluator, agent registry, and capabilities — ~80% code reuse from the gateway - Only Phase 3 (backend call) differs: forwards HTTP to GitHub API instead of calling an MCP backend server - Stub BackendCaller since the guard gets full API responses in LabelResponse New files: - internal/proxy/proxy.go — core proxy server with guard init - internal/proxy/handler.go — HTTP handler with 6-phase DIFC pipeline - internal/proxy/router.go — REST URL pattern → guard tool name mapping - internal/proxy/graphql.go — GraphQL query → guard tool name mapping - internal/proxy/proxy_test.go — 40+ test cases for routing/GraphQL - internal/cmd/proxy.go — `awmg proxy` Cobra subcommand Usage: awmg proxy \ --guard-wasm guards/github-guard/github_guard.wasm \ --policy '{"allow-only":{"repos":["org/repo"]}}' \ --github-token "$GITHUB_TOKEN" \ --listen localhost:8080 GH_HOST=localhost:8080 gh issue list -R org/repo Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent af7f45a commit 452d45e

6 files changed

Lines changed: 1462 additions & 0 deletions

File tree

internal/cmd/proxy.go

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
package cmd
2+
3+
import (
4+
"fmt"
5+
"log"
6+
"net"
7+
"net/http"
8+
"os"
9+
"os/signal"
10+
"syscall"
11+
12+
"github.com/github/gh-aw-mcpg/internal/logger"
13+
"github.com/github/gh-aw-mcpg/internal/proxy"
14+
"github.com/spf13/cobra"
15+
)
16+
17+
// Proxy subcommand flag variables
18+
var (
19+
proxyGuardWasm string
20+
proxyPolicy string
21+
proxyToken string
22+
proxyListen string
23+
proxyLogDir string
24+
proxyDIFCMode string
25+
proxyAPIURL string
26+
)
27+
28+
func init() {
29+
rootCmd.AddCommand(newProxyCmd())
30+
}
31+
32+
func newProxyCmd() *cobra.Command {
33+
cmd := &cobra.Command{
34+
Use: "proxy",
35+
Short: "Run as a GitHub API filtering proxy",
36+
Long: `Run the gateway in proxy mode — an HTTP forward proxy that intercepts
37+
gh CLI requests and applies DIFC filtering using the same guard WASM module.
38+
39+
Usage with the gh CLI:
40+
41+
# Start the proxy
42+
awmg proxy \
43+
--guard-wasm guards/github-guard/github_guard.wasm \
44+
--policy '{"allow-only":{"repos":["org/repo"],"min-integrity":"approved"}}' \
45+
--github-token "$GITHUB_TOKEN" \
46+
--listen localhost:8080
47+
48+
# Point gh at the proxy
49+
GH_HOST=localhost:8080 GH_TOKEN="$GITHUB_TOKEN" gh issue list -R org/repo`,
50+
SilenceUsage: true,
51+
RunE: runProxy,
52+
}
53+
54+
cmd.Flags().StringVar(&proxyGuardWasm, "guard-wasm", "", "Path to the guard WASM module (required)")
55+
cmd.Flags().StringVar(&proxyPolicy, "policy", getDefaultGuardPolicyJSON(), "Guard policy JSON")
56+
cmd.Flags().StringVar(&proxyToken, "github-token", os.Getenv("GITHUB_TOKEN"), "GitHub API token")
57+
cmd.Flags().StringVarP(&proxyListen, "listen", "l", "127.0.0.1:8080", "HTTP proxy listen address")
58+
cmd.Flags().StringVar(&proxyLogDir, "log-dir", getDefaultLogDir(), "Log file directory")
59+
cmd.Flags().StringVar(&proxyDIFCMode, "guards-mode", "filter", "DIFC enforcement mode: strict, filter, propagate")
60+
cmd.Flags().StringVar(&proxyAPIURL, "github-api-url", proxy.DefaultGitHubAPIBase, "Upstream GitHub API URL")
61+
62+
cmd.MarkFlagRequired("guard-wasm")
63+
64+
return cmd
65+
}
66+
67+
func runProxy(cmd *cobra.Command, args []string) error {
68+
ctx, cancel := signal.NotifyContext(cmd.Context(), os.Interrupt, syscall.SIGTERM)
69+
defer cancel()
70+
71+
// Initialize loggers
72+
if err := logger.InitFileLogger(proxyLogDir, "proxy.log"); err != nil {
73+
log.Printf("Warning: Failed to initialize file logger: %v", err)
74+
}
75+
if err := logger.InitJSONLLogger(proxyLogDir, "proxy-rpc.jsonl"); err != nil {
76+
log.Printf("Warning: Failed to initialize JSONL logger: %v", err)
77+
}
78+
79+
logger.LogInfo("startup", "MCPG Proxy starting: listen=%s, guard=%s, mode=%s", proxyListen, proxyGuardWasm, proxyDIFCMode)
80+
81+
// Resolve GitHub token
82+
token := proxyToken
83+
if token == "" {
84+
token = os.Getenv("GH_TOKEN")
85+
}
86+
if token == "" {
87+
token = os.Getenv("GITHUB_PERSONAL_ACCESS_TOKEN")
88+
}
89+
90+
// Create the proxy server
91+
proxySrv, err := proxy.New(ctx, proxy.Config{
92+
WasmPath: proxyGuardWasm,
93+
Policy: proxyPolicy,
94+
GitHubToken: token,
95+
GitHubAPIURL: proxyAPIURL,
96+
DIFCMode: proxyDIFCMode,
97+
})
98+
if err != nil {
99+
return fmt.Errorf("failed to create proxy server: %w", err)
100+
}
101+
102+
// Create and start the HTTP server
103+
httpServer := &http.Server{
104+
Addr: proxyListen,
105+
Handler: proxySrv.Handler(),
106+
}
107+
108+
// Start HTTP server in background
109+
go func() {
110+
listener, err := net.Listen("tcp", proxyListen)
111+
if err != nil {
112+
log.Printf("Failed to listen on %s: %v", proxyListen, err)
113+
cancel()
114+
return
115+
}
116+
117+
actualAddr := listener.Addr().String()
118+
log.Printf("MCPG Proxy listening on %s", actualAddr)
119+
logger.LogInfo("startup", "Proxy listening on %s", actualAddr)
120+
121+
// Print connection info
122+
fmt.Fprintf(os.Stderr, "\nMCPG GitHub API Proxy\n")
123+
fmt.Fprintf(os.Stderr, " Listening: %s\n", actualAddr)
124+
fmt.Fprintf(os.Stderr, " Mode: %s\n", proxyDIFCMode)
125+
fmt.Fprintf(os.Stderr, " Guard: %s\n", proxyGuardWasm)
126+
fmt.Fprintf(os.Stderr, "\nConnect with:\n")
127+
fmt.Fprintf(os.Stderr, " GH_HOST=%s GH_TOKEN=<token> gh ...\n\n", actualAddr)
128+
129+
if err := httpServer.Serve(listener); err != nil && err != http.ErrServerClosed {
130+
log.Printf("HTTP server error: %v", err)
131+
cancel()
132+
}
133+
}()
134+
135+
// Wait for shutdown signal
136+
<-ctx.Done()
137+
log.Println("Shutting down proxy...")
138+
logger.LogInfo("shutdown", "Proxy shutting down")
139+
140+
return httpServer.Close()
141+
}

internal/proxy/graphql.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
package proxy
2+
3+
import (
4+
"encoding/json"
5+
"regexp"
6+
"strings"
7+
8+
"github.com/github/gh-aw-mcpg/internal/logger"
9+
)
10+
11+
var logGraphQL = logger.New("proxy:graphql")
12+
13+
// GraphQLRequest represents a parsed GraphQL request body.
14+
type GraphQLRequest struct {
15+
Query string `json:"query"`
16+
Variables map[string]interface{} `json:"variables,omitempty"`
17+
}
18+
19+
// GraphQLRouteMatch contains the result of matching a GraphQL query to a guard tool name.
20+
type GraphQLRouteMatch struct {
21+
ToolName string
22+
Owner string
23+
Repo string
24+
Args map[string]interface{}
25+
}
26+
27+
// graphqlPattern maps operation name patterns to guard tool names.
28+
type graphqlPattern struct {
29+
// namePattern matches the GraphQL operation name (case-insensitive)
30+
namePattern *regexp.Regexp
31+
// queryPattern matches content within the query string
32+
queryPattern *regexp.Regexp
33+
toolName string
34+
}
35+
36+
// graphqlPatterns is the ordered list of GraphQL operation → tool name mappings.
37+
var graphqlPatterns = []graphqlPattern{
38+
// Issue operations (singular before plural — more specific first)
39+
{queryPattern: regexp.MustCompile(`(?i)repository\s*\([^)]*\)\s*\{[^}]*\bissue\s*\(`), toolName: "issue_read"},
40+
{queryPattern: regexp.MustCompile(`(?i)repository\s*\([^)]*\)\s*\{[^}]*\bissues\s*[\({]`), toolName: "list_issues"},
41+
42+
// PR operations (singular before plural)
43+
{queryPattern: regexp.MustCompile(`(?i)repository\s*\([^)]*\)\s*\{[^}]*\bpullRequest\s*\(`), toolName: "pull_request_read"},
44+
{queryPattern: regexp.MustCompile(`(?i)repository\s*\([^)]*\)\s*\{[^}]*\bpullRequests\s*[\({]`), toolName: "list_pull_requests"},
45+
46+
// Search operations
47+
{queryPattern: regexp.MustCompile(`(?i)\bsearch\s*\(`), toolName: "search_issues"},
48+
49+
// Project operations
50+
{queryPattern: regexp.MustCompile(`(?i)projectV2`), toolName: "list_projects"},
51+
52+
// Repository info
53+
{queryPattern: regexp.MustCompile(`(?i)\brepository\s*\(`), toolName: "get_file_contents"},
54+
55+
// User/viewer
56+
{queryPattern: regexp.MustCompile(`(?i)\bviewer\s*\{`), toolName: "get_me"},
57+
}
58+
59+
// ownerRepoPattern extracts owner and repo from GraphQL variables or query text.
60+
var (
61+
varOwnerPattern = regexp.MustCompile(`(?i)"owner"\s*:\s*"([^"]+)"`)
62+
varRepoPattern = regexp.MustCompile(`(?i)"(?:name|repo)"\s*:\s*"([^"]+)"`)
63+
// Matches: repository(owner: "X", name: "Y") or repository(owner: $owner, name: $name)
64+
queryRepoPattern = regexp.MustCompile(`(?i)repository\s*\(\s*owner\s*:\s*(?:"([^"]+)"|\$\w+)\s*,?\s*name\s*:\s*(?:"([^"]+)"|\$\w+)`)
65+
)
66+
67+
// MatchGraphQL matches a GraphQL request body to a guard tool name.
68+
func MatchGraphQL(body []byte) *GraphQLRouteMatch {
69+
var gql GraphQLRequest
70+
if err := json.Unmarshal(body, &gql); err != nil {
71+
logGraphQL.Printf("failed to parse GraphQL request: %v", err)
72+
return nil
73+
}
74+
75+
if gql.Query == "" {
76+
logGraphQL.Printf("empty GraphQL query")
77+
return nil
78+
}
79+
80+
// Match the query against known patterns
81+
var toolName string
82+
for _, p := range graphqlPatterns {
83+
if p.namePattern != nil {
84+
// Not currently used but available for operation name matching
85+
continue
86+
}
87+
if p.queryPattern != nil && p.queryPattern.MatchString(gql.Query) {
88+
toolName = p.toolName
89+
break
90+
}
91+
}
92+
93+
if toolName == "" {
94+
logGraphQL.Printf("no GraphQL pattern match for query: %.100s", gql.Query)
95+
return nil
96+
}
97+
98+
// Extract owner/repo from variables
99+
owner, repo := extractOwnerRepo(gql.Variables, gql.Query)
100+
101+
args := map[string]interface{}{}
102+
if owner != "" {
103+
args["owner"] = owner
104+
}
105+
if repo != "" {
106+
args["repo"] = repo
107+
}
108+
109+
logGraphQL.Printf("matched GraphQL → tool=%s owner=%s repo=%s", toolName, owner, repo)
110+
return &GraphQLRouteMatch{
111+
ToolName: toolName,
112+
Owner: owner,
113+
Repo: repo,
114+
Args: args,
115+
}
116+
}
117+
118+
// extractOwnerRepo extracts owner and repo from GraphQL variables and query text.
119+
func extractOwnerRepo(variables map[string]interface{}, query string) (string, string) {
120+
var owner, repo string
121+
122+
// Try variables first
123+
if variables != nil {
124+
if v, ok := variables["owner"].(string); ok {
125+
owner = v
126+
}
127+
if v, ok := variables["name"].(string); ok {
128+
repo = v
129+
}
130+
if v, ok := variables["repo"].(string); ok && repo == "" {
131+
repo = v
132+
}
133+
}
134+
135+
// Fall back to parsing the query string
136+
if owner == "" || repo == "" {
137+
if m := queryRepoPattern.FindStringSubmatch(query); m != nil {
138+
if m[1] != "" && owner == "" {
139+
owner = m[1]
140+
}
141+
if m[2] != "" && repo == "" {
142+
repo = m[2]
143+
}
144+
}
145+
}
146+
147+
// Try parsing raw variable JSON embedded in query (some gh commands inline variables)
148+
if owner == "" {
149+
if m := varOwnerPattern.FindStringSubmatch(query); m != nil {
150+
owner = m[1]
151+
}
152+
}
153+
if repo == "" {
154+
if m := varRepoPattern.FindStringSubmatch(query); m != nil {
155+
repo = m[1]
156+
}
157+
}
158+
159+
return owner, repo
160+
}
161+
162+
// IsGraphQLPath returns true if the request path is the GraphQL endpoint.
163+
func IsGraphQLPath(path string) bool {
164+
cleaned := strings.TrimSuffix(path, "/")
165+
return cleaned == "/graphql" || cleaned == "/api/v3/graphql"
166+
}

0 commit comments

Comments
 (0)