micasa-dev · cpcloud · Mar 14, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/internal/app/coldefs.go b/internal/app/coldefs.go
@@ -305,6 +305,7 @@ var documentColumnDefs = []columnDef{
 	{"Entity", columnSpec{Title: "Entity", Min: 10, Max: 24, Flex: true, Kind: cellEntity}},
 	{"Type", columnSpec{Title: "Type", Min: 8, Max: 16}},
 	{"Size", columnSpec{Title: "Size", Min: 6, Max: 10, Align: alignRight, Kind: cellReadonly}},
+	{"Model", columnSpec{Title: "Model", Min: 8, Max: 20, Kind: cellReadonly}},
 	{"Notes", columnSpec{Title: "Notes", Min: 12, Max: 40, Flex: true, Kind: cellNotes}},
 	{"Updated", columnSpec{Title: "Updated", Min: 10, Max: 12, Kind: cellReadonly}},
 }

diff --git a/internal/app/columns_generated.go b/internal/app/columns_generated.go
diff --git a/internal/app/docopen.go b/internal/app/docopen.go
@@ -6,6 +6,7 @@ package app
 import (
 	"errors"
 	"fmt"
+	"os"
 	"os/exec"
 	"runtime"
 
@@ -46,6 +47,34 @@ func (m *Model) openSelectedDocument() tea.Cmd {
 	return openFileCmd(cachePath)
 }
 
+// extractSelectedDocument loads the selected document and opens the extraction
+// overlay. Only operates on document tabs; returns nil on other tabs.
+func (m *Model) extractSelectedDocument() tea.Cmd {
+	if !m.effectiveTab().isDocumentTab() {
+		return nil
+	}
+
+	meta, ok := m.selectedRowMeta()
+	if !ok || meta.Deleted {
+		return nil
+	}
+
+	doc, err := m.store.GetDocument(meta.ID)
+	if err != nil {
+		m.setStatusError(fmt.Sprintf("load document: %s", err))
+		return nil
+	}
+
+	cmd := m.startExtractionOverlay(
+		doc.ID, doc.FileName, doc.Data, doc.MIMEType, doc.ExtractedText, doc.ExtractData,
+	)
+	if cmd == nil {
+		m.setStatusError("no extraction tools or LLM configured")
+		return nil
+	}
+	return cmd
+}
+
 // openFileCmd returns a tea.Cmd that opens the given path with the OS viewer.
 // The command runs to completion so exit-status errors (e.g. no handler for
 // the MIME type) are captured and returned as an openFileResultMsg.
@@ -88,23 +117,48 @@ func openFileCmd(path string) tea.Cmd {
 }
 
 // wrapOpenerError adds an actionable hint when the OS file-opener command
-// is missing or cannot be found.
+// is missing or fails (e.g. headless/remote server with no display).
 func wrapOpenerError(err error, openerName string) error {
-	if !errors.Is(err, exec.ErrNotFound) {
-		return err
+	if errors.Is(err, exec.ErrNotFound) {
+		switch openerName {
+		case "xdg-open":
+			return fmt.Errorf(
+				"%s not found -- install xdg-utils (e.g. apt install xdg-utils)",
+				openerName,
+			)
+		case "open":
+			return fmt.Errorf(
+				"%s not found -- expected on macOS; is this a headless environment?",
+				openerName,
+			)
+		default:
+			return fmt.Errorf("%s not found -- no file opener available", openerName)
+		}
 	}
-	switch openerName {
-	case "xdg-open":
-		return fmt.Errorf(
-			"%s not found -- install xdg-utils (e.g. apt install xdg-utils)",
-			openerName,
-		)
-	case "open":
+
+	// The opener command exists but exited non-zero. On headless/remote
+	// machines (no display server) this is the typical failure mode.
+	// Detect it and surface an actionable message.
+	var exitErr *exec.ExitError
+	if errors.As(err, &exitErr) {
+		if openerName == "xdg-open" && !hasDisplay() {
+			return fmt.Errorf(
+				"%s failed -- no display server (DISPLAY/WAYLAND_DISPLAY not set); "+
+					"running on a remote or headless machine?",
+				openerName,
+			)
+		}
 		return fmt.Errorf(
-			"%s not found -- expected on macOS; is this a headless environment?",
-			openerName,
+			"%s failed (exit code %d) -- is a graphical environment available?",
+			openerName, exitErr.ExitCode(),
 		)
-	default:
-		return fmt.Errorf("%s not found -- no file opener available", openerName)
 	}
+
+	return err
+}
+
+// hasDisplay reports whether a graphical display appears to be available
+// by checking the standard X11 and Wayland environment variables.
+func hasDisplay() bool {
+	return os.Getenv("DISPLAY") != "" || os.Getenv("WAYLAND_DISPLAY") != ""
 }
diff --git a/internal/app/docopen_test.go b/internal/app/docopen_test.go
@@ -4,14 +4,42 @@
 package app
 
 import (
+	"context"
 	"errors"
+	"os"
 	"os/exec"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
+// TestHelperProcess is a helper process that exits with code 1.
+// It is invoked by fakeExitError via the GO_WANT_HELPER_PROCESS env var.
+func TestHelperProcess(_ *testing.T) {
+	if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
+		return
+	}
+	os.Exit(1)
+}
+
+// fakeExitError returns an *exec.ExitError with a non-zero exit code
+// by re-executing the test binary with a helper that exits non-zero.
+// Works on all platforms (Linux, macOS, Windows).
+func fakeExitError(t *testing.T) *exec.ExitError {
+	t.Helper()
+	cmd := exec.CommandContext( //nolint:gosec // test helper re-execs itself
+		context.Background(),
+		os.Args[0],
+		"-test.run=^TestHelperProcess$",
+	)
+	cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
+	err := cmd.Run()
+	var exitErr *exec.ExitError
+	require.ErrorAs(t, err, &exitErr)
+	return exitErr
+}
+
 func TestWrapOpenerError_NotFound(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
@@ -48,11 +76,41 @@ func TestWrapOpenerError_NotFound(t *testing.T) {
 	}
 }
 
-func TestWrapOpenerError_OtherError(t *testing.T) {
+func TestWrapOpenerError_PlainError_PassesThrough(t *testing.T) {
 	t.Parallel()
-	other := errors.New("exit status 1")
+	other := errors.New("something weird")
 	got := wrapOpenerError(other, "xdg-open")
-	assert.Equal(t, other, got, "non-ErrNotFound errors should pass through unchanged")
+	assert.Equal(t, other, got, "non-ExitError non-ErrNotFound should pass through unchanged")
+}
+
+func TestWrapOpenerError_ExitError_GenericMessage(t *testing.T) {
+	t.Parallel()
+	exitErr := fakeExitError(t)
+	got := wrapOpenerError(exitErr, "open")
+	require.ErrorContains(t, got, "open failed")
+	require.ErrorContains(t, got, "graphical environment")
+}
+
+func TestWrapOpenerError_XdgOpen_NoDisplay(t *testing.T) {
+	t.Parallel()
+	if hasDisplay() {
+		t.Skip("test requires no display server")
+	}
+	exitErr := fakeExitError(t)
+	got := wrapOpenerError(exitErr, "xdg-open")
+	require.ErrorContains(t, got, "no display server")
+	require.ErrorContains(t, got, "remote or headless")
+}
+
+func TestWrapOpenerError_XdgOpen_WithDisplay(t *testing.T) {
+	t.Parallel()
+	if !hasDisplay() {
+		t.Skip("test requires a display server")
+	}
+	exitErr := fakeExitError(t)
+	got := wrapOpenerError(exitErr, "xdg-open")
+	require.ErrorContains(t, got, "xdg-open failed")
+	require.ErrorContains(t, got, "graphical environment")
 }
 
 func TestIsDocumentTab(t *testing.T) {

diff --git a/internal/app/extraction.go b/internal/app/extraction.go
@@ -258,10 +258,18 @@ func (m *Model) startExtractionOverlay(
 	fileData []byte,
 	mime string,
 	extractedText string,
+	extractData []byte,
 ) tea.Cmd {
 	needsExtract := extract.NeedsOCR(m.ex.extractors, mime)
 	needsLLM := m.extractionLLMClient() != nil
 
+	// Skip OCR when the document already has extracted text from a
+	// previous run -- feed existing text directly to the LLM.
+	hasExistingText := strings.TrimSpace(extractedText) != ""
+	if hasExistingText {
+		needsExtract = false
+	}
+
 	if !needsExtract && !needsLLM {
 		return nil
 	}
@@ -274,12 +282,14 @@ func (m *Model) startExtractionOverlay(
 		context.Background(),
 	)
 
-	// Text extraction only applies to PDFs and text files; skip for images.
-	hasText := !extract.IsImageMIME(mime)
+	// Text extraction only applies to PDFs and text files -- unless
+	// we already have text from a previous extraction (e.g. prior OCR
+	// on an image), in which case we show the cached text.
+	hasText := !extract.IsImageMIME(mime) || hasExistingText
 
 	// Build initial text source from already-extracted text.
 	var sources []extract.TextSource
-	if hasText && strings.TrimSpace(extractedText) != "" {
+	if hasText && hasExistingText {
 		var tool, desc string
 		switch {
 		case mime == extract.MIMEApplicationPDF:
@@ -288,13 +298,17 @@ func (m *Model) startExtractionOverlay(
 		case strings.HasPrefix(mime, "text/"):
 			tool = "plaintext"
 			desc = "Plain text content."
+		case extract.IsImageMIME(mime):
+			tool = "tesseract"
+			desc = "Text from previous OCR extraction."
 		default:
 			tool = mime
 		}
 		sources = append(sources, extract.TextSource{
 			Tool: tool,
 			Desc: desc,
 			Text: extractedText,
+			Data: extractData,
 		})
 	}
 
@@ -325,6 +339,8 @@ func (m *Model) startExtractionOverlay(
 			textTool = "pdf"
 		case strings.HasPrefix(mime, "text/"):
 			textTool = "plaintext"
+		case extract.IsImageMIME(mime):
+			textTool = "ocr"
 		default:
 			textTool = mime
 		}
@@ -869,6 +885,12 @@ func (m *Model) acceptDeferredExtraction() error {
 	if len(ex.pendingData) > 0 {
 		doc.ExtractData = ex.pendingData
 	}
+	doc.ExtractionModel = m.extractionModelUsed(ex)
+	ops, err := marshalOps(ex.operations)
+	if err != nil {
+		return fmt.Errorf("marshal extraction ops: %w", err)
+	}
+	doc.ExtractionOps = ops
 
 	if err := m.store.CreateDocument(doc); err != nil {
 		return fmt.Errorf("create document: %w", err)
@@ -893,11 +915,16 @@ func (m *Model) acceptDeferredExtraction() error {
 func (m *Model) acceptExistingExtraction() error {
 	ex := m.ex.extraction
 
-	// Persist async extraction results.
-	if ex.pendingText != "" || len(ex.pendingData) > 0 {
+	// Persist async extraction results and the model that produced them.
+	if ex.pendingText != "" || len(ex.pendingData) > 0 || ex.hasLLM {
 		if m.store != nil {
+			model := m.extractionModelUsed(ex)
+			ops, err := marshalOps(ex.operations)
+			if err != nil {
+				return fmt.Errorf("marshal extraction ops: %w", err)
+			}
 			if err := m.store.UpdateDocumentExtraction(
-				ex.DocID, ex.pendingText, ex.pendingData,
+				ex.DocID, ex.pendingText, ex.pendingData, model, ops,
 			); err != nil {
 				return fmt.Errorf("save extraction: %w", err)
 			}
@@ -1949,6 +1976,30 @@ func stepName(si extractionStep) string {
 	return "?"
 }
 
+// marshalOps serialises extraction operations to JSON for persistence.
+// A nil slice (LLM didn't run / failed) returns nil so callers skip
+// the update. A non-nil but empty slice (LLM ran, zero ops) returns
+// "[]" so stale data is cleared.
+func marshalOps(ops []extract.Operation) ([]byte, error) {
+	if ops == nil {
+		return nil, nil
+	}
+	b, err := json.Marshal(ops)
+	if err != nil {
+		return nil, fmt.Errorf("marshal ops: %w", err)
+	}
+	return b, nil
+}
+
+// extractionModelUsed returns the model name if the LLM step completed
+// successfully, or empty string if LLM was skipped or failed.
+func (m *Model) extractionModelUsed(ex *extractionLogState) string {
+	if ex.hasLLM && ex.Steps[stepLLM].Status == stepDone {
+		return m.extractionModelLabel()
+	}
+	return ""
+}
+
 // extractionModelLabel returns the model name used for extraction.
 func (m *Model) extractionModelLabel() string {
 	if m.ex.extractionModel != "" {
@@ -1999,9 +2050,9 @@ func previewColumns(tableName string, cur locale.Currency) []previewColDef {
 	case tableDocuments:
 		s := documentColumnSpecs()
 		return []previewColDef{
-			{data.ColTitle, s[1], fmtAnyText},
-			{data.ColMIMEType, s[3], fmtAnyText},
-			{data.ColNotes, s[5], fmtAnyText},
+			{data.ColTitle, s[documentColTitle], fmtAnyText},
+			{data.ColMIMEType, s[documentColType], fmtAnyText},
+			{data.ColNotes, s[documentColNotes], fmtAnyText},
 		}
 	case data.TableQuotes:
 		s := quoteColumnSpecs()