Skip to content

Commit bb80dd8

Browse files
committed
bounding boxes
1 parent f8b667c commit bb80dd8

File tree

9 files changed

+159
-113
lines changed

9 files changed

+159
-113
lines changed

web/src/lib/actions/zoom-image.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { photoZoomState } from '$lib/stores/zoom-image.store';
22
import { useZoomImageWheel } from '@zoom-image/svelte';
33
import { get } from 'svelte/store';
44

5-
export const zoomImageAction = (node: HTMLElement) => {
5+
export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => {
66
const { createZoomImage, zoomImageState, setZoomImageState } = useZoomImageWheel();
77

88
createZoomImage(node, {
@@ -14,9 +14,32 @@ export const zoomImageAction = (node: HTMLElement) => {
1414
setZoomImageState(state);
1515
}
1616

17+
// Store original event handlers so we can prevent them when disabled
18+
const wheelHandler = (event: WheelEvent) => {
19+
if (options?.disabled) {
20+
event.stopImmediatePropagation();
21+
}
22+
};
23+
24+
const pointerDownHandler = (event: PointerEvent) => {
25+
if (options?.disabled) {
26+
event.stopImmediatePropagation();
27+
}
28+
};
29+
30+
// Add handlers at capture phase with higher priority
31+
node.addEventListener('wheel', wheelHandler, { capture: true });
32+
node.addEventListener('pointerdown', pointerDownHandler, { capture: true });
33+
1734
const unsubscribes = [photoZoomState.subscribe(setZoomImageState), zoomImageState.subscribe(photoZoomState.set)];
35+
1836
return {
37+
update(newOptions?: { disabled?: boolean }) {
38+
options = newOptions;
39+
},
1940
destroy() {
41+
node.removeEventListener('wheel', wheelHandler, { capture: true });
42+
node.removeEventListener('pointerdown', pointerDownHandler, { capture: true });
2043
for (const unsubscribe of unsubscribes) {
2144
unsubscribe();
2245
}

web/src/lib/components/asset-viewer/asset-viewer.svelte

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
1212
import { closeEditorCofirm } from '$lib/stores/asset-editor.store';
1313
import { assetViewingStore } from '$lib/stores/asset-viewing.store';
14+
import { ocrStore } from '$lib/stores/ocr.svelte';
1415
import { alwaysLoadOriginalVideo, isShowDetail } from '$lib/stores/preferences.store';
1516
import { SlideshowNavigation, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store';
1617
import { user } from '$lib/stores/user.store';
@@ -382,9 +383,13 @@
382383
handlePromiseError(activityManager.init(album.id, asset.id));
383384
}
384385
});
386+
387+
let currentAssetId = $derived(asset.id);
385388
$effect(() => {
386-
if (asset.id) {
387-
handlePromiseError(handleGetAllAlbums());
389+
if (currentAssetId) {
390+
untrack(() => handlePromiseError(handleGetAllAlbums()));
391+
ocrStore.clear();
392+
handlePromiseError(ocrStore.getAssetOcr(currentAssetId));
388393
}
389394
});
390395
</script>
@@ -523,6 +528,7 @@
523528
{playOriginalVideo}
524529
/>
525530
{/if}
531+
526532
{#if $slideshowState === SlideshowState.None && isShared && ((album && album.isActivityEnabled) || activityManager.commentCount > 0) && !activityManager.isLoading}
527533
<div class="absolute bottom-0 end-0 mb-20 me-8">
528534
<ActivityStatus
@@ -535,9 +541,10 @@
535541
/>
536542
</div>
537543
{/if}
538-
{#if $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && !isShowEditor}
544+
545+
{#if $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && !isShowEditor && ocrStore.hasOcrData}
539546
<div class="absolute bottom-0 end-0 mb-6 me-6">
540-
<OcrButton assetId={asset.id} />
547+
<OcrButton />
541548
</div>
542549
{/if}
543550
{/key}

web/src/lib/components/asset-viewer/detail-panel.svelte

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@
503503
{/if}
504504

505505
{#if albums.length > 0}
506-
<section class="px-6 pt-6 dark:text-immich-dark-fg">
506+
<section class="px-6 py-6 dark:text-immich-dark-fg">
507507
<p class="uppercase pb-4 text-sm">{$t('appears_in')}</p>
508508
{#each albums as album (album.id)}
509509
<a href={resolve(`${AppRoute.ALBUMS}/${album.id}`)}>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<script lang="ts">
2+
import type { OcrBox } from '$lib/utils/ocr-utils';
3+
4+
interface Props {
5+
ocrBox: OcrBox;
6+
}
7+
8+
let { ocrBox }: Props = $props();
9+
10+
const points = $derived(ocrBox.points);
11+
const minX = $derived(Math.min(points[0].x, points[1].x, points[2].x, points[3].x));
12+
const maxX = $derived(Math.max(points[0].x, points[1].x, points[2].x, points[3].x));
13+
const minY = $derived(Math.min(points[0].y, points[1].y, points[2].y, points[3].y));
14+
const maxY = $derived(Math.max(points[0].y, points[1].y, points[2].y, points[3].y));
15+
const width = $derived(maxX - minX);
16+
const height = $derived(maxY - minY);
17+
const pathData = $derived(
18+
`M ${points[0].x} ${points[0].y} L ${points[1].x} ${points[1].y} L ${points[2].x} ${points[2].y} L ${points[3].x} ${points[3].y} Z`,
19+
);
20+
21+
// Calculate rotation angle from the bottom edge (points[3] to points[2])
22+
const rotation = $derived(Math.atan2(points[2].y - points[3].y, points[2].x - points[3].x) * (180 / Math.PI));
23+
24+
// Calculate center point for rotation
25+
const centerX = $derived((minX + maxX) / 2);
26+
const centerY = $derived((minY + maxY) / 2);
27+
</script>
28+
29+
<div class="absolute group" style="left: 0; top: 0;">
30+
<!-- Hover region covering the bounding box -->
31+
<div
32+
class="absolute cursor-pointer"
33+
style="left: {minX}px; top: {minY}px; width: {width}px; height: {height}px;"
34+
></div>
35+
<!-- SVG path for the actual shape -->
36+
<svg class="absolute" style="left: 0; top: 0; overflow: visible;">
37+
<path
38+
d={pathData}
39+
fill="rgba(59, 130, 246, 0.1)"
40+
stroke="rgb(59, 130, 246)"
41+
stroke-width="2"
42+
class="transition-all group-hover:fill-[rgba(59,130,246,0.3)] group-hover:stroke-[rgb(37,99,235)] group-hover:stroke-3"
43+
/>
44+
</svg>
45+
<!-- Tooltip overlay directly on the bounding box -->
46+
<p
47+
class="absolute hidden group-hover:flex items-center justify-center bg-black/75 text-white text-sm px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word z-10"
48+
style="left: {minX}px; top: {minY}px; width: {width}px; height: {height}px; user-select: text; -webkit-user-select: text; -moz-user-select: text; -ms-user-select: text; transform: rotate({rotation}deg); transform-origin: {centerX -
49+
minX}px {centerY - minY}px;"
50+
>
51+
{ocrBox.text}
52+
</p>
53+
</div>
Lines changed: 6 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,18 @@
11
<script lang="ts">
2-
import { ocrDataArray, showOcrOverlay } from '$lib/stores/ocr.store';
3-
import { getAssetOcr } from '@immich/sdk';
2+
import { ocrStore } from '$lib/stores/ocr.svelte';
43
import { IconButton } from '@immich/ui';
54
import { mdiTextRecognition } from '@mdi/js';
6-
import { untrack } from 'svelte';
7-
8-
interface Props {
9-
assetId: string;
10-
}
11-
12-
let { assetId }: Props = $props();
13-
14-
let isLoading = $state(false);
15-
let hasOcrData = $state<boolean | null>(null);
16-
let currentAssetId = $state<string>('');
17-
18-
const checkOcrData = async (id: string) => {
19-
try {
20-
const ocrResults = await getAssetOcr({ id });
21-
console.log('OCR results for asset', id, ocrResults);
22-
23-
untrack(() => {
24-
$ocrDataArray = ocrResults;
25-
hasOcrData = ocrResults.length > 0;
26-
});
27-
} catch (error) {
28-
console.error('Failed to check OCR data:', error);
29-
untrack(() => {
30-
hasOcrData = false;
31-
});
32-
}
33-
};
34-
35-
const toggleOcrOverlay = () => {
36-
$showOcrOverlay = !$showOcrOverlay;
37-
};
38-
39-
$effect(() => {
40-
// Only react to assetId changes
41-
if (assetId && assetId !== currentAssetId) {
42-
currentAssetId = assetId;
43-
44-
// Reset state in untrack to avoid triggering the effect again
45-
untrack(() => {
46-
hasOcrData = null;
47-
$showOcrOverlay = false;
48-
$ocrDataArray = [];
49-
});
50-
51-
// Check for OCR data
52-
void checkOcrData(assetId);
53-
}
54-
});
555
</script>
566

57-
{#if hasOcrData === true}
7+
<div class="dark">
588
<IconButton
59-
title={$showOcrOverlay ? 'Hide text recognition' : 'Show text recognition'}
9+
title={ocrStore.showOverlay ? 'Hide text recognition' : 'Show text recognition'}
6010
icon={mdiTextRecognition}
61-
disabled={isLoading}
62-
class={$showOcrOverlay ? 'bg-immich-primary text-white' : ''}
11+
class={ocrStore.showOverlay ? 'bg-immich-primary text-white' : ''}
6312
color="secondary"
6413
variant="ghost"
6514
shape="round"
6615
aria-label="Text recognition"
67-
onclick={toggleOcrOverlay}
16+
onclick={() => ocrStore.toggleOcrBoundingBox()}
6817
/>
69-
{/if}
18+
</div>

web/src/lib/components/asset-viewer/photo-viewer.svelte

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,23 @@
22
import { shortcuts } from '$lib/actions/shortcut';
33
import { zoomImageAction } from '$lib/actions/zoom-image';
44
import FaceEditor from '$lib/components/asset-viewer/face-editor/face-editor.svelte';
5+
import OcrBoundingBox from '$lib/components/asset-viewer/ocr-bounding-box.svelte';
56
import BrokenAsset from '$lib/components/assets/broken-asset.svelte';
67
import { assetViewerFadeDuration } from '$lib/constants';
78
import { castManager } from '$lib/managers/cast-manager.svelte';
89
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
910
import { photoViewerImgElement } from '$lib/stores/assets-store.svelte';
1011
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
12+
import { ocrStore } from '$lib/stores/ocr.svelte';
1113
import { boundingBoxesArray } from '$lib/stores/people.store';
12-
import { ocrDataArray, showOcrOverlay } from '$lib/stores/ocr.store';
1314
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
1415
import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store';
1516
import { photoZoomState } from '$lib/stores/zoom-image.store';
1617
import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils';
1718
import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils';
1819
import { handleError } from '$lib/utils/handle-error';
19-
import { getBoundingBox } from '$lib/utils/people-utils';
2020
import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils';
21+
import { getBoundingBox } from '$lib/utils/people-utils';
2122
import { cancelImageUrl } from '$lib/utils/sw-messaging';
2223
import { getAltText } from '$lib/utils/thumbnail-util';
2324
import { toTimelineAsset } from '$lib/utils/timeline-util';
@@ -74,11 +75,13 @@
7475
});
7576
7677
let ocrBoxes = $derived(
77-
$showOcrOverlay && $photoViewerImgElement
78-
? getOcrBoundingBoxes($ocrDataArray, $photoZoomState, $photoViewerImgElement)
79-
: []
78+
ocrStore.showOverlay && $photoViewerImgElement
79+
? getOcrBoundingBoxes(ocrStore.data, $photoZoomState, $photoViewerImgElement)
80+
: [],
8081
);
8182
83+
let isOcrActive = $derived(ocrStore.showOverlay);
84+
8285
const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => {
8386
for (const preloadAsset of preloadAssets || []) {
8487
if (preloadAsset.isImage) {
@@ -138,9 +141,15 @@
138141
if ($photoZoomState.currentZoom > 1) {
139142
return;
140143
}
144+
145+
if (ocrStore.showOverlay) {
146+
return;
147+
}
148+
141149
if (onNextAsset && event.detail.direction === 'left') {
142150
onNextAsset();
143151
}
152+
144153
if (onPreviousAsset && event.detail.direction === 'right') {
145154
onPreviousAsset();
146155
}
@@ -240,7 +249,7 @@
240249
</div>
241250
{:else if !imageError}
242251
<div
243-
use:zoomImageAction
252+
use:zoomImageAction={{ disabled: isOcrActive }}
244253
{...useSwipe(onSwipe)}
245254
class="h-full w-full"
246255
transition:fade={{ duration: haveFadeTransition ? assetViewerFadeDuration : 0 }}
@@ -269,30 +278,10 @@
269278
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
270279
></div>
271280
{/each}
272-
<!-- OCR bounding boxes -->
273-
{#if $showOcrOverlay && ocrBoxes.length > 0}
274-
<svg class="absolute top-0 left-0 w-full h-full pointer-events-none" style="overflow: visible;">
275-
{#each ocrBoxes as ocrBox (ocrBox.id)}
276-
{@const points = ocrBox.points}
277-
{@const pathData = `M ${points[0].x} ${points[0].y} L ${points[1].x} ${points[1].y} L ${points[2].x} ${points[2].y} L ${points[3].x} ${points[3].y} Z`}
278-
<path
279-
d={pathData}
280-
fill="rgba(59, 130, 246, 0.2)"
281-
stroke="rgb(59, 130, 246)"
282-
stroke-width="2"
283-
/>
284-
{/each}
285-
</svg>
281+
282+
{#if ocrStore.showOverlay}
286283
{#each ocrBoxes as ocrBox (ocrBox.id)}
287-
{@const points = ocrBox.points}
288-
{@const centerX = (points[0].x + points[1].x + points[2].x + points[3].x) / 4}
289-
{@const centerY = (points[0].y + points[1].y + points[2].y + points[3].y) / 4}
290-
<div
291-
class="absolute pointer-events-none text-white text-sm font-semibold px-2 py-1 rounded shadow-lg"
292-
style="left: {centerX}px; top: {centerY}px; transform: translate(-50%, -50%); background-color: rgba(59, 130, 246, 0.9); max-width: 200px; word-break: break-word;"
293-
>
294-
{ocrBox.text}
295-
</div>
284+
<OcrBoundingBox {ocrBox} />
296285
{/each}
297286
{/if}
298287
</div>

web/src/lib/stores/ocr.store.ts

Lines changed: 0 additions & 20 deletions
This file was deleted.

web/src/lib/stores/ocr.svelte.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { getAssetOcr } from '@immich/sdk';
2+
3+
export interface OcrBoundingBox {
4+
id: string;
5+
assetId: string;
6+
x1: number;
7+
y1: number;
8+
x2: number;
9+
y2: number;
10+
x3: number;
11+
y3: number;
12+
x4: number;
13+
y4: number;
14+
boxScore: number;
15+
textScore: number;
16+
text: string;
17+
}
18+
19+
class OcrStore {
20+
data = $state<OcrBoundingBox[]>([]);
21+
showOverlay = $state(false);
22+
hasOcrData = $state(false);
23+
24+
async getAssetOcr(id: string) {
25+
this.data = await getAssetOcr({ id });
26+
27+
if (this.data.length > 0) {
28+
this.hasOcrData = true;
29+
} else {
30+
this.hasOcrData = false;
31+
}
32+
}
33+
34+
clear() {
35+
this.data = [];
36+
this.showOverlay = false;
37+
this.hasOcrData = false;
38+
}
39+
40+
toggleOcrBoundingBox() {
41+
this.showOverlay = !this.showOverlay;
42+
}
43+
}
44+
45+
export const ocrStore = new OcrStore();

0 commit comments

Comments
 (0)