diff --git a/CLAUDE.md b/CLAUDE.md index 38451d61..6a1a0456 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,6 +5,7 @@ - Keep glossary definitions concise and focused on concepts rather than implementation details - Organize new terms into the appropriate existing sections - When modifying code, add or improve JSDoc comments where possible to enhance documentation +- Chunk retrieval architecture diagrams are in `docs/diagrams/src/chunk-source-priority.puml` and `docs/diagrams/src/chunk-component-architecture.puml` - update these when making changes to chunk sources, caching behavior, or retrieval flow ## Releases diff --git a/docs/diagrams/src/chunk-component-architecture.puml b/docs/diagrams/src/chunk-component-architecture.puml new file mode 100644 index 00000000..fd63978c --- /dev/null +++ b/docs/diagrams/src/chunk-component-architecture.puml @@ -0,0 +1,35 @@ +@startuml +skinparam dpi 400 +skinparam backgroundColor white +skinparam componentStyle rectangle + +title Chunk Retrieval Component Architecture + +component "Browser" as B +component "AR.IO Peer" as P + +package "AR.IO Node" { + component "Express Handler" as H + component "TX Offset Index" as TOI + component "Composite Source" as CS + database "Local Cache" as LC +} + +cloud "External Sources" { + component "AR.IO Network" as ARIO + component "Arweave Network" as AR + component "S3" as S3 +} + +B --> H: HTTP Request +P --> H: HTTP Request +H --> TOI: Lookup offset +H --> CS: Get chunk +CS --> LC: Check cache +CS ..> ARIO: Fetch +CS ..> AR: Fetch +CS ..> S3: Fetch + +note right of CS: Configurable parallelism\n(e.g., 1-3 concurrent) + +@enduml \ No newline at end of file diff --git a/docs/diagrams/src/chunk-retrieval-cascade.puml b/docs/diagrams/src/chunk-retrieval-cascade.puml new file mode 100644 index 00000000..cae24d69 --- /dev/null +++ b/docs/diagrams/src/chunk-retrieval-cascade.puml @@ -0,0 +1,29 @@ +@startuml +skinparam dpi 300 +skinparam backgroundColor white + +title Chunk Retrieval Cascade + +left to right direction + +rectangle "Client Request" as CR + +rectangle "AR.IO Node" { + rectangle "1. Local Cache" as LC #90EE90 + rectangle "2. AR.IO Peers" as AP #87CEEB + rectangle "3. Arweave Network" as AN #FFB6C1 + rectangle "4. Legacy S3" as S3 #FFFFE0 +} + +CR --> LC: First +LC --> AP: If miss +AP --> AN: If fail +AN --> S3: Last resort +S3 ..> AN: Rebroadcast + +note bottom of LC: Fastest\n(in-memory & disk) +note bottom of AP: Fast\n(frequently used) +note bottom of AN: Slower\n(complete set) +note bottom of S3: Backup\n(cloud storage) + +@enduml \ No newline at end of file diff --git a/docs/diagrams/src/contiguous-data-retrieval-cascade.puml b/docs/diagrams/src/contiguous-data-retrieval-cascade.puml new file mode 100644 index 00000000..9c47af97 --- /dev/null +++ b/docs/diagrams/src/contiguous-data-retrieval-cascade.puml @@ -0,0 +1,31 @@ +@startuml +skinparam dpi 200 +skinparam backgroundColor white + +title Contiguous Data Retrieval Cascade + +left to right direction + +rectangle "Data Request" as DR + +rectangle "AR.IO Node" { + rectangle "1. Local Cache" as LC #90EE90 + rectangle "2. Trusted Gateways" as TG #87CEEB + rectangle "3. Chunk Reconstruction" as CR #FFB6C1 + rectangle "4. TX Data" as TD #DDA0DD + rectangle "5. AR.IO Network" as AN #FFFFE0 +} + +DR --> LC: First +LC --> TG: If miss +TG --> CR: If fail +CR --> TD: If fail +TD --> AN: If fail + +note bottom of LC: Fastest\n(verified data) +note bottom of TG: Fast\n(external gateways) +note bottom of CR: Reliable\n(from chunks) +note bottom of TD: Direct\n(Arweave nodes) +note bottom of AN: Fallback\n(AR.IO peers) + +@enduml \ No newline at end of file diff --git a/docs/diagrams/src/release-47-bundle-header-parsing.puml b/docs/diagrams/src/release-47-bundle-header-parsing.puml new file mode 100644 index 00000000..7e642958 --- /dev/null +++ b/docs/diagrams/src/release-47-bundle-header-parsing.puml @@ -0,0 +1,39 @@ +@startuml +skinparam dpi 200 +skinparam backgroundColor white + +title Release 47 Bundle Header Parsing + +actor Client +participant "AR.IO Node" as Node +database "Local Cache" as Cache +participant "Root ID Source" as RootID +participant "Bundle Parser" as Parser +participant "Chunk Sources" as Chunks +participant "Root Transaction" as Root + +Client -> Node: GET /tx/{id}/data +Node -> Cache: Check local cache +Cache --> Node: Cache miss + +Node -> RootID: Get root transaction ID +RootID --> Node: Root transaction ID + +note right of RootID: Only stores root ID,\nnot pre-calculated offsets + +Node -> Root: Get root transaction header +Root --> Node: Bundle header data + +Node -> Parser: Parse ANS-104 bundle headers +Parser -> Parser: Calculate offset for data item +Parser --> Node: Calculated offset + +Node -> Chunks: Fetch chunks for range\n(using calculated offset + range) +Chunks --> Node: Chunk data + +Node -> Node: Assemble chunks into\ncontiguous data + +Node -> Cache: Store assembled data +Node -> Client: Stream response + +@enduml \ No newline at end of file diff --git a/docs/diagrams/src/release-47-contiguous-data-retrieval.puml b/docs/diagrams/src/release-47-contiguous-data-retrieval.puml new file mode 100644 index 00000000..65c73739 --- /dev/null +++ b/docs/diagrams/src/release-47-contiguous-data-retrieval.puml @@ -0,0 +1,34 @@ +@startuml +skinparam dpi 200 +skinparam backgroundColor white + +title Release 47 Contiguous Data Retrieval + +actor Client +participant "AR.IO Node" as Node +database "Local Cache" as Cache +participant "Offset Source" as Offset +participant "Chunk Sources" as Chunks +participant "Root Transaction" as Root + +Client -> Node: GET /tx/{id}/data +Node -> Cache: Check local cache +Cache --> Node: Cache miss + +Node -> Offset: Get root parent offset +Offset --> Node: Root transaction ID + offset + +note right of Offset: Could be local index\nor found in network\n(AR.IO peers, gateways) + +Node -> Root: Get transaction metadata +Root --> Node: TX size, data root + +Node -> Chunks: Fetch chunks for range\n(using offset + requested range) +Chunks --> Node: Chunk data + +Node -> Node: Assemble chunks into\ncontiguous data + +Node -> Cache: Store assembled data +Node -> Client: Stream response + +@enduml \ No newline at end of file