JuliaDynamics · bergio13 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,6 @@ test/adata.arrow
 test/mdata.arrow
 *.csv
 *.arrow
-tutorial.md
+tutorial.md
+log
+examples/rl/log
diff --git a/Project.toml b/Project.toml
@@ -30,25 +30,31 @@ StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
 
 [weakdeps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
+Crux = "e51cc422-768a-4345-bb8e-2246287ae729"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2"
 Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 OSMMakie = "76b6901f-8821-46bb-9129-841bc9cfe677"
+POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
 
 [extensions]
 AgentsArrow = "Arrow"
 AgentsGraphVisualizations = ["Makie", "GraphMakie"]
 AgentsOSMVisualizations = ["Makie", "OSMMakie"]
 AgentsVisualizations = "Makie"
+AgentsRL = ["Crux", "POMDPs", "Flux"]
 
 [compat]
 Arrow = "2"
 CSV = "0.9.7, 0.10"
 CommonSolve = "0.2.4"
+Crux = "0.1"
 DataFrames = "0.21, 0.22, 1"
 DataStructures = "0.18"
 Distributed = "1"
 Distributions = "0.25"
 Downloads = "1"
+Flux = "0.14"
 GraphMakie = "0.5, 0.6"
 Graphs = "1.4"
 JLD2 = "0.4, 0.5"
@@ -57,8 +63,9 @@ LightOSM = "0.2, 0.3"
 LightSumTypes = "5"
 LinearAlgebra = "1"
 MacroTools = "0.5"
-Makie = "0.20, 0.21, 0.22, 0.24"
+Makie = "0.20, 0.21, 0.22, 0.23"
 OSMMakie = "0.0, 0.1"
+POMDPs = "0.9, 1"
 PrecompileTools = "1"
 ProgressMeter = "1.5"
 Random = "1"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -6,6 +6,7 @@ BlackBoxOptim = "a134a8b2-14d6-55f6-9291-3336d3ab0209"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CellListMap = "69e1c6dd-3888-40e6-b3c8-31ac5f578864"
 ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
+Crux = "e51cc422-768a-4345-bb8e-2246287ae729"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DelaunayTriangulation = "927a84f5-c5f4-47a5-9785-b46e178433df"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
@@ -15,6 +16,7 @@ DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
 DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
 LightSumTypes = "f56206fc-af4c-5561-a72a-43fe2ca5a923"
 FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a"
 GraphMakie = "1ecd5474-83a3-4783-bb4f-06765db800d2"
 GraphRecipes = "bd48cda9-67a9-57be-86fa-5b3c104eda73"
@@ -28,6 +30,7 @@ MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca"
 OSMMakie = "76b6901f-8821-46bb-9129-841bc9cfe677"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
 SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622"

diff --git a/docs/make.jl b/docs/make.jl
@@ -15,6 +15,7 @@ pages = [
         "examples/predator_prey.md",
         "examples/rabbit_fox_hawk.md",
         "examples/event_rock_paper_scissors.md",
+        "examples/rl_boltzmann.md",
         "examples.md"
     ],
     "api.md",

diff --git a/docs/src/api.md b/docs/src/api.md
@@ -8,6 +8,7 @@ In this page we list the remaining API functions, which constitute the bulk of A
 - [`AgentBasedModel`](@ref)
 - [`StandardABM`](@ref)
 - [`EventQueueABM`](@ref)
+- [`ReinforcementLearningABM`](@ref)
 
 ```@docs
 AgentBasedModel
@@ -28,6 +29,18 @@ AgentEvent
 add_event!
 ```
 
+### Reinforcement learning models
+
+```@docs
+ReinforcementLearningABM
+set_rl_config!
+create_policy_network
+create_value_network
+train_model!
+get_trained_policies
+copy_trained_policies!
+```
+
 ## Agent types
 
 ```@docs
@@ -94,6 +107,7 @@ OpenStreetMapSpace
 ```
 
 ## Adding agents
+
 ```@docs
 add_agent!
 add_agent_own_pos!
@@ -102,6 +116,7 @@ random_position
 ```
 
 ## Moving agents
+
 ```@docs
 move_agent!
 walk!
@@ -110,6 +125,7 @@ get_direction
 ```
 
 ### Movement with paths
+
 For [`OpenStreetMapSpace`](@ref), and [`GridSpace`](@ref)/[`ContinuousSpace`](@ref) using [`Pathfinding`](@ref), a special
 movement method is available.
 
@@ -121,19 +137,22 @@ is_stationary
 ```
 
 ## Removing agents
+
 ```@docs
 remove_agent!
 remove_all!
 sample!
 ```
 
 ## Space utility functions
+
 ```@docs
 normalize_position
 spacesize
 ```
 
 ## [`DiscreteSpace` exclusives](@id DiscreteSpace_exclusives)
+
 ```@docs
 positions
 npositions
@@ -154,6 +173,7 @@ isempty(::Int, ::ABM)
 ```
 
 ## `GraphSpace` exclusives
+
 ```@docs
 add_edge!
 rem_edge!
@@ -162,6 +182,7 @@ rem_vertex!
 ```
 
 ## [`ContinuousSpace` exclusives](@id ContinuosSpace_exclusives)
+
 ```@docs
 nearest_neighbor
 get_spatial_property
@@ -173,6 +194,7 @@ manhattan_distance
 ```
 
 ## `OpenStreetMapSpace` exclusives
+
 ```@docs
 OSM
 OSM.lonlat
@@ -189,6 +211,7 @@ OSM.download_osm_network
 ```
 
 ## Nearby Agents
+
 ```@docs
 nearby_ids
 nearby_agents
@@ -204,6 +227,7 @@ Most iteration in Agents.jl is **dynamic** and **lazy**, when possible, for perf
 
 **Dynamic** means that when iterating over the result of e.g. the [`ids_in_position`](@ref) function, the iterator will be affected by actions that would alter its contents.
 Specifically, imagine the scenario
+
 ```@example docs
 using Agents
 # We don't need to make a new agent type here,
@@ -217,16 +241,20 @@ for id in ids_in_position((1, 1, 1, 1), model)
 end
 collect(allids(model))
 ```
+
 You will notice that only 1 agent was removed. This is simply because the final state of the iteration of `ids_in_position` was reached unnaturally, because the length of its output was reduced by 1 _during_ iteration.
 To avoid problems like these, you need to `collect` the iterator to have a non dynamic version.
 
 **Lazy** means that when possible the outputs of the iteration are not collected and instead are generated on the fly.
 A good example to illustrate this is [`nearby_ids`](@ref), where doing something like
+
 ```julia
 a = random_agent(model)
 sort!(nearby_ids(random_agent(model), model))
 ```
+
 leads to error, since you cannot `sort!` the returned iterator. This can be easily solved by adding a `collect` in between:
+
 ```@example docs
 a = random_agent(model)
 sort!(collect(nearby_agents(a, model)))
@@ -247,13 +275,13 @@ index_mapped_groups
 ```
 
 ## Data collection and analysis
+
 ```@docs
 run!
 ensemblerun!
 paramscan
 ```
 
-
 ### Manual data collection
 
 The central simulation function is [`run!`](@ref).
@@ -268,6 +296,7 @@ dataname
 ```
 
 For example, the core loop of `run!` is just
+
 ```julia
 df_agent = init_agent_dataframe(model, adata)
 df_model = init_model_dataframe(model, mdata)
@@ -286,6 +315,7 @@ while until(t, t0, n, model)
 end
 return df_agent, df_model
 ```
+
 (here `until` and `should_we_collect` are internal functions)
 
 ## [Schedulers](@id Schedulers)
@@ -310,15 +340,19 @@ Schedulers.ByKind
 ```
 
 ### [Advanced scheduling](@id advanced_scheduling)
+
 You can use [Function-like objects](https://docs.julialang.org/en/v1/manual/methods/#Function-like-objects) to make your scheduling possible of arbitrary events.
 For example, imagine that after the `n`-th step of your simulation you want to fundamentally change the order of agents. To achieve this you can define
+
 ```julia
 mutable struct MyScheduler
     n::Int # step number
     w::Float64
 end
 ```
+
 and then define a calling method for it like so
+
 ```julia
 function (ms::MyScheduler)(model::ABM)
     ms.n += 1 # increment internal counter by 1 each time its called
@@ -333,17 +367,20 @@ function (ms::MyScheduler)(model::ABM)
     end
 end
 ```
+
 and pass it to e.g. `step!` by initializing it
+
 ```julia
 ms = MyScheduler(100, 0.5)
 step!(model, agentstep, modelstep, 100; scheduler = ms)
 ```
 
-
 ### How to use `Distributed`
+
 To use the `parallel=true` option of [`ensemblerun!`](@ref) you need to load `Agents` and define your fundamental types at all processors. See the [Performance Tips](@ref) page for parallelization.
 
 ## Path-finding
+
 ```@docs
 Pathfinding
 Pathfinding.AStar
@@ -353,6 +390,7 @@ Pathfinding.random_walkable
 ```
 
 ### Pathfinding Metrics
+
 ```@docs
 Pathfinding.DirectDistance
 Pathfinding.MaxDistance
@@ -363,8 +401,10 @@ Building a custom metric is straightforward, if the provided ones do not suit yo
 See the [Developer Docs](@ref) for details.
 
 ## Save, Load, Checkpoints
+
 There may be scenarios where interacting with data in the form of files is necessary. The following
 functions provide an interface to save/load data to/from files.
+
 ```@docs
 AgentsIO.save_checkpoint
 AgentsIO.load_checkpoint
@@ -373,6 +413,7 @@ AgentsIO.dump_to_csv
 ```
 
 It is also possible to write data to file at predefined intervals while running your model, instead of storing it in memory:
+
 ```@docs
 offline_run!
 ```

diff --git a/docs/src/assets/boltzmann_rl_final_state.png b/docs/src/assets/boltzmann_rl_final_state.png
diff --git a/docs/src/assets/boltzmann_rl_initial_state.png b/docs/src/assets/boltzmann_rl_initial_state.png
diff --git a/docs/src/assets/rl_boltzmann.mp4 b/docs/src/assets/rl_boltzmann.mp4
diff --git a/docs/src/assets/rl_boltzmann_learning_curve.png b/docs/src/assets/rl_boltzmann_learning_curve.png