1105 lines
29 KiB
Julia
1105 lines
29 KiB
Julia
### A Pluto.jl notebook ###
|
||
# v0.19.36
|
||
|
||
#> [frontmatter]
|
||
#> title = " TP1 - Reinforcement learning "
|
||
#> date = "2022-12-14"
|
||
#> tags = ["RL"]
|
||
|
||
using Markdown
|
||
using InteractiveUtils
|
||
|
||
# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
|
||
macro bind(def, element)
|
||
quote
|
||
local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
|
||
local el = $(esc(element))
|
||
global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
|
||
el
|
||
end
|
||
end
|
||
|
||
# ╔═╡ 02b1e10c-653e-4660-90b5-2eae7f19f1f7
|
||
# ╠═╡ show_logs = false
|
||
# https://github.com/fonsp/Pluto.jl/wiki/%F0%9F%8E%81-Package-management#advanced-set-up-an-environment-with-pkgactivate
|
||
begin
|
||
using Pkg
|
||
Pkg.activate()
|
||
end
|
||
|
||
# ╔═╡ 26fdd17e-f03a-4835-93be-85303fe526d8
|
||
begin
|
||
using Plots # pour le tracé de figures
|
||
using PlutoUI # pour les objets Pluto
|
||
using LinearAlgebra # pour les matrices identité
|
||
using SparseArrays # pour les matrices creuses
|
||
using LaTeXStrings # pour les strings en LaTeX (pour les plots)
|
||
import Random
|
||
TableOfContents(depth=4)
|
||
end
|
||
|
||
# ╔═╡ 56ac3473-24f4-42d7-84e1-cfce6a70d8d5
|
||
html"""
|
||
<style>
|
||
pluto-output, img {
|
||
display: block;
|
||
margin: auto;
|
||
}
|
||
.sliders {
|
||
position: fixed;
|
||
left: 2rem;
|
||
top: 40%;
|
||
z-index: 1000;
|
||
}
|
||
</style>
|
||
"""
|
||
|
||
# ╔═╡ ccf4d63e-7ace-11ed-2123-d9dbb62bd308
|
||
html"""
|
||
<center>
|
||
<strong style="font-size: 2rem;">
|
||
TP1 - Reinforcement learning <br/>
|
||
Laurent Fainsin <br/>
|
||
2021 - 2022
|
||
</strong>
|
||
</center>
|
||
"""
|
||
|
||
# ╔═╡ 9f2879c1-c22b-4067-ad20-4e4c56cc8d00
|
||
begin
|
||
REWARD_MOVE_slider = @bind REWARD_MOVE Slider(-0.1:0.01:0, default=-0.04, show_value=true)
|
||
REWARD_GOAL_slider = @bind REWARD_GOAL Slider(0:1:10, default=1, show_value=true)
|
||
REWARD_DEATH_slider = @bind REWARD_DEATH Slider(-10:1:0, default=-2, show_value=true)
|
||
DISCOUNT_FACTOR_slider = @bind DISCOUNT_FACTOR Slider(0.9:0.01:0.99, default=0.9, show_value=true)
|
||
|
||
div = html"""<div class="sliders">"""
|
||
div_end = html"""</div>"""
|
||
|
||
md"""
|
||
$(div)
|
||
|
||
Hyper-paramètres:
|
||
|
||
REWARD\_MOVE: $(REWARD_MOVE_slider)
|
||
|
||
REWARD\_GOAL: $(REWARD_GOAL_slider)
|
||
|
||
REWARD\_DEATH: $(REWARD_DEATH_slider)
|
||
|
||
DISCOUNT\_FACTOR: $(DISCOUNT_FACTOR_slider)
|
||
|
||
$(div_end)
|
||
"""
|
||
end
|
||
|
||
# ╔═╡ 0a30a68a-068e-41fb-92c4-000869ba7dff
|
||
RANDOM_SEED = 420
|
||
|
||
# ╔═╡ 07b57746-fba0-49aa-ba17-6dcb0bbe44e5
|
||
MAX_ITERATIONS = 350
|
||
|
||
# ╔═╡ 92d6874b-651c-4551-840e-ad5d1e934aeb
|
||
MOVEMENTS = [
|
||
0.7 0.1 0.1 0.1
|
||
0.1 0.7 0.1 0.1
|
||
0.1 0.1 0.7 0.1
|
||
0.1 0.1 0.1 0.7
|
||
]
|
||
|
||
# ╔═╡ fe44d7f2-155e-42f2-83c3-dd18aadb3810
|
||
md"""
|
||
On définit notre environnement comme une grille 3x4:
|
||
"""
|
||
|
||
# ╔═╡ 28b769a6-dd3c-43ab-bae0-646d8ebc35d6
|
||
begin
|
||
ARROW_SYMBOLS = ["⬅️", "⬆️", "⬇️", "➡️"]
|
||
DEATH_SYMBOL = "☠️"
|
||
SUCCESS_SYMBOL = "🏆"
|
||
WALL_SYMBOL = "🧱"
|
||
EMPTY_SYMBOL = "🟫"
|
||
|
||
[
|
||
EMPTY_SYMBOL EMPTY_SYMBOL EMPTY_SYMBOL SUCCESS_SYMBOL
|
||
EMPTY_SYMBOL WALL_SYMBOL EMPTY_SYMBOL DEATH_SYMBOL
|
||
EMPTY_SYMBOL EMPTY_SYMBOL EMPTY_SYMBOL EMPTY_SYMBOL
|
||
]
|
||
end
|
||
|
||
# ╔═╡ 3881603c-619b-4976-ac4c-2c7e7f3a6ec7
|
||
md"""
|
||
On peut définir nos rewards tels que:
|
||
"""
|
||
|
||
# ╔═╡ fb797a9b-6a0a-4a77-a9b6-6804f98639bb
|
||
begin
|
||
REWARDS = [
|
||
REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_GOAL,
|
||
REWARD_MOVE, REWARD_MOVE, REWARD_DEATH,
|
||
REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_MOVE,
|
||
]
|
||
|
||
local REWARDS_display = copy(REWARDS)
|
||
insert!(REWARDS_display, 6, 0)
|
||
REWARDS_display = permutedims(reshape(REWARDS_display, 4, 3))
|
||
REWARDS_display = sparse(REWARDS_display)
|
||
end
|
||
|
||
# ╔═╡ 1e3abda8-6645-48ba-874d-28e1011fc3e3
|
||
md"""
|
||
# Performance Prediction
|
||
"""
|
||
|
||
# ╔═╡ beb410a8-03e2-4f18-8ccd-941cc926ee12
|
||
md"""
|
||
## Question 1
|
||
|
||
> Assume the random policy, that is, the policy that takes every possible action with probability 1/4. Compute its value function by solving \
|
||
> $V = (I − \gamma P )^{-1} R$. \
|
||
> Since there are 11 possible states in the problem, the vectors ``R`` and ``V`` have have length 11, and the matrix ``P`` has dimension 11x11. There are two absorbing states, i.e., they are visited once, and their respective reward (+1 or -1) is only accrued once. To model this, you can simply put all 0’s in all the elements of the respective two lines.
|
||
"""
|
||
|
||
# ╔═╡ 133f291f-6f21-4441-86f7-ba190a7d6b1f
|
||
md"""
|
||
On définit une politique aléatoire (à la main):
|
||
"""
|
||
|
||
# ╔═╡ e14f9977-d2fd-4d05-84d6-614008dc0c4a
|
||
[
|
||
ARROW_SYMBOLS[2] ARROW_SYMBOLS[1] ARROW_SYMBOLS[1] SUCCESS_SYMBOL
|
||
ARROW_SYMBOLS[1] WALL_SYMBOL ARROW_SYMBOLS[1] DEATH_SYMBOL
|
||
ARROW_SYMBOLS[1] ARROW_SYMBOLS[3] ARROW_SYMBOLS[2] ARROW_SYMBOLS[1]
|
||
]
|
||
|
||
# ╔═╡ 486c93ab-9cb9-4df4-b702-bbe12a961647
|
||
md"""
|
||
Via nos probabilités de mouvements on peut alors constituer ``P``:
|
||
"""
|
||
|
||
# ╔═╡ ab2d705d-fc00-43b2-bb6d-2a3d4ba9dab1
|
||
begin
|
||
P = [
|
||
[
|
||
0.8, 0.1, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.7, 0.2, 0.1, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.7, 0.1, 0.1,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.0, 0.0, 0.0,
|
||
0.8, 0.0, 0.0,
|
||
0.1, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.1, 0.0,
|
||
0.0, 0.7, 0.1,
|
||
0.0, 0.0, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.8, 0.1, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.1, 0.8, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.7, 0.0,
|
||
0.0, 0.1, 0.1, 0.1,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.1,
|
||
0.0, 0.0, 0.7, 0.2,
|
||
],
|
||
]
|
||
|
||
P = sparse(reduce(hcat, P)')
|
||
end
|
||
|
||
# ╔═╡ b7ae89c9-3c1b-4f5c-af5b-164d95ccca41
|
||
md"""
|
||
On peut alors calculer ``V``:
|
||
"""
|
||
|
||
# ╔═╡ 03c17428-5ab9-42e7-bf79-92eb846f11cb
|
||
begin
|
||
V = Matrix(I(length(REWARDS)) - DISCOUNT_FACTOR * P) \ REWARDS
|
||
|
||
local V_display = copy(V)
|
||
insert!(V_display, 6, 0)
|
||
V_display = permutedims(reshape(V_display, 4, 3))
|
||
V_display = sparse(V_display)
|
||
end
|
||
|
||
# ╔═╡ c65d0dbc-ecd7-4320-9b3a-a1b9c0545f9a
|
||
md"""
|
||
### Bonus
|
||
On contrôle que ``V`` vérifie l'équation de Bellman en calculant une itération de l'équation de Bellman:
|
||
|
||
$V_{\text{next}} = R + \gamma P V$
|
||
|
||
et en observant que ``V`` est un point fixe:
|
||
|
||
$V_{\text{next}} = V$
|
||
|
||
On calcule alors ``V_\text{next}``:
|
||
"""
|
||
|
||
# ╔═╡ ad547684-bcbe-44f4-9fc1-f327d2db4584
|
||
begin
|
||
V_next = REWARDS + Matrix(DISCOUNT_FACTOR * P) * V
|
||
|
||
local V_display = copy(V_next)
|
||
insert!(V_display, 6, 0)
|
||
V_display = permutedims(reshape(V_display, 4, 3))
|
||
V_display = sparse(V_display)
|
||
end
|
||
|
||
# ╔═╡ d3703ab8-912c-417d-acd9-29590ec1134b
|
||
if isapprox(V_next, V)
|
||
Markdown.MD(Markdown.Admonition("correct", "V est un point fixe", [md"L'équation de Bellman est vérifiée"]));
|
||
else
|
||
Markdown.MD(Markdown.Admonition("danger", "V n'est pas un point fixe", [md"L'équation de Bellman n'est vérifiée"]));
|
||
end
|
||
|
||
# ╔═╡ 1319b304-5126-4825-8076-e113e4dd3635
|
||
md"""
|
||
## Question 2
|
||
|
||
> Evaluate now the policy using Iterative Policy Evaluation (lecture 2, 2nd part, slides 11/35), and verify that the algorithm converges to the result obtained in 1. To stop iterating, you can take as a criterion that the difference between two iterations must be smaller than some small ``\delta``. Due to the contraction principle, the initial vector can be arbitrary.
|
||
"""
|
||
|
||
# ╔═╡ 3ea3f177-c576-4b9e-a54b-c427e29a8491
|
||
md"""
|
||
On initialise ``V_\text{random} \in [0, 1]^{11}`` aléatoirement.
|
||
|
||
On souhaite vérifier que ``V_\text{random}`` converge vers ``V`` par l'évaluation itérative de la politique ``P``.
|
||
"""
|
||
|
||
# ╔═╡ e94fe8a6-274b-4121-b1fc-063d3710c2f7
|
||
begin
|
||
Random.seed!(RANDOM_SEED)
|
||
V_random = rand(length(REWARDS))
|
||
local diffs = Vector{Float64}()
|
||
for _ in 1:MAX_ITERATIONS
|
||
local V_old = V_random
|
||
global V_random = REWARDS + Matrix(DISCOUNT_FACTOR * P) * V_random
|
||
append!(diffs, norm(V_random - V_old))
|
||
if isapprox(V_random, V_old)
|
||
break
|
||
end
|
||
end
|
||
|
||
plot(
|
||
diffs,
|
||
labels = "",
|
||
xlabel = L"n",
|
||
ylabel = L"|| V_{n+1} - V_n ||^2",
|
||
yticks=[10.0^-x for x in 0:10],
|
||
linewidth=2,
|
||
yaxis=:log,
|
||
title="Iterative Policy Evaluation convergence",
|
||
)
|
||
end
|
||
|
||
# ╔═╡ 80090d5f-d56c-4844-a04f-444ed49e5f34
|
||
if isapprox(V_random, V, rtol=1e-5)
|
||
Markdown.MD(Markdown.Admonition("correct", "L'évaluation itérative des politiques est vérifiée", [md"``V_\text{random}`` converge vers ``V``"]));
|
||
else
|
||
Markdown.MD(Markdown.Admonition("danger", "L'évaluation itérative des politiques n'est pas vérifiée", [md"``V_\text{random}`` ne converge pas vers ``V``"]));
|
||
end
|
||
|
||
# ╔═╡ 98362798-aae4-4540-9e98-cc7371802552
|
||
md"""
|
||
## Question 3
|
||
|
||
> To verify that the Bellman operator is a contraction, take two initial vectors, and calculate the max of their differences. Then, apply the iterative policy evaluation to these 2 vectors as done in the previous item, and plot the maximum of their differences as you keep iterating. Observe what happens with the difference as you iterate, and explain it.
|
||
"""
|
||
|
||
# ╔═╡ 30874daf-7b0e-4335-9a50-d19389cf1620
|
||
md"""
|
||
On initialise ``V_{r1}, V_{r2} \in [0, 1]^{11}`` aléatoirement.
|
||
|
||
On souhaite vérifier que ``V_{r1}`` converge vers ``V_{r2}`` (et aussi vers ``V``) par l'évaluation itérative de la politique ``P``.
|
||
"""
|
||
|
||
# ╔═╡ c005a3f8-765c-4a50-90ef-73a5a72eee01
|
||
begin
|
||
Random.seed!(RANDOM_SEED)
|
||
V_random1 = rand(length(REWARDS))
|
||
V_random2 = rand(length(REWARDS))
|
||
local diffs = Vector{Float64}()
|
||
for _ in 1:MAX_ITERATIONS
|
||
global V_random1 = REWARDS + Matrix(DISCOUNT_FACTOR * P) * V_random1
|
||
global V_random2 = REWARDS + Matrix(DISCOUNT_FACTOR * P) * V_random2
|
||
append!(diffs, norm(V_random1 - V_random2))
|
||
if isapprox(V_random1, V_random2)
|
||
break
|
||
end
|
||
end
|
||
|
||
plot(
|
||
diffs,
|
||
labels = "",
|
||
xlabel = L"n",
|
||
ylabel = L"|| V_{r1} - V_{r2} ||^2",
|
||
yticks=[10.0^-x for x in 0:10],
|
||
linewidth=2,
|
||
yaxis=:log,
|
||
title="Bellman's operator contraction",
|
||
)
|
||
end
|
||
|
||
# ╔═╡ 1b43e9e5-d7d2-4b5e-a2b2-3a8b8eda6d62
|
||
if isapprox(V_random1, V_random2, rtol=0.01)
|
||
Markdown.MD(Markdown.Admonition("correct", "On vérifie que l'opérateur de Bellman est une contraction", [md"``V_{r1}`` converge vers ``V_{r2}``"]));
|
||
else
|
||
Markdown.MD(Markdown.Admonition("danger", "On ne vérifie pas que l'opérateur de Bellman est une contraction", [md"``V_{r1}`` ne converge pas vers ``V_{r2}``"]));
|
||
end
|
||
|
||
# ╔═╡ add0221b-e352-4559-a722-c45a64f573f9
|
||
md"""
|
||
# Optimization
|
||
"""
|
||
|
||
# ╔═╡ 84e07dce-bf6d-4ac1-bfa4-65414fe1d787
|
||
md"""
|
||
## Question 1
|
||
|
||
> Write down the Bellman equation that characterizes the optimal policy.
|
||
"""
|
||
|
||
# ╔═╡ df13fa05-14de-409b-a0b1-5bba5eff432e
|
||
md"""
|
||
Bellman Optimality Equation for ``V_\star`` and ``\pi_\star``:
|
||
|
||
$V_\star(s) = \max_{a \in A} \left( r(s,a) + \gamma \sum_{s' \in S} p(s' | s, a) V_\star(s') \right)$
|
||
|
||
$\pi_\star(s) \in \mathop{\mathrm{argmax}}_{a \in A} \left( r(s,a) + \gamma \sum_{s' \in S} p(s' | s, a) V_\star(s') \right)$
|
||
"""
|
||
|
||
# ╔═╡ ac490e4a-ce20-4288-a04f-c224df5ade1a
|
||
md"""
|
||
## Question 2
|
||
|
||
> Solve numerically the optimal value function by Value Iteration Algorithm (lecture 2, 2nd part, slides 15/35). Verify that the solution you obtain satisfies the Bellman equation.
|
||
"""
|
||
|
||
# ╔═╡ 33890f22-d3f6-4bcf-870d-756f7ff250a9
|
||
md"""
|
||
``P_g`` la politique du déplacement toujours à gauche:
|
||
"""
|
||
|
||
# ╔═╡ cf9fb8a8-6c93-4c43-9f01-5f198f0cf4aa
|
||
begin
|
||
P_g = [
|
||
[
|
||
0.8, 0.1, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.7, 0.2, 0.1, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.7, 0.1, 0.1,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.0, 0.0, 0.0,
|
||
0.8, 0.0, 0.0,
|
||
0.1, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.1, 0.0,
|
||
0.0, 0.7, 0.1,
|
||
0.0, 0.0, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.8, 0.1, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.7, 0.2, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.7, 0.1, 0.1,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.1,
|
||
0.0, 0.0, 0.7, 0.2,
|
||
],
|
||
]
|
||
|
||
P_g = sparse(reduce(hcat, P_g)')
|
||
end
|
||
|
||
# ╔═╡ dc87b85f-c87c-4302-9124-194bd799f1fd
|
||
md"""
|
||
``P_h`` la politique du déplacement toujours en haut:
|
||
"""
|
||
|
||
# ╔═╡ b2595dec-aa5b-462b-b0f8-3555c1231b2f
|
||
begin
|
||
P_h = [
|
||
[
|
||
0.8, 0.1, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.8, 0.1, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.1, 0.7, 0.1,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.7, 0.0, 0.0, 0.0,
|
||
0.2, 0.0, 0.0,
|
||
0.1, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.7, 0.0,
|
||
0.0, 0.1, 0.1,
|
||
0.0, 0.0, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.7, 0.0, 0.0,
|
||
0.2, 0.1, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.1, 0.8, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.7, 0.0,
|
||
0.0, 0.1, 0.1, 0.1,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.7,
|
||
0.0, 0.0, 0.1, 0.2,
|
||
],
|
||
]
|
||
|
||
P_h = sparse(reduce(hcat, P_h)')
|
||
end
|
||
|
||
# ╔═╡ 70edf811-adb0-4ae8-941a-b298d85a6e0e
|
||
md"""
|
||
``P_b`` la politique du déplacement toujours en bas:
|
||
"""
|
||
|
||
# ╔═╡ 875673f1-08c9-4713-bbc2-85b0a7a0cb0a
|
||
begin
|
||
P_b = [
|
||
[
|
||
0.2, 0.1, 0.0, 0.0,
|
||
0.7, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.8, 0.1, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.1, 0.1, 0.1,
|
||
0.0, 0.7, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.0, 0.0, 0.0,
|
||
0.2, 0.0, 0.0,
|
||
0.7, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.1, 0.0,
|
||
0.0, 0.1, 0.1,
|
||
0.0, 0.0, 0.7, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.8, 0.1, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.1, 0.8, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.1, 0.7, 0.1,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.1,
|
||
0.0, 0.0, 0.1, 0.8,
|
||
],
|
||
]
|
||
|
||
P_b = sparse(reduce(hcat, P_b)')
|
||
end
|
||
|
||
# ╔═╡ 2deaac7c-ad14-43b0-9cd5-9f0ec12d324c
|
||
md"""
|
||
``P_d`` la politique du déplacement toujours à droite:
|
||
"""
|
||
|
||
# ╔═╡ b5c93b6f-933c-41b4-8399-44cc0fa07fab
|
||
begin
|
||
P_d = [
|
||
[
|
||
0.2, 0.7, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.2, 0.7, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.1, 0.1, 0.7,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.1, 0.0, 0.0, 0.0,
|
||
0.8, 0.0, 0.0,
|
||
0.1, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.1, 0.0,
|
||
0.0, 0.1, 0.7,
|
||
0.0, 0.0, 0.1, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.1, 0.0, 0.0,
|
||
0.2, 0.7, 0.0, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.0,
|
||
0.1, 0.2, 0.7, 0.0,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.1, 0.0,
|
||
0.0, 0.1, 0.1, 0.7,
|
||
],
|
||
[
|
||
0.0, 0.0, 0.0, 0.0,
|
||
0.0, 0.0, 0.1,
|
||
0.0, 0.0, 0.1, 0.8,
|
||
],
|
||
]
|
||
|
||
P_d = sparse(reduce(hcat, P_d)')
|
||
end
|
||
|
||
# ╔═╡ 8015bdbb-82dd-48da-905d-a25e5c864298
|
||
md"""
|
||
Pour trouver la politique optimal, on peut procéder de la façon suivante:
|
||
|
||
Initialiser ``V_\star`` (random). \
|
||
Tant qu'on a pas convergé (ou atteint `MAX_ITERATIONS`):
|
||
- Calculer pour chaque direction (gauche, haut, bas, droite) le vecteur correspondant à la fonction valeur de la politique associée à la direction.
|
||
- Sélectionner notre nouvel `V_optimal` comme le maximum par ligne de nos vecteurs issus des fonctions valeur (des quatre directions).
|
||
- Vérifier la convergeance par comparaison avec l'itération précédente.
|
||
|
||
Par application de cet algorithme on obtient alors ``V_\star``:
|
||
"""
|
||
|
||
# ╔═╡ 3d7d0b11-5b99-4b1f-ab06-3366678eece8
|
||
begin
|
||
Random.seed!(RANDOM_SEED)
|
||
V_optimal = rand(length(REWARDS))
|
||
pi = zeros(length(REWARDS))
|
||
|
||
for _ in 1:MAX_ITERATIONS
|
||
local V_g = REWARDS + Matrix(DISCOUNT_FACTOR * P_g) * V_optimal
|
||
local V_h = REWARDS + Matrix(DISCOUNT_FACTOR * P_h) * V_optimal
|
||
local V_b = REWARDS + Matrix(DISCOUNT_FACTOR * P_b) * V_optimal
|
||
local V_d = REWARDS + Matrix(DISCOUNT_FACTOR * P_d) * V_optimal
|
||
|
||
local V_new = maximum.(eachrow([V_g V_h V_b V_d]))
|
||
|
||
if isapprox(V_new, V_optimal)
|
||
pi = argmax.(eachrow([V_g V_h V_b V_d]))
|
||
break
|
||
else
|
||
V_optimal = V_new
|
||
end
|
||
end
|
||
|
||
local V_display = copy(V_optimal)
|
||
insert!(V_display, 6, 0)
|
||
V_display = permutedims(reshape(V_display, 4, 3))
|
||
V_display = sparse(V_display)
|
||
end
|
||
|
||
# ╔═╡ 664bb753-ccce-4c7a-8b11-76261a3b80d2
|
||
md"""
|
||
## Question 3
|
||
|
||
> Explain how you can infer the optimal action in every state from the optimal value function ``V_\star(s)``. Represent in a 2D matrix the optimal policy.
|
||
"""
|
||
|
||
# ╔═╡ df01ea55-b289-4c13-8a6b-780ce068e44c
|
||
md"""
|
||
La politique optimale se trouve en sélectionnant la direction la plus favorable dans chaque état:
|
||
"""
|
||
|
||
# ╔═╡ d7ff1cb5-d2b4-4597-bcef-0f74f2e7e0db
|
||
begin
|
||
pi_symbols = [ARROW_SYMBOLS[i] for i in pi]
|
||
insert!(pi_symbols, 6, WALL_SYMBOL)
|
||
pi_symbols[4] = SUCCESS_SYMBOL
|
||
pi_symbols[8] = DEATH_SYMBOL
|
||
permutedims(reshape(pi_symbols, 4, 3))
|
||
end
|
||
|
||
# ╔═╡ 40b7e793-d869-4b68-83a1-6bd7d20a3941
|
||
md"""
|
||
## Question 4
|
||
|
||
> Compare the performances obtained with the random policy and the optimal one, how can you conclude that the optimal policy performs better ?
|
||
"""
|
||
|
||
# ╔═╡ dce3978b-1334-426e-80cc-9cfe63989909
|
||
md"""
|
||
À partir ``\pi^\star`` on peut aussi trouver ``P^\star`` la matrice de notre politique optimale:
|
||
"""
|
||
|
||
# ╔═╡ 7aae25dc-38cf-40d5-a7da-44d13d397194
|
||
begin
|
||
P_star = sparse(zeros(11, 11))
|
||
for i in 1:11
|
||
if pi[i] == 1
|
||
P_star[i, :] = P_g[i, :]
|
||
elseif pi[i] == 2
|
||
P_star[i, :] = P_h[i, :]
|
||
elseif pi[i] == 3
|
||
P_star[i, :] = P_b[i, :]
|
||
else
|
||
P_star[i, :] = P_d[i, :]
|
||
end
|
||
end
|
||
P_star
|
||
end
|
||
|
||
# ╔═╡ b075f5fc-85ac-45a0-8e27-605d3dac0e97
|
||
begin
|
||
Random.seed!(RANDOM_SEED)
|
||
V_Prandom = rand(length(REWARDS))
|
||
V_Poptimal = rand(length(REWARDS))
|
||
|
||
ratio = Vector{Float64}()
|
||
convergence_random = Vector{Float64}()
|
||
convergence_optimal = Vector{Float64}()
|
||
|
||
for _ in 1:MAX_ITERATIONS
|
||
V_Prandom = REWARDS + Matrix(DISCOUNT_FACTOR * P) * V_Prandom
|
||
V_Poptimal = REWARDS + Matrix(DISCOUNT_FACTOR * P_star) * V_Poptimal
|
||
|
||
append!(convergence_optimal, norm(V_Poptimal-V_optimal))
|
||
append!(convergence_random, norm(V_Prandom-V))
|
||
append!(ratio, norm(V_Poptimal./V_Prandom))
|
||
end
|
||
end
|
||
|
||
# ╔═╡ 1fe62967-a9ea-4f6a-817e-666a900c8f92
|
||
plot(
|
||
[convergence_optimal, convergence_random],
|
||
labels = ["Optimal" "Random"],
|
||
xlabel = L"n",
|
||
ylabel = L"|| V^\star - \ \ V^r ||^2",
|
||
yticks=[10.0^-x for x in 0:20],
|
||
linewidth=2,
|
||
yaxis=:log,
|
||
title="Optimal vs Random: Convergence",
|
||
)
|
||
|
||
# ╔═╡ f31ce9b6-8399-4263-bad7-20c859116fa9
|
||
begin
|
||
plot(
|
||
ratio,
|
||
labels = "",
|
||
xlabel = L"n",
|
||
ylabel = L"|| V^\star / \ \ V^r ||^2",
|
||
linewidth=2,
|
||
title="Optimal vs Random: Ratio",
|
||
ylims=[0, Inf]
|
||
)
|
||
end
|
||
|
||
# ╔═╡ 05373383-0c51-49f2-8a62-b06a6225d659
|
||
md"""
|
||
## Question 5
|
||
|
||
> **Policy Iteration I**: We are now going to calculate the optimal policy using Policy Iteration (lecture 2, 2nd part, slides 23/35 and 24/35). You can start with the random policy for which you calculated its performance in the **Performance Prediction** section. Carry out a one-step improvement (or greedy step) on the random policy. Represent in a 2D matrix the policy you obtain. How can we verify that it is a better policy than the random one?
|
||
"""
|
||
|
||
# ╔═╡ 81572e40-4cde-4a13-84aa-5c5d6a9dbde3
|
||
md"""
|
||
0. Initialization: choose a policy ``\pi_0``
|
||
On reprend ici notre politique aléatoire ``P^{\pi_0}`` de la [question 1 partie 1](#beb410a8-03e2-4f18-8ccd-941cc926ee12):
|
||
"""
|
||
|
||
# ╔═╡ 4b264154-944d-498b-a998-a4b07f77918e
|
||
begin
|
||
P_pi_0 = P
|
||
P_pi_0
|
||
end
|
||
|
||
# ╔═╡ a68a3d33-f4df-456e-af13-9b39e14dbc13
|
||
md"""
|
||
2. Policy Evaluation: Compute iteratively ``V_{\pi_k} = (I − \gamma P^{\pi_k} )^{-1} R^{\pi_k}``
|
||
(on calcule uniquement ``V_{\pi_0}`` dans cette question)
|
||
"""
|
||
|
||
# ╔═╡ c3a6ab2c-7a3e-458f-a108-e6e81aa3def1
|
||
begin
|
||
V_pi_0 = Matrix(I(length(REWARDS)) - DISCOUNT_FACTOR * P_pi_0) \ REWARDS
|
||
|
||
local V_display = copy(V_pi_0)
|
||
insert!(V_display, 6, 0)
|
||
V_display = permutedims(reshape(V_display, 4, 3))
|
||
V_display = sparse(V_display)
|
||
end
|
||
|
||
# ╔═╡ ea457cd9-0db5-433f-9d57-1e875a160990
|
||
md"""
|
||
3. Policy improvement: Compute ``\pi_{k+1} = \text{greedy}(V_{\pi_k})``
|
||
|
||
(On calcule donc ici uniquement ``\pi_1`` )
|
||
"""
|
||
|
||
# ╔═╡ 3d62d11d-383c-4060-b697-be0c0155ce95
|
||
begin
|
||
local V_g = REWARDS + Matrix(DISCOUNT_FACTOR * P_g) * V_pi_0
|
||
local V_h = REWARDS + Matrix(DISCOUNT_FACTOR * P_h) * V_pi_0
|
||
local V_b = REWARDS + Matrix(DISCOUNT_FACTOR * P_b) * V_pi_0
|
||
local V_d = REWARDS + Matrix(DISCOUNT_FACTOR * P_d) * V_pi_0
|
||
|
||
local pi_1 = argmax.(eachrow([V_g V_h V_b V_d]))
|
||
|
||
P_pi_1 = sparse(zeros(11, 11))
|
||
for i in 1:11
|
||
if pi_1[i] == 1
|
||
P_pi_1[i, :] = P_g[i, :]
|
||
elseif pi_1[i] == 2
|
||
P_pi_1[i, :] = P_h[i, :]
|
||
elseif pi_1[i] == 3
|
||
P_pi_1[i, :] = P_b[i, :]
|
||
else
|
||
P_pi_1[i, :] = P_d[i, :]
|
||
end
|
||
end
|
||
P_pi_1
|
||
end
|
||
|
||
# ╔═╡ 245f3394-d5e3-4d2c-96a6-ce5ea0bc7d84
|
||
md"""
|
||
Stop if ``V_{\pi_{k+1}} = V_{\pi_k}``, else repeat
|
||
|
||
(Ici on s'arrête comme le dit l'énoncé pour k=1)
|
||
"""
|
||
|
||
# ╔═╡ 4f597447-f321-4a8f-adf0-3fd655ab203c
|
||
begin
|
||
diff_star_pi_0 = sum(abs.(P_star - P_pi_0))
|
||
diff_star_pi_1 = sum(abs.(P_star - P_pi_1))
|
||
|
||
md"""
|
||
On peut vérifier que ``\pi_1`` est meilleur que ``\pi_0`` en calculant:
|
||
|
||
``||\pi_\star - \pi_1||_\text{F} = `` $(diff_star_pi_1)
|
||
|
||
``||\pi_\star - \pi_0||_\text{F} = `` $(diff_star_pi_0)
|
||
"""
|
||
end
|
||
|
||
# ╔═╡ d599c370-6cb5-4bc3-a333-d41e207c39dc
|
||
if diff_star_pi_1 <= diff_star_pi_0
|
||
Markdown.MD(Markdown.Admonition("correct", "On a une meilleur politique après une itération", [md"``||\pi_\star - \pi_1||_\text{F} \leq ||\pi_\star - \pi_0||_\text{F}``"]));
|
||
else
|
||
Markdown.MD(Markdown.Admonition("danger", "On n'a pas une meilleur politique après une itération", [md"``||\pi_\star - \pi_1||_\text{F} \nleq ||\pi_\star - \pi_0||_\text{F}``"]));
|
||
end
|
||
|
||
# ╔═╡ 4e8e49b2-60ea-4dc6-906b-d459c7983b34
|
||
md"""
|
||
## Question 6
|
||
|
||
> **Policy Iteration II**: Continue iterating the Prediction and the greedy steps until convergence to the optimal policy.
|
||
"""
|
||
|
||
# ╔═╡ 362a3786-f85d-44b9-b369-ecbf4e5194e9
|
||
begin
|
||
P_pi_k = P_pi_0
|
||
local diffs = Vector{Float64}()
|
||
|
||
for k in 1:MAX_ITERATIONS
|
||
V_pi_k = Matrix(I(length(REWARDS)) - DISCOUNT_FACTOR * P_pi_k) \ REWARDS
|
||
|
||
local V_g = REWARDS + Matrix(DISCOUNT_FACTOR * P_g) * V_pi_k
|
||
local V_h = REWARDS + Matrix(DISCOUNT_FACTOR * P_h) * V_pi_k
|
||
local V_b = REWARDS + Matrix(DISCOUNT_FACTOR * P_b) * V_pi_k
|
||
local V_d = REWARDS + Matrix(DISCOUNT_FACTOR * P_d) * V_pi_k
|
||
|
||
local pi_k = argmax.(eachrow([V_g V_h V_b V_d]))
|
||
|
||
global P_pi_k = sparse(zeros(11, 11))
|
||
for i in 1:11
|
||
if pi_k[i] == 1
|
||
P_pi_k[i, :] = P_g[i, :]
|
||
elseif pi_k[i] == 2
|
||
P_pi_k[i, :] = P_h[i, :]
|
||
elseif pi_k[i] == 3
|
||
P_pi_k[i, :] = P_b[i, :]
|
||
else
|
||
P_pi_k[i, :] = P_d[i, :]
|
||
end
|
||
end
|
||
|
||
append!(diffs, sum(abs.(P_star - P_pi_k)))
|
||
|
||
if isapprox(P_star, P_pi_k)
|
||
break
|
||
end
|
||
|
||
end
|
||
|
||
local p = plot(
|
||
diffs,
|
||
labels = "",
|
||
xlabel = L"k",
|
||
ylabel = L"||\pi_\star - \pi_k||_F",
|
||
linewidth=2,
|
||
title="Policy Iteration convergence",
|
||
)
|
||
xticks!(round(Int,xlims(p)[1]):round(Int,xlims(p)[2]))
|
||
end
|
||
|
||
# ╔═╡ a1eaf48e-f92f-4554-942e-f6303ebaa084
|
||
md"""
|
||
## Question 7
|
||
|
||
> Investigate the structure of the optimal policy for different values of ``\gamma``, and explain the results. You might use Value Iteration or Policy Iteration.
|
||
"""
|
||
|
||
# ╔═╡ 8d5b2cc2-2e21-47df-b821-189de5d357a3
|
||
begin
|
||
local gammas = 0.9:0.001:0.99
|
||
local iterations = zeros(length(gammas))
|
||
|
||
for (i, gamma) in enumerate(gammas)
|
||
|
||
global P_pi_k2 = P_pi_0
|
||
k = 0
|
||
|
||
while true
|
||
V_pi_k = Matrix(I(length(REWARDS)) - gamma * P_pi_k2) \ REWARDS
|
||
|
||
local V_g = REWARDS + Matrix(gamma * P_g) * V_pi_k
|
||
local V_h = REWARDS + Matrix(gamma * P_h) * V_pi_k
|
||
local V_b = REWARDS + Matrix(gamma * P_b) * V_pi_k
|
||
local V_d = REWARDS + Matrix(gamma * P_d) * V_pi_k
|
||
|
||
local pi_k = argmax.(eachrow([V_g V_h V_b V_d]))
|
||
|
||
P_pi_k2 = sparse(zeros(11, 11))
|
||
for i in 1:11
|
||
if pi_k[i] == 1
|
||
P_pi_k2[i, :] = P_g[i, :]
|
||
elseif pi_k[i] == 2
|
||
P_pi_k2[i, :] = P_h[i, :]
|
||
elseif pi_k[i] == 3
|
||
P_pi_k2[i, :] = P_b[i, :]
|
||
else
|
||
P_pi_k2[i, :] = P_d[i, :]
|
||
end
|
||
end
|
||
|
||
k += 1
|
||
|
||
if isapprox(P_star, P_pi_k2) || k >= MAX_ITERATIONS
|
||
break
|
||
end
|
||
end
|
||
|
||
iterations[i] = k
|
||
end
|
||
|
||
local p = plot(
|
||
gammas,
|
||
iterations,
|
||
labels = "",
|
||
xlabel = L"\gamma",
|
||
ylabel = L"k",
|
||
linetype=:steppre,
|
||
linewidth=2,
|
||
title=md"Policy Iteration convergence according to ``\gamma``",
|
||
)
|
||
yticks!(round.(Int, yticks(p)[1][1]))
|
||
end
|
||
|
||
# ╔═╡ 0c6fd7ed-5180-41bd-9958-29cc9f3ce73b
|
||
md"""
|
||
On observe qu'il y a convergence de la politique généralement en dessous de 5 itérations. Cependant pour certaines combinaisons d'hyperparamètres on remarque qu'il n'y a pas convergence.
|
||
"""
|
||
|
||
# ╔═╡ Cell order:
|
||
# ╟─02b1e10c-653e-4660-90b5-2eae7f19f1f7
|
||
# ╟─26fdd17e-f03a-4835-93be-85303fe526d8
|
||
# ╟─56ac3473-24f4-42d7-84e1-cfce6a70d8d5
|
||
# ╟─ccf4d63e-7ace-11ed-2123-d9dbb62bd308
|
||
# ╟─9f2879c1-c22b-4067-ad20-4e4c56cc8d00
|
||
# ╟─0a30a68a-068e-41fb-92c4-000869ba7dff
|
||
# ╟─07b57746-fba0-49aa-ba17-6dcb0bbe44e5
|
||
# ╟─92d6874b-651c-4551-840e-ad5d1e934aeb
|
||
# ╟─fe44d7f2-155e-42f2-83c3-dd18aadb3810
|
||
# ╟─28b769a6-dd3c-43ab-bae0-646d8ebc35d6
|
||
# ╟─3881603c-619b-4976-ac4c-2c7e7f3a6ec7
|
||
# ╟─fb797a9b-6a0a-4a77-a9b6-6804f98639bb
|
||
# ╟─1e3abda8-6645-48ba-874d-28e1011fc3e3
|
||
# ╟─beb410a8-03e2-4f18-8ccd-941cc926ee12
|
||
# ╟─133f291f-6f21-4441-86f7-ba190a7d6b1f
|
||
# ╟─e14f9977-d2fd-4d05-84d6-614008dc0c4a
|
||
# ╟─486c93ab-9cb9-4df4-b702-bbe12a961647
|
||
# ╟─ab2d705d-fc00-43b2-bb6d-2a3d4ba9dab1
|
||
# ╟─b7ae89c9-3c1b-4f5c-af5b-164d95ccca41
|
||
# ╟─03c17428-5ab9-42e7-bf79-92eb846f11cb
|
||
# ╟─c65d0dbc-ecd7-4320-9b3a-a1b9c0545f9a
|
||
# ╟─ad547684-bcbe-44f4-9fc1-f327d2db4584
|
||
# ╟─d3703ab8-912c-417d-acd9-29590ec1134b
|
||
# ╟─1319b304-5126-4825-8076-e113e4dd3635
|
||
# ╟─3ea3f177-c576-4b9e-a54b-c427e29a8491
|
||
# ╟─e94fe8a6-274b-4121-b1fc-063d3710c2f7
|
||
# ╟─80090d5f-d56c-4844-a04f-444ed49e5f34
|
||
# ╟─98362798-aae4-4540-9e98-cc7371802552
|
||
# ╟─30874daf-7b0e-4335-9a50-d19389cf1620
|
||
# ╟─c005a3f8-765c-4a50-90ef-73a5a72eee01
|
||
# ╟─1b43e9e5-d7d2-4b5e-a2b2-3a8b8eda6d62
|
||
# ╟─add0221b-e352-4559-a722-c45a64f573f9
|
||
# ╟─84e07dce-bf6d-4ac1-bfa4-65414fe1d787
|
||
# ╟─df13fa05-14de-409b-a0b1-5bba5eff432e
|
||
# ╟─ac490e4a-ce20-4288-a04f-c224df5ade1a
|
||
# ╟─33890f22-d3f6-4bcf-870d-756f7ff250a9
|
||
# ╟─cf9fb8a8-6c93-4c43-9f01-5f198f0cf4aa
|
||
# ╟─dc87b85f-c87c-4302-9124-194bd799f1fd
|
||
# ╟─b2595dec-aa5b-462b-b0f8-3555c1231b2f
|
||
# ╟─70edf811-adb0-4ae8-941a-b298d85a6e0e
|
||
# ╟─875673f1-08c9-4713-bbc2-85b0a7a0cb0a
|
||
# ╟─2deaac7c-ad14-43b0-9cd5-9f0ec12d324c
|
||
# ╟─b5c93b6f-933c-41b4-8399-44cc0fa07fab
|
||
# ╟─8015bdbb-82dd-48da-905d-a25e5c864298
|
||
# ╟─3d7d0b11-5b99-4b1f-ab06-3366678eece8
|
||
# ╟─664bb753-ccce-4c7a-8b11-76261a3b80d2
|
||
# ╟─df01ea55-b289-4c13-8a6b-780ce068e44c
|
||
# ╟─d7ff1cb5-d2b4-4597-bcef-0f74f2e7e0db
|
||
# ╟─40b7e793-d869-4b68-83a1-6bd7d20a3941
|
||
# ╟─dce3978b-1334-426e-80cc-9cfe63989909
|
||
# ╟─7aae25dc-38cf-40d5-a7da-44d13d397194
|
||
# ╟─b075f5fc-85ac-45a0-8e27-605d3dac0e97
|
||
# ╟─1fe62967-a9ea-4f6a-817e-666a900c8f92
|
||
# ╟─f31ce9b6-8399-4263-bad7-20c859116fa9
|
||
# ╟─05373383-0c51-49f2-8a62-b06a6225d659
|
||
# ╟─81572e40-4cde-4a13-84aa-5c5d6a9dbde3
|
||
# ╟─4b264154-944d-498b-a998-a4b07f77918e
|
||
# ╟─a68a3d33-f4df-456e-af13-9b39e14dbc13
|
||
# ╟─c3a6ab2c-7a3e-458f-a108-e6e81aa3def1
|
||
# ╟─ea457cd9-0db5-433f-9d57-1e875a160990
|
||
# ╟─3d62d11d-383c-4060-b697-be0c0155ce95
|
||
# ╟─245f3394-d5e3-4d2c-96a6-ce5ea0bc7d84
|
||
# ╟─4f597447-f321-4a8f-adf0-3fd655ab203c
|
||
# ╟─d599c370-6cb5-4bc3-a333-d41e207c39dc
|
||
# ╟─4e8e49b2-60ea-4dc6-906b-d459c7983b34
|
||
# ╟─362a3786-f85d-44b9-b369-ecbf4e5194e9
|
||
# ╟─a1eaf48e-f92f-4554-942e-f6303ebaa084
|
||
# ╟─8d5b2cc2-2e21-47df-b821-189de5d357a3
|
||
# ╟─0c6fd7ed-5180-41bd-9958-29cc9f3ce73b
|