init
This commit is contained in:
commit
468c9a9e1f
BIN
TP1/TP1.pdf
Normal file
BIN
TP1/TP1.pdf
Normal file
Binary file not shown.
421
TP1/main.py
Normal file
421
TP1/main.py
Normal file
|
@ -0,0 +1,421 @@
|
|||
import time
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from rich import print
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
REWARD_MOVE = -0.04
|
||||
REWARD_GOAL = 1
|
||||
REWARD_DEATH = -1
|
||||
DISCOUNT_FACTOR = 0.9
|
||||
|
||||
ARROWS = ["←", "↑", "↓", "→"]
|
||||
|
||||
MOVEMENTS = np.array([
|
||||
[0.7, 0.1, 0.1, 0.1],
|
||||
[0.1, 0.7, 0.1, 0.1],
|
||||
[0.1, 0.1, 0.7, 0.1],
|
||||
[0.1, 0.1, 0.1, 0.7]
|
||||
])
|
||||
|
||||
REWARDS = np.array([
|
||||
REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_GOAL,
|
||||
REWARD_MOVE, REWARD_MOVE, REWARD_DEATH,
|
||||
REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_MOVE,
|
||||
])
|
||||
|
||||
# Part 1: Performance Prediction
|
||||
console.rule("[bold white]Part 1: Performance Prediction")
|
||||
|
||||
# Exo 1: Random policy
|
||||
console.rule("[bold yellow]Exo 1: Random policy")
|
||||
|
||||
P = np.array([
|
||||
[
|
||||
0.8, 0.1, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.7, 0.2, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.7, 0.1, 0.1,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
0.8, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
0.0, 0.7, 0.1,
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.8, 0.1, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.1, 0.8, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.7, 0.0,
|
||||
0.0, 0.1, 0.1, 0.1,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.1,
|
||||
0.0, 0.0, 0.7, 0.2,
|
||||
],
|
||||
])
|
||||
|
||||
V = np.linalg.inv(np.eye(11) - DISCOUNT_FACTOR * P) @ REWARDS.flatten()
|
||||
print(V)
|
||||
|
||||
V_new = REWARDS + DISCOUNT_FACTOR * P @ V
|
||||
|
||||
if np.allclose(V, V_new):
|
||||
print("[green]V is a fixed point !")
|
||||
else:
|
||||
print("[red]You suck !")
|
||||
|
||||
# Exo 2: Iterative Policy Evaluation
|
||||
console.rule("[bold yellow]Exo 2: Iterative Policy Evaluation")
|
||||
|
||||
DELTA = 1e-6
|
||||
|
||||
V_random = np.random.rand(11)
|
||||
|
||||
while True:
|
||||
V_new = REWARDS + DISCOUNT_FACTOR * P @ V_random
|
||||
|
||||
diff = np.max(np.abs(V_new - V_random))
|
||||
V_random = V_new
|
||||
|
||||
if diff < DELTA:
|
||||
break
|
||||
|
||||
print(V_random)
|
||||
|
||||
# Exo 3: Bellman operator contraction
|
||||
console.rule("[bold yellow]Exo 3: Bellman operator contraction")
|
||||
|
||||
V1 = np.random.rand(11)
|
||||
V2 = np.random.rand(11)
|
||||
diffs = []
|
||||
|
||||
while True:
|
||||
V1 = REWARDS + DISCOUNT_FACTOR * P @ V1
|
||||
V2 = REWARDS + DISCOUNT_FACTOR * P @ V2
|
||||
|
||||
diff = np.max(np.abs(V1 - V2))
|
||||
diffs.append(diff)
|
||||
|
||||
if diff < DELTA:
|
||||
break
|
||||
|
||||
print(V1)
|
||||
print(V2)
|
||||
|
||||
plt.plot(diffs)
|
||||
plt.yscale("log")
|
||||
plt.title("Convergence of Bellman operator")
|
||||
plt.xlabel("Iteration")
|
||||
plt.ylabel("Difference")
|
||||
# plt.show()
|
||||
|
||||
# Part 2: Optimization
|
||||
console.rule("[bold white]Part 2: Optimization")
|
||||
|
||||
# Exo 1: Bellmann equation
|
||||
console.rule("[bold yellow]Exo 1: Bellmann equation")
|
||||
# cf cours, partie 2, slide 15
|
||||
|
||||
# Exo 2: Value Iteration Algorithm
|
||||
console.rule("[bold yellow]Exo 2: Value Iteration Algorithm")
|
||||
|
||||
P_g = np.array([
|
||||
[
|
||||
0.8, 0.1, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.7, 0.2, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.7, 0.1, 0.1,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
0.8, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
0.0, 0.7, 0.1,
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.8, 0.1, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.7, 0.2, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.7, 0.1, 0.1,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.1,
|
||||
0.0, 0.0, 0.7, 0.2,
|
||||
],
|
||||
])
|
||||
|
||||
P_h = np.array([
|
||||
[
|
||||
0.8, 0.1, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.8, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.1, 0.7, 0.1,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.7, 0.0, 0.0, 0.0,
|
||||
0.2, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.7, 0.0,
|
||||
0.0, 0.1, 0.1,
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.7, 0.0, 0.0,
|
||||
0.2, 0.1, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.1, 0.8, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.7, 0.0,
|
||||
0.0, 0.1, 0.1, 0.1,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.7,
|
||||
0.0, 0.0, 0.1, 0.2,
|
||||
],
|
||||
])
|
||||
|
||||
P_b = np.array([
|
||||
[
|
||||
0.2, 0.1, 0.0, 0.0,
|
||||
0.7, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.8, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.1, 0.1, 0.1,
|
||||
0.0, 0.7, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
0.2, 0.0, 0.0,
|
||||
0.7, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
0.0, 0.1, 0.1,
|
||||
0.0, 0.0, 0.7, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.8, 0.1, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.1, 0.8, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.1, 0.7, 0.1,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.1,
|
||||
0.0, 0.0, 0.1, 0.8,
|
||||
],
|
||||
])
|
||||
|
||||
P_d = np.array([
|
||||
[
|
||||
0.2, 0.7, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.2, 0.7, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.1, 0.1, 0.7,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
0.8, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
0.0, 0.1, 0.7,
|
||||
0.0, 0.0, 0.1, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.1, 0.0, 0.0,
|
||||
0.2, 0.7, 0.0, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.0,
|
||||
0.1, 0.2, 0.7, 0.0,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.1, 0.0,
|
||||
0.0, 0.1, 0.1, 0.7,
|
||||
],
|
||||
[
|
||||
0.0, 0.0, 0.0, 0.0,
|
||||
0.0, 0.0, 0.1,
|
||||
0.0, 0.0, 0.1, 0.8,
|
||||
],
|
||||
])
|
||||
|
||||
V_optimal = np.random.rand(11)
|
||||
pi = np.zeros(11)
|
||||
|
||||
while True:
|
||||
V_g = REWARDS + DISCOUNT_FACTOR * P_g @ V_optimal
|
||||
V_h = REWARDS + DISCOUNT_FACTOR * P_h @ V_optimal
|
||||
V_b = REWARDS + DISCOUNT_FACTOR * P_b @ V_optimal
|
||||
V_d = REWARDS + DISCOUNT_FACTOR * P_d @ V_optimal
|
||||
|
||||
V_new = np.max([V_g, V_h, V_b, V_d], axis=0)
|
||||
|
||||
if np.allclose(V_new, V_optimal, atol=1e-6):
|
||||
pi = np.argmax([V_g, V_h, V_b, V_d], axis=0)
|
||||
break
|
||||
else:
|
||||
V_optimal = V_new
|
||||
|
||||
print(V_optimal)
|
||||
|
||||
pi_pretty = [ARROWS[i] for i in pi]
|
||||
pi_pretty.insert(5, "■")
|
||||
pi_pretty[3] = "✓"
|
||||
pi_pretty[7] = "☠"
|
||||
pi_pretty = np.array(pi_pretty).reshape(3, 4)
|
||||
print(pi_pretty)
|
||||
|
||||
# Exo 4: Performance comparison
|
||||
console.rule("[bold yellow]Exo 4: Performance comparison")
|
||||
|
||||
perf = np.abs(V_optimal - V_random)
|
||||
print(perf)
|
2044
TP1/notebook.jl
Normal file
2044
TP1/notebook.jl
Normal file
File diff suppressed because it is too large
Load diff
1
TP2/.envrc
Normal file
1
TP2/.envrc
Normal file
|
@ -0,0 +1 @@
|
|||
use nix
|
7
TP2/.vscode/extensions.json
vendored
Normal file
7
TP2/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"editorconfig.editorconfig",
|
||||
"njpwerner.autodocstring",
|
||||
"ms-python.python"
|
||||
]
|
||||
}
|
28
TP2/.vscode/settings.json
vendored
Normal file
28
TP2/.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,28 @@
|
|||
{
|
||||
"python.defaultInterpreterPath": ".venv/bin/python",
|
||||
"python.analysis.typeCheckingMode": "basic",
|
||||
"python.formatting.provider": "black",
|
||||
"editor.formatOnSave": true,
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.lintOnSave": true,
|
||||
"python.linting.flake8Enabled": true,
|
||||
"python.linting.mypyEnabled": true,
|
||||
"python.linting.banditEnabled": true,
|
||||
"python.languageServer": "Pylance",
|
||||
"[python]": {
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": true
|
||||
}
|
||||
},
|
||||
"files.exclude": {
|
||||
"**/.git": true,
|
||||
"**/.svn": true,
|
||||
"**/.hg": true,
|
||||
"**/CVS": true,
|
||||
"**/.DS_Store": true,
|
||||
"**/Thumbs.db": true,
|
||||
"**/__pycache__": true,
|
||||
"**/.mypy_cache": true,
|
||||
},
|
||||
"nixEnvSelector.nixFile": "${workspaceRoot}/shell.nix",
|
||||
}
|
673
TP2/TP2.ipynb
Normal file
673
TP2/TP2.ipynb
Normal file
File diff suppressed because one or more lines are too long
12
TP2/shell.nix
Normal file
12
TP2/shell.nix
Normal file
|
@ -0,0 +1,12 @@
|
|||
{ pkgs ? import <nixpkgs> { } }:
|
||||
|
||||
pkgs.mkShell {
|
||||
buildInputs = with pkgs; [
|
||||
poetry
|
||||
python3
|
||||
python310Packages.numpy
|
||||
python310Packages.matplotlib
|
||||
python310Packages.ipykernel
|
||||
python310Packages.pip
|
||||
];
|
||||
}
|
1229
notebook_exos.jl
Normal file
1229
notebook_exos.jl
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue