This commit is contained in:
Laureηt 2023-06-23 20:10:32 +02:00
commit 468c9a9e1f
Signed by: Laurent
SSH key fingerprint: SHA256:kZEpW8cMJ54PDeCvOhzreNr4FSh6R13CMGH/POoO8DI
9 changed files with 4415 additions and 0 deletions

BIN
TP1/TP1.pdf Normal file

Binary file not shown.

421
TP1/main.py Normal file
View file

@ -0,0 +1,421 @@
import time
import matplotlib.pyplot as plt
import numpy as np
from rich import print
from rich.console import Console
console = Console()
REWARD_MOVE = -0.04
REWARD_GOAL = 1
REWARD_DEATH = -1
DISCOUNT_FACTOR = 0.9
ARROWS = ["", "", "", ""]
MOVEMENTS = np.array([
[0.7, 0.1, 0.1, 0.1],
[0.1, 0.7, 0.1, 0.1],
[0.1, 0.1, 0.7, 0.1],
[0.1, 0.1, 0.1, 0.7]
])
REWARDS = np.array([
REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_GOAL,
REWARD_MOVE, REWARD_MOVE, REWARD_DEATH,
REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_MOVE,
])
# Part 1: Performance Prediction
console.rule("[bold white]Part 1: Performance Prediction")
# Exo 1: Random policy
console.rule("[bold yellow]Exo 1: Random policy")
P = np.array([
[
0.8, 0.1, 0.0, 0.0,
0.1, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.7, 0.2, 0.1, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.7, 0.1, 0.1,
0.0, 0.1, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.0, 0.0, 0.0,
0.8, 0.0, 0.0,
0.1, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.1, 0.0,
0.0, 0.7, 0.1,
0.0, 0.0, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.1, 0.0, 0.0,
0.8, 0.1, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.1, 0.8, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.7, 0.0,
0.0, 0.1, 0.1, 0.1,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.1,
0.0, 0.0, 0.7, 0.2,
],
])
V = np.linalg.inv(np.eye(11) - DISCOUNT_FACTOR * P) @ REWARDS.flatten()
print(V)
V_new = REWARDS + DISCOUNT_FACTOR * P @ V
if np.allclose(V, V_new):
print("[green]V is a fixed point !")
else:
print("[red]You suck !")
# Exo 2: Iterative Policy Evaluation
console.rule("[bold yellow]Exo 2: Iterative Policy Evaluation")
DELTA = 1e-6
V_random = np.random.rand(11)
while True:
V_new = REWARDS + DISCOUNT_FACTOR * P @ V_random
diff = np.max(np.abs(V_new - V_random))
V_random = V_new
if diff < DELTA:
break
print(V_random)
# Exo 3: Bellman operator contraction
console.rule("[bold yellow]Exo 3: Bellman operator contraction")
V1 = np.random.rand(11)
V2 = np.random.rand(11)
diffs = []
while True:
V1 = REWARDS + DISCOUNT_FACTOR * P @ V1
V2 = REWARDS + DISCOUNT_FACTOR * P @ V2
diff = np.max(np.abs(V1 - V2))
diffs.append(diff)
if diff < DELTA:
break
print(V1)
print(V2)
plt.plot(diffs)
plt.yscale("log")
plt.title("Convergence of Bellman operator")
plt.xlabel("Iteration")
plt.ylabel("Difference")
# plt.show()
# Part 2: Optimization
console.rule("[bold white]Part 2: Optimization")
# Exo 1: Bellmann equation
console.rule("[bold yellow]Exo 1: Bellmann equation")
# cf cours, partie 2, slide 15
# Exo 2: Value Iteration Algorithm
console.rule("[bold yellow]Exo 2: Value Iteration Algorithm")
P_g = np.array([
[
0.8, 0.1, 0.0, 0.0,
0.1, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.7, 0.2, 0.1, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.7, 0.1, 0.1,
0.0, 0.1, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.0, 0.0, 0.0,
0.8, 0.0, 0.0,
0.1, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.1, 0.0,
0.0, 0.7, 0.1,
0.0, 0.0, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.1, 0.0, 0.0,
0.8, 0.1, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.7, 0.2, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.1, 0.0,
0.0, 0.7, 0.1, 0.1,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.1,
0.0, 0.0, 0.7, 0.2,
],
])
P_h = np.array([
[
0.8, 0.1, 0.0, 0.0,
0.1, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.8, 0.1, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.1, 0.7, 0.1,
0.0, 0.1, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.7, 0.0, 0.0, 0.0,
0.2, 0.0, 0.0,
0.1, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.7, 0.0,
0.0, 0.1, 0.1,
0.0, 0.0, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.7, 0.0, 0.0,
0.2, 0.1, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.1, 0.8, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.7, 0.0,
0.0, 0.1, 0.1, 0.1,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.7,
0.0, 0.0, 0.1, 0.2,
],
])
P_b = np.array([
[
0.2, 0.1, 0.0, 0.0,
0.7, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.8, 0.1, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.1, 0.1, 0.1,
0.0, 0.7, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.0, 0.0, 0.0,
0.2, 0.0, 0.0,
0.7, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.1, 0.0,
0.0, 0.1, 0.1,
0.0, 0.0, 0.7, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.1, 0.0, 0.0,
0.8, 0.1, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.1, 0.8, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.1, 0.0,
0.0, 0.1, 0.7, 0.1,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.1,
0.0, 0.0, 0.1, 0.8,
],
])
P_d = np.array([
[
0.2, 0.7, 0.0, 0.0,
0.1, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.2, 0.7, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.1, 0.1, 0.7,
0.0, 0.1, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.1, 0.0, 0.0, 0.0,
0.8, 0.0, 0.0,
0.1, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.1, 0.0,
0.0, 0.1, 0.7,
0.0, 0.0, 0.1, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.1, 0.0, 0.0,
0.2, 0.7, 0.0, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0,
0.1, 0.2, 0.7, 0.0,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.1, 0.0,
0.0, 0.1, 0.1, 0.7,
],
[
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.1,
0.0, 0.0, 0.1, 0.8,
],
])
V_optimal = np.random.rand(11)
pi = np.zeros(11)
while True:
V_g = REWARDS + DISCOUNT_FACTOR * P_g @ V_optimal
V_h = REWARDS + DISCOUNT_FACTOR * P_h @ V_optimal
V_b = REWARDS + DISCOUNT_FACTOR * P_b @ V_optimal
V_d = REWARDS + DISCOUNT_FACTOR * P_d @ V_optimal
V_new = np.max([V_g, V_h, V_b, V_d], axis=0)
if np.allclose(V_new, V_optimal, atol=1e-6):
pi = np.argmax([V_g, V_h, V_b, V_d], axis=0)
break
else:
V_optimal = V_new
print(V_optimal)
pi_pretty = [ARROWS[i] for i in pi]
pi_pretty.insert(5, "")
pi_pretty[3] = ""
pi_pretty[7] = ""
pi_pretty = np.array(pi_pretty).reshape(3, 4)
print(pi_pretty)
# Exo 4: Performance comparison
console.rule("[bold yellow]Exo 4: Performance comparison")
perf = np.abs(V_optimal - V_random)
print(perf)

2044
TP1/notebook.jl Normal file

File diff suppressed because it is too large Load diff

1
TP2/.envrc Normal file
View file

@ -0,0 +1 @@
use nix

7
TP2/.vscode/extensions.json vendored Normal file
View file

@ -0,0 +1,7 @@
{
"recommendations": [
"editorconfig.editorconfig",
"njpwerner.autodocstring",
"ms-python.python"
]
}

28
TP2/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,28 @@
{
"python.defaultInterpreterPath": ".venv/bin/python",
"python.analysis.typeCheckingMode": "basic",
"python.formatting.provider": "black",
"editor.formatOnSave": true,
"python.linting.enabled": true,
"python.linting.lintOnSave": true,
"python.linting.flake8Enabled": true,
"python.linting.mypyEnabled": true,
"python.linting.banditEnabled": true,
"python.languageServer": "Pylance",
"[python]": {
"editor.codeActionsOnSave": {
"source.organizeImports": true
}
},
"files.exclude": {
"**/.git": true,
"**/.svn": true,
"**/.hg": true,
"**/CVS": true,
"**/.DS_Store": true,
"**/Thumbs.db": true,
"**/__pycache__": true,
"**/.mypy_cache": true,
},
"nixEnvSelector.nixFile": "${workspaceRoot}/shell.nix",
}

673
TP2/TP2.ipynb Normal file

File diff suppressed because one or more lines are too long

12
TP2/shell.nix Normal file
View file

@ -0,0 +1,12 @@
{ pkgs ? import <nixpkgs> { } }:
pkgs.mkShell {
buildInputs = with pkgs; [
poetry
python3
python310Packages.numpy
python310Packages.matplotlib
python310Packages.ipykernel
python310Packages.pip
];
}

1229
notebook_exos.jl Normal file

File diff suppressed because it is too large Load diff