init

2023-06-23 20:10:32 +02:00 · 2023-06-23 20:10:32 +02:00 · 468c9a9e1f
commit 468c9a9e1f
9 changed files with 4415 additions and 0 deletions
--- a/TP1/TP1.pdf
+++ b/TP1/TP1.pdf
--- a/TP1/main.py
+++ b/TP1/main.py
@ -0,0 +1,421 @@
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from rich import print
+from rich.console import Console
+
+console = Console()
+
+REWARD_MOVE = -0.04
+REWARD_GOAL = 1
+REWARD_DEATH = -1
+DISCOUNT_FACTOR = 0.9
+
+ARROWS = ["←", "↑", "↓", "→"] 
+
+MOVEMENTS = np.array([
+    [0.7, 0.1, 0.1, 0.1],
+    [0.1, 0.7, 0.1, 0.1],
+    [0.1, 0.1, 0.7, 0.1],
+    [0.1, 0.1, 0.1, 0.7]
+])
+
+REWARDS = np.array([
+    REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_GOAL,
+    REWARD_MOVE,              REWARD_MOVE, REWARD_DEATH,
+    REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_MOVE,
+])
+
+# Part 1: Performance Prediction
+console.rule("[bold white]Part 1: Performance Prediction")
+
+# Exo 1: Random policy
+console.rule("[bold yellow]Exo 1: Random policy")
+
+P = np.array([
+    [  
+        0.8, 0.1, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.7, 0.2, 0.1, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.7, 0.1, 0.1,
+        0.0,      0.1, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.0, 0.0, 0.0,
+        0.8,      0.0, 0.0,
+        0.1, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.1, 0.0,
+        0.0,      0.7, 0.1,
+        0.0, 0.0, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.8, 0.1, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.1, 0.8, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.7, 0.0,
+        0.0, 0.1, 0.1, 0.1,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.1,
+        0.0, 0.0, 0.7, 0.2,
+    ],
+])
+
+V = np.linalg.inv(np.eye(11) - DISCOUNT_FACTOR * P) @ REWARDS.flatten()
+print(V)
+
+V_new = REWARDS + DISCOUNT_FACTOR * P @ V
+
+if np.allclose(V, V_new):
+    print("[green]V is a fixed point !")
+else:
+    print("[red]You suck !")
+
+# Exo 2: Iterative Policy Evaluation
+console.rule("[bold yellow]Exo 2: Iterative Policy Evaluation")
+
+DELTA = 1e-6
+
+V_random = np.random.rand(11)
+
+while True:
+    V_new = REWARDS + DISCOUNT_FACTOR * P @ V_random
+
+    diff = np.max(np.abs(V_new - V_random))
+    V_random = V_new
+
+    if diff < DELTA:
+        break
+
+print(V_random)
+
+# Exo 3: Bellman operator contraction
+console.rule("[bold yellow]Exo 3: Bellman operator contraction")
+
+V1 = np.random.rand(11)
+V2 = np.random.rand(11)
+diffs = []
+
+while True:
+    V1 = REWARDS + DISCOUNT_FACTOR * P @ V1
+    V2 = REWARDS + DISCOUNT_FACTOR * P @ V2
+    
+    diff = np.max(np.abs(V1 - V2))
+    diffs.append(diff)
+    
+    if diff < DELTA:
+        break
+
+print(V1)
+print(V2)
+
+plt.plot(diffs)
+plt.yscale("log")
+plt.title("Convergence of Bellman operator")
+plt.xlabel("Iteration")
+plt.ylabel("Difference")
+# plt.show()
+
+# Part 2: Optimization
+console.rule("[bold white]Part 2: Optimization")
+
+# Exo 1: Bellmann equation
+console.rule("[bold yellow]Exo 1: Bellmann equation")
+# cf cours, partie 2, slide 15
+
+# Exo 2: Value Iteration Algorithm
+console.rule("[bold yellow]Exo 2: Value Iteration Algorithm")
+
+P_g = np.array([
+    [
+        0.8, 0.1, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.7, 0.2, 0.1, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.7, 0.1, 0.1,
+        0.0,      0.1, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.0, 0.0, 0.0,
+        0.8,      0.0, 0.0,
+        0.1, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.1, 0.0,
+        0.0,      0.7, 0.1,
+        0.0, 0.0, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.8, 0.1, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.7, 0.2, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.1, 0.0,
+        0.0, 0.7, 0.1, 0.1,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.1,
+        0.0, 0.0, 0.7, 0.2,
+    ],
+])
+
+P_h = np.array([
+    [
+        0.8, 0.1, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.8, 0.1, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.1, 0.7, 0.1,
+        0.0,      0.1, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.7, 0.0, 0.0, 0.0,
+        0.2,      0.0, 0.0,
+        0.1, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.7, 0.0,
+        0.0,      0.1, 0.1,
+        0.0, 0.0, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.7,      0.0, 0.0,
+        0.2, 0.1, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.1, 0.8, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.7, 0.0,
+        0.0, 0.1, 0.1, 0.1,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.7,
+        0.0, 0.0, 0.1, 0.2,
+    ],
+])
+
+P_b = np.array([
+    [
+        0.2, 0.1, 0.0, 0.0,
+        0.7,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.8, 0.1, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.1, 0.1, 0.1,
+        0.0,      0.7, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.0, 0.0, 0.0,
+        0.2,      0.0, 0.0,
+        0.7, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.1, 0.0,
+        0.0,      0.1, 0.1,
+        0.0, 0.0, 0.7, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.8, 0.1, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.1, 0.8, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.1, 0.0,
+        0.0, 0.1, 0.7, 0.1,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.1,
+        0.0, 0.0, 0.1, 0.8,
+    ],
+])
+
+P_d = np.array([
+    [
+        0.2, 0.7, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.2, 0.7, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.1, 0.1, 0.7,
+        0.0,      0.1, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.1, 0.0, 0.0, 0.0,
+        0.8,      0.0, 0.0,
+        0.1, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.1, 0.0,
+        0.0,      0.1, 0.7,
+        0.0, 0.0, 0.1, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.1,      0.0, 0.0,
+        0.2, 0.7, 0.0, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.0,
+        0.1, 0.2, 0.7, 0.0,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.1, 0.0,
+        0.0, 0.1, 0.1, 0.7,
+    ],
+    [
+        0.0, 0.0, 0.0, 0.0,
+        0.0,      0.0, 0.1,
+        0.0, 0.0, 0.1, 0.8,
+    ],
+])
+
+V_optimal = np.random.rand(11)
+pi = np.zeros(11)
+
+while True:
+    V_g = REWARDS + DISCOUNT_FACTOR * P_g @ V_optimal
+    V_h = REWARDS + DISCOUNT_FACTOR * P_h @ V_optimal
+    V_b = REWARDS + DISCOUNT_FACTOR * P_b @ V_optimal
+    V_d = REWARDS + DISCOUNT_FACTOR * P_d @ V_optimal
+
+    V_new = np.max([V_g, V_h, V_b, V_d], axis=0)
+
+    if np.allclose(V_new, V_optimal, atol=1e-6):
+        pi = np.argmax([V_g, V_h, V_b, V_d], axis=0)
+        break
+    else:
+        V_optimal = V_new
+
+print(V_optimal)
+
+pi_pretty = [ARROWS[i] for i in pi]
+pi_pretty.insert(5, "■")
+pi_pretty[3] = "✓"
+pi_pretty[7] = "☠"
+pi_pretty = np.array(pi_pretty).reshape(3, 4)
+print(pi_pretty)
+
+# Exo 4: Performance comparison
+console.rule("[bold yellow]Exo 4: Performance comparison")
+
+perf = np.abs(V_optimal - V_random)
+print(perf)
--- a/TP1/notebook.jl
+++ b/TP1/notebook.jl
--- a/TP2/.envrc
+++ b/TP2/.envrc
@ -0,0 +1 @@
+use nix
--- a/TP2/.vscode/extensions.json
+++ b/TP2/.vscode/extensions.json
@ -0,0 +1,7 @@
+{
+  "recommendations": [
+    "editorconfig.editorconfig",
+    "njpwerner.autodocstring",
+    "ms-python.python"
+  ]
+}
--- a/TP2/.vscode/settings.json
+++ b/TP2/.vscode/settings.json
@ -0,0 +1,28 @@
+{
+  "python.defaultInterpreterPath": ".venv/bin/python",
+  "python.analysis.typeCheckingMode": "basic",
+  "python.formatting.provider": "black",
+  "editor.formatOnSave": true,
+  "python.linting.enabled": true,
+  "python.linting.lintOnSave": true,
+  "python.linting.flake8Enabled": true,
+  "python.linting.mypyEnabled": true,
+  "python.linting.banditEnabled": true,
+  "python.languageServer": "Pylance",
+  "[python]": {
+    "editor.codeActionsOnSave": {
+      "source.organizeImports": true
+    }
+  },
+  "files.exclude": {
+    "**/.git": true,
+    "**/.svn": true,
+    "**/.hg": true,
+    "**/CVS": true,
+    "**/.DS_Store": true,
+    "**/Thumbs.db": true,
+    "**/__pycache__": true,
+    "**/.mypy_cache": true,
+  },
+  "nixEnvSelector.nixFile": "${workspaceRoot}/shell.nix",
+}
--- a/TP2/TP2.ipynb
+++ b/TP2/TP2.ipynb
--- a/TP2/shell.nix
+++ b/TP2/shell.nix
@ -0,0 +1,12 @@
+{ pkgs ? import <nixpkgs> { } }:
+
+pkgs.mkShell {
+  buildInputs = with pkgs; [
+    poetry
+    python3
+    python310Packages.numpy
+    python310Packages.matplotlib
+    python310Packages.ipykernel
+    python310Packages.pip
+  ];
+}
--- a/notebook_exos.jl
+++ b/notebook_exos.jl