init

2023-06-23 20:10:32 +02:00 · 2023-06-23 20:10:32 +02:00 · 468c9a9e1f
commit 468c9a9e1f
9 changed files with 4415 additions and 0 deletions
--- a/TP1/TP1.pdf
+++ b/TP1/TP1.pdf
--- a/TP1/main.py
+++ b/TP1/main.py
@ -0,0 +1,421 @@
 import time
 import matplotlib.pyplot as plt
 import numpy as np
 from rich import print
 from rich.console import Console
 console = Console()
 REWARD_MOVE = -0.04
 REWARD_GOAL = 1
 REWARD_DEATH = -1
 DISCOUNT_FACTOR = 0.9
 ARROWS = ["←", "↑", "↓", "→"] 
 MOVEMENTS = np.array([
    [0.7, 0.1, 0.1, 0.1],
    [0.1, 0.7, 0.1, 0.1],
    [0.1, 0.1, 0.7, 0.1],
    [0.1, 0.1, 0.1, 0.7]
 ])
 REWARDS = np.array([
    REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_GOAL,
    REWARD_MOVE,              REWARD_MOVE, REWARD_DEATH,
    REWARD_MOVE, REWARD_MOVE, REWARD_MOVE, REWARD_MOVE,
 ])
 # Part 1: Performance Prediction
 console.rule("[bold white]Part 1: Performance Prediction")
 # Exo 1: Random policy
 console.rule("[bold yellow]Exo 1: Random policy")
 P = np.array([
    [  
        0.8, 0.1, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.7, 0.2, 0.1, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.7, 0.1, 0.1,
        0.0,      0.1, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.0, 0.0, 0.0,
        0.8,      0.0, 0.0,
        0.1, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.1, 0.0,
        0.0,      0.7, 0.1,
        0.0, 0.0, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.8, 0.1, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.1, 0.8, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.7, 0.0,
        0.0, 0.1, 0.1, 0.1,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.1,
        0.0, 0.0, 0.7, 0.2,
    ],
 ])
 V = np.linalg.inv(np.eye(11) - DISCOUNT_FACTOR * P) @ REWARDS.flatten()
 print(V)
 V_new = REWARDS + DISCOUNT_FACTOR * P @ V
 if np.allclose(V, V_new):
    print("[green]V is a fixed point !")
 else:
    print("[red]You suck !")
 # Exo 2: Iterative Policy Evaluation
 console.rule("[bold yellow]Exo 2: Iterative Policy Evaluation")
 DELTA = 1e-6
 V_random = np.random.rand(11)
 while True:
    V_new = REWARDS + DISCOUNT_FACTOR * P @ V_random
    diff = np.max(np.abs(V_new - V_random))
    V_random = V_new
    if diff < DELTA:
        break
 print(V_random)
 # Exo 3: Bellman operator contraction
 console.rule("[bold yellow]Exo 3: Bellman operator contraction")
 V1 = np.random.rand(11)
 V2 = np.random.rand(11)
 diffs = []
 while True:
    V1 = REWARDS + DISCOUNT_FACTOR * P @ V1
    V2 = REWARDS + DISCOUNT_FACTOR * P @ V2
    diff = np.max(np.abs(V1 - V2))
    diffs.append(diff)
    if diff < DELTA:
        break
 print(V1)
 print(V2)
 plt.plot(diffs)
 plt.yscale("log")
 plt.title("Convergence of Bellman operator")
 plt.xlabel("Iteration")
 plt.ylabel("Difference")
 # plt.show()
 # Part 2: Optimization
 console.rule("[bold white]Part 2: Optimization")
 # Exo 1: Bellmann equation
 console.rule("[bold yellow]Exo 1: Bellmann equation")
 # cf cours, partie 2, slide 15
 # Exo 2: Value Iteration Algorithm
 console.rule("[bold yellow]Exo 2: Value Iteration Algorithm")
 P_g = np.array([
    [
        0.8, 0.1, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.7, 0.2, 0.1, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.7, 0.1, 0.1,
        0.0,      0.1, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.0, 0.0, 0.0,
        0.8,      0.0, 0.0,
        0.1, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.1, 0.0,
        0.0,      0.7, 0.1,
        0.0, 0.0, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.8, 0.1, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.7, 0.2, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.1, 0.0,
        0.0, 0.7, 0.1, 0.1,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.1,
        0.0, 0.0, 0.7, 0.2,
    ],
 ])
 P_h = np.array([
    [
        0.8, 0.1, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.8, 0.1, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.1, 0.7, 0.1,
        0.0,      0.1, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.7, 0.0, 0.0, 0.0,
        0.2,      0.0, 0.0,
        0.1, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.7, 0.0,
        0.0,      0.1, 0.1,
        0.0, 0.0, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.7,      0.0, 0.0,
        0.2, 0.1, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.1, 0.8, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.7, 0.0,
        0.0, 0.1, 0.1, 0.1,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.7,
        0.0, 0.0, 0.1, 0.2,
    ],
 ])
 P_b = np.array([
    [
        0.2, 0.1, 0.0, 0.0,
        0.7,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.8, 0.1, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.1, 0.1, 0.1,
        0.0,      0.7, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.0, 0.0, 0.0,
        0.2,      0.0, 0.0,
        0.7, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.1, 0.0,
        0.0,      0.1, 0.1,
        0.0, 0.0, 0.7, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.8, 0.1, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.1, 0.8, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.1, 0.0,
        0.0, 0.1, 0.7, 0.1,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.1,
        0.0, 0.0, 0.1, 0.8,
    ],
 ])
 P_d = np.array([
    [
        0.2, 0.7, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.2, 0.7, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.1, 0.1, 0.7,
        0.0,      0.1, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.1, 0.0, 0.0, 0.0,
        0.8,      0.0, 0.0,
        0.1, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.1, 0.0,
        0.0,      0.1, 0.7,
        0.0, 0.0, 0.1, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.0, 0.0, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.1,      0.0, 0.0,
        0.2, 0.7, 0.0, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.0,
        0.1, 0.2, 0.7, 0.0,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.1, 0.0,
        0.0, 0.1, 0.1, 0.7,
    ],
    [
        0.0, 0.0, 0.0, 0.0,
        0.0,      0.0, 0.1,
        0.0, 0.0, 0.1, 0.8,
    ],
 ])
 V_optimal = np.random.rand(11)
 pi = np.zeros(11)
 while True:
    V_g = REWARDS + DISCOUNT_FACTOR * P_g @ V_optimal
    V_h = REWARDS + DISCOUNT_FACTOR * P_h @ V_optimal
    V_b = REWARDS + DISCOUNT_FACTOR * P_b @ V_optimal
    V_d = REWARDS + DISCOUNT_FACTOR * P_d @ V_optimal
    V_new = np.max([V_g, V_h, V_b, V_d], axis=0)
    if np.allclose(V_new, V_optimal, atol=1e-6):
        pi = np.argmax([V_g, V_h, V_b, V_d], axis=0)
        break
    else:
        V_optimal = V_new
 print(V_optimal)
 pi_pretty = [ARROWS[i] for i in pi]
 pi_pretty.insert(5, "■")
 pi_pretty[3] = "✓"
 pi_pretty[7] = "☠"
 pi_pretty = np.array(pi_pretty).reshape(3, 4)
 print(pi_pretty)
 # Exo 4: Performance comparison
 console.rule("[bold yellow]Exo 4: Performance comparison")
 perf = np.abs(V_optimal - V_random)
 print(perf)
--- a/TP1/notebook.jl
+++ b/TP1/notebook.jl
--- a/TP2/.envrc
+++ b/TP2/.envrc
@ -0,0 +1 @@
 use nix
--- a/TP2/.vscode/extensions.json
+++ b/TP2/.vscode/extensions.json
@ -0,0 +1,7 @@
 {
  "recommendations": [
    "editorconfig.editorconfig",
    "njpwerner.autodocstring",
    "ms-python.python"
  ]
 }
--- a/TP2/.vscode/settings.json
+++ b/TP2/.vscode/settings.json
@ -0,0 +1,28 @@
 {
  "python.defaultInterpreterPath": ".venv/bin/python",
  "python.analysis.typeCheckingMode": "basic",
  "python.formatting.provider": "black",
  "editor.formatOnSave": true,
  "python.linting.enabled": true,
  "python.linting.lintOnSave": true,
  "python.linting.flake8Enabled": true,
  "python.linting.mypyEnabled": true,
  "python.linting.banditEnabled": true,
  "python.languageServer": "Pylance",
  "[python]": {
    "editor.codeActionsOnSave": {
      "source.organizeImports": true
    }
  },
  "files.exclude": {
    "**/.git": true,
    "**/.svn": true,
    "**/.hg": true,
    "**/CVS": true,
    "**/.DS_Store": true,
    "**/Thumbs.db": true,
    "**/__pycache__": true,
    "**/.mypy_cache": true,
  },
  "nixEnvSelector.nixFile": "${workspaceRoot}/shell.nix",
 }
--- a/TP2/TP2.ipynb
+++ b/TP2/TP2.ipynb
--- a/TP2/shell.nix
+++ b/TP2/shell.nix
@ -0,0 +1,12 @@
 { pkgs ? import <nixpkgs> { } }:
 pkgs.mkShell {
  buildInputs = with pkgs; [
    poetry
    python3
    python310Packages.numpy
    python310Packages.matplotlib
    python310Packages.ipykernel
    python310Packages.pip
  ];
 }
--- a/notebook_exos.jl
+++ b/notebook_exos.jl