This commit is contained in:
Laureηt 2023-06-23 19:39:56 +02:00
commit fc3fd7ceb1
Signed by: Laurent
SSH key fingerprint: SHA256:kZEpW8cMJ54PDeCvOhzreNr4FSh6R13CMGH/POoO8DI
6 changed files with 8923 additions and 0 deletions

View file

@ -0,0 +1,415 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "4Vhg6pn2uDlW"
},
"source": [
"# Dataset des 2 lunes\n",
"\n",
"(Avec un nouvel affichage plus joli, merci Arthur !)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Hw8rHiTKuHL3"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import datasets\n",
"import matplotlib.pyplot as plt \n",
"\n",
"def generate_2moons_dataset(num_lab = 10, num_unlab=740, num_test=250):\n",
" num_samples = num_lab + num_unlab + num_test\n",
" # Génération de 1000 données du dataset des 2 lunes\n",
" x, y = datasets.make_moons(n_samples=num_samples, noise=0.1, random_state=1)\n",
"\n",
" x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=num_test/num_samples, random_state=1)\n",
" x_train_lab, x_train_unlab, y_train_lab, y_train_unlab = train_test_split(x_train, y_train, test_size=num_unlab/(num_unlab+num_lab), random_state=6)\n",
"\n",
" return x_train_lab, y_train_lab, x_train_unlab, y_train_unlab, x_test, y_test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ww95atT6uJ4D"
},
"outputs": [],
"source": [
"x_train_lab, y_train_lab, x_train_unlab, y_train_unlab, x_test, y_test = generate_2moons_dataset(num_lab = 10, num_unlab=740, num_test=250)\n",
"\n",
"print(x_train_lab.shape, x_train_unlab.shape, x_test.shape)\n",
"print(y_train_lab.shape, y_train_unlab.shape, y_test.shape)\n",
"\n",
"# Affichage des données\n",
"plt.plot(x_train_unlab[y_train_unlab==0,0], x_train_unlab[y_train_unlab==0,1], color=(0.5,0.5,0.5), marker='.', linestyle=' ')\n",
"plt.plot(x_train_unlab[y_train_unlab==1,0], x_train_unlab[y_train_unlab==1,1], color=(0.5,0.5,0.5), marker='.', linestyle=' ')\n",
"\n",
"plt.plot(x_test[y_test==0,0], x_test[y_test==0,1], 'b+')\n",
"plt.plot(x_test[y_test==1,0], x_test[y_test==1,1], 'r+')\n",
"\n",
"plt.plot(x_train_lab[y_train_lab==0,0], x_train_lab[y_train_lab==0,1], 'b.', markersize=30)\n",
"plt.plot(x_train_lab[y_train_lab==1,0], x_train_lab[y_train_lab==1,1], 'r.', markersize=30)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LKODCH2luSPM"
},
"outputs": [],
"source": [
"def create_model_2moons():\n",
"\n",
" inputs = keras.Input(shape=(2,))\n",
" x = Dense(20, activation=\"relu\")(inputs)\n",
" outputs = Dense(1, activation=\"sigmoid\")(x)\n",
" model = keras.Model(inputs=inputs, outputs=outputs) \n",
"\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ea7E3-6l3_le"
},
"source": [
"# $\\Pi$-Modèle\n",
"\n",
"Nous allons maintenant tenter d'utiliser un 2nd algorithme semi-supervisé supposé être plus efficace, il s'agit de l'algorithme du $\\Pi$-Modèle, dont la version détaillée est présentée ci-dessous (en VO).\n",
"\n",
"<img src=\"https://drive.google.com/uc?id=13VhlBYwA6YIYGzKI81Jom_jTiuhOypEg\">\n",
"<caption><center> Figure 1 : Pseudo-code de l'algorithme du $\\Pi$-Modèle</center></caption>\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6vaWDKNpYxc0"
},
"source": [
"Ci-dessous, la boucle d'entraînement détaillée est reprise et contient un squelette du code à réaliser pour implémenter le $\\Pi$-Modèle. \n",
"\n",
"**Travail à faire :** Complétez le squelette de l'algorithme du $\\Pi$-Modèle pour pouvoir tester ce nouvel algorithme."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uVK8itsvD72s"
},
"outputs": [],
"source": [
"# Nombre d'epochs de l'apprentissage\n",
"epochs = 2000\n",
"# Nombre de données non-labellisées par batch\n",
"bs_unlab = 100\n",
"# Nombre de données labellisées par batch\n",
"bs_lab = 10\n",
"# Taille du batch\n",
"batch_size = bs_lab + bs_unlab\n",
"# Valeur initiale du paramètre de contrôle de l'importance de la régularisation non-supervisée\n",
"lambda_t = 0\n",
"\n",
"# Données et modèle du problème des 2 clusters\n",
"x_train_lab, y_train_lab, x_train_unlab, y_train_unlab, x_test, y_test = generate_2moons_dataset(num_lab = 10, num_unlab=740, num_test=250)\n",
"model = create_model_2moons()\n",
"\n",
"# Nombre de batches par epochs\n",
"steps_per_epochs = int(np.floor(x_train_lab.shape[0]/bs_lab))\n",
"# Instanciation d'un optimiseur et d'une fonction de coût.\n",
"optimizer = keras.optimizers.Adam(learning_rate=3e-2)\n",
"# ICI ON A BESOIN DE DEUX FONCTIONS DE COUT : \n",
"# L'une pour la partie supervisée de la perte\n",
"loss_sup = ...\n",
"# L'autre pour la partie non-supervisée de la perte\n",
"loss_unsup = ...\n",
"\n",
"# Préparation des métriques pour le suivi de la performance du modèle.\n",
"train_acc_metric = keras.metrics.BinaryAccuracy()\n",
"val_acc_metric = keras.metrics.BinaryAccuracy()\n",
"\n",
"# Indices de l'ensemble non labellisé\n",
"indices_lab = np.arange(x_train_lab.shape[0]) \n",
"# Indices de l'ensemble non labellisé\n",
"indices_unlab = np.arange(x_train_unlab.shape[0]) \n",
"\n",
"for epoch in range(epochs):\n",
"\n",
" for b in range(steps_per_epochs):\n",
"\n",
" # Les données d'un batch sont constituées de l'intégralité de nos données labellisées...\n",
" x_batch_lab = x_train_lab[indices_lab[b*bs_lab:(b+1)*bs_lab]]\n",
" y_batch_lab = y_train_lab[indices_lab[b*bs_lab:(b+1)*bs_lab]]\n",
" y_batch_lab = np.expand_dims(y_batch_lab, 1)\n",
"\n",
" # ... ainsi que de données non-labellisées !\n",
" x_batch_unlab = x_train_unlab[indices[b*bs_unlab:(b+1)*bs_unlab]]\n",
"\n",
" # On forme notre batch en concaténant les données labellisées et non labellisées\n",
" x_batch = np.concatenate((x_batch_lab, x_batch_unlab), axis=0)\n",
"\n",
" # On forme également un batch alternatif constitué des mêmes données bruitées\n",
" # Le bruit ici sera simplement obtenu avec np.rand()\n",
" # Attention à l'échelle du bruit !\n",
" x_batch_noisy = ...\n",
"\n",
" # Les opérations effectuées par le modèle dans ce bloc sont suivies et permettront\n",
" # la différentiation automatique.\n",
" with tf.GradientTape() as tape:\n",
"\n",
" # Application du réseau aux données d'entrée\n",
" y_pred = model(x_batch, training=True)\n",
" # Ne pas oublier de le faire également sur le 2e batch ! \n",
" y_pred_noisy = model(x_batch_noisy, training=True) \n",
"\n",
" # Calcul de la fonction de perte sur ce batch\n",
" sup_term = ...\n",
" unsup_term = ...\n",
"\n",
" loss_value = ...\n",
"\n",
" # Calcul des gradients par différentiation automatique\n",
" grads = tape.gradient(loss_value, model.trainable_weights)\n",
"\n",
" # Réalisation d'une itération de la descente de gradient (mise à jour des paramètres du réseau)\n",
" optimizer.apply_gradients(zip(grads, model.trainable_weights))\n",
"\n",
" # Mise à jour de la métrique\n",
" train_acc_metric.update_state(np.expand_dims(y_batch_lab, 1), y_pred[0:bs_lab])\n",
"\n",
" \n",
" # Calcul de la précision à la fin de l'epoch\n",
" train_acc = train_acc_metric.result()\n",
" # Calcul de la précision sur l'ensemble de validation à la fin de l'epoch\n",
" val_logits = model(x_test, training=False)\n",
" val_acc_metric.update_state(np.expand_dims(y_test, 1), val_logits)\n",
" val_acc = val_acc_metric.result()\n",
"\n",
" print(\"Epoch %4d : Loss : %.4f, Acc : %.4f, Val Acc : %.4f\" % (epoch, float(loss_value), float(train_acc), float(val_acc)))\n",
"\n",
" # Remise à zéro des métriques pour la prochaine epoch\n",
" train_acc_metric.reset_states()\n",
" val_acc_metric.reset_states()\n",
"\n",
" # Mise à jour du paramètre de contrôle de l'importance de la régularisation non-supervisée\n",
" # Il augmente progressivement !\n",
" if lambda_t < 1:\n",
" if epoch > 100:\n",
" lambda_t = lambda_t + 0.001\n",
"\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "l1dZNTmKYjZs"
},
"outputs": [],
"source": [
"from mlxtend.plotting import plot_decision_regions\n",
"\n",
"# Affichage des données\n",
"plt.plot(x_train_unlab[y_train_unlab==0,0], x_train_unlab[y_train_unlab==0,1], 'b.')\n",
"plt.plot(x_train_unlab[y_train_unlab==1,0], x_train_unlab[y_train_unlab==1,1], 'r.')\n",
"\n",
"plt.plot(x_test[y_test==0,0], x_test[y_test==0,1], 'b+')\n",
"plt.plot(x_test[y_test==1,0], x_test[y_test==1,1], 'r+')\n",
"\n",
"plt.plot(x_train_lab[y_train_lab==0,0], x_train_lab[y_train_lab==0,1], 'b.', markersize=30)\n",
"plt.plot(x_train_lab[y_train_lab==1,0], x_train_lab[y_train_lab==1,1], 'r.', markersize=30)\n",
"\n",
"plt.show()\n",
"\n",
"# Plot decision boundary\n",
"plot_decision_regions(x_train_unlab, y_train_unlab, clf=model, legend=2)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "e2AnvQPl4YTb"
},
"source": [
"# MNIST"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "B_noJPS5f2Td"
},
"source": [
"Pour adapter l'algorithme du $\\Pi$-modèle à MNIST, nous allons devoir remplacer le bruitage des données par de l'augmentation de données.\n",
"\n",
"Commencez par remplir l'ImageDataGenerator (à vous de voir comment dans [la documentation](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator)) avec des transformations pertinentes. **Attention** cette étape est cruciale pour obtenir de bons résultats. Il faut intégrer les augmentations les plus fortes possibles, mais être certain qu'elles ne modifient pas le label du chiffre !"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "anl-QTIxgnwf"
},
"outputs": [],
"source": [
"from tensorflow.keras.datasets import mnist\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def generate_mnist_dataset(num_lab = 100):\n",
"\n",
" # Chargement et normalisation (entre 0 et 1) des données de la base de données MNIST\n",
" (x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"\n",
" x_train = np.expand_dims(x_train.astype('float32') / 255., 3)\n",
" x_test = np.expand_dims(x_test.astype('float32') / 255., 3)\n",
"\n",
" x_train_lab, x_train_unlab, y_train_lab, y_train_unlab = train_test_split(x_train, y_train, test_size=(x_train.shape[0]-num_lab)/x_train.shape[0], random_state=2)\n",
"\n",
" return x_train_lab, y_train_lab, x_train_unlab, y_train_unlab, x_test, y_test\n",
"\n",
"x_train_lab, y_train_lab, x_train_unlab, y_train_unlab, x_test, y_test = generate_mnist_dataset()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "OLKir7N1klkz"
},
"outputs": [],
"source": [
"from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
"import matplotlib.pyplot as plt\n",
"\n",
"train_datagen = ImageDataGenerator(\n",
" ### A COMPLETER\n",
")\n",
"\n",
"# Affichage d'une donnée et de son augmentation\n",
"x = x_train_lab[0:10]\n",
"plt.imshow(x[0, : ,: ,0])\n",
"plt.show()\n",
"x_aug = train_datagen.flow(x, shuffle=False, batch_size=10).next()\n",
"plt.imshow(x_aug[0, : ,: ,0])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nx9N8ZV-u_fX"
},
"source": [
"Implémentez le réseau LeNet-5 pour la classifications des chiffres manuscrits, en suivant cet exemple : \n",
"<img src=\"https://www.datasciencecentral.com/wp-content/uploads/2021/10/1lvvWF48t7cyRWqct13eU0w.jpeg\">\n",
"<caption><center> Figure 2 : Schéma de l'architecture de LeNet-5</center></caption>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ASNuRBCVvHZe"
},
"outputs": [],
"source": [
"from tensorflow.keras.layers import *\n",
"from tensorflow.keras import Model, Input\n",
"\n",
"# A COMPLETER\n",
"# Ici, on implémentera le modèle LeNet-5 :\n",
"# 1 couche de convolution 5x5 à 6 filtres suivie d'un max pooling\n",
"# puis 1 couche de convolution 5x5 à 16 filtres suivie d'un max pooling et d'un Flatten\n",
"# Enfin 2 couches denses de 120 et 84 neurones, avant la couche de sortie à 10 neurones.\n",
"def create_model_mnist():\n",
"\n",
" inputs = keras.Input(shape=(...))\n",
"\n",
" ...\n",
" \n",
" outputs = \n",
"\n",
" model = keras.Model(inputs=inputs, outputs=outputs) \n",
"\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "D-v1X5Ypv4jz"
},
"source": [
"**Travail à faire**\n",
"\n",
"Commencez d'abord par entraîner LeNet-5 sur MNIST de manière supervisée, en **utilisant 100 données labellisées**.\n",
"\n",
"Attention, il va vous falloir modifier quelques élements par rapport à ce que nous avons fait dans la séance précédente, notamment la fonction de coût (*SparseCategoricalCrossEntropy*) et les métriques (*SparseCategoricalAccuracy*).\n",
"\n",
"Pour comparer de manière juste les versions supervisée et semi-supervisée, n'oubliez pas également d'intégrer l'augmentation de données dans votre apprentissage. Vous devriez obtenir environ 80\\% de bonnes classifications sur l'ensemble de test."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VAAFtjTv5U1n"
},
"source": [
"**Travail à faire**\n",
"\n",
"Reprenez ensuite le code du $\\Pi$-Modèle pour l'adapter à MNIST, en intégrant l'augmentation (à la place du bruitage des données). Vous devriez obtenir un gain significatif avec les bons hyperparamètres ! (jusqu'à environ 97\\%)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"machine_shape": "hm",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3.10.8 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
},
"vscode": {
"interpreter": {
"hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

456
IAM2022_TP_GAN_Sujet.ipynb Normal file
View file

@ -0,0 +1,456 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "Ls4hgfTEHgGR"
},
"source": [
"# Réseaux Génératifs Antagonistes\n",
"\n",
"Dans ce TP nous allons mettre en place l'entraînement d'un réseau de neurone génératif, entraîné de manière antagoniste à l'aide d'un réseau discriminateur. \n",
"\n",
"<center> <img src=\"https://drive.google.com/uc?id=1_ADmA-Js37z6R-0o476dzX4jMG5WHLtr\" width=600></center>\n",
"<caption><center> Schéma global de fonctionnement d'un GAN ([Goodfellow 2014]) </center></caption>\n",
"\n",
"Dans un premier temps, nous allons illustrer le fonctionnement du GAN sur l'exemple simple, canonique, de la base de données MNIST. \n",
"Votre objectif sera par la suite d'adapter cet exemple à la base de données *Labelled Faces in the Wild*, et éventuellement d'implémenter quelques astuces permettant d'améliorer l'entrainement.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "TRziuDJMInpM"
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from keras import layers\n",
"import numpy as np\n",
"import os\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IFNzLxouIfwy"
},
"source": [
"On commence par définir les réseaux discriminateur et générateur, en suivant les recommandations de DCGAN (activation *LeakyReLU*, *stride*, *Batch Normalization*, activation de sortie *tanh* pour le générateur)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "IfPhxKGLHfD-"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"discriminator\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" conv2d_3 (Conv2D) (None, 14, 14, 64) 640 \n",
" \n",
" batch_normalization_5 (Batc (None, 14, 14, 64) 256 \n",
" hNormalization) \n",
" \n",
" leaky_re_lu_5 (LeakyReLU) (None, 14, 14, 64) 0 \n",
" \n",
" conv2d_4 (Conv2D) (None, 7, 7, 128) 73856 \n",
" \n",
" batch_normalization_6 (Batc (None, 7, 7, 128) 512 \n",
" hNormalization) \n",
" \n",
" leaky_re_lu_6 (LeakyReLU) (None, 7, 7, 128) 0 \n",
" \n",
" global_max_pooling2d_1 (Glo (None, 128) 0 \n",
" balMaxPooling2D) \n",
" \n",
" dense_2 (Dense) (None, 1) 129 \n",
" \n",
"=================================================================\n",
"Total params: 75,393\n",
"Trainable params: 75,009\n",
"Non-trainable params: 384\n",
"_________________________________________________________________\n",
"Model: \"generator\"\n",
"_________________________________________________________________\n",
" Layer (type) Output Shape Param # \n",
"=================================================================\n",
" dense_3 (Dense) (None, 6272) 809088 \n",
" \n",
" batch_normalization_7 (Batc (None, 6272) 25088 \n",
" hNormalization) \n",
" \n",
" leaky_re_lu_7 (LeakyReLU) (None, 6272) 0 \n",
" \n",
" reshape_1 (Reshape) (None, 7, 7, 128) 0 \n",
" \n",
" conv2d_transpose_2 (Conv2DT (None, 14, 14, 128) 262272 \n",
" ranspose) \n",
" \n",
" batch_normalization_8 (Batc (None, 14, 14, 128) 512 \n",
" hNormalization) \n",
" \n",
" leaky_re_lu_8 (LeakyReLU) (None, 14, 14, 128) 0 \n",
" \n",
" conv2d_transpose_3 (Conv2DT (None, 28, 28, 128) 262272 \n",
" ranspose) \n",
" \n",
" batch_normalization_9 (Batc (None, 28, 28, 128) 512 \n",
" hNormalization) \n",
" \n",
" leaky_re_lu_9 (LeakyReLU) (None, 28, 28, 128) 0 \n",
" \n",
" conv2d_5 (Conv2D) (None, 28, 28, 1) 6273 \n",
" \n",
"=================================================================\n",
"Total params: 1,366,017\n",
"Trainable params: 1,352,961\n",
"Non-trainable params: 13,056\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"latent_dim = 128\n",
"discriminator = keras.Sequential(\n",
" [\n",
" keras.Input(shape=(28, 28, 1)),\n",
" layers.Conv2D(64, (3, 3), strides=(2, 2), padding=\"same\"),\n",
" layers.BatchNormalization(momentum = 0.8),\n",
" layers.LeakyReLU(alpha=0.2),\n",
" layers.Conv2D(128, (3, 3), strides=(2, 2), padding=\"same\"),\n",
" layers.BatchNormalization(momentum = 0.8),\n",
" layers.LeakyReLU(alpha=0.2),\n",
" layers.GlobalMaxPooling2D(),\n",
" layers.Dense(1, activation=\"sigmoid\"),\n",
" ],\n",
" name=\"discriminator\",\n",
")\n",
"discriminator.summary()\n",
"\n",
"generator = keras.Sequential(\n",
" [\n",
" keras.Input(shape=(latent_dim,)),\n",
" layers.Dense(7 * 7 * 128), \n",
" layers.BatchNormalization(momentum = 0.8),\n",
" layers.LeakyReLU(alpha=0.2),\n",
" layers.Reshape((7, 7, 128)),\n",
" layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n",
" layers.BatchNormalization(momentum = 0.8),\n",
" layers.LeakyReLU(alpha=0.2),\n",
" layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding=\"same\"),\n",
" layers.BatchNormalization(momentum = 0.8),\n",
" layers.LeakyReLU(alpha=0.2),\n",
" layers.Conv2D(1, (7, 7), padding=\"same\", activation=\"tanh\"),\n",
" ],\n",
" name=\"generator\",\n",
")\n",
"generator.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "kZ0FTcu6yl56"
},
"source": [
"Le code suivant décrit ce qui se passe à chaque itération de l'algorithme, ce qui est également résumé dans le cours sur le slide suivant : \n",
"\n",
"<center> <img src=\"https://drive.google.com/uc?id=1I6KesJZeSN_p_mx5nkAsVUeMmUKfIYB_\" width=600></center>\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "_RnxhJX_KJxF"
},
"outputs": [],
"source": [
"# Instanciation de deux optimiseurs, l'un pour le discrimnateur et l'autre pour le générateur\n",
"d_optimizer = keras.optimizers.Adam(learning_rate=0.0008)\n",
"g_optimizer = keras.optimizers.Adam(learning_rate=0.0004)\n",
"\n",
"# Instanciation d'une fonction de coût entropie croisée\n",
"loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)\n",
"\n",
"\n",
"# La fonction prend en entrée un mini-batch d'images réelles\n",
"@tf.function\n",
"def train_step(real_images):\n",
" batch_size = tf.shape(real_images)[0]\n",
"\n",
" # ENTRAINEMENT DU DISCRIMINATEUR\n",
" # Échantillonnage dun mini-batch de bruit\n",
" random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim,))\n",
" # Création d'un mini-batch d'images générées à partir du bruit\n",
" generated_images = generator(random_latent_vectors)\n",
" # Échantillonnage dun mini-batch de données combinant images générées et réelles\n",
" combined_images = tf.concat([generated_images, real_images], axis=0)\n",
"\n",
" # Création des labels associés au mini-batch de données créé précédemment\n",
" # Pour l'entraînement du discriminateur :\n",
" # - les données générées sont labellisées \"0\" \n",
" #  - les données réelles sont labellisées \"1\" \n",
" labels = tf.concat([tf.zeros((batch_size, 1)), tf.ones((batch_size, 1))], axis=0)\n",
"\n",
" # Entraînement du discriminateur\n",
" with tf.GradientTape() as tape:\n",
" # L'appel d'un modèle (ici discriminator) à l'intérieur de Tf.GradientTape\n",
" # permet de récupérer les gradients pour faire la mise à jour\n",
"\n",
" # Prédiction du discriminateur sur notre batch d'images réelles et générées\n",
" predictions = discriminator(combined_images)\n",
" # Calcul de la fonction de coût\n",
" d_loss = loss_fn(labels, predictions)\n",
"\n",
" # Récupération des gradients de la fonction de coût par rapport aux paramètres du discriminateur\n",
" grads = tape.gradient(d_loss, discriminator.trainable_weights)\n",
" # Mise à jour des paramètres par l'optimiseur grâce aux gradients de la fonction de coût\n",
" d_optimizer.apply_gradients(zip(grads, discriminator.trainable_weights))\n",
" ### NOTE : ON N'ENTRAINE PAS LE GENERATEUR A CE MOMENT !\n",
"\n",
" # ENTRAINEMENT DU GENERATEUR\n",
" # Échantillonnage dun mini-batch de bruit\n",
" random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim,))\n",
" # Création des labels associés au mini-batch de données créé précédemment\n",
" # Pour l'entraînement du générateur :\n",
" # - les données générées sont labellisées ici \"1\" \n",
" misleading_labels = tf.ones((batch_size, 1))\n",
"\n",
" # Entraînement du générateur sans toucher aux paramètres du discriminateur !\n",
" with tf.GradientTape() as tape:\n",
" predictions = discriminator(generator(random_latent_vectors))\n",
" g_loss = loss_fn(misleading_labels, predictions)\n",
" \n",
" # Récupération des gradients de la fonction de coût par rapport aux paramètres du générateur\n",
" grads = tape.gradient(g_loss, generator.trainable_weights)\n",
" # Mise à jour des paramètres par l'optimiseur grâce aux gradients de la fonction de coût\n",
" g_optimizer.apply_gradients(zip(grads, generator.trainable_weights))\n",
"\n",
" return d_loss, g_loss, generated_images"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "all1LAF92h1u"
},
"source": [
"Il reste à écrire l'algorithme final qui va faire appel au code d'itération écrit précédemment"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "lQJWoazN2pwd"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Start epoch 0\n"
]
},
{
"ename": "TypeError",
"evalue": "in user code:\n\n File \"/tmp/ipykernel_11002/1607979120.py\", line 26, in train_step *\n labels = tf.concat(tf.zeros((batch_size, 1)), tf.ones((batch_size, 1)), axis=0)\n\n TypeError: Got multiple values for argument 'axis'\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/laurent/Documents/Cours/ENSEEIHT/S9 - IAM/IAM2022_TP_GAN_Sujet.ipynb Cell 8\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mStart epoch\u001b[39m\u001b[39m\"\u001b[39m, epoch)\n\u001b[1;32m <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m \u001b[39mfor\u001b[39;00m step, real_images \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(dataset):\n\u001b[1;32m <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=15'>16</a>\u001b[0m \u001b[39m# Descente de gradient simultanée du discrimnateur et du générateur\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m d_loss, g_loss, generated_images \u001b[39m=\u001b[39m train_step(real_images)\n\u001b[1;32m <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m \u001b[39m# Affichage régulier d'images générées.\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m \u001b[39mif\u001b[39;00m step \u001b[39m%\u001b[39m \u001b[39m200\u001b[39m \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m <a href='vscode-notebook-cell:/home/laurent/Documents/Cours/ENSEEIHT/S9%20-%20IAM/IAM2022_TP_GAN_Sujet.ipynb#X10sZmlsZQ%3D%3D?line=20'>21</a>\u001b[0m \u001b[39m# Métriques\u001b[39;00m\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n\u001b[0;32m--> 153\u001b[0m \u001b[39mraise\u001b[39;00m e\u001b[39m.\u001b[39mwith_traceback(filtered_tb) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39m\n\u001b[1;32m 154\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 155\u001b[0m \u001b[39mdel\u001b[39;00m filtered_tb\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/tensorflow/python/framework/func_graph.py:1147\u001b[0m, in \u001b[0;36mfunc_graph_from_py_func.<locals>.autograph_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1145\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e: \u001b[39m# pylint:disable=broad-except\u001b[39;00m\n\u001b[1;32m 1146\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(e, \u001b[39m\"\u001b[39m\u001b[39mag_error_metadata\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m-> 1147\u001b[0m \u001b[39mraise\u001b[39;00m e\u001b[39m.\u001b[39mag_error_metadata\u001b[39m.\u001b[39mto_exception(e)\n\u001b[1;32m 1148\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1149\u001b[0m \u001b[39mraise\u001b[39;00m\n",
"\u001b[0;31mTypeError\u001b[0m: in user code:\n\n File \"/tmp/ipykernel_11002/1607979120.py\", line 26, in train_step *\n labels = tf.concat(tf.zeros((batch_size, 1)), tf.ones((batch_size, 1)), axis=0)\n\n TypeError: Got multiple values for argument 'axis'\n"
]
}
],
"source": [
"# Préparation de la base de données : on utilise toutes les images (entraînement + test) de MNIST\n",
"batch_size = 32\n",
"(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()\n",
"all_digits = np.concatenate([x_train, x_test])\n",
"all_digits = (all_digits.astype(\"float32\")-127.5) / 127.5 # Images normalisées\n",
"all_digits = np.reshape(all_digits, (-1, 28, 28, 1))\n",
"dataset = tf.data.Dataset.from_tensor_slices(all_digits)\n",
"dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n",
"\n",
"epochs = 20 # Une 20aine d'epochs est nécessaire pour voir des chiffres qui semblent réalistes\n",
"\n",
"for epoch in range(epochs):\n",
" print(\"\\nStart epoch\", epoch)\n",
"\n",
" for step, real_images in enumerate(dataset):\n",
" # Descente de gradient simultanée du discrimnateur et du générateur\n",
" d_loss, g_loss, generated_images = train_step(real_images)\n",
"\n",
" # Affichage régulier d'images générées.\n",
" if step % 200 == 0:\n",
" # Métriques\n",
" print(\"Perte du discriminateur à l'étape %d: %.2f\" % (step, d_loss))\n",
" print(\"Perte du générateur à l'étape %d: %.2f\" % (step, g_loss))\n",
"\n",
" plt.figure(figsize=(20, 4))\n",
" for i in range(10):\n",
" plt.subplot(1,10, i+1)\n",
" plt.imshow(generated_images[i, :, :, 0]*128+128, cmap='gray')\n",
" \n",
" plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "kwIc9354oNIV"
},
"source": [
"# Travail à faire :\n",
"\n",
"Prenez le temps de lire, de comprendre et de compléter le code qui vous est fourni. Observez attentivement l'évolution des métriques ainsi que les images générées au cours de l'entraînement. L'objectif de ce TP est d'abord de vous fournir un exemple de code implémentant les GANs, mais surtout de vous faire sentir la difficulté d'entraîner ces modèles.\n",
"\n",
"Dans la suite du TP, nous vous fournissons ci-dessous un code de chargement de la base de données de visages *Labelled Faces in the Wild*. Votre objectif est donc d'adapter le code précédent pour générer non plus des chiffres mais des visages.\n",
"\n",
"Quelques précisions importantes, et indications : \n",
"\n",
"\n",
"* MNIST est une base de données d'images noir et blanc de dimension 28 $\\times$ 28, LFW est une base de données d'images couleur de dimension 32 $\\times$ 32 $\\times$ 3\n",
"* La diversité des visages est bien plus grande que celle des chiffres ; votre générateur doit donc être un peu plus complexe que celui utilisé ici (plus de couches, et/ou plus de filtres par exemple) \n",
"* Pour faire fonctionner ce second exemple, il pourrait être nécessaire de modifier quelques hyperparamètres (dimension de l'espace latent, taux d'apprentissage des générateur et discriminateur, etc.)\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ohexDvCYrahC"
},
"source": [
"Le code suivant télécharge et prépare les données de la base LFW."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ot-zkfDBQUkl"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import tarfile, tqdm, cv2, os\n",
"from sklearn.model_selection import train_test_split\n",
"import numpy as np\n",
"\n",
"# Télécharger les données de la base de données \"Labelled Faces in the Wild\"\n",
"!wget http://www.cs.columbia.edu/CAVE/databases/pubfig/download/lfw_attributes.txt\n",
"!wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz\n",
"!wget http://vis-www.cs.umass.edu/lfw/lfw.tgz\n",
" \n",
"ATTRS_NAME = \"lfw_attributes.txt\"\n",
"IMAGES_NAME = \"lfw-deepfunneled.tgz\"\n",
"RAW_IMAGES_NAME = \"lfw.tgz\"\n",
"\n",
"def decode_image_from_raw_bytes(raw_bytes):\n",
" img = cv2.imdecode(np.asarray(bytearray(raw_bytes), dtype=np.uint8), 1)\n",
" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
" return img\n",
"\n",
"def load_lfw_dataset(\n",
" use_raw=False,\n",
" dx=80, dy=80,\n",
" dimx=45, dimy=45):\n",
"\n",
" # Read attrs\n",
" df_attrs = pd.read_csv(ATTRS_NAME, sep='\\t', skiprows=1)\n",
" df_attrs = pd.DataFrame(df_attrs.iloc[:, :-1].values, columns=df_attrs.columns[1:])\n",
" imgs_with_attrs = set(map(tuple, df_attrs[[\"person\", \"imagenum\"]].values))\n",
"\n",
" # Read photos\n",
" all_photos = []\n",
" photo_ids = []\n",
"\n",
" # tqdm in used to show progress bar while reading the data in a notebook here, you can change\n",
" # tqdm_notebook to use it outside a notebook\n",
" with tarfile.open(RAW_IMAGES_NAME if use_raw else IMAGES_NAME) as f:\n",
" for m in tqdm.tqdm_notebook(f.getmembers()):\n",
" # Only process image files from the compressed data\n",
" if m.isfile() and m.name.endswith(\".jpg\"):\n",
" # Prepare image\n",
" img = decode_image_from_raw_bytes(f.extractfile(m).read())\n",
"\n",
" # Crop only faces and resize it\n",
" img = img[dy:-dy, dx:-dx]\n",
" img = cv2.resize(img, (dimx, dimy))\n",
"\n",
" # Parse person and append it to the collected data\n",
" fname = os.path.split(m.name)[-1]\n",
" fname_splitted = fname[:-4].replace('_', ' ').split()\n",
" person_id = ' '.join(fname_splitted[:-1])\n",
" photo_number = int(fname_splitted[-1])\n",
" if (person_id, photo_number) in imgs_with_attrs:\n",
" all_photos.append(img)\n",
" photo_ids.append({'person': person_id, 'imagenum': photo_number})\n",
"\n",
" photo_ids = pd.DataFrame(photo_ids)\n",
" all_photos = np.stack(all_photos).astype('uint8')\n",
"\n",
" # Preserve photo_ids order!\n",
" all_attrs = photo_ids.merge(df_attrs, on=('person', 'imagenum')).drop([\"person\", \"imagenum\"], axis=1)\n",
"\n",
" return all_photos, all_attrs\n",
"\n",
"# Prépare le dataset et le charge dans la variable X\n",
"X, attr = load_lfw_dataset(use_raw=True, dimx=32, dimy=32)\n",
"# Normalise les images\n",
"X = (X.astype(\"float32\")-127.5)/127.5\n"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"machine_shape": "hm",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.10.8 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
},
"vscode": {
"interpreter": {
"hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long