FIx GPU OOM issues - GuitarLSTM - Deep learning models for guitar amp/pedal emulation using LSTM with Keras

commit a790c8736525f942abf5487357514447a090a4f3
parent bc0faebae54a13352148a6a1d69171b9f70cd3c3
Author: jmiller656 <joshxmiller656@gmail.com>
Date:   Sat, 30 Jan 2021 23:46:42 -0500

FIx GPU OOM issues

Diffstat:
M guitar_lstm_colab.ipynb  | 511 +++++++++++++++++++++++++++++++++++++++----------------------------------------

1 file changed, 254 insertions(+), 257 deletions(-)
diff --git a/guitar_lstm_colab.ipynb b/guitar_lstm_colab.ipynb
@@ -1,258 +1,256 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "name": "guitar_lstm_colab.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "accelerator": "GPU"
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "RF2uyPfxgi8H"
+   },
+   "outputs": [],
+   "source": [
+    "# TO USE: \n",
+    "#    1. Upload your input and output wav files to the current directory in Colab\n",
+    "#    2. Edit the USER INPUTS section to point to your wav files, and choose a\n",
+    "#         model name, and number of epochs for training. If you experience \n",
+    "#         crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n",
+    "#         the \"split_data\" parameter.\n",
+    "#    3. Run each section of code. The trained models and output wav files will be \n",
+    "#         added to the \"models\" directory.\n",
+    "#\n",
+    "#     Note: Tested on CPU and GPU runtimes.\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "from tensorflow.keras import Sequential\n",
+    "from tensorflow.keras.layers import LSTM, Conv1D, Dense\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from tensorflow.keras.backend import clear_session\n",
+    "from tensorflow.keras.activations import tanh, elu, relu\n",
+    "from tensorflow.keras.models import load_model\n",
+    "import tensorflow.keras.backend as K\n",
+    "from tensorflow.keras.utils import Sequence\n",
+    "\n",
+    "import os\n",
+    "from scipy import signal\n",
+    "from scipy.io import wavfile\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import math\n",
+    "import h5py"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "RF2uyPfxgi8H"
-      },
-      "source": [
-        "# TO USE: \n",
-        "#    1. Upload your input and output wav files to the current directory in Colab\n",
-        "#    2. Edit the USER INPUTS section to point to your wav files, and choose a\n",
-        "#         model name, and number of epochs for training. If you experience \n",
-        "#         crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n",
-        "#         the \"split_data\" parameter.\n",
-        "#    3. Run each section of code. The trained models and output wav files will be \n",
-        "#         added to the \"models\" directory.\n",
-        "#\n",
-        "#     Note: Tested on CPU and GPU runtimes.\n",
-        "\n",
-        "import tensorflow as tf\n",
-        "from tensorflow.keras import Sequential\n",
-        "from tensorflow.keras.layers import LSTM, Conv1D, Dense\n",
-        "from tensorflow.keras.optimizers import Adam\n",
-        "from tensorflow.keras.backend import clear_session\n",
-        "from tensorflow.keras.activations import tanh, elu, relu\n",
-        "from tensorflow.keras.models import load_model\n",
-        "import tensorflow.keras.backend as K\n",
-        "from tensorflow.keras.utils import Sequence\n",
-        "\n",
-        "import os\n",
-        "from scipy import signal\n",
-        "from scipy.io import wavfile\n",
-        "import numpy as np\n",
-        "import matplotlib.pyplot as plt\n",
-        "import math\n",
-        "import h5py\n",
-        "\n"
-      ],
-      "execution_count": 1,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "U22mDBe4jaf2"
-      },
-      "source": [
-        "# EDIT THIS SECTION FOR USER INPUTS\n",
-        "#\n",
-        "name = 'test'\n",
-        "in_file = 'ts9_test1_in_FP32.wav'\n",
-        "out_file = 'ts9_test1_out_FP32.wav'\n",
-        "epochs = 1\n",
-        "split_data=4 # **Increase this to reduce RAM usage **\n",
-        "\n",
-        "train_mode = 0     # 0 = speed training, \n",
-        "                   # 1 = accuracy training \n",
-        "                   # 2 = extended training\n",
-        "\n",
-        "input_size = 150  # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n",
-        "                 #                     A higher setting may result in crashing due to\n",
-        "                 #                     memory limitation of 8GB for the free version\n",
-        "                 #                     of Colab. This setting limits the accuracy of\n",
-        "                 #                     the training, especially for complex guitar signals\n",
-        "                 #                     such as high distortion.\n",
-        "                 #                 \n",
-        "                 # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n",
-        "                 #                   free up RAM between training sessions.\n",
-        "                 #\n",
-        "                 # Increase the \"split_data\" parameter to reduce the RAM used and\n",
-        "                 #    still allow for a higher \"input_size\" setting.     \n",
-        "                 #\n",
-        "                 # Future dev note: Using a custom dataloader may be a good\n",
-        "                 #                     workaround for this limitation, at the cost\n",
-        "                 #                     of slower training.\n",
-        "\n",
-        "if not os.path.exists('models/'+name):\n",
-        "    os.makedirs('models/'+name)\n",
-        "else:\n",
-        "    print(\"A model with the same name already exists. Please choose a new name.\")\n",
-        "    exit\n"
-      ],
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "metadata": {
-        "id": "WqI-cGt1jaG2"
-      },
-      "source": [
-        "\n",
-        "def pre_emphasis_filter(x, coeff=0.95):\n",
-        "    return tf.concat([x, x - coeff * x], 1)\n",
-        "    \n",
-        "def error_to_signal(y_true, y_pred): \n",
-        "    \"\"\"\n",
-        "    Error to signal ratio with pre-emphasis filter:\n",
-        "    \"\"\"\n",
-        "    y_true, y_pred = pre_emphasis_filter(y_true), pre_emphasis_filter(y_pred)\n",
-        "    return K.sum(tf.pow(y_true - y_pred, 2), axis=0) / K.sum(tf.pow(y_true, 2), axis=0) + 1e-10\n",
-        "    \n",
-        "def save_wav(name, data):\n",
-        "    wavfile.write(name, 44100, data.flatten().astype(np.float32))\n",
-        "\n",
-        "def normalize(data):\n",
-        "    data_max = max(data)\n",
-        "    data_min = min(data)\n",
-        "    data_norm = max(data_max,abs(data_min))\n",
-        "    return data / data_norm\n",
-        "\n",
-        "\n",
-        "'''This is a similar Tensorflow/Keras implementation of the LSTM model from the paper:\n",
-        "    \"Real-Time Guitar Amplifier Emulation with Deep Learning\"\n",
-        "    https://www.mdpi.com/2076-3417/10/3/766/htm\n",
-        "\n",
-        "    Uses a stack of two 1-D Convolutional layers, followed by LSTM, followed by \n",
-        "    a Dense (fully connected) layer. Three preset training modes are available, \n",
-        "    with further customization by editing the code. A Sequential tf.keras model \n",
-        "    is implemented here.\n",
-        "\n",
-        "    Note: RAM may be a limiting factor for the parameter \"input_size\". The wav data\n",
-        "      is preprocessed and stored in RAM, which improves training speed but quickly runs out\n",
-        "      if using a large number for \"input_size\".  Reduce this if you are experiencing\n",
-        "      RAM issues. \n",
-        "    \n",
-        "    --training_mode=0   Speed training (default)\n",
-        "    --training_mode=1   Accuracy training\n",
-        "    --training_mode=2   Extended training (set max_epochs as desired, for example 50+)\n",
-        "'''\n",
-        "\n",
-        "batch_size = 4096 \n",
-        "test_size = 0.2\n",
-        "\n",
-        "if train_mode == 0:         # Speed Training\n",
-        "    learning_rate = 0.01 \n",
-        "    conv1d_strides = 12    \n",
-        "    conv1d_filters = 16\n",
-        "    hidden_units = 36\n",
-        "elif train_mode == 1:       # Accuracy Training (~10x longer than Speed Training)\n",
-        "    learning_rate = 0.01 \n",
-        "    conv1d_strides = 4\n",
-        "    conv1d_filters = 36\n",
-        "    hidden_units= 64\n",
-        "else:                       # Extended Training (~60x longer than Accuracy Training)\n",
-        "    learning_rate = 0.0005 \n",
-        "    conv1d_strides = 3\n",
-        "    conv1d_filters = 36\n",
-        "    hidden_units= 96\n",
-        "\n",
-        "\n",
-        "# Create Sequential Model ###########################################\n",
-        "clear_session()\n",
-        "model = Sequential()\n",
-        "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same',input_shape=(input_size,1)))\n",
-        "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same'))\n",
-        "model.add(LSTM(hidden_units))\n",
-        "model.add(Dense(1, activation=None))\n",
-        "model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
-        "print(model.summary())\n",
-        "\n",
-        "# Load and Preprocess Data ###########################################\n",
-        "in_rate, in_data = wavfile.read(in_file)\n",
-        "out_rate, out_data = wavfile.read(out_file)\n",
-        "\n",
-        "X_all = in_data.astype(np.float32).flatten()  \n",
-        "X_all = normalize(X_all).reshape(len(X_all),1)   \n",
-        "y_all = out_data.astype(np.float32).flatten() \n",
-        "y_all = normalize(y_all).reshape(len(y_all),1)   \n",
-        "\n",
-        "# If splitting the data for training, do this part\n",
-        "if split_data > 1:\n",
-        "    num_split = len(X_all) // split_data\n",
-        "    X = X_all[0:num_split*split_data]\n",
-        "    y = y_all[0:num_split*split_data]\n",
-        "    X_data = np.split(X, split_data)\n",
-        "    y_data = np.split(y, split_data)\n",
-        "\n",
-        "    # Perform training on each split dataset\n",
-        "    for i in range(len(X_data)):\n",
-        "        print(\"\\nTraining on split data \" + str(i+1) + \"/\" +str(len(X_data)))\n",
-        "        X_split = X_data[i]\n",
-        "        y_split = y_data[i]\n",
-        "\n",
-        "        y_ordered = y_split[input_size-1:] \n",
-        "\n",
-        "        indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] \n",
-        "        X_ordered = tf.gather(X_split,indices) \n",
-        "\n",
-        "        shuffled_indices = np.random.permutation(len(X_ordered)) \n",
-        "        X_random = tf.gather(X_ordered,shuffled_indices)\n",
-        "        y_random = tf.gather(y_ordered, shuffled_indices)\n",
-        "\n",
-        "        # Train Model ###################################################\n",
-        "        model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2)  \n",
-        "\n",
-        "\n",
-        "    model.save('models/'+name+'/'+name+'.h5')\n",
-        "\n",
-        "# If training on the full set of input data in one run, do this part\n",
-        "else:\n",
-        "    y_ordered = y_all[input_size-1:] \n",
-        "\n",
-        "    indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n",
-        "    X_ordered = tf.gather(X_all,indices) \n",
-        "\n",
-        "    shuffled_indices = np.random.permutation(len(X_ordered)) \n",
-        "    X_random = tf.gather(X_ordered,shuffled_indices)\n",
-        "    y_random = tf.gather(y_ordered, shuffled_indices)\n",
-        "\n",
-        "    # Train Model ###################################################\n",
-        "    model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size)    \n",
-        "\n",
-        "    model.save('models/'+name+'/'+name+'.h5')\n",
-        "\n",
-        "# Run Prediction #################################################\n",
-        "print(\"Running prediction..\")\n",
-        "\n",
-        "# Get the last 20% of the wav data to run prediction and plot results\n",
-        "y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n",
-        "x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n",
-        "y_test = y_last_part[input_size-1:] \n",
-        "indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n",
-        "X_test = tf.gather(x_last_part,indices) \n",
-        "\n",
-        "prediction = model.predict(X_test, batch_size=batch_size)\n",
-        "\n",
-        "save_wav('models/'+name+'/y_pred.wav', prediction)\n",
-        "save_wav('models/'+name+'/x_test.wav', x_last_part)\n",
-        "save_wav('models/'+name+'/y_test.wav', y_test)\n",
-        "\n",
-        "# Add additional data to the saved model (like input_size)\n",
-        "filename = 'models/'+name+'/'+name+'.h5'\n",
-        "f = h5py.File(filename, 'a')\n",
-        "grp = f.create_group(\"info\")\n",
-        "dset = grp.create_dataset(\"input_size\", (1,), dtype='int16')\n",
-        "dset[0] = input_size\n",
-        "f.close()"
-      ],
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}
-\ No newline at end of file
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "U22mDBe4jaf2"
+   },
+   "outputs": [],
+   "source": [
+    "# EDIT THIS SECTION FOR USER INPUTS\n",
+    "#\n",
+    "name = 'test'\n",
+    "in_file = 'data/ts9_test1_in_FP32.wav'\n",
+    "out_file = 'data/ts9_test1_out_FP32.wav'\n",
+    "epochs = 1\n",
+    "\n",
+    "train_mode = 0     # 0 = speed training, \n",
+    "                   # 1 = accuracy training \n",
+    "                   # 2 = extended training\n",
+    "\n",
+    "input_size = 150  # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n",
+    "                 #                     A higher setting may result in crashing due to\n",
+    "                 #                     memory limitation of 8GB for the free version\n",
+    "                 #                     of Colab. This setting limits the accuracy of\n",
+    "                 #                     the training, especially for complex guitar signals\n",
+    "                 #                     such as high distortion.\n",
+    "                 #                 \n",
+    "                 # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n",
+    "                 #                   free up RAM between training sessions.\n",
+    "                 #\n",
+    "                 # Increase the \"split_data\" parameter to reduce the RAM used and\n",
+    "                 #    still allow for a higher \"input_size\" setting.     \n",
+    "                 #\n",
+    "                 # Future dev note: Using a custom dataloader may be a good\n",
+    "                 #                     workaround for this limitation, at the cost\n",
+    "                 #                     of slower training.\n",
+    "\n",
+    "if not os.path.exists('models/'+name):\n",
+    "    os.makedirs('models/'+name)\n",
+    "else:\n",
+    "    print(\"A model with the same name already exists. Please choose a new name.\")\n",
+    "    exit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "WqI-cGt1jaG2"
+   },
+   "outputs": [],
+   "source": [
+    "def pre_emphasis_filter(x, coeff=0.95):\n",
+    "    return tf.concat([x, x - coeff * x], 1)\n",
+    "    \n",
+    "def error_to_signal(y_true, y_pred): \n",
+    "    \"\"\"\n",
+    "    Error to signal ratio with pre-emphasis filter:\n",
+    "    \"\"\"\n",
+    "    y_true, y_pred = pre_emphasis_filter(y_true), pre_emphasis_filter(y_pred)\n",
+    "    return K.sum(tf.pow(y_true - y_pred, 2), axis=0) / K.sum(tf.pow(y_true, 2), axis=0) + 1e-10\n",
+    "    \n",
+    "def save_wav(name, data):\n",
+    "    wavfile.write(name, 44100, data.flatten().astype(np.float32))\n",
+    "\n",
+    "def normalize(data):\n",
+    "    data_max = max(data)\n",
+    "    data_min = min(data)\n",
+    "    data_norm = max(data_max,abs(data_min))\n",
+    "    return data / data_norm\n",
+    "\n",
+    "\n",
+    "'''This is a similar Tensorflow/Keras implementation of the LSTM model from the paper:\n",
+    "    \"Real-Time Guitar Amplifier Emulation with Deep Learning\"\n",
+    "    https://www.mdpi.com/2076-3417/10/3/766/htm\n",
+    "\n",
+    "    Uses a stack of two 1-D Convolutional layers, followed by LSTM, followed by \n",
+    "    a Dense (fully connected) layer. Three preset training modes are available, \n",
+    "    with further customization by editing the code. A Sequential tf.keras model \n",
+    "    is implemented here.\n",
+    "\n",
+    "    Note: RAM may be a limiting factor for the parameter \"input_size\". The wav data\n",
+    "      is preprocessed and stored in RAM, which improves training speed but quickly runs out\n",
+    "      if using a large number for \"input_size\".  Reduce this if you are experiencing\n",
+    "      RAM issues. \n",
+    "    \n",
+    "    --training_mode=0   Speed training (default)\n",
+    "    --training_mode=1   Accuracy training\n",
+    "    --training_mode=2   Extended training (set max_epochs as desired, for example 50+)\n",
+    "'''\n",
+    "\n",
+    "batch_size = 4096 \n",
+    "test_size = 0.2\n",
+    "\n",
+    "if train_mode == 0:         # Speed Training\n",
+    "    learning_rate = 0.01 \n",
+    "    conv1d_strides = 12    \n",
+    "    conv1d_filters = 16\n",
+    "    hidden_units = 36\n",
+    "elif train_mode == 1:       # Accuracy Training (~10x longer than Speed Training)\n",
+    "    learning_rate = 0.01 \n",
+    "    conv1d_strides = 4\n",
+    "    conv1d_filters = 36\n",
+    "    hidden_units= 64\n",
+    "else:                       # Extended Training (~60x longer than Accuracy Training)\n",
+    "    learning_rate = 0.0005 \n",
+    "    conv1d_strides = 3\n",
+    "    conv1d_filters = 36\n",
+    "    hidden_units= 96\n",
+    "\n",
+    "\n",
+    "# Create Sequential Model ###########################################\n",
+    "clear_session()\n",
+    "model = Sequential()\n",
+    "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same',input_shape=(input_size,1)))\n",
+    "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same'))\n",
+    "model.add(LSTM(hidden_units))\n",
+    "model.add(Dense(1, activation=None))\n",
+    "model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
+    "model.summary()\n",
+    "\n",
+    "# Load and Preprocess Data ###########################################\n",
+    "in_rate, in_data = wavfile.read(in_file)\n",
+    "out_rate, out_data = wavfile.read(out_file)\n",
+    "\n",
+    "X_all = in_data.astype(np.float32).flatten()  \n",
+    "X_all = normalize(X_all).reshape(len(X_all),1)   \n",
+    "y_all = out_data.astype(np.float32).flatten() \n",
+    "y_all = normalize(y_all).reshape(len(y_all),1)\n",
+    "\n",
+    "y_ordered = y_all[input_size-1:] \n",
+    "indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n",
+    "x_ordered = np.take(X_all, indices)[:,:, np.newaxis]\n",
+    "\n",
+    "# Train Model ###################################################\n",
+    "model.fit(x_ordered,y_ordered, epochs=epochs, batch_size=batch_size, validation_split=test_size, shuffle=True)    \n",
+    "model.save('models/'+name+'/'+name+'.h5')\n",
+    "\n",
+    "# Run Prediction #################################################\n",
+    "print(\"Running prediction..\")\n",
+    "\n",
+    "# Get the last 20% of the wav data to run prediction and plot results\n",
+    "y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n",
+    "x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n",
+    "y_test = y_last_part[input_size-1:] \n",
+    "indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n",
+    "X_test = np.take(x_last_part,indices)[:, :, np.newaxis]\n",
+    "\n",
+    "prediction = model.predict(X_test, batch_size=batch_size)\n",
+    "\n",
+    "save_wav('models/'+name+'/y_pred.wav', prediction)\n",
+    "save_wav('models/'+name+'/x_test.wav', x_last_part)\n",
+    "save_wav('models/'+name+'/y_test.wav', y_test)\n",
+    "\n",
+    "# Add additional data to the saved model (like input_size)\n",
+    "filename = 'models/'+name+'/'+name+'.h5'\n",
+    "f = h5py.File(filename, 'a')\n",
+    "grp = f.create_group(\"info\")\n",
+    "dset = grp.create_dataset(\"input_size\", (1,), dtype='int16')\n",
+    "dset[0] = input_size\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import IPython.display as ipd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Audio('models/'+name+'/y_pred.wav')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "guitar_lstm_colab.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

	GuitarLSTM Deep learning models for guitar amp/pedal emulation using LSTM with Keras
	Log \| Files \| Refs \| README