commit a790c8736525f942abf5487357514447a090a4f3
parent bc0faebae54a13352148a6a1d69171b9f70cd3c3
Author: jmiller656 <joshxmiller656@gmail.com>
Date: Sat, 30 Jan 2021 23:46:42 -0500
FIx GPU OOM issues
Diffstat:
1 file changed, 254 insertions(+), 257 deletions(-)
diff --git a/guitar_lstm_colab.ipynb b/guitar_lstm_colab.ipynb
@@ -1,258 +1,256 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "name": "guitar_lstm_colab.ipynb",
- "provenance": [],
- "collapsed_sections": []
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "accelerator": "GPU"
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "RF2uyPfxgi8H"
+ },
+ "outputs": [],
+ "source": [
+ "# TO USE: \n",
+ "# 1. Upload your input and output wav files to the current directory in Colab\n",
+ "# 2. Edit the USER INPUTS section to point to your wav files, and choose a\n",
+ "# model name, and number of epochs for training. If you experience \n",
+ "# crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n",
+ "# the \"split_data\" parameter.\n",
+ "# 3. Run each section of code. The trained models and output wav files will be \n",
+ "# added to the \"models\" directory.\n",
+ "#\n",
+ "# Note: Tested on CPU and GPU runtimes.\n",
+ "\n",
+ "import tensorflow as tf\n",
+ "from tensorflow.keras import Sequential\n",
+ "from tensorflow.keras.layers import LSTM, Conv1D, Dense\n",
+ "from tensorflow.keras.optimizers import Adam\n",
+ "from tensorflow.keras.backend import clear_session\n",
+ "from tensorflow.keras.activations import tanh, elu, relu\n",
+ "from tensorflow.keras.models import load_model\n",
+ "import tensorflow.keras.backend as K\n",
+ "from tensorflow.keras.utils import Sequence\n",
+ "\n",
+ "import os\n",
+ "from scipy import signal\n",
+ "from scipy.io import wavfile\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import math\n",
+ "import h5py"
+ ]
},
- "cells": [
- {
- "cell_type": "code",
- "metadata": {
- "id": "RF2uyPfxgi8H"
- },
- "source": [
- "# TO USE: \n",
- "# 1. Upload your input and output wav files to the current directory in Colab\n",
- "# 2. Edit the USER INPUTS section to point to your wav files, and choose a\n",
- "# model name, and number of epochs for training. If you experience \n",
- "# crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n",
- "# the \"split_data\" parameter.\n",
- "# 3. Run each section of code. The trained models and output wav files will be \n",
- "# added to the \"models\" directory.\n",
- "#\n",
- "# Note: Tested on CPU and GPU runtimes.\n",
- "\n",
- "import tensorflow as tf\n",
- "from tensorflow.keras import Sequential\n",
- "from tensorflow.keras.layers import LSTM, Conv1D, Dense\n",
- "from tensorflow.keras.optimizers import Adam\n",
- "from tensorflow.keras.backend import clear_session\n",
- "from tensorflow.keras.activations import tanh, elu, relu\n",
- "from tensorflow.keras.models import load_model\n",
- "import tensorflow.keras.backend as K\n",
- "from tensorflow.keras.utils import Sequence\n",
- "\n",
- "import os\n",
- "from scipy import signal\n",
- "from scipy.io import wavfile\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "import math\n",
- "import h5py\n",
- "\n"
- ],
- "execution_count": 1,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "U22mDBe4jaf2"
- },
- "source": [
- "# EDIT THIS SECTION FOR USER INPUTS\n",
- "#\n",
- "name = 'test'\n",
- "in_file = 'ts9_test1_in_FP32.wav'\n",
- "out_file = 'ts9_test1_out_FP32.wav'\n",
- "epochs = 1\n",
- "split_data=4 # **Increase this to reduce RAM usage **\n",
- "\n",
- "train_mode = 0 # 0 = speed training, \n",
- " # 1 = accuracy training \n",
- " # 2 = extended training\n",
- "\n",
- "input_size = 150 # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n",
- " # A higher setting may result in crashing due to\n",
- " # memory limitation of 8GB for the free version\n",
- " # of Colab. This setting limits the accuracy of\n",
- " # the training, especially for complex guitar signals\n",
- " # such as high distortion.\n",
- " # \n",
- " # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n",
- " # free up RAM between training sessions.\n",
- " #\n",
- " # Increase the \"split_data\" parameter to reduce the RAM used and\n",
- " # still allow for a higher \"input_size\" setting. \n",
- " #\n",
- " # Future dev note: Using a custom dataloader may be a good\n",
- " # workaround for this limitation, at the cost\n",
- " # of slower training.\n",
- "\n",
- "if not os.path.exists('models/'+name):\n",
- " os.makedirs('models/'+name)\n",
- "else:\n",
- " print(\"A model with the same name already exists. Please choose a new name.\")\n",
- " exit\n"
- ],
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "metadata": {
- "id": "WqI-cGt1jaG2"
- },
- "source": [
- "\n",
- "def pre_emphasis_filter(x, coeff=0.95):\n",
- " return tf.concat([x, x - coeff * x], 1)\n",
- " \n",
- "def error_to_signal(y_true, y_pred): \n",
- " \"\"\"\n",
- " Error to signal ratio with pre-emphasis filter:\n",
- " \"\"\"\n",
- " y_true, y_pred = pre_emphasis_filter(y_true), pre_emphasis_filter(y_pred)\n",
- " return K.sum(tf.pow(y_true - y_pred, 2), axis=0) / K.sum(tf.pow(y_true, 2), axis=0) + 1e-10\n",
- " \n",
- "def save_wav(name, data):\n",
- " wavfile.write(name, 44100, data.flatten().astype(np.float32))\n",
- "\n",
- "def normalize(data):\n",
- " data_max = max(data)\n",
- " data_min = min(data)\n",
- " data_norm = max(data_max,abs(data_min))\n",
- " return data / data_norm\n",
- "\n",
- "\n",
- "'''This is a similar Tensorflow/Keras implementation of the LSTM model from the paper:\n",
- " \"Real-Time Guitar Amplifier Emulation with Deep Learning\"\n",
- " https://www.mdpi.com/2076-3417/10/3/766/htm\n",
- "\n",
- " Uses a stack of two 1-D Convolutional layers, followed by LSTM, followed by \n",
- " a Dense (fully connected) layer. Three preset training modes are available, \n",
- " with further customization by editing the code. A Sequential tf.keras model \n",
- " is implemented here.\n",
- "\n",
- " Note: RAM may be a limiting factor for the parameter \"input_size\". The wav data\n",
- " is preprocessed and stored in RAM, which improves training speed but quickly runs out\n",
- " if using a large number for \"input_size\". Reduce this if you are experiencing\n",
- " RAM issues. \n",
- " \n",
- " --training_mode=0 Speed training (default)\n",
- " --training_mode=1 Accuracy training\n",
- " --training_mode=2 Extended training (set max_epochs as desired, for example 50+)\n",
- "'''\n",
- "\n",
- "batch_size = 4096 \n",
- "test_size = 0.2\n",
- "\n",
- "if train_mode == 0: # Speed Training\n",
- " learning_rate = 0.01 \n",
- " conv1d_strides = 12 \n",
- " conv1d_filters = 16\n",
- " hidden_units = 36\n",
- "elif train_mode == 1: # Accuracy Training (~10x longer than Speed Training)\n",
- " learning_rate = 0.01 \n",
- " conv1d_strides = 4\n",
- " conv1d_filters = 36\n",
- " hidden_units= 64\n",
- "else: # Extended Training (~60x longer than Accuracy Training)\n",
- " learning_rate = 0.0005 \n",
- " conv1d_strides = 3\n",
- " conv1d_filters = 36\n",
- " hidden_units= 96\n",
- "\n",
- "\n",
- "# Create Sequential Model ###########################################\n",
- "clear_session()\n",
- "model = Sequential()\n",
- "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same',input_shape=(input_size,1)))\n",
- "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same'))\n",
- "model.add(LSTM(hidden_units))\n",
- "model.add(Dense(1, activation=None))\n",
- "model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
- "print(model.summary())\n",
- "\n",
- "# Load and Preprocess Data ###########################################\n",
- "in_rate, in_data = wavfile.read(in_file)\n",
- "out_rate, out_data = wavfile.read(out_file)\n",
- "\n",
- "X_all = in_data.astype(np.float32).flatten() \n",
- "X_all = normalize(X_all).reshape(len(X_all),1) \n",
- "y_all = out_data.astype(np.float32).flatten() \n",
- "y_all = normalize(y_all).reshape(len(y_all),1) \n",
- "\n",
- "# If splitting the data for training, do this part\n",
- "if split_data > 1:\n",
- " num_split = len(X_all) // split_data\n",
- " X = X_all[0:num_split*split_data]\n",
- " y = y_all[0:num_split*split_data]\n",
- " X_data = np.split(X, split_data)\n",
- " y_data = np.split(y, split_data)\n",
- "\n",
- " # Perform training on each split dataset\n",
- " for i in range(len(X_data)):\n",
- " print(\"\\nTraining on split data \" + str(i+1) + \"/\" +str(len(X_data)))\n",
- " X_split = X_data[i]\n",
- " y_split = y_data[i]\n",
- "\n",
- " y_ordered = y_split[input_size-1:] \n",
- "\n",
- " indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] \n",
- " X_ordered = tf.gather(X_split,indices) \n",
- "\n",
- " shuffled_indices = np.random.permutation(len(X_ordered)) \n",
- " X_random = tf.gather(X_ordered,shuffled_indices)\n",
- " y_random = tf.gather(y_ordered, shuffled_indices)\n",
- "\n",
- " # Train Model ###################################################\n",
- " model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2) \n",
- "\n",
- "\n",
- " model.save('models/'+name+'/'+name+'.h5')\n",
- "\n",
- "# If training on the full set of input data in one run, do this part\n",
- "else:\n",
- " y_ordered = y_all[input_size-1:] \n",
- "\n",
- " indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n",
- " X_ordered = tf.gather(X_all,indices) \n",
- "\n",
- " shuffled_indices = np.random.permutation(len(X_ordered)) \n",
- " X_random = tf.gather(X_ordered,shuffled_indices)\n",
- " y_random = tf.gather(y_ordered, shuffled_indices)\n",
- "\n",
- " # Train Model ###################################################\n",
- " model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) \n",
- "\n",
- " model.save('models/'+name+'/'+name+'.h5')\n",
- "\n",
- "# Run Prediction #################################################\n",
- "print(\"Running prediction..\")\n",
- "\n",
- "# Get the last 20% of the wav data to run prediction and plot results\n",
- "y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n",
- "x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n",
- "y_test = y_last_part[input_size-1:] \n",
- "indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n",
- "X_test = tf.gather(x_last_part,indices) \n",
- "\n",
- "prediction = model.predict(X_test, batch_size=batch_size)\n",
- "\n",
- "save_wav('models/'+name+'/y_pred.wav', prediction)\n",
- "save_wav('models/'+name+'/x_test.wav', x_last_part)\n",
- "save_wav('models/'+name+'/y_test.wav', y_test)\n",
- "\n",
- "# Add additional data to the saved model (like input_size)\n",
- "filename = 'models/'+name+'/'+name+'.h5'\n",
- "f = h5py.File(filename, 'a')\n",
- "grp = f.create_group(\"info\")\n",
- "dset = grp.create_dataset(\"input_size\", (1,), dtype='int16')\n",
- "dset[0] = input_size\n",
- "f.close()"
- ],
- "execution_count": null,
- "outputs": []
- }
- ]
-}
-\ No newline at end of file
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "U22mDBe4jaf2"
+ },
+ "outputs": [],
+ "source": [
+ "# EDIT THIS SECTION FOR USER INPUTS\n",
+ "#\n",
+ "name = 'test'\n",
+ "in_file = 'data/ts9_test1_in_FP32.wav'\n",
+ "out_file = 'data/ts9_test1_out_FP32.wav'\n",
+ "epochs = 1\n",
+ "\n",
+ "train_mode = 0 # 0 = speed training, \n",
+ " # 1 = accuracy training \n",
+ " # 2 = extended training\n",
+ "\n",
+ "input_size = 150 # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n",
+ " # A higher setting may result in crashing due to\n",
+ " # memory limitation of 8GB for the free version\n",
+ " # of Colab. This setting limits the accuracy of\n",
+ " # the training, especially for complex guitar signals\n",
+ " # such as high distortion.\n",
+ " # \n",
+ " # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n",
+ " # free up RAM between training sessions.\n",
+ " #\n",
+ " # Increase the \"split_data\" parameter to reduce the RAM used and\n",
+ " # still allow for a higher \"input_size\" setting. \n",
+ " #\n",
+ " # Future dev note: Using a custom dataloader may be a good\n",
+ " # workaround for this limitation, at the cost\n",
+ " # of slower training.\n",
+ "\n",
+ "if not os.path.exists('models/'+name):\n",
+ " os.makedirs('models/'+name)\n",
+ "else:\n",
+ " print(\"A model with the same name already exists. Please choose a new name.\")\n",
+ " exit"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "WqI-cGt1jaG2"
+ },
+ "outputs": [],
+ "source": [
+ "def pre_emphasis_filter(x, coeff=0.95):\n",
+ " return tf.concat([x, x - coeff * x], 1)\n",
+ " \n",
+ "def error_to_signal(y_true, y_pred): \n",
+ " \"\"\"\n",
+ " Error to signal ratio with pre-emphasis filter:\n",
+ " \"\"\"\n",
+ " y_true, y_pred = pre_emphasis_filter(y_true), pre_emphasis_filter(y_pred)\n",
+ " return K.sum(tf.pow(y_true - y_pred, 2), axis=0) / K.sum(tf.pow(y_true, 2), axis=0) + 1e-10\n",
+ " \n",
+ "def save_wav(name, data):\n",
+ " wavfile.write(name, 44100, data.flatten().astype(np.float32))\n",
+ "\n",
+ "def normalize(data):\n",
+ " data_max = max(data)\n",
+ " data_min = min(data)\n",
+ " data_norm = max(data_max,abs(data_min))\n",
+ " return data / data_norm\n",
+ "\n",
+ "\n",
+ "'''This is a similar Tensorflow/Keras implementation of the LSTM model from the paper:\n",
+ " \"Real-Time Guitar Amplifier Emulation with Deep Learning\"\n",
+ " https://www.mdpi.com/2076-3417/10/3/766/htm\n",
+ "\n",
+ " Uses a stack of two 1-D Convolutional layers, followed by LSTM, followed by \n",
+ " a Dense (fully connected) layer. Three preset training modes are available, \n",
+ " with further customization by editing the code. A Sequential tf.keras model \n",
+ " is implemented here.\n",
+ "\n",
+ " Note: RAM may be a limiting factor for the parameter \"input_size\". The wav data\n",
+ " is preprocessed and stored in RAM, which improves training speed but quickly runs out\n",
+ " if using a large number for \"input_size\". Reduce this if you are experiencing\n",
+ " RAM issues. \n",
+ " \n",
+ " --training_mode=0 Speed training (default)\n",
+ " --training_mode=1 Accuracy training\n",
+ " --training_mode=2 Extended training (set max_epochs as desired, for example 50+)\n",
+ "'''\n",
+ "\n",
+ "batch_size = 4096 \n",
+ "test_size = 0.2\n",
+ "\n",
+ "if train_mode == 0: # Speed Training\n",
+ " learning_rate = 0.01 \n",
+ " conv1d_strides = 12 \n",
+ " conv1d_filters = 16\n",
+ " hidden_units = 36\n",
+ "elif train_mode == 1: # Accuracy Training (~10x longer than Speed Training)\n",
+ " learning_rate = 0.01 \n",
+ " conv1d_strides = 4\n",
+ " conv1d_filters = 36\n",
+ " hidden_units= 64\n",
+ "else: # Extended Training (~60x longer than Accuracy Training)\n",
+ " learning_rate = 0.0005 \n",
+ " conv1d_strides = 3\n",
+ " conv1d_filters = 36\n",
+ " hidden_units= 96\n",
+ "\n",
+ "\n",
+ "# Create Sequential Model ###########################################\n",
+ "clear_session()\n",
+ "model = Sequential()\n",
+ "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same',input_shape=(input_size,1)))\n",
+ "model.add(Conv1D(conv1d_filters, 12,strides=conv1d_strides, activation=None, padding='same'))\n",
+ "model.add(LSTM(hidden_units))\n",
+ "model.add(Dense(1, activation=None))\n",
+ "model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n",
+ "model.summary()\n",
+ "\n",
+ "# Load and Preprocess Data ###########################################\n",
+ "in_rate, in_data = wavfile.read(in_file)\n",
+ "out_rate, out_data = wavfile.read(out_file)\n",
+ "\n",
+ "X_all = in_data.astype(np.float32).flatten() \n",
+ "X_all = normalize(X_all).reshape(len(X_all),1) \n",
+ "y_all = out_data.astype(np.float32).flatten() \n",
+ "y_all = normalize(y_all).reshape(len(y_all),1)\n",
+ "\n",
+ "y_ordered = y_all[input_size-1:] \n",
+ "indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n",
+ "x_ordered = np.take(X_all, indices)[:,:, np.newaxis]\n",
+ "\n",
+ "# Train Model ###################################################\n",
+ "model.fit(x_ordered,y_ordered, epochs=epochs, batch_size=batch_size, validation_split=test_size, shuffle=True) \n",
+ "model.save('models/'+name+'/'+name+'.h5')\n",
+ "\n",
+ "# Run Prediction #################################################\n",
+ "print(\"Running prediction..\")\n",
+ "\n",
+ "# Get the last 20% of the wav data to run prediction and plot results\n",
+ "y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n",
+ "x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n",
+ "y_test = y_last_part[input_size-1:] \n",
+ "indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n",
+ "X_test = np.take(x_last_part,indices)[:, :, np.newaxis]\n",
+ "\n",
+ "prediction = model.predict(X_test, batch_size=batch_size)\n",
+ "\n",
+ "save_wav('models/'+name+'/y_pred.wav', prediction)\n",
+ "save_wav('models/'+name+'/x_test.wav', x_last_part)\n",
+ "save_wav('models/'+name+'/y_test.wav', y_test)\n",
+ "\n",
+ "# Add additional data to the saved model (like input_size)\n",
+ "filename = 'models/'+name+'/'+name+'.h5'\n",
+ "f = h5py.File(filename, 'a')\n",
+ "grp = f.create_group(\"info\")\n",
+ "dset = grp.create_dataset(\"input_size\", (1,), dtype='int16')\n",
+ "dset[0] = input_size\n",
+ "f.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import IPython.display as ipd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ipd.Audio('models/'+name+'/y_pred.wav')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "collapsed_sections": [],
+ "name": "guitar_lstm_colab.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}