GuitarLSTM

Deep learning models for guitar amp/pedal emulation using LSTM with Keras
Log | Files | Refs | README

commit 3c1dc9e379241c0f43369f2e60e88ffa0653df94
parent 5fd1fa74b9f07c526d6e7ad0b0ef4e96c49602d3
Author: Keith Bloemer <32459398+GuitarML@users.noreply.github.com>
Date:   Sun,  6 Dec 2020 14:44:34 -0600

Merge pull request #4 from GuitarML/feature-add-split-data-arg

Feature add split data arg
Diffstat:
MREADME.md | 8++++++++
Mguitar_lstm_colab.ipynb | 104++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Mtrain.py | 93+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
3 files changed, 142 insertions(+), 63 deletions(-)

diff --git a/README.md b/README.md @@ -54,6 +54,7 @@ python predict.py data/ts9_test1_in_FP32.wav output models/ts9_model.h5 --training_mode=0 # enter 0, 1, or 2 for speed tranining, accuracy training, or extended training, respectively --input_size=150 # sets the number of previous samples to consider for each output sample of audio +--split_data=3 # splits the input data by X amount to reduce RAM usage; trains the model on each split separately --max_epochs=1 # sets the number of epochs to train for; intended to be increased dramatically for extended training --batch_size=4096 # sets the batch size of data for training @@ -93,6 +94,13 @@ which requires about 8GB of RAM. Increasing this setting will improve training accuracy, but the size of the preprocessed wav data in RAM will increase as well. +You can also use the "--split_data" parameter with train.py to +train the same model on separate sections of the data. This +will reduce RAM usage while still allowing a high input_size +setting. For example, "--split_data=5" would split the data +into 5 sections, and train each section separately. The default +is 1, or no splitting. + Adding a custom dataloader would reduce RAM usage at the cost of training speed, and will be a focus of future work. diff --git a/guitar_lstm_colab.ipynb b/guitar_lstm_colab.ipynb @@ -24,7 +24,8 @@ "# 1. Upload your input and output wav files to the current directory in Colab\n", "# 2. Edit the USER INPUTS section to point to your wav files, and choose a\n", "# model name, and number of epochs for training. If you experience \n", - "# crashing due to low RAM, reduce the \"input_size\" parameter.\n", + "# crashing due to low RAM, reduce the \"input_size\" parameter, or increase\n", + "# the \"split_data\" parameter.\n", "# 3. Run each section of code. The trained models and output wav files will be \n", "# added to the \"models\" directory.\n", "#\n", @@ -49,7 +50,7 @@ "import h5py\n", "\n" ], - "execution_count": null, + "execution_count": 1, "outputs": [] }, { @@ -64,14 +65,14 @@ "in_file = 'ts9_test1_in_FP32.wav'\n", "out_file = 'ts9_test1_out_FP32.wav'\n", "epochs = 1\n", - "\n", + "split_data=4 # **Increase this to reduce RAM usage **\n", "\n", "train_mode = 0 # 0 = speed training, \n", " # 1 = accuracy training \n", " # 2 = extended training\n", "\n", - "input_size = 75 # !!!IMPORTANT !!!: The input_size is set at 75 for Colab notebook. \n", - " # a higher setting may result in crashing due to\n", + "input_size = 150 # !!!IMPORTANT !!!: The input_size is set at 150 for Colab notebook. \n", + " # A higher setting may result in crashing due to\n", " # memory limitation of 8GB for the free version\n", " # of Colab. This setting limits the accuracy of\n", " # the training, especially for complex guitar signals\n", @@ -80,6 +81,9 @@ " # !!!IMPORTANT!!!: You will most likely need to cycle the runtime to \n", " # free up RAM between training sessions.\n", " #\n", + " # Increase the \"split_data\" parameter to reduce the RAM used and\n", + " # still allow for a higher \"input_size\" setting. \n", + " #\n", " # Future dev note: Using a custom dataloader may be a good\n", " # workaround for this limitation, at the cost\n", " # of slower training.\n", @@ -159,24 +163,6 @@ " hidden_units= 96\n", "\n", "\n", - "# Load and Preprocess Data ###########################################\n", - "in_rate, in_data = wavfile.read(in_file)\n", - "out_rate, out_data = wavfile.read(out_file)\n", - "\n", - "X = in_data.astype(np.float32).flatten() \n", - "X = normalize(X).reshape(len(X),1) \n", - "y = out_data.astype(np.float32).flatten() \n", - "y = normalize(y).reshape(len(y),1) \n", - "\n", - "y_ordered = y[input_size-1:] \n", - "\n", - "indices = np.arange(input_size) + np.arange(len(X)-input_size+1)[:,np.newaxis] \n", - "X_ordered = tf.gather(X,indices) \n", - "\n", - "shuffled_indices = np.random.permutation(len(X_ordered)) \n", - "X_random = tf.gather(X_ordered,shuffled_indices)\n", - "y_random = tf.gather(y_ordered, shuffled_indices)\n", - "\n", "# Create Sequential Model ###########################################\n", "clear_session()\n", "model = Sequential()\n", @@ -187,27 +173,75 @@ "model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n", "print(model.summary())\n", "\n", - "# Train Model ###################################################\n", - "model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) \n", + "# Load and Preprocess Data ###########################################\n", + "in_rate, in_data = wavfile.read(in_file)\n", + "out_rate, out_data = wavfile.read(out_file)\n", + "\n", + "X_all = in_data.astype(np.float32).flatten() \n", + "X_all = normalize(X_all).reshape(len(X_all),1) \n", + "y_all = out_data.astype(np.float32).flatten() \n", + "y_all = normalize(y_all).reshape(len(y_all),1) \n", + "\n", + "# If splitting the data for training, do this part\n", + "if split_data > 1:\n", + " num_split = len(X_all) // split_data\n", + " X = X_all[0:num_split*split_data]\n", + " y = y_all[0:num_split*split_data]\n", + " X_data = np.split(X, split_data)\n", + " y_data = np.split(y, split_data)\n", + "\n", + " # Perform training on each split dataset\n", + " for i in range(len(X_data)):\n", + " print(\"\\nTraining on split data \" + str(i+1) + \"/\" +str(len(X_data)))\n", + " X_split = X_data[i]\n", + " y_split = y_data[i]\n", + "\n", + " y_ordered = y_split[input_size-1:] \n", + "\n", + " indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] \n", + " X_ordered = tf.gather(X_split,indices) \n", + "\n", + " shuffled_indices = np.random.permutation(len(X_ordered)) \n", + " X_random = tf.gather(X_ordered,shuffled_indices)\n", + " y_random = tf.gather(y_ordered, shuffled_indices)\n", + "\n", + " # Train Model ###################################################\n", + " model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2) \n", "\n", - "model.save('models/'+name+'/'+name+'.h5')\n", "\n", - "#model.save('model_data/')\n", - "#model = load_model('new_model_'+name+'.h5', custom_objects={'error_to_signal' : error_to_signal})\n", - "#learning_rate = 0.005\n", - "#model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal])\n", + " model.save('models/'+name+'/'+name+'.h5')\n", + "\n", + "# If training on the full set of input data in one run, do this part\n", + "else:\n", + " y_ordered = y_all[input_size-1:] \n", + "\n", + " indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] \n", + " X_ordered = tf.gather(X_all,indices) \n", + "\n", + " shuffled_indices = np.random.permutation(len(X_ordered)) \n", + " X_random = tf.gather(X_ordered,shuffled_indices)\n", + " y_random = tf.gather(y_ordered, shuffled_indices)\n", + "\n", + " # Train Model ###################################################\n", + " model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) \n", + "\n", + " model.save('models/'+name+'/'+name+'.h5')\n", "\n", "# Run Prediction #################################################\n", "print(\"Running prediction..\")\n", - "y_the_rest, y_last_part = np.split(y_ordered, [int(len(y_ordered)*.8)])\n", - "x_the_rest, x_last_part = np.split(X, [int(len(X)*.8)])\n", "\n", - "x_the_rest, x_ordered_last_part = np.split(X_ordered, [int(len(X_ordered)*.8)])\n", - "prediction = model.predict(x_ordered_last_part, batch_size=batch_size)\n", + "# Get the last 20% of the wav data to run prediction and plot results\n", + "y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)])\n", + "x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)])\n", + "y_test = y_last_part[input_size-1:] \n", + "indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] \n", + "X_test = tf.gather(x_last_part,indices) \n", + "\n", + "prediction = model.predict(X_test, batch_size=batch_size)\n", "\n", "save_wav('models/'+name+'/y_pred.wav', prediction)\n", "save_wav('models/'+name+'/x_test.wav', x_last_part)\n", - "save_wav('models/'+name+'/y_test.wav', y_last_part)\n", + "save_wav('models/'+name+'/y_test.wav', y_test)\n", "\n", "# Add additional data to the saved model (like input_size)\n", "filename = 'models/'+name+'/'+name+'.h5'\n", diff --git a/train.py b/train.py @@ -50,7 +50,9 @@ def main(args): Note: RAM may be a limiting factor for the parameter "input_size". The wav data is preprocessed and stored in RAM, which improves training speed but quickly runs out if using a large number for "input_size". Reduce this if you are experiencing - RAM issues. + RAM issues. Also, you can use the "--split_data" option to divide the data by the + specified amount and train the model on each set. Doing this will allow for a higher + input_size setting (more accurate results). --training_mode=0 Speed training (default) --training_mode=1 Accuracy training @@ -89,25 +91,6 @@ def main(args): conv1d_filters = 36 hidden_units= 96 - - # Load and Preprocess Data ########################################### - in_rate, in_data = wavfile.read(args.in_file) - out_rate, out_data = wavfile.read(args.out_file) - - X = in_data.astype(np.float32).flatten() - X = normalize(X).reshape(len(X),1) - y = out_data.astype(np.float32).flatten() - y = normalize(y).reshape(len(y),1) - - y_ordered = y[input_size-1:] - - indices = np.arange(input_size) + np.arange(len(X)-input_size+1)[:,np.newaxis] - X_ordered = tf.gather(X,indices) - - shuffled_indices = np.random.permutation(len(X_ordered)) - X_random = tf.gather(X_ordered,shuffled_indices) - y_random = tf.gather(y_ordered, shuffled_indices) - # Create Sequential Model ########################################### clear_session() model = Sequential() @@ -118,22 +101,75 @@ def main(args): model.compile(optimizer=Adam(learning_rate=learning_rate), loss=error_to_signal, metrics=[error_to_signal]) print(model.summary()) - # Train Model ################################################### - model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) + # Load and Preprocess Data ########################################### + in_rate, in_data = wavfile.read(args.in_file) + out_rate, out_data = wavfile.read(args.out_file) + + X_all = in_data.astype(np.float32).flatten() + X_all = normalize(X_all).reshape(len(X_all),1) + y_all = out_data.astype(np.float32).flatten() + y_all = normalize(y_all).reshape(len(y_all),1) - model.save('models/'+name+'/'+name+'.h5') + # If splitting the data for training, do this part + if args.split_data > 1: + num_split = len(X_all) // args.split_data + X = X_all[0:num_split*args.split_data] + y = y_all[0:num_split*args.split_data] + X_data = np.split(X, args.split_data) + y_data = np.split(y, args.split_data) + + # Perform training on each split dataset + for i in range(len(X_data)): + print("\nTraining on split data " + str(i+1) + "/" +str(len(X_data))) + X_split = X_data[i] + y_split = y_data[i] + + y_ordered = y_split[input_size-1:] + + indices = np.arange(input_size) + np.arange(len(X_split)-input_size+1)[:,np.newaxis] + X_ordered = tf.gather(X_split,indices) + + shuffled_indices = np.random.permutation(len(X_ordered)) + X_random = tf.gather(X_ordered,shuffled_indices) + y_random = tf.gather(y_ordered, shuffled_indices) + + # Train Model ################################################### + model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=0.2) + + + model.save('models/'+name+'/'+name+'.h5') + + # If training on the full set of input data in one run, do this part + else: + y_ordered = y_all[input_size-1:] + + indices = np.arange(input_size) + np.arange(len(X_all)-input_size+1)[:,np.newaxis] + X_ordered = tf.gather(X_all,indices) + + shuffled_indices = np.random.permutation(len(X_ordered)) + X_random = tf.gather(X_ordered,shuffled_indices) + y_random = tf.gather(y_ordered, shuffled_indices) + + # Train Model ################################################### + model.fit(X_random,y_random, epochs=epochs, batch_size=batch_size, validation_split=test_size) + + model.save('models/'+name+'/'+name+'.h5') # Run Prediction ################################################# print("Running prediction..") - y_the_rest, y_last_part = np.split(y_ordered, [int(len(y_ordered)*.8)]) - x_the_rest, x_last_part = np.split(X, [int(len(X)*.8)]) - x_the_rest, x_ordered_last_part = np.split(X_ordered, [int(len(X_ordered)*.8)]) - prediction = model.predict(x_ordered_last_part, batch_size=batch_size) + # Get the last 20% of the wav data to run prediction and plot results + y_the_rest, y_last_part = np.split(y_all, [int(len(y_all)*.8)]) + x_the_rest, x_last_part = np.split(X_all, [int(len(X_all)*.8)]) + y_test = y_last_part[input_size-1:] + indices = np.arange(input_size) + np.arange(len(x_last_part)-input_size+1)[:,np.newaxis] + X_test = tf.gather(x_last_part,indices) + + prediction = model.predict(X_test, batch_size=batch_size) save_wav('models/'+name+'/y_pred.wav', prediction) save_wav('models/'+name+'/x_test.wav', x_last_part) - save_wav('models/'+name+'/y_test.wav', y_last_part) + save_wav('models/'+name+'/y_test.wav', y_test) # Add additional data to the saved model (like input_size) filename = 'models/'+name+'/'+name+'.h5' @@ -166,5 +202,6 @@ if __name__ == "__main__": parser.add_argument("--max_epochs", type=int, default=1) parser.add_argument("--create_plots", type=int, default=1) parser.add_argument("--input_size", type=int, default=100) + parser.add_argument("--split_data", type=int, default=1) args = parser.parse_args() main(args) \ No newline at end of file