diff --git a/Network and Training.ipynb b/Network and Training.ipynb new file mode 100644 index 0000000..9c22ac1 --- /dev/null +++ b/Network and Training.ipynb @@ -0,0 +1,537 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jyLwghAJz7KB" + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\"\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\" \n", + "import midi\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import copy\n", + "import keras\n", + "import warnings\n", + "import time\n", + "import random\n", + "import itertools\n", + "import pickle\n", + "import keras\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, TimeDistributed, LSTM, Dropout, CuDNNLSTM, Embedding, Input, Conv1D\n", + "from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "7xm5Xp8uudy-" + }, + "outputs": [], + "source": [ + "mid_jingle = midi.read_midifile('data/jingle-bells-guitar-glenn-jarrett.mid')\n", + "mid_jingle.make_ticks_abs()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "fOhQgAAfpqZj" + }, + "outputs": [], + "source": [ + "def get_max_tick(track):\n", + " '''\n", + " track: list of MIDI events\n", + " \n", + " returns: last MIDI tick in track\n", + " '''\n", + " max_tick = None\n", + " \n", + " for event in reversed(track):\n", + " if type(event) in [midi.NoteOnEvent, midi.NoteOffEvent]:\n", + " max_tick = event.tick\n", + " break\n", + " \n", + " return max_tick\n", + "\n", + "def find_pitch(notes, pitch):\n", + " '''\n", + " notes: list/set of (pitch, velocity) tuples\n", + " pitch: pitch value to find\n", + " \n", + " returns: first (pitch, velocity) tuple that matches input pitch\n", + " '''\n", + " \n", + " for note in notes:\n", + " if note[0] == pitch:\n", + " return note\n", + " \n", + " print(notes, pitch)\n", + " \n", + "def parse_data(track):\n", + " '''\n", + " track: list of MIDI events\n", + " \n", + " returns: time series list of lists, each list fixed length,\n", + " contains activated notes at corresponding tick\n", + " '''\n", + " events = []\n", + " activated_notes = set([])\n", + " max_tick = get_max_tick(track)\n", + " \n", + " if max_tick is None:\n", + " raise ValueError()\n", + " \n", + " note_starts = {}\n", + " note_ends = {}\n", + " \n", + " for event in track:\n", + " if type(event) not in [midi.NoteOnEvent, midi.NoteOffEvent]:\n", + " continue\n", + " \n", + " tick = event.tick\n", + " pitch, velocity = event.data\n", + " \n", + " if velocity != 0:\n", + " try:\n", + " note_starts[tick].add((pitch, velocity))\n", + " except KeyError:\n", + " note_starts[tick] = set([(pitch, velocity)])\n", + " finally:\n", + " if (pitch, velocity) in activated_notes:\n", + " print((pitch, velocity), activated_notes)\n", + " velocity += np.random.choice([-1, 1])\n", + " \n", + " activated_notes.add((pitch, velocity))\n", + "\n", + " else:\n", + " pitch, velocity = find_pitch(activated_notes, pitch)\n", + " \n", + " try:\n", + " note_ends[tick].add((pitch, velocity))\n", + " except KeyError:\n", + " note_ends[tick] = set([(pitch, velocity)])\n", + " finally:\n", + " activated_notes.remove((pitch, velocity))\n", + " \n", + " for tick in range(max_tick):\n", + " try:\n", + " for note in note_starts[tick]:\n", + " activated_notes.add(note)\n", + " except KeyError:\n", + " pass\n", + " \n", + " try:\n", + " for note in note_ends[tick]:\n", + " activated_notes.remove(note)\n", + " except KeyError:\n", + " pass\n", + " \n", + " events.append(list(activated_notes))\n", + " \n", + " return keras.preprocessing.sequence.pad_sequences(events)\n", + "\n", + "def process_data(data, timestep):\n", + " '''\n", + " data: time series MIDI data\n", + " timestep: specifies length of convolution\n", + " \n", + " returns: 1-D convolution of time series data with window \n", + " specified by timestep\n", + " '''\n", + " X, y = [], []\n", + " for i in range(len(data)-timestep-1):\n", + " X.append(np.array([data[i:(i+timestep)]]))\n", + " y.append(np.array([data[(i+timestep)]]))\n", + " \n", + " X, y = np.array(X), np.array(y)\n", + " return X.reshape(*[_ for _ in X.shape if _ != 1]), \\\n", + " y.reshape(*[_ for _ in y.shape if _ != 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "LNRepQCkwmz9" + }, + "outputs": [], + "source": [ + "class MidiScaler():\n", + " '''\n", + " Custom scaler for MIDI time series data\n", + " '''\n", + " def __init__(self):\n", + " pass\n", + " \n", + " def fit(self, data):\n", + " pass\n", + " \n", + " def fit_transform(self, data):\n", + " return ((data-64)/128).astype(np.float128)\n", + " \n", + " def transform(self, data):\n", + " return ((data-64)/128).astype(np.float128)\n", + " \n", + " def inverse_transform(self, data):\n", + " return ((data*128)+64).astype(np.float128)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ugNPvma2wn6s" + }, + "outputs": [], + "source": [ + "class RNN:\n", + " '''\n", + " Custom RNN class/data container\n", + " '''\n", + " def __init__(self, X, train_test_split=0.8, epochs=100, batch_size=32, lstm_units=128, timestep=256):\n", + " self.X = X\n", + " self.split = int(self.X.shape[0]*train_test_split)\n", + " self.X_train, self.X_test = self.X[:self.split], self.X[self.split:]\n", + " self.epochs = epochs\n", + " self.batch_size = batch_size\n", + " self.lstm_units = lstm_units\n", + " self.timestep = timestep\n", + " self.scaler = MidiScaler()\n", + " self.scale_data()\n", + " \n", + " self.model = Sequential()\n", + " self.model.add(\n", + " CuDNNLSTM(self.lstm_units, input_shape=(\n", + " self.timestep, self.X_train_processed.shape[-1]\n", + " ), \n", + " return_sequences=True)\n", + " )\n", + " self.model.add(Dropout(0.5))\n", + " self.model.add(CuDNNLSTM(self.lstm_units))\n", + " self.model.add(Dropout(0.5))\n", + " self.model.add(Dense(self.lstm_units, activation='relu'))\n", + " self.model.add(Dense(self.X_train_processed.shape[-1], activation='softmax'))\n", + " self.model.compile(optimizer='adam', loss=['mse'], metrics=['accuracy'])\n", + " \n", + " def flatten_data(self):\n", + " try:\n", + " self.X_train_flattened = self.X_train[:,:,0]\n", + " self.X_test_flattened = self.X_test[:,:,0]\n", + " except IndexError:\n", + " self.X_train_flattened = self.X_train\n", + " self.X_test_flattened = self.X_test\n", + " \n", + " def scale_data(self):\n", + " self.flatten_data()\n", + " self.scaler.fit(self.X_train_flattened)\n", + " self.X_train_scaled = self.scaler.transform(self.X_train_flattened)\n", + " self.X_test_scaled = self.scaler.transform(self.X_test_flattened)\n", + " \n", + " self.X_train_processed, self.y_train_processed = \\\n", + " process_data(self.X_train_scaled, self.timestep)\n", + " self.X_test_processed, self.y_test_processed = \\\n", + " process_data(self.X_test_scaled, self.timestep)\n", + " \n", + " def train(self, epochs, validation_split=0.25, verbose=0):\n", + " history = self.model.fit(self.X_train_processed, self.y_train_processed,\n", + " batch_size=self.batch_size, epochs=epochs,\n", + " validation_split=validation_split)\n", + "\n", + " return history" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "dj9mGuF1YpMY" + }, + "outputs": [], + "source": [ + "def detokenize_data(pred):\n", + " '''\n", + " pred: predicted MIDI time series data\n", + " \n", + " returns: corresponding MIDI pattern\n", + " '''\n", + " pattern = midi.Pattern()\n", + " track = midi.Track()\n", + " prev = []\n", + "\n", + " for tick, note_arr in enumerate(pred[:-1].tolist()):\n", + " for note in note_arr:\n", + " if note != 0: \n", + " if note in prev:\n", + " if note not in pred[tick+1]:\n", + " track.append(midi.NoteOffEvent(tick=tick, channel=10, data=[note, 0]))\n", + " else:\n", + " if note in pred[tick+1]:\n", + " track.append(midi.NoteOnEvent(tick=tick, channel=10, data=[note, 60]))\n", + " else:\n", + " track.append(midi.NoteOnEvent(tick=tick, channel=10, data=[note, 60]))\n", + " track.append(midi.NoteOffEvent(tick=tick, channel=10, data=[note, 0]))\n", + "\n", + " prev = note_arr\n", + "\n", + " for i, event in reversed(list(enumerate(track))):\n", + " if i == 0:\n", + " continue\n", + "\n", + " event.tick = (event.tick - track[i-1].tick)\n", + "\n", + " pattern.append(track)\n", + " \n", + " return pattern" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "P-nlqvepSVRn" + }, + "outputs": [], + "source": [ + "track_data = parse_data(mid_jingle[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ayoFg5k6wqvB" + }, + "outputs": [], + "source": [ + "rnn = RNN(track_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "colab_type": "code", + "id": "VQakuhnx7t2r", + "outputId": "5348e975-2295-4652-d1d6-6d3cbf3b73ef" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "cu_dnnlstm_3 (CuDNNLSTM) (None, 256, 128) 70144 \n", + "_________________________________________________________________\n", + "dropout_3 (Dropout) (None, 256, 128) 0 \n", + "_________________________________________________________________\n", + "cu_dnnlstm_4 (CuDNNLSTM) (None, 128) 132096 \n", + "_________________________________________________________________\n", + "dropout_4 (Dropout) (None, 128) 0 \n", + "_________________________________________________________________\n", + "dense_3 (Dense) (None, 128) 16512 \n", + "_________________________________________________________________\n", + "dense_4 (Dense) (None, 7) 903 \n", + "=================================================================\n", + "Total params: 219,655\n", + "Trainable params: 219,655\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n" + ] + } + ], + "source": [ + "print(rnn.model.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 714 + }, + "colab_type": "code", + "id": "uH-iiwtb__e7", + "outputId": "52686753-5411-4c94-b430-774eff939026" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 22770 samples, validate on 7590 samples\n", + "Epoch 1/20\n", + "22770/22770 [==============================] - 72s 3ms/step - loss: 0.1930 - acc: 0.6531 - val_loss: 0.1622 - val_acc: 0.4864\n", + "Epoch 2/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1915 - acc: 0.8441 - val_loss: 0.1618 - val_acc: 0.6331\n", + "Epoch 3/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1914 - acc: 0.8711 - val_loss: 0.1615 - val_acc: 0.7838\n", + "Epoch 4/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1914 - acc: 0.8899 - val_loss: 0.1616 - val_acc: 0.6556\n", + "Epoch 5/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1913 - acc: 0.8966 - val_loss: 0.1614 - val_acc: 0.7560\n", + "Epoch 6/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1913 - acc: 0.9018 - val_loss: 0.1615 - val_acc: 0.5750\n", + "Epoch 7/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1913 - acc: 0.9054 - val_loss: 0.1615 - val_acc: 0.7885\n", + "Epoch 8/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1913 - acc: 0.9079 - val_loss: 0.1617 - val_acc: 0.6889\n", + "Epoch 9/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1913 - acc: 0.9139 - val_loss: 0.1615 - val_acc: 0.6250\n", + "Epoch 10/20\n", + "22770/22770 [==============================] - 68s 3ms/step - loss: 0.1913 - acc: 0.9209 - val_loss: 0.1620 - val_acc: 0.6630\n", + "Epoch 11/20\n", + "22770/22770 [==============================] - 70s 3ms/step - loss: 0.1912 - acc: 0.9283 - val_loss: 0.1620 - val_acc: 0.6914\n", + "Epoch 12/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9270 - val_loss: 0.1620 - val_acc: 0.5444\n", + "Epoch 13/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9296 - val_loss: 0.1619 - val_acc: 0.6246\n", + "Epoch 14/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9307 - val_loss: 0.1618 - val_acc: 0.6246\n", + "Epoch 15/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9334 - val_loss: 0.1619 - val_acc: 0.6577\n", + "Epoch 16/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9389 - val_loss: 0.1620 - val_acc: 0.6242\n", + "Epoch 17/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9417 - val_loss: 0.1622 - val_acc: 0.6586\n", + "Epoch 18/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9344 - val_loss: 0.1621 - val_acc: 0.6253\n", + "Epoch 19/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9434 - val_loss: 0.1621 - val_acc: 0.6252\n", + "Epoch 20/20\n", + "22770/22770 [==============================] - 69s 3ms/step - loss: 0.1912 - acc: 0.9460 - val_loss: 0.1620 - val_acc: 0.7190\n" + ] + } + ], + "source": [ + "history = rnn.train(epochs=20, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "d9tC9sZLMJfe" + }, + "outputs": [], + "source": [ + "rnd = np.vectorize(round)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Zq9anpm1YztV" + }, + "outputs": [], + "source": [ + "scaled_pred = rnn.model.predict(rnn.X_train_processed)\n", + "pred = rnn.scaler.inverse_transform(scaled_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "s1m30PQ_uxnF" + }, + "outputs": [], + "source": [ + "pattern = detokenize_data(rnd(pred).astype(int))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "bwR1X4R_BIvr" + }, + "outputs": [], + "source": [ + "midi.write_midifile('data/output/jingle-bells-pred-train.mid', pattern)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "uy4bOFcuNd9E" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "Copy of Untitled1.ipynb", + "provenance": [], + "version": "0.3.2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/README.md b/README.md index 3cd6ff4..9fd6a68 100644 --- a/README.md +++ b/README.md @@ -1 +1,7 @@ # music-gen-tool + +The `Exploring MIDI` notebook fleshes out our process of wrangling the data and experimenting with the `python-midi` library we used. + +The `Network Definition and Test` notebook delves into our implementation of the tokenizer and RNN. + +We have several MIDI songs under the `data` folder, as well as a sample output file in the `data/output` folder diff --git a/data/.DS_Store b/data/.DS_Store index 30e8580..5665329 100644 Binary files a/data/.DS_Store and b/data/.DS_Store differ diff --git a/data/output/jingle-bells-pred-train.mid b/data/output/jingle-bells-pred-train.mid new file mode 100644 index 0000000..7754cce Binary files /dev/null and b/data/output/jingle-bells-pred-train.mid differ