【Day20】 WavenetGan, BidirectionalLSTMGAN, WaveGan 钢琴音乐生成

  • 因为之後想花一点时间分享一下 Transformer 阅读跟实作的经验,所以这篇就没写 Transformer 的部分,但它生成的音乐还是会贴在下方跟大家分享

  • 书接昨日,我们就开始写吧!

WavenetGan

Generator

Discriminator

def build_discriminator():
    model=Sequential([
        Conv1D(32,(3),strides=(2),input_shape=(SEQ_LEN,1,)),
        LeakyReLU(),
        Conv1D(64, (3), strides=(2), padding='same',use_bias=False),
        BatchNormalization(),
        LeakyReLU(),
        Conv1D(64, (3), strides=(2), padding='same',use_bias=False),
        BatchNormalization(),
        LeakyReLU(),
        Flatten(),
        Dense(1)
    ])
    return model

BidirectionalLSTMGAN

Generator

def build_generator():

    model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True), input_shape=(gen_len, gen_len)),
    LeakyReLU(alpha=0.2),
    Bidirectional(LSTM(128, return_sequences=True)),
    LeakyReLU(alpha=0.2),
    Bidirectional(LSTM(128)),
    LeakyReLU(alpha=0.2),
    # specifying output to have 40 timesteps
    RepeatVector(seq_len),
    # specifying 1 feature as the output
    Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2)),
    LeakyReLU(alpha=0.2),
    Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2)),
    LeakyReLU(alpha=0.2),
    Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2)),
    LeakyReLU(alpha=0.2),
    Dropout(0.3),
    TimeDistributed(Dense(128)),
    LeakyReLU(alpha=0.2),
    Dropout(0.4),
    TimeDistributed(Dense(128)),
    LeakyReLU(alpha=0.2),
    Dropout(0.4),
    TimeDistributed(Dense(1)),
    # back to 0 ~ 1
    Activation("sigmoid"),
    ])
    noise = Input(shape=(gen_len,gen_len))
    img = model(noise)
    return Model(noise, img)

Discriminator

def build_discriminator():

        model = Sequential([

                Bidirectional(LSTM(128, return_sequences=True), input_shape=(seq_len, 1)),
                Activation("relu"),
                LeakyReLU(alpha=0.2),
                Bidirectional(LSTM(128)),
                Activation("relu"),
                LeakyReLU(alpha=0.2),
                Dropout(0.4),
                RepeatVector(1),
                TimeDistributed(Dense(128, activation = 'sigmoid')),
                LeakyReLU(alpha=0.2),
                Dropout(0.4),
                TimeDistributed(Dense(128, activation = 'relu')),
                LeakyReLU(alpha=0.2),
                Dropout(0.4),
                TimeDistributed(Dense(1, activation = 'linear'))

        ])

        img = Input(shape=(seq_len,1))
        validity = model(img)
        return Model(img, validity)

WaveGan

Generator

def WaveGANGenerator():
    model = tf.keras.Sequential([
        Dense(seq_len,  activation='relu',input_shape=(seq_len,)),
        Reshape((1,seq_len)),

        Conv1D(64, kernel_size=25, strides=4, padding="same"),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Conv1D(128,kernel_size=25, strides=4,padding='same'),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Conv1D(seq_len,kernel_size=25,strides=4, padding='same'),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Conv1D(seq_len,kernel_size=25,strides=4, padding='same'),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Flatten(),
        Dense(seq_len, activation='sigmoid')
    ])
    return model

Discriminator

def WaveGANDiscriminator():
    model = tf.keras.Sequential([

        Dense(seq_len,  activation='relu',input_shape=(seq_len,)),
        Reshape((1,seq_len)),

        Conv1D(64, kernel_size=25, strides=4, padding="same"),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Conv1D(seq_len,kernel_size=25,strides=4, padding='same'),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Conv1D(seq_len,kernel_size=25,strides=4, padding='same'),
        BatchNormalization(momentum=0.8),
        ReLU(),

        Flatten(),
        Dense(seq_len, activation='sigmoid')
        ])
    return model
    

对於不同的模型,训练的方式都是一样的

定义 loss

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

Train Loop

import time
total_Gloss = []
total_Dloss = []
def train(dataset, epochs):
    for epoch in range(epochs):
        start = time.time()
        G_loss = 0
        D_loss = 0
        for i,image_batch in enumerate(dataset):
            # 见下方
            gen_loss,disc_loss = train_step(image_batch)
            print(f"Step:{i} | G_loss:{gen_loss} D_loss:{disc_loss}|")
            G_loss += gen_loss
            D_loss += disc_loss
        clear_output(wait=True)
        print (f'Time for epoch {epoch + 1} is {time.time()-start} sec\n')
        print(f'G_AVE_Loss:{G_loss/len(dataset)}')
        print(f'D_AVE_loss:{D_loss/len(dataset)}')
        total_Gloss.append(G_loss/len(dataset))
        total_Dloss.append(D_loss/len(dataset))

Train step

@tf.function
def train_step(music):
    LAMBDA = 10
    noise = tf.random.normal([BATCH_SIZE,seq_len])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_music = generator(noise, training=True)
        real_output = discriminator(music, training=True)
        fake_output = discriminator(generated_music, training=True)
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(fake_output,real_output)       
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    return gen_loss,disc_loss      

Loss 图

生成 MIDI

import random
from mido import MidiFile, MidiTrack, Message
noise =   np.random.normal(0,1,(1,seq_len))
predict = generator.predict(noise)
predict = predict*127

midler = MidiFile()
track = MidiTrack()
midler.tracks.append(track)

track.append(Message('program_change', program=2, time=0))
for x in range(seq_len):
    # 这里就是前面说的,训练的部分只有音符排列,节奏跟控制都没有训练到,所以都是随机生成的
    on_interval = random.randint(0,127)
    off_interval = random.randint(0,127)
    change_interval = random.randint(0,127)
    change_value = random.randint(0,127)
    isControl = random.randint(0,1)
    track.append(Message('note_on',channel =1, note=int(predict[0][x]), velocity=64, time = on_interval)) 
    if isControl:
         track.append(Message('control_change',channel =1, control=64, value=change_value, time = change_interval)) 
    track.append(Message('note_off',channel =1 ,note=int(predict[0][x]), velocity=64, time = off_interval))
    midler.save('WaveGan.mid')
    

小结

虽然在训练的时候没有用到节奏跟控制来训练,我们只有训练音符的排列,但结果跟随机乱生的听起来就是不一样,评断音乐的标准定义还是相当模糊的(至少对像我这样不懂音乐的麻瓜来说),还是要实际听看看比较能够体会,所以最後附上我生成的结果XD。

WavenetGan

BidirectionalLSTMGAN

WaveGan

TransformerGan

/images/emoticon/emoticon09.gif/images/emoticon/emoticon13.gif/images/emoticon/emoticon14.gif/images/emoticon/emoticon22.gif/images/emoticon/emoticon28.gif


<<:  Day11表格(HTML)

>>:  全端入门Day20_前端程序撰写之多一点的JavaScript

企划实现(8)

立案流程 第五步: 完成以上步骤後就会有以下8份文件公司名称预查核定书、公司章程、董事愿任核定书、股...

这几阵子我遇到的坑 For Active Storage

最近因为专案的关系还有老板的坚持,让我重新好好的认识 Active Storage 这个 Rails...

Day17 Vue Component(元件)

元件(Component)是Vue里主要也是最强大的特性之一,它提供了THML DOM元素的扩充性,...

Day-18 任意举出三个你在开发 Rails 专案时常用到的 gem,并说明一下

又是专案题来了!基本上有做专案就会有很多对於专案的题型,大家要对专案熟一点比较好喔! 我们小组是作...

Day30 Open-Match 使用与参赛心得

很开心能够完成 30天的 Open-Match 文章分享,其实中间有些东西,是我一边研究一边写出来的...