深度學習一行一行敲vae-npvc網路-tensorflow版(convert.py)

源碼地址:vae-npvc

論文地址:Voice Conversion from Non-parallel Corpora Using Variational Auto-encoder

def main():n logdir, ckpt = os.path.split(args.checkpoint)n arch = tf.gfile.Glob(os.path.join(logdir, architecture*.json))[0] # should only be 1 filen with open(arch) as fp:n arch = json.load(fp)#得到訓練的相應參數nn normalizer = Tanhize(n xmax=np.fromfile(./etc/xmax.npf),n xmin=np.fromfile(./etc/xmin.npf),n )#標準化的參數nn features = read_whole_features(args.file_pattern.format(args.src))#得到需要轉化語音的特徵圖nn x = normalizer.forward_process(features[sp])#標準化處理n x = nh_to_nchw(x)#增加維度n y_s = features[speaker]#得到源說話人標籤n y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])n y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)#得到相應數量目標說話人標籤的placeholdernn machine = MODEL(arch)n z = machine.encode(x)#注意這個是encode只有一個變數返回,_encode是有兩個返回n x_t = machine.decode(z, y_t) # NOTE: the API yields NHWC formatn x_t = tf.squeeze(x_t)n x_t = normalizer.backward_process(x_t)#得到轉換後的特徵,並反歸一化nn # For sanity check (validation)n x_s = machine.decode(z, y_s)n x_s = tf.squeeze(x_s)n x_s = normalizer.backward_process(x_s)#源說話人nn f0_s = features[f0]n f0_t = convert_f0(f0_s, args.src, args.trg)#轉換f0nn output_dir = get_default_output(args.output_dir)nn saver = tf.train.Saver()n sv = tf.train.Supervisor(logdir=output_dir)n with sv.managed_session() as sess:n load(saver, sess, logdir, ckpt=ckpt)n while True:n try:n feat, f0, sp = sess.run(n [features, f0_t, x_t],n feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}n )n feat.update({sp: sp, f0: f0})#更新featn y = pw2wav(feat)n oFilename = make_output_wav_name(output_dir, feat[filename])n sf.write(oFilename, y, FS)n except:n breakn

args.checkpoint = logdir/train/2018_01_10_16_14_03/model.ckpt-191200ntf.app.flags.DEFINE_string(file_pattern, ./dataset/vcc2016/bin/Testing Set/{}/*.bin, file pattern)ntf.app.flags.DEFINE_string(src, SF1, source speaker [SF1 - SM2])n

def nh_to_nchw(x):n with tf.name_scope(NH_to_NCHW):n x = tf.expand_dims(x, 1) # [b, h] => [b, c=1, h]n return tf.expand_dims(x, -1) # => [b, c=1, h, w=1]n

def convert_f0(f0, src, trg):n mu_s, std_s = np.fromfile(os.path.join(./etc, {}.npf.format(src)), np.float32)n mu_t, std_t = np.fromfile(os.path.join(./etc, {}.npf.format(trg)), np.float32)n lf0 = tf.where(f0 > 1., tf.log(f0), f0)n lf0 = tf.where(lf0 > 1., (lf0 - mu_s)/std_s * std_t + mu_t, lf0)n lf0 = tf.where(lf0 > 1., tf.exp(lf0), lf0)n return lf0n

def get_default_output(logdir_root):n STARTED_DATESTRING = datetime.now().strftime(%0m%0d-%0H%0M-%0S-%Y)n logdir = os.path.join(logdir_root, output, STARTED_DATESTRING)n print(Using default logdir: {}.format(logdir)) n return logdirn

歡迎關注公眾號:huangxiaobai880

https://www.zhihu.com/video/934906903159504896
推薦閱讀:

如何評價深度學習之父Hinton發布的Capsule論文?
如何評價重磅論文《Stopping GAN Violence》?
關於這些用於深度學習的機器配置,合理嗎,哪個好?
卷積神經網路可以用於小目標檢測嗎?
Google 人工智慧引擎 TensorFlow 開源會帶來哪些影響?

TAG:深度学习DeepLearning | 机器学习 | 源码阅读 |