[fix] bitrate is not bitrate for stft

255BITS · Mar 30, 2016 · 259886a · 259886a
1 parent 2b2bbc6
commit 259886a
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 13 deletions.
diff --git a/composer.py b/composer.py
@@ -11,9 +11,9 @@
 dataset="audio"
 wav_size=64
 is_crop=False
-batch_size=10
+batch_size=128
 checkpoint_dir="checkpoint"
-bitrate=2048
+bitrate=4096 # this is not the bitrate with stft
 
 with tf.Session() as sess:
     with tf.device('/cpu:0'):
@@ -24,19 +24,21 @@
         data = glob(os.path.join("./training", "*.wav"))
         sample_file = data[0]
         sample =tensorflow_wav.get_wav(sample_file)
+        print(sample)
 
         full_audio = []
-        for i in range(120):
+        for i in range(1):
             audio = dcgan.sample()
 
             audio = np.reshape(audio,[-1])
             print("Audio shape", np.shape(audio))
-            full_audio += audio[:bitrate].tolist()
+            full_audio += audio[:bitrate*batch_size].tolist()
             print("Full audio shape", np.shape(full_audio))
 
         samplewav = sample.copy()
         samplewav
-        samplewav['data']=np.array(full_audio)
+        print("Generated stats 'min', 'max', 'mean', 'stddev'", np.min(full_audio), np.max(full_audio), np.mean(full_audio), np.std(full_audio))
+        samplewav['data']=np.reshape(np.array(full_audio), [-1, 64])
         print("samplewav shape", np.shape(samplewav['data']))
 
         filename = "./compositions/song.wav.stft"

diff --git a/convert_to_istft.py b/convert_to_istft.py
@@ -22,11 +22,12 @@
     wav= tensorflow_wav.get_stft(file)
     nframes = wav['nframes']
     print('shape', wav['data'].shape)
-    time = 192
-    print(np.min(wav['data']), np.max(wav['data']), np.mean(wav['data']), np.std(wav['data']))
+    time = np.shape(np.reshape(wav['data'],[-1]))[0]/4096
+    print(np.shape(wav['data']))
+    #print(np.min(wav['data']), np.max(wav['data']), np.mean(wav['data']), np.std(wav['data']))
     #wav['data'] = np.exp(wav['data'])
     data = istft(wav['data'],fs, time, hop)
-    wav['data'] = data*3
+    wav['data'] = data*6
     #print(wav)
     #wav['data'] = np.sign(wav['data'])*np.sqrt(wav['data'])
     res= tensorflow_wav.save_wav(wav, file+".istft")

diff --git a/model.py b/model.py
@@ -9,7 +9,7 @@
 
 WAV_SIZE=64
 WAV_HEIGHT=64
-BITRATE=4096
+BITRATE=4096  # this is not the bitrate with stft
 class DCGAN(object):
     def __init__(self, sess, wav_size=WAV_SIZE, is_crop=True,
                  batch_size=64, sample_size = 2, wav_shape=[WAV_SIZE, WAV_HEIGHT, 1],
@@ -225,8 +225,8 @@ def get_wav_content(files):
                         % (epoch, idx, batch_idxs,
                             time.time() - start_time, errD_fake, errD_real, errG))
 
-                    SAVE_COUNT=500
-                    SAMPLE_COUNT=100
+                    SAVE_COUNT=10
+                    SAMPLE_COUNT=1e10
 
                     print("Batch ", counter)
                     if np.mod(counter, SAVE_COUNT) == SAVE_COUNT-3:
@@ -261,7 +261,7 @@ def get_wav_content(files):
 
     def sample(self, bz=None):
         if(bz == None):
-            bz = np.random.normal(0, 0.5, [self.batch_size, self.z_dim]) 
+            bz = np.random.normal(0, 1, [self.batch_size, self.z_dim]) 
         result = self.sess.run(
             self.sampler,
             feed_dict={self.z: bz}

diff --git a/tensorflow_wav.py b/tensorflow_wav.py
@@ -22,7 +22,7 @@ def get_wav(path):
     results['framerate']=wav.getframerate()
     results['nframes']=wav.getnframes()
     results['compname']=wav.getcompname()
-    processed = np.array(data).astype(np.complex64, copy=False)
+    processed = np.array(data).astype(np.int16, copy=False)
     results['data']=processed
     return results