Skip to content

0.6. Visualization

Jim Schwoebel edited this page Aug 17, 2018 · 15 revisions

This section covers all the scripts in the Chapter_6_visualization folder.

Visualizing audio features

audio streams

audio_stream.py

import sounddevice as sd
import soundfile as sf
import random, time, librosa, os 
import numpy as np

def visualize_data(sample, minimum, maximum):
    difference=maximum-minimum
    delta=difference/10
    bar='==.==.'
    if sample <= minimum:
        # 1 bar
        output=bar
    elif minimum+delta >= sample > minimum:
        # 1 bar
        output=bar
    elif minimum+delta*2 >= sample > minimum+delta:
        # 2 bars
        output=bar*2
    elif minimum+delta*3 >= sample >= minimum+delta*2:
        # 3 bars
        output=bar*3
    elif minimum+delta*4 >= sample > minimum+delta*3:
        # 4 bars
        output=bar*4
    elif minimum+delta*5 >= sample > minimum+delta*4:
        # 5 bars
        output=bar*5
    elif minimum+delta*6 >= sample > minimum+delta*5:
        # 6 bars
        output=bar*6
    elif minimum+delta*7 >= sample > minimum+delta*6:
        # 7 bars
        output=bar*7
    elif minimum+delta*8 >= sample > minimum+delta*7:
        # 8 bards
        output=bar*8
    elif minimum+delta*9 >= sample > minimum+delta*8:
        # 9 bars
        output=bar*9
    elif maximum > sample >= minimum+delta*9:
        # 10 bars
        output=bar*10
    elif sample >= maximum:
        # 10 bars
        output=bar*10
    else:
        print(sample)
        output='error'

    # plot bars based on a min and max 
    return output[0:-1]

def record_data(filename, duration, fs, channels):
    # synchronous recording 
    myrecording = sd.rec(int(duration * fs), samplerate=fs, channels=channels)
    sd.wait()
    sf.write(filename, myrecording, fs)
    y, sr = librosa.load(filename)
    rmse=np.mean(librosa.feature.rmse(y)[0])
    os.remove(filename)
    
    return rmse*1000

# take a streaming sample and then put that data as it is being recorded
minimum=0
maximum=70
samples=list()

for i in range(100):    
    # record 20ms of data 
    sample=record_data('sample.wav',0.02, 44100, 1)
    if sample > maximum:
        maximum=sample 
        print('new max is %s'%(maximum))
    samples.append(sample)
    #print(sample)
    print(visualize_data(sample,minimum,maximum))

    # other stuff - if you'd like to sleep or generate random samples 
    # keep going streaming
    # randomize data 
    # sample=random.randint(0,30)
    #time.sleep(0.2)
samples=np.array(samples)
minval=np.amin(samples)
maxval=np.amax(samples)
print('minimum val: %s'%(str(minval)))
print('max val: %s'%(str(maxval)))

results in a stream like this:

new max is 157.97308087348938
==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==
==.==.==.==
==.==
==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==
==.==.==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==.==.==.==.==.==.==.==.==.==.==
==.==.==.==.==.==
==.==.==.==.==.==
==.==.==.==
==.==.==.==.==.==
==.==.==.==
==.==.==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==
==.==.==.==.==.==.==.==
==.==
==.==
==.==
==.==
==.==
==.==.==.==.==.==.==.==
==.==
==.==.==.==.==.==
==.==.==.==
==.==
==.==
==.==.==.==.==.==
minimum val: 0.057202301832148805
max val: 157.97308087348938

audio path plots

audio_path.py

import sounddevice as sd
import soundfile as sf
import random, time, librosa, os 
import numpy as np
import matplotlib.pyplot as plt
from drawnow import drawnow

def make_fig():
    plt.scatter(x, y)  
def record_data(filename, duration, fs, channels):
    # synchronous recording 
    myrecording = sd.rec(int(duration * fs), samplerate=fs, channels=channels)
    sd.wait()
    sf.write(filename, myrecording, fs)
    y, sr = librosa.load(filename)
    rmse=np.mean(librosa.feature.rmse(y)[0])
    os.remove(filename)
    
    return rmse*1000
# initialize plot 
plt.ion()  # enable interactivity
fig = plt.figure()  # make a figure
x = list()
y = list()
for i in range(100):    
    # record 20ms of data 
    sample=record_data('sample.wav',0.02, 44100, 1)
    x.append(i)
    y.append(sample)
    drawnow(make_fig)
plt.savefig('stream.png')
os.system('open stream.png')

results in an image like this:

spectrograms

audio_spectrograms.py

import librosa, os
import matplotlib.pyplot as plt
import numpy as np
import librosa.display
import audio_plot as ap 

# load file 
data_dir=os.getcwd()+'/data'
os.chdir(data_dir)
filename='Voice.wav'
imgfile=ap.plot_spectrogram(filename)

results in an image like this:

Frequency spectrums and oscillograms

audio_plotmany.py

results in an image like this:

Spectral power density

audio_plotspd.py

results in an image like this:

as KNN clusters

audio_cluster.py

import os, librosa 
import scipy.io.wavfile as wav
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np 
########################################################################
##     MAIN CODE BASE    					       ##
########################################################################
os.chdir('data/samples')
listdir=os.listdir()
wavfiles=list()
for i in range(len(listdir)):
    if listdir[i][-4:]=='.wav':
        wavfiles.append(listdir[i])
wavfiles=sorted(wavfiles)
samples=list()
for i in range(len(wavfiles)):
    y, sr = librosa.core.load(wavfiles[i])
    rmse=np.mean(librosa.feature.rmse(y)[0])
    mfcc=np.mean(librosa.feature.mfcc(y)[0])
    samples.append(np.array([rmse, mfcc]))
        
kmeans = KMeans(3, max_iter = 1000, n_init = 100)
kmeans.fit_transform(samples)
predictions = kmeans.predict(samples)

x=list()
y=list()
for i in range(len(predictions)):
    x.append(i)
    y.append(predictions[i])


x0=list()
x1=list()
x2=list()
y0=list()
y1=list()
y2=list()

for i in range(len(y)):
    if y[i] == 0:
        x0.append(x[i])
        y0.append(y[i])
    elif y[i] == 1:
        x1.append(x[i])
        y1.append(y[i])
    elif y[i] == 2:
        x2.append(x[i])
        y2.append(y[i])

plt.scatter(x0, y0, marker='o', c='black')
plt.scatter(x1, y1, marker='o', c='blue')
plt.scatter(x2, y2, marker='o', c='red')
plt.xlabel('sample number')
plt.ylabel('k means class')
plt.savefig('kmeans.png')

results in an image like this:

Visualizing text features

text streams

text_stream.py

import os, json, shutil
import speech_recognition as sr_audio
import sounddevice as sd
import soundfile as sf 

def transcribe_pocket(filename):
    # transcribe the audio (note this is only done if a voice sample)
    r=sr_audio.Recognizer()
    with sr_audio.AudioFile(filename) as source:
        audio = r.record(source) 
    text=r.recognize_sphinx(audio)
    
    return text

def sync_record(filename, duration, fs, channels):
    #print('recording')
    myrecording = sd.rec(int(duration * fs), samplerate=fs, channels=channels)
    sd.wait()
    sf.write(filename, myrecording, fs)
    #print('done recording')

# set recording params 
duration=1
fs=44100
channels=1

try:
    os.mkdir('recordings')
    os.chdir(os.getcwd()+'/recordings')
except:
    shutil.rmtree('recordings')
    os.mkdir('recordings')
    os.chdir(os.getcwd()+'/recordings')

filenames=list()
transcripts=list()
for i in range(30):
    filename='%s.wav'%(str(i))
    sync_record(filename, duration, fs, channels)
    transcript=transcribe_pocket(filename)
    # print transcript on screen 
    print(transcript)
    filenames.append(filename)
    transcripts.append(transcript)

data={
    'filenames':filenames,
    'transcripts':transcripts
    }

jsonfile=open('recordings.json','w')
json.dump(data,jsonfile)
jsonfile.close()

results in a .JSON output like this:

1 lines (1 sloc)  672 Bytes
{"filenames": ["0.wav", "1.wav", "2.wav", "3.wav", "4.wav", "5.wav", "6.wav", "7.wav", "8.wav", "9.wav", "10.wav", "11.wav", "12.wav", "13.wav", "14.wav", "15.wav", "16.wav", "17.wav", "18.wav", "19.wav", "20.wav", "21.wav", "22.wav", "23.wav", "24.wav", "25.wav", "26.wav", "27.wav", "28.wav", "29.wav"], "transcripts": ["oh", "it is gorgeous", "testing on this", "return to go", "oh good and it's", "", "", "where", "it can be", "", "", "runny nose", "", "", "the", "oh", "the", "we're then were", "so good", "", "", "", "resting to", "testing testing", "asking because too", "testing testing", "interesting times", "testing testing", "deemed resting", "talking to us"]}

text path

text_path.py

results an image like this:

frequency distribution plots

text_freqplot.py

results in an image like this:

wordclouds

text_wordcloud.py

results in an image like this:

parsed tree plots

text_tree.py

results in an image like this:

named entity visualiations

text_entity.py

results in an image like this (in browser):

network plots

text_network.py

tSNE plots

text_tsne.py

results in an image like this:

text_tsne_many.py

results in an image like this:

Visualizing mixed features

mixed streams

mixed_stream.py (sample output)

==.==.==.==.==.==.==.==.==.== as things were
==.== 
==.==.==.==.==.==.==.==.==.== not sure who
==.==.==.==.==.== and it's all gone
==.== 
==.== 
==.== good
==.==.==.==.==.==.==.==.==.== exactly what
==.== air
==.== oh
==.== 
==.== 
==.==.==.==.==.==.==.==.==.==.==.== i was too good at all
==.== 
==.== 
==.==.==.==.==.==.==.==.==.== uh huh
==.==.==.==.==.==.==.==.==.==.==.== in the mud
==.==.==.==.==.==.==.==.==.== aren't you
==.== 
==.==.==.==.==.==.==.==.==.==.==.== so
==.== mm
==.== 
==.== 
==.== 
==.==.==.==.==.== this is
==.== oh
==.== 
==.==.==.==.==.== oh
==.== 
==.== 
minimum val: 0.05180590960662812
max val: 39.191748946905136
transcripts: 
['as things were', '', 'not sure who', "and it's all gone", '', '', 'good', 'exactly what', 'air', 'oh', '', '', 'i was too good at all', '', '', 'uh huh', 'in the mud', "aren't you", '', 'so', 'mm', '', '', '', 'this is', 'oh', '', 'oh', '', '']

mixed path plat

mixed_path.py

results in an image like this:

mixed_video.py

results in a video as shown here

Looks like this:

Visualizing meta features

streaming meta features

meta_stream.py

Looks like this:

multiple streaming meta features

meta_multi.py

Looks like this:

non-streaming meta features

meta_nonstream.py

Looks like this:

Resources

If you are interested to read more on any of these topics, check out the documentation below.

Visualization libraries

Audio data

Text data

Mixed data

Meta data