This repository was archived by the owner on Dec 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlive_testing.py
More file actions
73 lines (58 loc) · 2.35 KB
/
Copy pathlive_testing.py
File metadata and controls
73 lines (58 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from pyKey import press
import librosa
import numpy as np
import torch
import pyaudio
from time import time
import os
import argparse
from utils import SiameseNet
parser = argparse.ArgumentParser(description='Live test SiameseNet')
parser.add_argument('--model_location', '-l', type=str, default='model/model-epoch-{}.pth')
parser.add_argument('--epoch', '-e', type=int, default=None)
parser.add_argument('--device', '-d', type=str, default=None)
parser.add_argument('--ref', '-r', type=str, default='references/')
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
if not args.device:
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 4000
def preprocess(audio=None):
audio_trimmed = librosa.effects.trim(audio, top_db=7)[0]
audio_center = librosa.util.pad_center(audio_trimmed[:4000], 4000)
audio_mfcc = librosa.feature.mfcc(y=audio_center, sr=RATE)
audio_tensor = torch.tensor(audio_mfcc[None,None])
return audio_tensor.to(device=args.device)
refs = {
'up':preprocess(librosa.load(os.path.join(args.ref,'up.wav'), sr=RATE)[0]),
'down':preprocess(librosa.load(os.path.join(args.ref,'down.wav'), sr=RATE)[0]),
'sil':preprocess(librosa.load(os.path.join(args.ref,'sil.wav'), sr=RATE)[0]),
'quit':preprocess(librosa.load(os.path.join(args.ref,'quit.wav'), sr=RATE)[0])
}
print('Loading model')
model = SiameseNet(mode='inference', weights_path=args.model_location.format(args.epoch), refs_dict=refs, device=args.device)
previous = np.zeros((CHUNK, 1))
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("Recording...")
while True:
start = time()
data = stream.read(CHUNK)
data_int = np.frombuffer(data, dtype='<i2').reshape(-1, CHANNELS) / (2**15)
data_tensor = preprocess(np.squeeze(data_int))
scores = model(data_tensor)
if np.argmax(scores) == 0:
press('UP')
elif np.argmax(scores) == 1:
press('DOWN')
elif np.argmax(scores) == 3:
break
if args.verbose and np.argmax(scores) != 2:
print('Up : ',scores[0],' Down : ', scores[1], ' Silence : ', scores[2], ' time : ', time()-start)