asr_vosk: add comments
[ros_wild_thumper.git] / scripts / asr_vosk.rb
1 #!/usr/bin/ruby
2 # export AUDIODEV=plughw:CARD=ArrayUAC10,0
3 # rec -q -t alsa -c 1 -b 16 -r 16000 -t wav - silence -l 1 0.1 0.3% -1 2.0 0.3% | ./asr_vosk.rb -
4
5 require 'logger'
6 require 'websocket-eventmachine-client'
7 require 'json'
8 require 'ros'
9 require 'std_msgs/String'
10
11 KEYWORDS = ["wild thumper"]
12 CONFIG = {
13         "config": {
14                 "phrase_list": ["angle", "backward", "by", "centimeter", "compass", "current", "decrease", "default", "degree", "down", "eight", "eighteen", "eighty", "eleven", "fifteen", "fifty", "five", "forty", "forward", "four", "fourteen", "get", "go", "hundred", "increase", "left", "light", "lights", "meter", "mic", "minus", "motion", "mute", "nine", "nineteen", "ninety", "off", "on", "one", "position", "pressure", "right", "secure", "set", "seven", "seventeen", "seventy", "silence", "six", "sixteen", "sixty", "speed", "stop", "temp", "temperature", "ten", "thirteen", "thirty", "three", "to", "turn", "twelve", "twenty", "two", "up", "velocity", "voltage", "volume", "wild thumper", "zero"],
15                 "sample_rate": 16000.0
16         }
17 }
18
19 class Speak
20         def initialize(node)
21                 @logger = Logger.new(STDOUT)
22                 @commands_enabled = false
23                 @publisher = node.advertise('asr_result', Std_msgs::String)
24
25                 # Websocket handling
26                 EM.run do
27                         Signal.trap("INT")  { send_eof }
28                         @ws = WebSocket::EventMachine::Client.connect(:uri => 'ws://192.168.36.4:2700')
29
30                         def send_eof
31                                 @ws.send '{"eof" : 1}'
32                         end
33
34                         # Loop over all input data
35                         def run
36                                 while true do
37                                         data = ARGF.read(16000)
38                                         if data
39                                                 @ws.send data, :type => :binary
40                                         else
41                                                 send_eof
42                                                 break
43                                         end
44                                 end
45                         end
46
47                         @ws.onopen do
48                                 @logger.info "Running.."
49                                 @ws.send CONFIG.to_json
50
51                                 Thread.new {
52                                         run
53                                 }
54                         end
55
56                         @ws.onmessage do |msg, type|
57                                 d = JSON.parse(msg)
58                                 handle_result(d)
59                         end
60
61                         @ws.onclose do |code, reason|
62                                 puts "Disconnected with status code: #{code}"
63                                 exit
64                         end
65                 end
66         end
67
68         def handle_result(msg)
69                 if msg.has_key? "result"
70                         msg["result"].each do |result|
71                                 @logger.debug "word=" + result["word"]
72                         end
73
74                         # check for keywords first
75                         text = msg["text"]
76                         @logger.debug "text=" + msg["text"]
77                         if KEYWORDS.include? text
78                                 keyword_detect(text)
79                                 return
80                         end
81
82                         # not a keyword, handle command if enabled
83                         if @commands_enabled
84                                 final_result(msg["text"])
85                         end
86                 end
87         end
88
89         # Enables/Disables the speech command
90         def enable_commands(bEnable)
91                 @commands_enabled = bEnable
92         end
93
94         # Resulting speech command
95         def final_result(hyp)
96                 @logger.info "final: " + hyp
97                 enable_commands(false)
98
99                 # Publish vosk result as ros message
100                 msg = Std_msgs::String.new
101                 msg.data = hyp
102                 @publisher.publish(msg)
103         end
104
105         def keyword_detect(hyp)
106                 @logger.debug "Got keyword: " + hyp
107                 enable_commands(true)
108         end
109 end
110
111 if __FILE__ == $0
112         node = ROS::Node.new('asr_vosk')
113         app = Speak.new(node)
114         begin
115                 node.spin
116         rescue Interrupt
117         ensure
118                 node.shutdown
119         end
120 end