asr_vosk: Allow to handle keyword and command in one sentence
[ros_wild_thumper.git] / scripts / asr_vosk.rb
1 #!/usr/bin/ruby
2 # export AUDIODEV=plughw:CARD=ArrayUAC10,0
3 # rec -q -t alsa -c 1 -b 16 -r 16000 -t wav - silence -l 1 0.1 0.3% -1 2.0 0.3% | ./asr_vosk.rb -
4
5 require 'logger'
6 require 'websocket-eventmachine-client'
7 require 'json'
8 require 'ros'
9 require 'std_msgs/String'
10
11 KEYWORDS = ["wild thumper"]
12 CONFIG = {
13         "config": {
14                 "phrase_list": ["angle", "backward", "by", "centimeter", "compass", "current", "decrease", "default", "degree", "down", "eight", "eighteen", "eighty", "eleven", "fifteen", "fifty", "five", "forty", "forward", "four", "fourteen", "get", "go", "hundred", "increase", "left", "light", "lights", "meter", "mic", "minus", "motion", "mute", "nine", "nineteen", "ninety", "off", "on", "one", "position", "pressure", "right", "secure", "set", "seven", "seventeen", "seventy", "silence", "six", "sixteen", "sixty", "speed", "stop", "temp", "temperature", "ten", "thirteen", "thirty", "three", "to", "turn", "twelve", "twenty", "two", "up", "velocity", "voltage", "volume", "wild thumper", "zero"],
15                 "sample_rate": 16000.0
16         }
17 }
18
19 class Speak
20         def initialize(node)
21                 @logger = Logger.new(STDOUT)
22                 @commands_enabled = false
23                 @publisher = node.advertise('asr_result', Std_msgs::String)
24
25                 # Websocket handling
26                 EM.run do
27                         Signal.trap("INT") { send_eof }
28                         @ws = WebSocket::EventMachine::Client.connect(:uri => 'ws://192.168.36.4:2700')
29
30                         def send_eof
31                                 @ws.send '{"eof" : 1}'
32                         end
33
34                         # Loop over all input data
35                         def run
36                                 while true do
37                                         data = ARGF.read(16000)
38                                         if data
39                                                 @ws.send data, :type => :binary
40                                         else
41                                                 send_eof
42                                                 break
43                                         end
44                                 end
45                         end
46
47                         @ws.onopen do
48                                 @logger.info "Running.."
49                                 @ws.send CONFIG.to_json
50
51                                 Thread.new {
52                                         run
53                                 }
54                         end
55
56                         @ws.onmessage do |msg, type|
57                                 d = JSON.parse(msg)
58                                 handle_result(d)
59                         end
60
61                         @ws.onclose do |code, reason|
62                                 puts "Disconnected with status code: #{code}"
63                                 exit
64                         end
65                 end
66         end
67
68         def handle_result(msg)
69                 if msg.has_key? "result"
70                         msg["result"].each do |result|
71                                 @logger.debug "word=" + result["word"]
72                         end
73
74                         # check for keywords first
75                         text = msg["text"]
76                         @logger.debug "text=" + msg["text"]
77                         KEYWORDS.each do |keyword|
78                                 if text.include? keyword
79                                         keyword_detected(keyword)
80                                         text = text.gsub(keyword, "").strip
81                                 end
82                         end
83
84                         # not a keyword, handle command if enabled
85                         if @commands_enabled and text.length > 0
86                                 final_result(text)
87                         end
88                 end
89         end
90
91         # Enables/Disables the speech command
92         def enable_commands(bEnable)
93                 @commands_enabled = bEnable
94         end
95
96         # Resulting speech command
97         def final_result(hyp)
98                 @logger.info "final: " + hyp
99                 enable_commands(false)
100
101                 # Publish vosk result as ros message
102                 msg = Std_msgs::String.new
103                 msg.data = hyp
104                 @publisher.publish(msg)
105         end
106
107         def keyword_detected(hyp)
108                 @logger.debug "Got keyword: " + hyp
109                 enable_commands(true)
110         end
111 end
112
113 if __FILE__ == $0
114         node = ROS::Node.new('asr_vosk')
115         app = Speak.new(node)
116         begin
117                 node.spin
118         rescue Interrupt
119         ensure
120                 node.shutdown
121         end
122 end