discordmuffin/voice.go
Bruce Marriner c8d01b7f60 Reconnect now does a full disconnect and reconnect
This should hopefully provide a more stable voice connection at the cost
of making reconnects slightly more expensive for both the client and
Discord. This could be changed later once the Discord voice
documentation more clearly defines reconnect rules. Also, a bit of cleanup :)
2016-06-14 10:57:43 -05:00

868 lines
22 KiB
Go

// Discordgo - Discord bindings for Go
// Available at https://github.com/bwmarrin/discordgo
// Copyright 2015-2016 Bruce Marriner <bruce@sqls.net>. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains code related to Discord voice suppport
package discordgo
import (
"encoding/binary"
"encoding/json"
"fmt"
"log"
"net"
"runtime"
"strings"
"sync"
"time"
"github.com/gorilla/websocket"
"golang.org/x/crypto/nacl/secretbox"
)
// ------------------------------------------------------------------------------------------------
// Code related to both VoiceConnection Websocket and UDP connections.
// ------------------------------------------------------------------------------------------------
// A VoiceConnection struct holds all the data and functions related to a Discord Voice Connection.
type VoiceConnection struct {
sync.RWMutex
Debug bool // If true, print extra logging -- DEPRECATED
LogLevel int
Ready bool // If true, voice is ready to send/receive audio
UserID string
GuildID string
ChannelID string
deaf bool
mute bool
speaking bool
reconnecting bool // If true, voice connection is trying to reconnect
OpusSend chan []byte // Chan for sending opus audio
OpusRecv chan *Packet // Chan for receiving opus audio
wsConn *websocket.Conn
wsMutex sync.Mutex
udpConn *net.UDPConn
session *Session
sessionID string
token string
endpoint string
// Used to send a close signal to goroutines
close chan struct{}
// Used to allow blocking until connected
connected chan bool
// Used to pass the sessionid from onVoiceStateUpdate
// sessionRecv chan string UNUSED ATM
op4 voiceOP4
op2 voiceOP2
voiceSpeakingUpdateHandlers []VoiceSpeakingUpdateHandler
}
// VoiceSpeakingUpdateHandler type provides a function defination for the
// VoiceSpeakingUpdate event
type VoiceSpeakingUpdateHandler func(vc *VoiceConnection, vs *VoiceSpeakingUpdate)
// Speaking sends a speaking notification to Discord over the voice websocket.
// This must be sent as true prior to sending audio and should be set to false
// once finished sending audio.
// b : Send true if speaking, false if not.
func (v *VoiceConnection) Speaking(b bool) (err error) {
v.log(LogDebug, "called (%t)", b)
type voiceSpeakingData struct {
Speaking bool `json:"speaking"`
Delay int `json:"delay"`
}
type voiceSpeakingOp struct {
Op int `json:"op"` // Always 5
Data voiceSpeakingData `json:"d"`
}
if v.wsConn == nil {
return fmt.Errorf("No VoiceConnection websocket.")
}
data := voiceSpeakingOp{5, voiceSpeakingData{b, 0}}
v.wsMutex.Lock()
err = v.wsConn.WriteJSON(data)
v.wsMutex.Unlock()
if err != nil {
v.speaking = false
log.Println("Speaking() write json error:", err)
return
}
v.speaking = b
return
}
// ChangeChannel sends Discord a request to change channels within a Guild
// !!! NOTE !!! This function may be removed in favour of just using ChannelVoiceJoin
func (v *VoiceConnection) ChangeChannel(channelID string, mute, deaf bool) (err error) {
data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, &channelID, mute, deaf}}
v.wsMutex.Lock()
err = v.session.wsConn.WriteJSON(data)
v.wsMutex.Unlock()
if err != nil {
return
}
v.ChannelID = channelID
v.deaf = deaf
v.mute = mute
v.speaking = false
return
}
// Disconnect disconnects from this voice channel and closes the websocket
// and udp connections to Discord.
// !!! NOTE !!! this function may be removed in favour of ChannelVoiceLeave
func (v *VoiceConnection) Disconnect() (err error) {
// Send a OP4 with a nil channel to disconnect
if v.sessionID != "" {
data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}}
v.wsMutex.Lock()
err = v.session.wsConn.WriteJSON(data)
v.wsMutex.Unlock()
v.sessionID = ""
}
// Close websocket and udp connections
v.Close()
v.log(LogInformational, "Deleting VoiceConnection %s", v.GuildID)
delete(v.session.VoiceConnections, v.GuildID)
return
}
// Close closes the voice ws and udp connections
func (v *VoiceConnection) Close() {
v.log(LogInformational, "called")
v.Lock()
defer v.Unlock()
v.Ready = false
v.speaking = false
if v.close != nil {
close(v.close)
v.close = nil
}
if v.udpConn != nil {
err := v.udpConn.Close()
if err != nil {
log.Println("error closing udp connection: ", err)
}
v.udpConn = nil
}
if v.wsConn != nil {
err := v.wsConn.Close()
if err != nil {
log.Println("error closing websocket connection: ", err)
}
v.wsConn = nil
}
}
// AddHandler adds a Handler for VoiceSpeakingUpdate events.
func (v *VoiceConnection) AddHandler(h VoiceSpeakingUpdateHandler) {
v.Lock()
defer v.Unlock()
v.voiceSpeakingUpdateHandlers = append(v.voiceSpeakingUpdateHandlers, h)
}
// VoiceSpeakingUpdate is a struct for a VoiceSpeakingUpdate event.
type VoiceSpeakingUpdate struct {
UserID string `json:"user_id"`
SSRC int `json:"ssrc"`
Speaking bool `json:"speaking"`
}
// ------------------------------------------------------------------------------------------------
// Unexported Internal Functions Below.
// ------------------------------------------------------------------------------------------------
// A voiceOP4 stores the data for the voice operation 4 websocket event
// which provides us with the NaCl SecretBox encryption key
type voiceOP4 struct {
SecretKey [32]byte `json:"secret_key"`
Mode string `json:"mode"`
}
// A voiceOP2 stores the data for the voice operation 2 websocket event
// which is sort of like the voice READY packet
type voiceOP2 struct {
SSRC uint32 `json:"ssrc"`
Port int `json:"port"`
Modes []string `json:"modes"`
HeartbeatInterval time.Duration `json:"heartbeat_interval"`
}
// WaitUntilConnected waits for the Voice Connection to
// become ready, if it does not become ready it retuns an err
func (v *VoiceConnection) waitUntilConnected() error {
i := 0
for {
if v.Ready {
return nil
}
if i > 10 {
return fmt.Errorf("Timeout waiting for voice.")
}
time.Sleep(1 * time.Second)
i++
}
}
// Open opens a voice connection. This should be called
// after VoiceChannelJoin is used and the data VOICE websocket events
// are captured.
func (v *VoiceConnection) open() (err error) {
v.log(LogInformational, "called")
v.Lock()
defer v.Unlock()
// Don't open a websocket if one is already open
if v.wsConn != nil {
v.log(LogWarning, "refusing to overwrite non-nil websocket")
return
}
// TODO temp? loop to wait for the SessionID
i := 0
for {
if v.sessionID != "" {
break
}
if i > 20 { // only loop for up to 1 second total
return fmt.Errorf("Did not receive voice Session ID in time.")
}
time.Sleep(50 * time.Millisecond)
i++
}
// Connect to VoiceConnection Websocket
vg := fmt.Sprintf("wss://%s", strings.TrimSuffix(v.endpoint, ":80"))
v.log(LogInformational, "connecting to voice endpoint %s", vg)
v.wsConn, _, err = websocket.DefaultDialer.Dial(vg, nil)
if err != nil {
v.log(LogWarning, "error connecting to voice endpoint %s, %s", vg, err)
v.log(LogDebug, "voice struct: %#v\n", v)
return
}
type voiceHandshakeData struct {
ServerID string `json:"server_id"`
UserID string `json:"user_id"`
SessionID string `json:"session_id"`
Token string `json:"token"`
}
type voiceHandshakeOp struct {
Op int `json:"op"` // Always 0
Data voiceHandshakeData `json:"d"`
}
data := voiceHandshakeOp{0, voiceHandshakeData{v.GuildID, v.UserID, v.sessionID, v.token}}
err = v.wsConn.WriteJSON(data)
if err != nil {
log.Println("VOICE error sending init packet:", err)
return
}
v.close = make(chan struct{})
go v.wsListen(v.wsConn, v.close)
// add loop/check for Ready bool here?
// then return false if not ready?
// but then wsListen will also err.
return
}
// wsListen listens on the voice websocket for messages and passes them
// to the voice event handler. This is automatically called by the Open func
func (v *VoiceConnection) wsListen(wsConn *websocket.Conn, close <-chan struct{}) {
for {
_, message, err := v.wsConn.ReadMessage()
if err != nil {
// Detect if we have been closed manually. If a Close() has already
// happened, the websocket we are listening on will be different to the
// current session.
v.RLock()
sameConnection := v.wsConn == wsConn
v.RUnlock()
if sameConnection {
v.log(LogError, "voice endpoint %s websocket closed unexpectantly, %s", v.endpoint, err)
// Start reconnect goroutine then exit.
go v.reconnect()
}
return
}
// Pass received message to voice event handler
select {
case <-close:
return
default:
go v.onEvent(message)
}
}
}
// wsEvent handles any voice websocket events. This is only called by the
// wsListen() function.
func (v *VoiceConnection) onEvent(message []byte) {
v.log(LogDebug, "received: %s", string(message))
var e Event
if err := json.Unmarshal(message, &e); err != nil {
v.log(LogError, "unmarshall error, %s", err)
return
}
switch e.Operation {
case 2: // READY
if err := json.Unmarshal(e.RawData, &v.op2); err != nil {
v.log(LogError, "OP2 unmarshall error, %s, %s", err, string(e.RawData))
return
}
// Start the voice websocket heartbeat to keep the connection alive
go v.wsHeartbeat(v.wsConn, v.close, v.op2.HeartbeatInterval)
// TODO monitor a chan/bool to verify this was successful
// Start the UDP connection
err := v.udpOpen()
if err != nil {
v.log(LogError, "error opening udp connection, %s", err)
return
}
// Start the opusSender.
// TODO: Should we allow 48000/960 values to be user defined?
if v.OpusSend == nil {
v.OpusSend = make(chan []byte, 2)
}
go v.opusSender(v.udpConn, v.close, v.OpusSend, 48000, 960)
// Start the opusReceiver
if !v.deaf {
if v.OpusRecv == nil {
v.OpusRecv = make(chan *Packet, 2)
}
go v.opusReceiver(v.udpConn, v.close, v.OpusRecv)
}
// Send the ready event
v.connected <- true
return
case 3: // HEARTBEAT response
// add code to use this to track latency?
return
case 4: // udp encryption secret key
v.op4 = voiceOP4{}
if err := json.Unmarshal(e.RawData, &v.op4); err != nil {
v.log(LogError, "OP4 unmarshall error, %s, %s", err, string(e.RawData))
return
}
return
case 5:
if len(v.voiceSpeakingUpdateHandlers) == 0 {
return
}
voiceSpeakingUpdate := &VoiceSpeakingUpdate{}
if err := json.Unmarshal(e.RawData, voiceSpeakingUpdate); err != nil {
v.log(LogError, "OP5 unmarshall error, %s, %s", err, string(e.RawData))
return
}
for _, h := range v.voiceSpeakingUpdateHandlers {
h(v, voiceSpeakingUpdate)
}
default:
v.log(LogError, "unknown voice operation, %d, %s", e.Operation, string(e.RawData))
}
return
}
type voiceHeartbeatOp struct {
Op int `json:"op"` // Always 3
Data int `json:"d"`
}
// NOTE :: When a guild voice server changes how do we shut this down
// properly, so a new connection can be setup without fuss?
//
// wsHeartbeat sends regular heartbeats to voice Discord so it knows the client
// is still connected. If you do not send these heartbeats Discord will
// disconnect the websocket connection after a few seconds.
func (v *VoiceConnection) wsHeartbeat(wsConn *websocket.Conn, close <-chan struct{}, i time.Duration) {
if close == nil || wsConn == nil {
return
}
var err error
ticker := time.NewTicker(i * time.Millisecond)
for {
v.log(LogDebug, "sending heartbeat packet")
v.wsMutex.Lock()
err = wsConn.WriteJSON(voiceHeartbeatOp{3, int(time.Now().Unix())})
v.wsMutex.Unlock()
if err != nil {
v.log(LogError, "error sending heartbeat to voice endpoint %s, %s", v.endpoint, err)
return
}
select {
case <-ticker.C:
// continue loop and send heartbeat
case <-close:
return
}
}
}
// ------------------------------------------------------------------------------------------------
// Code related to the VoiceConnection UDP connection
// ------------------------------------------------------------------------------------------------
type voiceUDPData struct {
Address string `json:"address"` // Public IP of machine running this code
Port uint16 `json:"port"` // UDP Port of machine running this code
Mode string `json:"mode"` // always "xsalsa20_poly1305"
}
type voiceUDPD struct {
Protocol string `json:"protocol"` // Always "udp" ?
Data voiceUDPData `json:"data"`
}
type voiceUDPOp struct {
Op int `json:"op"` // Always 1
Data voiceUDPD `json:"d"`
}
// udpOpen opens a UDP connection to the voice server and completes the
// initial required handshake. This connection is left open in the session
// and can be used to send or receive audio. This should only be called
// from voice.wsEvent OP2
func (v *VoiceConnection) udpOpen() (err error) {
v.Lock()
defer v.Unlock()
if v.wsConn == nil {
return fmt.Errorf("nil voice websocket")
}
if v.udpConn != nil {
return fmt.Errorf("udp connection already open")
}
if v.close == nil {
return fmt.Errorf("nil close channel")
}
if v.endpoint == "" {
return fmt.Errorf("empty endpoint")
}
host := fmt.Sprintf("%s:%d", strings.TrimSuffix(v.endpoint, ":80"), v.op2.Port)
addr, err := net.ResolveUDPAddr("udp", host)
if err != nil {
v.log(LogWarning, "error resolving udp host %s, %s", host, err)
return
}
v.log(LogInformational, "connecting to udp addr %s", addr.String())
v.udpConn, err = net.DialUDP("udp", nil, addr)
if err != nil {
v.log(LogWarning, "error connecting to udp addr %s, %s", addr.String(), err)
return
}
// Create a 70 byte array and put the SSRC code from the Op 2 VoiceConnection event
// into it. Then send that over the UDP connection to Discord
sb := make([]byte, 70)
binary.BigEndian.PutUint32(sb, v.op2.SSRC)
_, err = v.udpConn.Write(sb)
if err != nil {
v.log(LogWarning, "udp write error to %s, %s", addr.String(), err)
return
}
// Create a 70 byte array and listen for the initial handshake response
// from Discord. Once we get it parse the IP and PORT information out
// of the response. This should be our public IP and PORT as Discord
// saw us.
rb := make([]byte, 70)
rlen, _, err := v.udpConn.ReadFromUDP(rb)
if err != nil {
v.log(LogWarning, "udp read error, %s, %s", addr.String(), err)
return
}
if rlen < 70 {
v.log(LogWarning, "received udp packet too small")
return fmt.Errorf("received udp packet too small")
}
// Loop over position 4 though 20 to grab the IP address
// Should never be beyond position 20.
var ip string
for i := 4; i < 20; i++ {
if rb[i] == 0 {
break
}
ip += string(rb[i])
}
// Grab port from position 68 and 69
port := binary.LittleEndian.Uint16(rb[68:70])
// Take the data from above and send it back to Discord to finalize
// the UDP connection handshake.
data := voiceUDPOp{1, voiceUDPD{"udp", voiceUDPData{ip, port, "xsalsa20_poly1305"}}}
v.wsMutex.Lock()
err = v.wsConn.WriteJSON(data)
v.wsMutex.Unlock()
if err != nil {
v.log(LogWarning, "udp write error, %#v, %s", data, err)
return
}
// start udpKeepAlive
go v.udpKeepAlive(v.udpConn, v.close, 5*time.Second)
// TODO: find a way to check that it fired off okay
return
}
// udpKeepAlive sends a udp packet to keep the udp connection open
// This is still a bit of a "proof of concept"
func (v *VoiceConnection) udpKeepAlive(udpConn *net.UDPConn, close <-chan struct{}, i time.Duration) {
if udpConn == nil || close == nil {
return
}
var err error
var sequence uint64
packet := make([]byte, 8)
ticker := time.NewTicker(i)
for {
binary.LittleEndian.PutUint64(packet, sequence)
sequence++
_, err = udpConn.Write(packet)
if err != nil {
v.log(LogError, "write error, %s")
return
}
select {
case <-ticker.C:
// continue loop and send keepalive
case <-close:
return
}
}
}
// opusSender will listen on the given channel and send any
// pre-encoded opus audio to Discord. Supposedly.
func (v *VoiceConnection) opusSender(udpConn *net.UDPConn, close <-chan struct{}, opus <-chan []byte, rate, size int) {
if udpConn == nil || close == nil {
return
}
runtime.LockOSThread()
// VoiceConnection is now ready to receive audio packets
// TODO: this needs reviewed as I think there must be a better way.
v.Ready = true
defer func() { v.Ready = false }()
var sequence uint16
var timestamp uint32
var recvbuf []byte
var ok bool
udpHeader := make([]byte, 12)
var nonce [24]byte
// build the parts that don't change in the udpHeader
udpHeader[0] = 0x80
udpHeader[1] = 0x78
binary.BigEndian.PutUint32(udpHeader[8:], v.op2.SSRC)
// start a send loop that loops until buf chan is closed
ticker := time.NewTicker(time.Millisecond * time.Duration(size/(rate/1000)))
for {
// Get data from chan. If chan is closed, return.
select {
case <-close:
return
case recvbuf, ok = <-opus:
if !ok {
return
}
// else, continue loop
}
if !v.speaking {
err := v.Speaking(true)
if err != nil {
v.log(LogError, "error sending speaking packet, %s", err)
}
}
// Add sequence and timestamp to udpPacket
binary.BigEndian.PutUint16(udpHeader[2:], sequence)
binary.BigEndian.PutUint32(udpHeader[4:], timestamp)
// encrypt the opus data
copy(nonce[:], udpHeader)
sendbuf := secretbox.Seal(udpHeader, recvbuf, &nonce, &v.op4.SecretKey)
// block here until we're exactly at the right time :)
// Then send rtp audio packet to Discord over UDP
select {
case <-close:
return
case <-ticker.C:
// continue
}
_, err := udpConn.Write(sendbuf)
if err != nil {
v.log(LogError, "udp write error, %s", err)
v.log(LogDebug, "voice struct: %#v\n", v)
return
}
if (sequence) == 0xFFFF {
sequence = 0
} else {
sequence++
}
if (timestamp + uint32(size)) >= 0xFFFFFFFF {
timestamp = 0
} else {
timestamp += uint32(size)
}
}
}
// A Packet contains the headers and content of a received voice packet.
type Packet struct {
SSRC uint32
Sequence uint16
Timestamp uint32
Type []byte
Opus []byte
PCM []int16
}
// opusReceiver listens on the UDP socket for incoming packets
// and sends them across the given channel
// NOTE :: This function may change names later.
func (v *VoiceConnection) opusReceiver(udpConn *net.UDPConn, close <-chan struct{}, c chan *Packet) {
if udpConn == nil || close == nil {
return
}
p := Packet{}
recvbuf := make([]byte, 1024)
var nonce [24]byte
for {
rlen, err := udpConn.Read(recvbuf)
if err != nil {
// Detect if we have been closed manually. If a Close() has already
// happened, the udp connection we are listening on will be different
// to the current session.
v.RLock()
sameConnection := v.udpConn == udpConn
v.RUnlock()
if sameConnection {
v.log(LogError, "udp read error, %s, %s", v.endpoint, err)
v.log(LogDebug, "voice struct: %#v\n", v)
go v.reconnect()
}
return
}
select {
case <-close:
return
default:
// continue loop
}
// For now, skip anything except audio.
if rlen < 12 || recvbuf[0] != 0x80 {
continue
}
// build a audio packet struct
p.Type = recvbuf[0:2]
p.Sequence = binary.BigEndian.Uint16(recvbuf[2:4])
p.Timestamp = binary.BigEndian.Uint32(recvbuf[4:8])
p.SSRC = binary.BigEndian.Uint32(recvbuf[8:12])
// decrypt opus data
copy(nonce[:], recvbuf[0:12])
p.Opus, _ = secretbox.Open(nil, recvbuf[12:rlen], &nonce, &v.op4.SecretKey)
if c != nil {
c <- &p
}
}
}
// Reconnect will close down a voice connection then immediately try to
// reconnect to that session.
// NOTE : This func is messy and a WIP while I find what works.
// It will be cleaned up once a proven stable option is flushed out.
// aka: this is ugly shit code, please don't judge too harshly.
func (v *VoiceConnection) reconnect() {
v.log(LogInformational, "called")
v.Lock()
if v.reconnecting {
v.log(LogInformational, "already reconnecting, exiting.")
return
}
v.reconnecting = true
v.Unlock()
defer func() { v.reconnecting = false }()
if v.session == nil {
v.log(LogInformational, "cannot reconnect with nil session")
v.log(LogInformational, "Deleting VoiceConnection %s", v.GuildID)
delete(v.session.VoiceConnections, v.GuildID)
return
}
// Send a OP4 with a nil channel to disconnect
if v.sessionID != "" {
data := voiceChannelJoinOp{4, voiceChannelJoinData{&v.GuildID, nil, true, true}}
v.wsMutex.Lock()
err := v.session.wsConn.WriteJSON(data)
if err != nil {
v.log(LogError, "error sending disconnect packet, %s", err)
}
v.wsMutex.Unlock()
v.sessionID = ""
}
// Close websocket and udp connections
v.Close()
// Take a short nap to allow everything to close.
// may not be needed but just extra protection for now.
time.Sleep(1 * time.Second)
wait := time.Duration(1)
i := 0
for {
if v.session == nil {
v.log(LogInformational, "cannot reconnect with nil session")
v.log(LogInformational, "Deleting VoiceConnection %s", v.GuildID)
delete(v.session.VoiceConnections, v.GuildID)
return
}
if v.session.DataReady == false {
v.log(LogInformational, "cannot reconenct with unready session")
continue
}
v.log(LogInformational, "trying to reconnect to voice")
// Below is required because ChannelVoiceJoin checks the GuildID
// to decide if we should change channels or open a new connection.
// TODO: Maybe find a better method.
gID := v.GuildID
v.GuildID = ""
_, err := v.session.ChannelVoiceJoin(gID, v.ChannelID, v.mute, v.deaf)
if err == nil {
v.log(LogInformational, "successfully reconnected to voice")
return
}
v.log(LogInformational, "error reconnecting to voice, %s", err)
if i >= 10 {
// NOTE: this will probably change but it's a safety net
// here to prevent this goroutine from becomming abandoned.
v.log(LogInformational, "timeout reconnecting, I give up.")
v.log(LogInformational, "Deleting VoiceConnection %s", v.GuildID)
delete(v.session.VoiceConnections, v.GuildID)
return
}
<-time.After(wait * time.Second)
wait *= 2
if wait > 600 {
wait = 600
}
i++
}
}