DEV Community

Cover image for Exploring WebSocket Part 1: Building Minimal Echo WebSocket Server From Scratch (RFC 6455 Fundamentals)
Syukur
Syukur

Posted on

Exploring WebSocket Part 1: Building Minimal Echo WebSocket Server From Scratch (RFC 6455 Fundamentals)

Introduction

WebSocket protocol enables full-duplex (two-way) communication between client to remote host over a single, long-lived TCP connection. WebSocket protocol is designed to supersede/replace older bidirectional communication technologies that tahat use HTTP as transport layer to benefit from existing infrastructure.

This article explores WebSocket RFC RFC 6455-the official WebSocket specification, starting from create handshake, create client request using Postman, understanding request frame, and echoing/send back user input using WebSocket. Main target audience of this article is a first timer or someone who is unfamiliar with RFC document. We'll create minimal working WebSocket server using Go with several constraints which will be explained as we progress.

Starting WebSocket Handshake

Section 1.3 RFC discusses about creating websocket handshake. Request header for handshake as follows

GET /chat HTTP/1.1
Host: server.example.com
Upgrade: websocket
Connection: Upgrade
Sec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==
Origin: http://example.com
Sec-WebSocket-Protocol: chat, superchat
Sec-WebSocket-Version: 13
Enter fullscreen mode Exit fullscreen mode
  • From headers above, header Sec-Websocket-Protocol and Origin are optional
  • HTTP method use for handshake must GET
  • Header Sec-WebSocket-Key is use to validate whether client success a handshake attempt. Validation steps begin with take header value without trailing space, then concatenate with 258EAFA5-E914-47DA-95CA-C5AB0DC85B11 resulting dGhlIHNhbXBsZSBub25jZQ==258EAFA5-E914-47DA-95CA-C5AB0DC85B11. Then resulted string will be encrypted with SHA-1 and encode with base 64.

Upon successful handshake, server respond as follows

HTTP/1.1 101 Switching Protocols
Upgrade: websocket
Connection: Upgrade
Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo=
Enter fullscreen mode Exit fullscreen mode

HTTP response is not 101 or Sec-WebSocket-Accept it MUST equal Sec-WebSocket-Key indicates that handshake attempt is not success between client and server.

handshake function translates handshake steps above.

func handshake(conn net.Conn) {
    var secWsAccept string

    reader := bufio.NewReader(conn)

    for {
        header, err := reader.ReadString('\n')
        if err != nil && !errors.Is(err, io.EOF) {
            log.Fatal(err)
        }

        if header == "\r\n" || header == "\n" {
            break
        }

        secWsAcceptVal, err := readHTTPUpgradeHeaderRequest(header)
        if err != nil {
            log.Fatal(err)
        }

        if secWsAcceptVal != "" {
            secWsAccept = secWsAcceptVal
        }
    }

    writer := bufio.NewWriter(conn)

    upgradeResp := []string{
        "HTTP/1.1 101 Web Socket Protocol Handshake",
        "Server: go/echoserver",
        "Upgrade: WebSocket",
        "Connection: Upgrade",
        "Sec-WebSocket-Accept: " + secWsAccept,
        "", // required for extra CRLF
        "", // required for extra CRLF
    }

    _, err := writer.Write([]byte(strings.Join(upgradeResp, "\r\n")))
    if err != nil {
        log.Println(err)
        return
    }

    err = writer.Flush()
    if err != nil {
        log.Println(err)
        return
    }

    ws := ws{
        conn:   conn,
        reader: reader,
        writer: writer,
    }

    ws.handleRequest()
}

// read HTTP upgrade request, returns Sec-WebSocket-Accept header 
// value if header is Sec-WebSocket-Accept. Otherwise, checking
// other upgrade header defined at https://datatracker.ietf.org/doc/html/rfc6455#autoid-4
func readHTTPUpgradeHeaderRequest(header string) (string, error) {
    var secWsAccept string

    headerKeys := strings.Split(header, ":")
    headerKey := strings.TrimSpace(headerKeys[0])
    switch {
    case headerKey == upgradeHeaderKey:
        uUpgradeVal := strings.TrimSpace(headerKeys[1])

        if uUpgradeVal != upgradeConnHeaderVal {
            return "", errors.New("upgrade header value is not websocket")
        }

        return "", nil
    case strings.Contains(header, connHeaderKey):
        cConnVal := strings.TrimSpace(headerKeys[1])

        if cConnVal != connHeaderVal {
            return "", errors.New("conenection header value is not upgrade")
        }

        return "", nil
    case strings.Contains(header, secWsKey):
        sSecWsVal := strings.TrimSpace(headerKeys[1])

        secWsAccept = sSecWsVal + "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"

        sha := sha1.New()
        sha.Write([]byte(secWsAccept))
        encSecWsAccept := sha.Sum(nil)
        secWsAccept = b64.StdEncoding.EncodeToString(encSecWsAccept)

        return secWsAccept, nil
    }

    return "", nil
}
Enter fullscreen mode Exit fullscreen mode

Understanding WebSocket Request Format

Section 5.2 of this RFC discusses about data framing. Data transmitted to WebSocket protocol is a sequence of frames as follows.

  1. FIN (1 Bit): Indicates this is a final fragement of the message. First fragment MAY also be the final fragment. Purpose of FIN flag is to support message fragmentation. In short, fragmentation is divide payload data to fragments. We may discuss about fragmentation on another article
  2. RSV1, RSV2, RSV3 (1 Bit each): Reserved fragment. These fragments MUST be 0
  3. opcode (4 Bit): Interprets payload data. If unknown opcode received, endpoint MUST fail websocket connection.
    • %x0 denotes continuation frame
    • %x1 denotes text frame
    • %x2 denotes binary frame
    • %x3-%x7 reserved for futher non-control frame
    • %x8 denotes connection close
    • %x9 denotes ping
    • %xA denotes pong
    • %xB-F reserved for further control frame
  4. MASK (1 Bit): Defines whether payload data is masked. If set to 1, a masking key presents in masking-key and used to unmask payload data. Masking prevents proxies from misinterpreting WebSocket traffic as HTTP and it is not encription mechanism. All frames sent from client MUST set this bit tot 1.
  5. Payload length is encoded as:
    • 7 bits (0-125)
    • 7 bit value 126 followed by a 16 bit unsigned integer
    • 7 bit value 127 followed by a 64 bit unsigned integer
  6. Masking Key (0 or 4 bytes): All frames sent from client to server are masked by 32 bit value that is contained within the frame. This field is present if the mask bit is set to 1 and is absent if the mask bit set to 0

Control frame is use to communicate state between client and websocket server. Currently, defined opcodes for control frame are 0x8 (close), 0x9 (ping), and 0xA pong. All control frames MUST have payload length of 125 or less, and MUST NOT be fragmented.

Non-control frame are identified by opcodes where most significant bit (leftmost bit from byte sequence) of the opcode is 0. Currently defined non-control frame are 0x1 (text frame) and 0x2 (binary frame). Text frame is a valid UTF-8 format text data. Binary frame is arbitraty binary data whose interpretation is solely up to the application layer

Furthermore, we can divide a frame as several byte/bit parts (begin at 0th index).

  1. 1st byte (0th - 7th): FIN + RSVs + opcode
  2. 2nd byte (8th - 15th): Mask + Payload length
  3. If payload length < 126: Next 4 bytes are masking key
    • Next 4 bytes are masking key
    • Following bytes are payload data
  4. If payload length == 126:
    • Next 2 bytes is extended payload length
    • Next 4 bytes are masking key
    • Following bytes are payload data
  5. If payload length == 127:
    • Next 8 bytes is extended payload length
    • Next 4 bytes are masking key
    • Following bytes are payload data

Extended payload length frame exists if length of payload data >= 126. Later in this article, we'll cover only payload length < 125, and we may discuss how to handle payload length >= 126 in another article.

Receiving and Read Client Frame

Send Request To Server

We are going to use Postman to perform request to WebSocket server. Picture below shows how creating a handshake to localhost:8083

Upon successful handshake attempt, Postman will shows like this

Note there is a dropdown above Response tab. It consists of frame types that can be use as request payload. By default, the frame type is text frame.

Recieve Request From Client

After take a look of how data transmitted on WebSocket, we can step into how to recieve and read request. Following codes below demonstrate receiving and read request. (For demonstrative purpose, all code on this article works if payload length is < 126 and assuming request payload is not fragmented)

func (ws *ws) handleRequest() {
    defer ws.conn.Close()

    for {
        // assuming we accept payload length < 126,
        // without fragmentation, and client masks
        // payload
        frame, err := ws.readRequest(2)
        if err != nil {
            log.Fatal(err)
        }

        switch frame.opcode {
        case 1:
            err := ws.writeTextFrameResponse(frame)
            if err != nil {
                log.Fatal(err)
            }
        case 2:
            log.Println("binary frame")
        case 8:
            log.Println("disconnected from client")
            return
        case 9:
            frame.opcode = 10
        default:
            log.Println("invalid opcode")
        }
    }
}

func (ws *ws) readRequest(requestSize int) (frame, error) {
    data := make([]byte, requestSize)
    _, err := ws.reader.Read(data)
    if err != nil {
        return frame{}, err
    }

    opcode := data[0] & 0xf
    payloadLength := data[1] & 0x7f

    return frame{
        opcode:        opcode,
        payloadLength: int(payloadLength),
    }, nil
}
Enter fullscreen mode Exit fullscreen mode

handleRequest() method calls readRequest(2). This means upon receiving request, the server reads first 2 bytes/8 bits requests. When debugging handleRequest() using VSCode, we'll see something like this.

Interpreting First Byte

First byte is data[0] that contains 129 decimal. Convert decimal value to binary, we'll get 10000001 binary value. Recall that first byte of frame contains FIN, RSV, and opcode fields. From left of binary value, FIN occupies first bit (1), RSV1 occupies second bit (0), RSV2 occupies third bit (0), RSV3 occupies fourth bit (0), and opcode occupies fifth to eighth bit (0001).

We retrieve opcode by perform bitwise AND (&) operation between first byte against 0xf/00001111b. Result is follows.

LSB (Least Significant Bit/rightmost bit value) from operation above is 1. This means that server receive text frame request form client.
source

If we don't use & 0x0F, we'll read FIN and RSV bits as part of opcode, which makes opcode invalid.

Interpreting Second Byte

Second byte is data[1] that contains 128 decimal. Convert 128 decimal to binary, we'll get 10000000 binary value. From left to right of decimal, first bit (1) occupied by mask, and second to seventh bit (0000000) occupied by payload length.

Send Response To Client

writeTextFrameResponse() method demonstrate reading request payload and echoing request payload.

// writeTextFrameResponse echoes server response as same as
// client request. This works normally when client
// request payload length is < 126
func (ws *ws) writeTextFrameResponse(frame frame) error {
    // read masking key where masking key located
    // 3rd - 6th byte of request, first 2 already read
    // to retrieve opcode and payload length.
    maskingKey := make([]byte, 4)
    _, err := ws.reader.Read(maskingKey)
    if err != nil {
        return err
    }

    requestData := make([]byte, frame.payloadLength)
    _, err = ws.reader.Read(requestData)
    if err != nil {
        return err
    }

    // unmasking request payload, refer to https://datatracker.ietf.org/doc/html/rfc6455#autoid-24
    for i := 0; i < len(requestData); i++ {
        requestData[i] = requestData[i] ^ maskingKey[i%4]
    }

    responseByte := []byte(requestData)

    responseFrame := make([]byte, 2)
    responseFrame[0] = 0x80 | frame.opcode
    responseFrame[1] = byte(len(responseByte))
    responseFrame = append(responseFrame, responseByte...)

    _, err = ws.writer.Write(responseFrame)
    if err != nil {
        return err
    }

    err = ws.writer.Flush()
    if err != nil {
        return err
    }

    return nil
}
Enter fullscreen mode Exit fullscreen mode

Pay attention to section 5.3 on RFC 6455. It states a request from client to server MUST be masked with 32 bit/4 byte random value chosen by client. That means we MUST unmask request payload to retrieve it's original payload.

Formula to retrieve original request payload as follows.

j                   = i MOD 4
transformed-octet-i = original-octet-i XOR masking-key-octet-j
Enter fullscreen mode Exit fullscreen mode

This snippet translates formula above

    // read masking key where masking key located
    // 3rd - 6th byte of request, first 2 already read
    // to retrieve opcode and payload length.
    maskingKey := make([]byte, 4)
    _, err := ws.reader.Read(maskingKey)
    if err != nil {
        return err
    }

    requestData := make([]byte, frame.payloadLength)
    _, err = ws.reader.Read(requestData)
    if err != nil {
        return err
    }

    // unmasking request payload, refer to https://datatracker.ietf.org/doc/html/rfc6455#autoid-24
    for i := 0; i < len(requestData); i++ {
        requestData[i] = requestData[i] ^ maskingKey[i%4]
    }
Enter fullscreen mode Exit fullscreen mode

Take a look on these screenshot on given breakpoint.

When request reaches _, err := ws.reader.Read(mask) where mask is 4 bytes, mask filled with 3rd - 6th byte of request. This due to Read() behavior where it's read sequence of byte by maintaining it's position. Since we've been read 1st - 2nd byte to retrieve FIN, RSV, opcode, and payload length, when we call Read() on this moment we'll read 3rd - 6th byte. The read starting point continues from 2nd byte, not begins from 1st. This approach also applies when read payload message when request reaches _, err = ws.reader.Read(requestData) part of code.

This snippet demonstrates building response frame.

    responseByte := []byte(requestData)

    responseFrame := make([]byte, 2)
    responseFrame[0] = 0x80 | frame.opcode
    responseFrame[1] = byte(len(responseByte))
    responseFrame = append(responseFrame, responseByte...)

    _, err = ws.writer.Write(responseFrame)
    if err != nil {
        return err
    }

    err = ws.writer.Flush()
    if err != nil {
        return err
    }

    return nil
Enter fullscreen mode Exit fullscreen mode

Recall that 1st byte of frame contains FIN, RSV, and opcode, 2nd byte contains payload length. Based on section 5, we MUST NOT mask response data to client, that is why we just append response to defined response frame without masking as client to server does.

Response from server as follows.

Here is full code

package main

import (
    "bufio"
    "crypto/sha1"
    "errors"
    "io"
    "log"
    "net"
    "strings"

    b64 "encoding/base64"
)

const (
    secWsKey = "Sec-WebSocket-Key"

    connHeaderKey = "Connection"
    connHeaderVal = "Upgrade"

    upgradeHeaderKey     = "Upgrade"
    upgradeConnHeaderVal = "websocket"
)

func main() {
    port := ":8083"
    listener, err := net.Listen("tcp", port)
    if err != nil {
        log.Fatal(err)
    }

    log.Println("running on ", port)

    for {
        conn, err := listener.Accept()
        if err != nil {
            log.Fatalln(err)
        }

        go handshake(conn)
    }
}

func handshake(conn net.Conn) {
    var secWsAccept string

    reader := bufio.NewReader(conn)

    for {
        header, err := reader.ReadString('\n')
        if err != nil && !errors.Is(err, io.EOF) {
            log.Fatal(err)
        }

        if header == "\r\n" || header == "\n" {
            break
        }

        secWsAcceptVal, err := readHTTPUpgradeHeaderRequest(header)
        if err != nil {
            log.Fatal(err)
        }

        if secWsAcceptVal != "" {
            secWsAccept = secWsAcceptVal
        }
    }

    writer := bufio.NewWriter(conn)

    upgradeResp := []string{
        "HTTP/1.1 101 Web Socket Protocol Handshake",
        "Server: go/echoserver",
        "Upgrade: WebSocket",
        "Connection: Upgrade",
        "Sec-WebSocket-Accept: " + secWsAccept,
        "", // required for extra CRLF
        "", // required for extra CRLF
    }

    _, err := writer.Write([]byte(strings.Join(upgradeResp, "\r\n")))
    if err != nil {
        log.Println(err)
        return
    }

    err = writer.Flush()
    if err != nil {
        log.Println(err)
        return
    }

    ws := ws{
        conn:   conn,
        reader: reader,
        writer: writer,
    }

    ws.handleRequest()
}


// read HTTP upgrade request, returns Sec-WebSocket-Accept header 
// value if header is Sec-WebSocket-Accept. Otherwise, checking
// other upgrade header defined at https://datatracker.ietf.org/doc/html/rfc6455#autoid-4
func readHTTPUpgradeHeaderRequest(header string) (string, error) {
    var secWsAccept string

    headerKeys := strings.Split(header, ":")
    headerKey := strings.TrimSpace(headerKeys[0])
    switch {
    case headerKey == upgradeHeaderKey:
        uUpgradeVal := strings.TrimSpace(headerKeys[1])

        if uUpgradeVal != upgradeConnHeaderVal {
            return "", errors.New("upgrade header value is not websocket")
        }

        return "", nil
    case strings.Contains(header, connHeaderKey):
        cConnVal := strings.TrimSpace(headerKeys[1])

        if cConnVal != connHeaderVal {
            return "", errors.New("conenection header value is not upgrade")
        }

        return "", nil
    case strings.Contains(header, secWsKey):
        sSecWsVal := strings.TrimSpace(headerKeys[1])

        secWsAccept = sSecWsVal + "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"

        sha := sha1.New()
        sha.Write([]byte(secWsAccept))
        encSecWsAccept := sha.Sum(nil)
        secWsAccept = b64.StdEncoding.EncodeToString(encSecWsAccept)

        return secWsAccept, nil
    }

    return "", nil
}

type ws struct {
    conn   net.Conn
    reader *bufio.Reader
    writer *bufio.Writer
}

func (ws *ws) handleRequest() {
    defer ws.conn.Close()

    for {
        frame, err := ws.readRequest(2)
        if err != nil {
            log.Fatal(err)
        }

        switch frame.opcode {
        case 1:
            err := ws.writeTextFrameResponse(frame)
            if err != nil {
                log.Fatal(err)
            }
        case 2:
            log.Println("binary frame")
        case 8:
            log.Println("disconnected from client")
            return
        case 9:
            frame.opcode = 10
        default:
            log.Println("invalid opcode")
        }
    }
}

func (ws *ws) readRequest(requestSize int) (frame, error) {
    data := make([]byte, requestSize)
    _, err := ws.reader.Read(data)
    if err != nil {
        return frame{}, err
    }

    opcode := data[0] & 0xf
    payloadLength := data[1] & 0x7f

    return frame{
        opcode:        opcode,
        payloadLength: int(payloadLength),
    }, nil
}

// writeTextFrameResponse echoes server response as same as
// client request. This works normally when client
// request length is < 126
func (ws *ws) writeTextFrameResponse(frame frame) error {
    // read masking key where masking key located
    // 3rd - 6th byte of request, first 2 already read
    // to retrieve opcode and payload length.
    maskingKey := make([]byte, 4)
    _, err := ws.reader.Read(maskingKey)
    if err != nil {
        return err
    }

    requestData := make([]byte, frame.payloadLength)
    _, err = ws.reader.Read(requestData)
    if err != nil {
        return err
    }

    // unmasking request payload, refer to https://datatracker.ietf.org/doc/html/rfc6455#autoid-24
    for i := 0; i < len(requestData); i++ {
        requestData[i] = requestData[i] ^ maskingKey[i%4]
    }

    responseByte := []byte(requestData)

    responseFrame := make([]byte, 2)
    responseFrame[0] = 0x80 | frame.opcode
    responseFrame[1] = byte(len(responseByte))
    responseFrame = append(responseFrame, responseByte...)

    _, err = ws.writer.Write(responseFrame)
    if err != nil {
        return err
    }

    err = ws.writer.Flush()
    if err != nil {
        return err
    }

    return nil
}

type frame struct {
    opcode        uint8
    payloadLength int
}
Enter fullscreen mode Exit fullscreen mode

References

  1. https://datatracker.ietf.org/doc/html/rfc6455
  2. https://hassansin.github.io/implementing-websocket-protocol-in-go

Top comments (0)