A binary compiler takes a data format and produces pure, flat binary data.
data -> buffer
The reason these things exist (protobufs, flatbuffers, you name it) is simple: the current standard for internet communication is bloated. Yes, JSON is insanely inefficient, but that’s the tax we pay for debuggability and readability.
This number right here → 1 is 200% larger in JSON than in pure binary.
'1' = 8 bits
'"' = 8 bits
'"' = 8 bits
----------------
Total: 24 bits
In pure binary it’s just 8 bits.
But that’s not even the crazy part.
Every value in JSON also requires a key:
{
"key": "1"
}
Take a second and guess how many bits that is now.
Don’t take my word for it, here’s proof.
This object:
const obj = {
age: 26,
randNum16: 256,
randNum16: 16000,
randNum32: 33000,
hello: "world",
thisisastr: "a long string lowkey",
}
Size comparison:
obj.json 98 kb
obj.bin 86 kb # <- no protocol (keys + values serialized)
obj2.bin 41 kb # <- with protocol (values only, protocol owns the keys)
Even with keys encoded, pure binary is still way leaner.
And this compounds fast as payloads grow.
It’s also pretty fast, near native:
Execution time comparison (milliseconds):
Name | Count | Min(ms) | Max(ms) | Mean(ms) | Median(ms) | StdDev(ms)
-----+-------+---------+---------+----------+------------+-----------
JSON | 100 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000
TEMU | 100 | 0.000 | 1.000 | 0.010 | 0.000 | 0.099
The longest run took 1 ms, which is misleading, it’s a single outlier in 100 samples. My guess? Node allocating the initial buffer.
So… why a binary compiler?
Why a Binary Compiler?
I’ve been building more real-time systems and tools for Node.js lately, and JSON is just a bandwidth hog. It’s borderline inconceivable to build a low-latency system with JSON as your primary transport.
That’s why things like protobuf exist, especially for server-to-server communication. They’re insanely fast and way leaner.
Instead of using a generic solution, I’m experimenting with rolling my own, mainly as groundwork for the stuff I want to bring to Node.js:
- tessera.js – a C++ N-API renderer powered by raw bytes (SharedArrayBuffers) (How I built a renderer for Node.js)
- shard – a sub-100 nanosecond latency profiler (native C/C++ usually sits around 5–40ns)
- nexus – a Godot-like game engine for Node.js
This experiment is really about preparing solid binary encoders/decoders for those projects.
Building the Compiler
Note: this is experimental. I literally opened VS Code and just started coding, no research, no reading papers.
Honestly, this is the best way to learn anything:
build it badly from intuition first, then go see how experts do it.
You’ll notice there’s zero thought put into naming, just pure flow. That’s intentional. This is how I prototype.
Utils and Setup
import { writeFileSync, fstatSync, openSync, closeSync } from "fs";
const obj = {
age: 26,
randNum16: 256,
randNum16: 16000,
randNum32: 33000,
hello: "world",
thisisastr: "a long string lowkey",
// stack: ['c++', "js", "golang"],
// hobbies: ["competitive gaming", "hacking node.js", "football"]
}
const TYPES = {
numF: 1, // float
numI8: 2, // int8
numI16: 3, // int16
numI32: 4, // int32
string: 5,
array: 6,
}
function getObjectKeysAndValues(obj) {
// JS preserves property order per spec
const keys = Object.keys(obj);
const values = Object.values(obj);
return [keys, values];
}
function isFloat(num) {
return !Number.isInteger(num)
}
Serializing Keys
Simple protocol:
[allKeysLen | keyLen | key] -> buffer
function serKeys(keys) {
let len = 0;
for (let i = 0; i < keys.length; i++) {
len += keys[i].length;
}
const finalLen = len + keys.length + 2;
const buf = Buffer.allocUnsafe(finalLen);
buf.writeUInt16BE(finalLen); // total key section length
let writer = 2;
for (const k of keys) {
if (k.length > 255)
throw new Error(`Key too long: "${k}" (${k.length} bytes)`);
buf.writeUInt8(k.length, writer++);
const written = buf.write(k, writer, 'utf8');
writer += written;
}
return buf;
}
Deserializing is just the reverse: read length → read key → move pointer.
function deserKeys(buf) {
let reader = 2;
let keys = [];
while (reader < buf.length) {
const keyLen = buf.readUInt8(reader++);
const key = buf.slice(reader, reader + keyLen);
keys.push(key.toString("utf8"));
reader += keyLen;
}
return keys;
}
Supported Types (For Now)
Only numbers and strings.
Strings
function seString(str) {
const buf = Buffer.allocUnsafe(str.length + 3);
buf.writeInt8(TYPES.string, 0);
buf.writeInt16BE(str.length, 1);
buf.write(str, 3, "utf8");
return buf;
}
Numbers
function seNumber(num) {
let buf;
if (isFloat(num)) {
buf = Buffer.allocUnsafe(5);
buf.writeInt8(TYPES.numF, 0);
buf.writeFloatBE(num, 1);
return buf;
}
let type;
if (num >= -128 && num <= 127) type = "8";
else if (num >= -32768 && num <= 32767) type = "16";
else type = "32";
buf = Buffer.allocUnsafe(type === "8" ? 2 : type === "16" ? 3 : 5);
switch (type) {
case "8":
buf.writeInt8(TYPES.numI8, 0);
buf.writeInt8(num, 1);
break;
case "16":
buf.writeInt8(TYPES.numI16, 0);
buf.writeInt16BE(num, 1);
break;
case "32":
buf.writeInt8(TYPES.numI32, 0);
buf.writeInt32BE(num, 1);
break;
}
return buf;
}
You’ll notice the same pattern everywhere:
[type | len? | data]
Length is optional, numbers encode it via type:
i8 -> 8 bits
i16 -> 16 bits
i32 -> 32 bits
The Compiler
Serialize:
function gen(obj, protocol = false) {
if (typeof obj !== "object")
throw new Error("Must be Object");
let cache = new Map();
const [keys] = getObjectKeysAndValues(obj);
let serk;
if (!protocol) {
serk = serKeys(keys);
}
let length = 0;
for (const key of keys) {
let buf;
switch (typeof obj[key]) {
case "number":
buf = seNumber(obj[key]);
break;
case "string":
buf = seString(obj[key]);
break;
default:
continue;
}
length += buf.length;
cache.set(key, buf);
}
const dataBuf = Buffer.allocUnsafe(length);
let writer = 0;
for (const key of keys) {
const b = cache.get(key);
if (b) {
b.copy(dataBuf, writer);
writer += b.length;
}
}
return protocol ? dataBuf : Buffer.concat([serk, dataBuf]);
}
Deserialize:
function unserData(buf) {
let reader = 0;
let data = [];
while (reader < buf.length) {
const t = buf.readInt8(reader++);
switch (t) {
case 1:
data.push(buf.readFloatBE(reader));
reader += 4;
break;
case 2:
data.push(buf.readInt8(reader++));
break;
case 3:
data.push(buf.readInt16BE(reader));
reader += 2;
break;
case 4:
data.push(buf.readInt32BE(reader));
reader += 4;
break;
case 5:
const len = buf.readInt16BE(reader);
reader += 2;
data.push(buf.subarray(reader, reader + len).toString("utf8"));
reader += len;
break;
}
}
return data;
}
Unified parser:
function ungen(buf, protocol = false) {
if (!protocol) {
const keysLen = buf.readInt16BE(0);
const keysBuf = buf.subarray(0, keysLen);
deserKeys(keysBuf);
return unserData(buf.subarray(keysLen));
}
return unserData(buf);
}
Running It
Sanity check:
let samples = { JSON: [], TEMU: [] };
function J() {
const start = process.hrtime.bigint();
JSON.parse(JSON.stringify(obj));
const end = process.hrtime.bigint();
samples.JSON.push((end - start) / 1_000_000n);
}
function T() {
const start = process.hrtime.bigint();
const b = gen(obj, true);
ungen(b);
const end = process.hrtime.bigint();
samples.TEMU.push((end - start) / 1_000_000n);
}
Sampling:
const WARMUP = 100_000;
const SAMPLE = 100;
for (let i = 0; i < WARMUP; i++) {}
for (let i = 0; i < SAMPLE; i++) J();
for (let i = 0; i < WARMUP; i++) {}
for (let i = 0; i < SAMPLE; i++) T();
console.dir(samples.TEMU);
It works.
The real question is:
after doing proper research, how much better can this get?
Top comments (0)