Forem

Stanislav Berkov
Stanislav Berkov

Posted on

Protobuf fake data generator

Let's assume you have protobuf proto files, generated C# message classes for them, and you want to generate fake data for demo purposes. You can use Google.Protobuf.Reflection for it as follows:

using System;
using System.Collections.Generic;
using System.Linq;
using Google.Protobuf;
using Google.Protobuf.Reflection;
using Google.Protobuf.Collections;
using Google.Protobuf.WellKnownTypes;
using System.Collections;

public class ProtobufFaker {
    private readonly Random _random;
    private readonly int _maxCollectionSize;
    private readonly int _stringMaxLength;

    public ProtobufFaker(int? seed = null, int maxCollectionSize = 5, int stringMaxLength = 20) {
        _random = seed.HasValue ? new Random(seed.Value) : new Random();
        _maxCollectionSize = maxCollectionSize;
        _stringMaxLength = stringMaxLength;
    }

    public T Generate<T>() where T : IMessage, new() {
        var message = new T();
        PopulateMessage(message);
        return message;
    }

    public void PopulateMessage(IMessage message) {
        var descriptor = message.Descriptor;
        foreach (var fieldDescriptor in descriptor.Fields.InDeclarationOrder()) {
            if (fieldDescriptor.IsRepeated) {
                var list = (IList)fieldDescriptor.Accessor.GetValue(message);
                var count = _random.Next(1, _maxCollectionSize + 1);
                for (int i = 0; i < count; i++) {
                    var value = GenerateSingleValue(fieldDescriptor);
                    if (value != null) {
                        list.Add(value);
                    }
                }
            }
            else {
                var value = GenerateFieldValue(fieldDescriptor);
                if (value != null) {
                    fieldDescriptor.Accessor.SetValue(message, value);
                }
            }
        }
    }

    private object? GenerateFieldValue(FieldDescriptor field) {
        if (field.IsMap) {
            return GenerateMapField(field);
        }

        // Handle oneof fields
        if (field.ContainingOneof != null) {
            // Randomly decide whether to set this oneof field
            if (_random.Next(2) == 0) {
                return null;
            }
        }

        return GenerateSingleValue(field);
    }

    private object? GenerateSingleValue(FieldDescriptor field) {
        if (field.FieldType == FieldType.Message && field.MessageType.FullName == "google.protobuf.Timestamp") {
            return GenerateTimestamp();
        }

        return field.FieldType switch {
            FieldType.Double => _random.NextDouble() * 1000,
            FieldType.Float => (float)(_random.NextDouble() * 1000),
            FieldType.Int64 or FieldType.SInt64 or FieldType.SFixed64 => (long)_random.Next(1, 1000),
            FieldType.UInt64 or FieldType.Fixed64 => (ulong)_random.Next(1, 1000),
            FieldType.Int32 or FieldType.SInt32 or FieldType.SFixed32 => _random.Next(1, 1000),
            FieldType.Fixed32 or FieldType.UInt32 => (uint)_random.Next(1, 1000),
            FieldType.Bool => _random.Next(2) == 1,
            FieldType.String => GenerateRandomString(),
            FieldType.Bytes => GenerateRandomBytes(),
            FieldType.Enum => GenerateEnumValue(field.EnumType),
            FieldType.Message => GenerateNestedMessage(field.MessageType),
            _ => null,
        };
    }

    private object? GenerateMapField(FieldDescriptor field) {
        var count = _random.Next(1, _maxCollectionSize + 1);
        var mapField = field.Accessor.GetValue((IMessage?)Activator.CreateInstance(field.ContainingType.ClrType));
        var mapDescriptor = field.MessageType;
        var keyDescriptor = mapDescriptor.FindFieldByNumber(1); // Key is always field 1
        var valueDescriptor = mapDescriptor.FindFieldByNumber(2); // Value is always field 2

        var addMethod = mapField.GetType().GetMethod("Add");
        if (addMethod == null) { return null; }

        for (int i = 0; i < count; i++) {
            var key = GenerateSingleValue(keyDescriptor);
            var value = GenerateSingleValue(valueDescriptor);

            if (key != null && value != null) {
                addMethod.Invoke(mapField, [key, value]);
            }
        }

        return mapField;
    }

    private IMessage? GenerateNestedMessage(MessageDescriptor messageDescriptor) {
        var message = (IMessage?)Activator.CreateInstance(messageDescriptor.ClrType);
        if (message == null) { return null; }
        PopulateMessage(message);
        return message;
    }

    private object GenerateEnumValue(EnumDescriptor enumDescriptor) {
        var values = enumDescriptor.Values;
        var randomIndex = _random.Next(values.Count);
        return values[randomIndex].Number;
    }

    private Timestamp GenerateTimestamp() {
        var now = DateTime.UtcNow;
        var daysOffset = _random.Next(-365 * 2, 365 * 2); // ±2 years from now
        var hoursOffset = _random.Next(-24, 24);
        var minutesOffset = _random.Next(-60, 60);

        var randomTime = now
            .AddDays(daysOffset)
            .AddHours(hoursOffset)
            .AddMinutes(minutesOffset);

        return Timestamp.FromDateTime(randomTime);
    }

    private string GenerateRandomString() {
        const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
        var length = _random.Next(1, _stringMaxLength + 1);
        return new string(Enumerable.Repeat(chars, length)
            .Select(s => s[_random.Next(s.Length)]).ToArray());
    }

    private ByteString GenerateRandomBytes() {
        var length = _random.Next(1, _stringMaxLength + 1);
        var bytes = new byte[length];
        _random.NextBytes(bytes);
        return ByteString.CopyFrom(bytes);
    }
}

// Example usage:
/*
public class Program
{
    public static void Main()
    {
        var faker = new ProtobufFaker();

        // Generate fake data for any protobuf message
        var message = faker.Generate<YourProtobufMessage>();

        // The faker will properly handle:
        // - All protobuf field types
        // - Repeated fields
        // - Map fields
        // - Oneof fields
        // - Nested messages
        // - Enums
        // - Optional fields

        Console.WriteLine(message.ToString());
    }
}
*/
Enter fullscreen mode Exit fullscreen mode

https://gist.github.com/stasberkov/2d12925ea1d30c712fe9240bd5c7ea8b

Image of Timescale

🚀 pgai Vectorizer: SQLAlchemy and LiteLLM Make Vector Search Simple

We built pgai Vectorizer to simplify embedding management for AI applications—without needing a separate database or complex infrastructure. Since launch, developers have created over 3,000 vectorizers on Timescale Cloud, with many more self-hosted.

Read more →

Top comments (0)

AWS Security LIVE!

Tune in for AWS Security LIVE!

Join AWS Security LIVE! for expert insights and actionable tips to protect your organization and keep security teams prepared.

Learn More