DEV Community

Cover image for Database Internals: Building a File Manager in Go from Scratch πŸ—„οΈ
Anthony4m
Anthony4m

Posted on

Database Internals: Building a File Manager in Go from Scratch πŸ—„οΈ

Have you ever wondered how databases actually manage all those gigabytes of data under the hood? πŸ€” Today, we're going to peek behind the curtain by building one of the most crucial components of a database engine: the File Manager.

What's a File Manager and Why Should You Care? 🎯

Think of a File Manager as your database's personal secretary for disk operations. Its job? Breaking up storage into manageable chunks (blocks), handling reads and writes efficiently, and making sure nothing gets lost or corrupted along the way.

flowchart of file Manager

The Building Blocks 🧱

Let's start with our core structure. Here's what makes our File Manager tick:

type FileMgr struct {
    dbDirectory   string
    blocksize     int
    isNew         bool
    openFiles     map[string]*os.File
    mutex         sync.RWMutex
    blocksRead    int
    blocksWritten int
    readLog       []ReadWriteLogEntry
    writeLog      []ReadWriteLogEntry
    metaData      FileMetadata
}
Enter fullscreen mode Exit fullscreen mode

πŸ” Key Components:

  • dbDirectory: Home base for all our files
  • blocksize: How big each chunk of data should be
  • mutex: Our traffic controller for thread safety
  • openFiles: A catalog of files currently in use
  • metaData: The keeper of important file stats

Making It Thread-Safe πŸ”’

One of the coolest parts of our implementation is how it handles multiple users trying to access the same data. We use a sync.RWMutex which allows:

  • Multiple readers to access data simultaneously (yay efficiency!)
  • Only one writer at a time (preventing data corruption)

Here's how we handle reads:

func (fm *FileMgr) Read(blk *BlockId, p *Page) error {
    fm.mutex.RLock()
    defer fm.mutex.RUnlock()

   f, err := fm.getFile(blk.GetFileName())
    if err != nil {
        return fmt.Errorf("failed to get file for block %v: %v", blk, err)
    }

    offset := int64(blk.Number() * fm.blocksize)
    _, err = f.Seek(offset, 0)
    if err != nil {

        return fmt.Errorf(format, offset, blk.GetFileName(), err)
    }
    bytesRead, err := f.Read(p.Contents())
    if err != nil {
        return fmt.Errorf("failed to read block %v: %v", blk, err)
    }

    if bytesRead != fm.blocksize {
        return fmt.Errorf("incomplete read: expected %d bytes, got %d", fm.blocksize, bytesRead)
    }
    fm.blocksRead++
    fm.addToReadLog(ReadWriteLogEntry{
        Timestamp:   time.Now(),
        BlockId:     blk,
        BytesAmount: bytesRead,
    })

    return nil
}
Enter fullscreen mode Exit fullscreen mode

And writes:

func (fm *FileMgr) Write(blk *BlockId, p *Page) error {
    fm.mutex.Lock()
    defer fm.mutex.Unlock()

   f, err := fm.getFile(blk.GetFileName())
    if err != nil {
        return fmt.Errorf("failed to get file for block %v: %v", blk, err)
    }

    offset := int64(blk.Number() * fm.blocksize)
    _, err = f.Seek(offset, 0)
    if err != nil {
        return fmt.Errorf(format, offset, blk.GetFileName(), err)
    }
    bytesWritten, err := f.Write(p.Contents())
    if err != nil {
        return fmt.Errorf("failed to write block %v: %v", blk, err)
    }

    if bytesWritten != fm.blocksize {
        return fmt.Errorf("incomplete write: expected %d bytes, wrote %d", fm.blocksize, bytesWritten)
    }

    err = f.Sync()
    if err != nil {
        return fmt.Errorf("failed to sync file %s: %v", blk.GetFileName(), err)
    }
    fm.blocksWritten++
    fm.addToWriteLog(ReadWriteLogEntry{
        Timestamp:   time.Now(),
        BlockId:     blk,
        BytesAmount: bytesWritten,
    })

    return nil
}
Enter fullscreen mode Exit fullscreen mode

The Secret Sauce: Metadata Tracking πŸ“Š

Every good database needs to keep track of its files. Our metadata system handles this beautifully:

type FileMetadata struct {
    CreatedAt    time.Time
    ModifiedAt   time.Time
    SizeLimit    int64
    FileSize     int64
    BlockCount   int
    LastAccessed time.Time
}
Enter fullscreen mode Exit fullscreen mode

This gives us powerful capabilities like:

  • Enforcing size limits
  • Tracking file access patterns
  • Managing file lifecycles

Cool Features You Can Add πŸš€

  1. Block Caching
   type BlockCache struct {
       capacity int
       blocks   map[string]*Page
   }
Enter fullscreen mode Exit fullscreen mode
  1. Compression
   func (fm *FileMgr) WriteCompressed(blk *BlockId, p *Page) error {
       fm.mutex.Lock()
    defer fm.mutex.Unlock()

    // Close file if open
    if f, exists := fm.openFiles[filename]; exists {
        if err := f.Close(); err != nil {
            return fmt.Errorf("failed to close file before compression: %v", err)
        }
        delete(fm.openFiles, filename)
    }

    srcPath := filepath.Join(fm.dbDirectory, filename)
    dstPath := srcPath + compressionExt

    // Open source file
    src, err := os.Open(srcPath)
    if err != nil {
        return fmt.Errorf("failed to open source file: %v", err)
    }
    defer src.Close()

    // Create destination file
    dst, err := os.Create(dstPath)
    if err != nil {
        return fmt.Errorf("failed to create compressed file: %v", err)
    }
    defer dst.Close()

    // Create gzip writer
    gw := gzip.NewWriter(dst)
    defer gw.Close()

    // Copy data using buffer
    buf := make([]byte, bufferSize)
    if _, err := io.CopyBuffer(gw, src, buf); err != nil {
        return fmt.Errorf("compression failed: %v", err)
    }

    // Remove original file
    if err := os.Remove(srcPath); err != nil {
        return fmt.Errorf("failed to remove original file: %v", err)
    }

    return nil
   }
Enter fullscreen mode Exit fullscreen mode
  1. Checksum Verification
func (fm *FileMgr) CalculateChecksum(filename string, checksumType ChecksumType) (*FileChecksum, error) {
    fm.mutex.RLock()
    defer fm.mutex.RUnlock()

    var h hash.Hash
    switch checksumType {
    case MD5:
        h = md5.New()
    case SHA256:
        h = sha256.New()
    default:
        return nil, fmt.Errorf("unsupported checksum type: %s", checksumType)
    }

    filepath := filepath.Join(fm.dbDirectory, filename)
    file, err := os.Open(filepath)
    if err != nil {
        return nil, fmt.Errorf("failed to open file for checksum: %v", err)
    }
    defer file.Close()

    buf := make([]byte, 32*1024)
    if _, err := io.CopyBuffer(h, file, buf); err != nil {
        return nil, fmt.Errorf("failed to calculate checksum: %v", err)
    }

    return &FileChecksum{
        Filename: filename,
        Type:     checksumType,
        Hash:     hex.EncodeToString(h.Sum(nil)),
    }, nil
}

   func (fm *FileMgr) verifyChecksum(data []byte, checksum uint32) bool {
        actual, err := fm.CalculateChecksum(filename, checksumType)
    if err != nil {
        return false, err
    }
    return actual.Hash == expectedChecksum, nil
   }
Enter fullscreen mode Exit fullscreen mode

Real-World Applications 🌍

This File Manager isn't just a toy project. Similar systems are used in:

  • SQLite's pager
  • PostgreSQL's buffer manager
  • LevelDB's table files

Let's Talk Performance πŸ“ˆ

Our implementation provides:

  • O(1) block access time
  • Thread-safe operations
  • Predictable memory usage
  • Crash recovery support

Try It Yourself! πŸ’»

Want to experiment? Here's a quick way to get started:

func main() {
    fm, err := NewFileMgr("./data", 4096)
    if err != nil {
        log.Fatal(err)
    }

    // Start experimenting!
    block := NewBlockId("test.db", 0)
    page := NewPage(make([]byte, 4096))

    // Write some data
    fm.Write(block, page)
}
Enter fullscreen mode Exit fullscreen mode

What's Next? 🎯

You could extend this File Manager with:

  • Write-ahead logging (WAL)
  • Buffer pool management
  • Compression algorithms
  • Encryption support

Share Your Thoughts! πŸ’­

What features would you add to this implementation? Have you built something similar? Let's discuss in the comments!


If you enjoyed this deep dive into database internals, follow me for more articles about Go, databases, and system design! And don't forget to drop a ❀️ if you found this helpful!

Top comments (0)