MLOps for JavaScript Developers: Deploy and Monitor Your First AI Model in Production

#devops #javascript #machinelearning #monitoring

This article contains affiliate links. I may earn a commission at no extra cost to you.

title: "MLOps for JavaScript Developers: Deploy and Monitor Your First AI Model in Production"
published: true
description: "Learn to build a complete MLOps pipeline with Docker, Node.js, and real-time monitoring for production AI deployments"
tags: mlops, javascript, ai, production, monitoring

cover_image: https://dev-to-uploads.s3.amazonaws.com/uploads/articles/mlops-javascript-cover.png

You've trained your first machine learning model, tested it locally, and it's working great. But now comes the real challenge: how do you deploy it to production, monitor its performance, and maintain it over time? If you're a JavaScript developer stepping into the world of MLOps, this tutorial will guide you through building a complete production pipeline.

We'll build a sentiment analysis API that demonstrates real-world MLOps practices, from containerized deployment to automated monitoring and rollback strategies.

What We're Building

Our MLOps pipeline will include:

A Node.js API serving a pre-trained sentiment analysis model
Docker containerization for consistent deployments
Real-time performance monitoring and alerting
Model versioning with automated rollback capabilities
A monitoring dashboard to track model drift and performance metrics

Prerequisites

Before we start, make sure you have:

Node.js 18+ installed
Docker and Docker Compose
Basic familiarity with Express.js
A text editor (VS Code recommended)

Step 1: Setting Up the Base Model Service

Let's start by creating a simple sentiment analysis service using a pre-trained model from Hugging Face's Transformers.js library.

mkdir mlops-sentiment-api
cd mlops-sentiment-api
npm init -y
npm install express @xenova/transformers cors helmet morgan
npm install --save-dev nodemon jest supertest

Create src/models/sentimentModel.js:

const { pipeline } = require('@xenova/transformers');

class SentimentModel {
  constructor() {
    this.model = null;
    this.modelVersion = '1.0.0';
    this.isLoaded = false;
  }

  async initialize() {
    try {
      console.log('Loading sentiment analysis model...');
      this.model = await pipeline(
        'sentiment-analysis',
        'Xenova/distilbert-base-uncased-finetuned-sst-2-english'
      );
      this.isLoaded = true;
      console.log(`Model v${this.modelVersion} loaded successfully`);
    } catch (error) {
      console.error('Failed to load model:', error);
      throw error;
    }
  }

  async predict(text) {
    if (!this.isLoaded) {
      throw new Error('Model not loaded');
    }

    const startTime = Date.now();
    const result = await this.model(text);
    const inferenceTime = Date.now() - startTime;

    return {
      prediction: result[0],
      inferenceTime,
      modelVersion: this.modelVersion,
      timestamp: new Date().toISOString()
    };
  }

  getHealth() {
    return {
      isLoaded: this.isLoaded,
      modelVersion: this.modelVersion,
      uptime: process.uptime()
    };
  }
}

module.exports = SentimentModel;

Now create the main API in src/app.js:

const express = require('express');
const cors = require('cors');
const helmet = require('helmet');
const morgan = require('morgan');
const SentimentModel = require('./models/sentimentModel');
const MetricsCollector = require('./monitoring/metricsCollector');

class MLOpsAPI {
  constructor() {
    this.app = express();
    this.model = new SentimentModel();
    this.metrics = new MetricsCollector();
    this.setupMiddleware();
    this.setupRoutes();
  }

  setupMiddleware() {
    this.app.use(helmet());
    this.app.use(cors());
    this.app.use(morgan('combined'));
    this.app.use(express.json({ limit: '10mb' }));
  }

  setupRoutes() {
    // Health check endpoint
    this.app.get('/health', (req, res) => {
      const health = this.model.getHealth();
      res.json({
        status: health.isLoaded ? 'healthy' : 'unhealthy',
        ...health
      });
    });

    // Prediction endpoint
    this.app.post('/predict', async (req, res) => {
      try {
        const { text } = req.body;

        if (!text || typeof text !== 'string') {
          return res.status(400).json({
            error: 'Text input is required and must be a string'
          });
        }

        const result = await this.model.predict(text);

        // Record metrics
        this.metrics.recordPrediction({
          input: text,
          output: result.prediction,
          inferenceTime: result.inferenceTime,
          modelVersion: result.modelVersion
        });

        res.json(result);
      } catch (error) {
        console.error('Prediction error:', error);
        this.metrics.recordError(error);
        res.status(500).json({
          error: 'Internal server error',
          message: error.message
        });
      }
    });

    // Metrics endpoint
    this.app.get('/metrics', (req, res) => {
      res.json(this.metrics.getMetrics());
    });
  }

  async initialize() {
    await this.model.initialize();
    return this;
  }

  start(port = 3000) {
    this.app.listen(port, () => {
      console.log(`MLOps API running on port ${port}`);
    });
  }
}

module.exports = MLOpsAPI;

Step 2: Implementing Real-time Monitoring

Create src/monitoring/metricsCollector.js to track model performance:

class MetricsCollector {
  constructor() {
    this.metrics = {
      totalPredictions: 0,
      totalErrors: 0,
      averageInferenceTime: 0,
      predictionHistory: [],
      errorHistory: [],
      sentimentDistribution: { POSITIVE: 0, NEGATIVE: 0 },
      hourlyStats: new Map()
    };

    // Keep only last 1000 predictions in memory
    this.maxHistorySize = 1000;
  }

  recordPrediction(data) {
    this.metrics.totalPredictions++;

    // Update average inference time
    const currentAvg = this.metrics.averageInferenceTime;
    const total = this.metrics.totalPredictions;
    this.metrics.averageInferenceTime = 
      (currentAvg * (total - 1) + data.inferenceTime) / total;

    // Track sentiment distribution
    const sentiment = data.output.label;
    this.metrics.sentimentDistribution[sentiment]++;

    // Store prediction history
    this.metrics.predictionHistory.push({
      timestamp: new Date().toISOString(),
      input: data.input.substring(0, 100), // Truncate for storage
      output: data.output,
      inferenceTime: data.inferenceTime,
      modelVersion: data.modelVersion
    });

    // Maintain history size limit
    if (this.metrics.predictionHistory.length > this.maxHistorySize) {
      this.metrics.predictionHistory.shift();
    }

    // Update hourly stats
    this.updateHourlyStats();
  }

  recordError(error) {
    this.metrics.totalErrors++;
    this.metrics.errorHistory.push({
      timestamp: new Date().toISOString(),
      error: error.message,
      stack: error.stack
    });

    // Maintain error history size
    if (this.metrics.errorHistory.length > 100) {
      this.metrics.errorHistory.shift();
    }
  }

  updateHourlyStats() {
    const currentHour = new Date().getHours();
    const hourKey = `${new Date().toDateString()}-${currentHour}`;

    if (!this.metrics.hourlyStats.has(hourKey)) {
      this.metrics.hourlyStats.set(hourKey, {
        predictions: 0,
        errors: 0,
        avgInferenceTime: 0
      });
    }

    const hourStats = this.metrics.hourlyStats.get(hourKey);
    hourStats.predictions++;
  }

  getMetrics() {
    const errorRate = this.metrics.totalPredictions > 0 
      ? (this.metrics.totalErrors / this.metrics.totalPredictions) * 100 
      : 0;

    return {
      ...this.metrics,
      errorRate: parseFloat(errorRate.toFixed(2)),
      uptime: process.uptime(),
      memoryUsage: process.memoryUsage(),
      recentPredictions: this.metrics.predictionHistory.slice(-10)
    };
  }

  // Check for model drift by analyzing recent predictions
  checkModelDrift() {
    const recentPredictions = this.metrics.predictionHistory.slice(-100);
    if (recentPredictions.length < 50) return null;

    const recentPositive = recentPredictions.filter(
      p => p.output.label === 'POSITIVE'
    ).length;
    const recentPositiveRate = recentPositive / recentPredictions.length;

    // Simple drift detection: significant change in sentiment distribution
    const overallPositiveRate = this.metrics.sentimentDistribution.POSITIVE / 
      this.metrics.totalPredictions;

    const drift = Math.abs(recentPositiveRate - overallPositiveRate);

    return {
      driftScore: parseFloat(drift.toFixed(3)),
      isDrifting: drift > 0.2, // Alert if >20% change
      recentPositiveRate: parseFloat(recentPositiveRate.toFixed(3)),
      overallPositiveRate: parseFloat(overallPositiveRate.toFixed(3))
    };
  }
}

module.exports = MetricsCollector;

Step 3: Model Versioning and Rollback Strategy

Create src/deployment/modelManager.js for handling model versions:


javascript
const fs = require('fs').promises;
const path = require('path');

class ModelManager {
  constructor() {
    this.modelsPath = path.join(__dirname, '../../models');
    this.currentModel = null;
    this.previousModel = null;
    this.deploymentHistory = [];
  }

  async deployModel(modelConfig) {
    try {
      // Store previous model for rollback
      this.previousModel = this.currentModel;

      // Deploy new model
      const newModel = await this.loadModel(modelConfig);

      // Run validation tests
      const validationResults = await this.validateModel(newModel);

      if (!validationResults.isValid) {
        throw new Error(`Model validation failed: ${validationResults.errors.join(', ')}`);
      }

      this.currentModel = newModel;

      // Record deployment
      this.deploymentHistory.push({
        version: modelConfig.version,
        timestamp: new Date().toISOString(),
        status: 'deployed',
        validationResults
      });

      console.log(`Successfully deployed model version ${modelConfig.version}`);
      return { success: true, version: modelConfig.version };

    } catch (error) {
      console.error('Model deployment failed:', error);

      // Record failed deployment
      this.deploymentHistory.push({
        version: modelConfig.version,
        timestamp: new Date().toISOString(),
        status: 'failed',
        error: error.message
      });

      throw error;
    }
  }

  async rollback() {
    if (!this.previousModel) {
      throw new Error('No previous model available for rollback');
    }

    console.log('Rolling back to previous model version...');

    const temp = this.currentModel;
    this.currentModel = this.previousModel;
    this.previousModel = temp;

    this.deploymentHistory.push({
      version: this.currentModel.modelVersion,
      timestamp: new Date().toISOString(),
      status: 'rollback'
    });

    return { success: true, version: this.currentModel.modelVersion };
  }

  async validateModel(model) {
    const testCases = [
      { input: 'I love this product!', expected: 'POSITIVE' },
      { input: 'This is terrible', expected: 'NEGATIVE' },
      { input: 'It\'s okay, nothing special', expected: 'NEGATIVE' }
    ];

    const errors = [];
    let passedTests = 0;

    for (const testCase of testCases) {
      try {
        const result = await model.predict(testCase.input);
        if (result.prediction.label === testCase.expected) {
          passedTests++;
        } else {
          errors.push(`Test failed for "${testCase.input}": expected ${testCase.expected}, got ${result.prediction.label}`);
        }
      } catch (error) {
        errors.push(`Test error for "${testCase.input}": ${error.message}`);
      }
    }

    return {
      isValid: passedTests >= testCases.length * 0.8, // 80% pass rate required
      passedTests,
      totalTests: testCases.length,
      errors
    };
  }

  getDeploymentHistory() {
    return this.deploymen

---

**Tools mentioned:**
- [Amazon](https://www.amazon.com/?tag=practicalai06-20)