DEV Community

Piyush Chauhan
Piyush Chauhan

Posted on

Algorithmic Concepts in MongoDB Design

1. Sliding Window Concept

Application in MongoDB

// Sliding Window for Time-Series Data
db.userActivity.aggregate([
  // Sliding window for last 30 days of user engagement
  {
    $match: {
      timestamp: {
        $gte: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000)
      }
    }
  },
  {
    $group: {
      _id: {
        // Group by day
        day: { $dateToString: { 
          format: "%Y-%m-%d", 
          date: "$timestamp" 
        }}
      },
      dailyActiveUsers: { $addToSet: "$userId" },
      totalEvents: { $sum: 1 }
    }
  },
  // Sliding window aggregation to track trends
  {
    $setWindowFields: {
      sortBy: { "_id.day": 1 },
      output: {
        movingAverageUsers: { 
          $avg: "$dailyActiveUsers.length", 
          window: {
            range: [-7, 0],
            unit: "day"
          }
        }
      }
    }
  }
])
Enter fullscreen mode Exit fullscreen mode

Key Benefits

  • Track rolling metrics
  • Analyze time-based trends
  • Efficient memory usage

2. Two-Pointer Technique

Schema Design Example

// Optimized Social Graph Schema
{
  _id: ObjectId("user1"),
  followers: [
    { 
      userId: ObjectId("user2"),
      followedAt: ISODate(),
      interaction: {
        // Two-pointer like tracking
        mutualFollows: Boolean,
        lastInteractionScore: Number
      }
    }
  ],
  following: [
    { 
      userId: ObjectId("user3"),
      followedAt: ISODate()
    }
  ]
}

// Efficient Friend Recommendation
function findPotentialConnections(userId) {
  return db.users.aggregate([
    { $match: { _id: userId } },
    // Expand followers and following
    { $project: {
        potentialConnections: {
          $setIntersection: [
            "$followers.userId", 
            "$following.userId"
          ]
        }
      }
    }
  ]);
}
Enter fullscreen mode Exit fullscreen mode

Optimization Techniques

  • Reduce computational complexity
  • Efficient relationship tracking
  • Minimize full collection scans

3. Dynamic Programming (DP) Approach

Caching and Memoization

// DP-Inspired Caching Strategy
{
  _id: "user_analytics_cache",
  userId: ObjectId("user1"),
  // Memoized computation results
  cachedMetrics: {
    last30DaysEngagement: {
      computedAt: ISODate(),
      totalViews: 1000,
      avgSessionDuration: 5.5
    },
    yearlyTrends: {
      // Cached computation results
      computedAt: ISODate(),
      metrics: { /* pre-computed data */ }
    }
  },
  // Invalidation timestamp
  lastUpdated: ISODate()
}

// DP-like Incremental Computation
function updateUserAnalytics(userId) {
  // Check if cached result is valid
  const cachedResult = db.analyticsCache.findOne({ userId });

  if (shouldRecompute(cachedResult)) {
    const newMetrics = computeComplexMetrics(userId);

    // Atomic update with incremental computation
    db.analyticsCache.updateOne(
      { userId },
      { 
        $set: {
          cachedMetrics: newMetrics,
          lastUpdated: new Date()
        }
      },
      { upsert: true }
    );
  }
}
Enter fullscreen mode Exit fullscreen mode

4. Greedy Approach in Indexing

Indexing Strategy

// Greedy Index Selection
db.products.createIndex(
  { 
    category: 1, 
    price: -1, 
    soldCount: -1 
  },
  {
    // Greedy optimization
    partialFilterExpression: {
      inStock: true,
      price: { $gt: 100 }
    }
  }
)

// Query Optimization Example
function greedyQueryOptimization(filters) {
  // Dynamically select best index
  const indexes = db.products.getIndexes();

  const bestIndex = indexes.reduce((best, current) => {
    // Greedy selection of most selective index
    const selectivityScore = computeIndexSelectivity(current, filters);
    return selectivityScore > best.selectivityScore 
      ? { index: current, selectivityScore }
      : best;
  }, { selectivityScore: -1 });

  return bestIndex.index;
}
Enter fullscreen mode Exit fullscreen mode

5. Heap/Priority Queue Concepts

Distributed Ranking System

// Priority Queue-like Document Structure
{
  _id: "global_leaderboard",
  topUsers: [
    // Maintained like a min-heap
    { 
      userId: ObjectId("user1"),
      score: 1000,
      lastUpdated: ISODate()
    },
    // Continuously maintained top K users
  ],
  updateStrategy: {
    maxSize: 100,
    evictionPolicy: "lowest_score"
  }
}

// Efficient Leaderboard Management
function updateLeaderboard(userId, newScore) {
  db.leaderboards.findOneAndUpdate(
    { _id: "global_leaderboard" },
    {
      $push: {
        topUsers: {
          $each: [{ userId, score: newScore }],
          $sort: { score: -1 },
          $slice: 100  // Maintain top 100
        }
      }
    }
  );
}
Enter fullscreen mode Exit fullscreen mode

6. Graph Algorithms Inspiration

Social Network Schema

// Graph-like User Connections
{
  _id: ObjectId("user1"),
  connections: [
    {
      userId: ObjectId("user2"),
      type: "friend",
      strength: 0.85,
      // Inspired by PageRank-like scoring
      connectionScore: {
        mutualFriends: 10,
        interactions: 25
      }
    }
  ]
}

// Connection Recommendation
function recommendConnections(userId) {
  return db.users.aggregate([
    { $match: { _id: userId } },
    // Graph traversal-like recommendation
    { $graphLookup: {
        from: "users",
        startWith: "$connections.userId",
        connectFromField: "connections.userId",
        connectToField: "_id",
        as: "potentialConnections",
        maxDepth: 2,
        restrictSearchWithMatch: {
          // Avoid already connected users
          _id: { $nin: existingConnections }
        }
      }
    }
  ]);
}
Enter fullscreen mode Exit fullscreen mode

Scalability Considerations

Key Principles

  1. Algorithmic Efficiency

    • Minimize collection scans
    • Use indexing strategically
    • Implement efficient aggregation
  2. Distributed Computing

    • Leverage sharding
    • Implement smart partitioning
    • Use aggregation pipeline for distributed computing
  3. Caching and Memoization

    • Cache complex computations
    • Use time-based invalidation
    • Implement incremental updates

Key Skills

  • Understand data access patterns
  • Know indexing strategies
  • Recognize query complexity
  • Think about horizontal scaling

Top comments (0)