MongoDB - Lý Thuyết Cơ Bản

📚 Giới Thiệu

MongoDB là document-oriented NoSQL database, lưu trữ dữ liệu dưới dạng BSON (Binary JSON) documents. Được thiết kế cho scalability, performance và ease of development.

🏗️ Kiến Trúc MongoDB

1. Logical Architecture

Database
    ↓
Collections (equivalent to tables)
    ↓
Documents (equivalent to rows)
    ↓
Fields (equivalent to columns)

2. Physical Architecture

mongod (Database Server)
    ↓
Storage Engine (WiredTiger)
    ↓
Data Files + Journal Files
    ↓
File System

3. Deployment Architectures

Standalone

Application → mongod instance

Replica Set

Application → Primary → Secondary
                    → Secondary
                    → Arbiter (optional)

Sharded Cluster

Application → mongos Router
                ↓
            Config Servers
                ↓
        Shard1   Shard2   Shard3
        (RS)     (RS)     (RS)

📄 Document Structure

BSON Data Types

{
  // String
  "name": "John Doe",

  // Number (Int32, Int64, Double)
  "age": 30,
  "salary": 75000.50,

  // Boolean
  "isActive": true,

  // Date
  "createdAt": ISODate("2024-01-15T10:30:00Z"),

  // Array
  "tags": ["developer", "senior", "javascript"],

  // Object/Document
  "address": {
    "street": "123 Main St",
    "city": "New York",
    "zipCode": "10001"
  },

  // ObjectId
  "_id": ObjectId("507f1f77bcf86cd799439011"),

  // Null
  "middleName": null,

  // Binary Data
  "profileImage": BinData(0, "...")
}

Schema Design Patterns

Embedding Pattern

// User với embedded addresses
{
  "_id": ObjectId("..."),
  "name": "John Doe",
  "addresses": [
    {
      "type": "home",
      "street": "123 Main St",
      "city": "New York"
    },
    {
      "type": "work", 
      "street": "456 Office Blvd",
      "city": "Boston"
    }
  ]
}

Referencing Pattern

// User document
{
  "_id": ObjectId("user1"),
  "name": "John Doe",
  "addressIds": [
    ObjectId("addr1"),
    ObjectId("addr2")
  ]
}

// Address documents
{
  "_id": ObjectId("addr1"),
  "userId": ObjectId("user1"),
  "type": "home",
  "street": "123 Main St"
}

🔍 Indexing

Index Types

Single Field Index

// Create index on email field
db.users.createIndex({ "email": 1 })

// Compound index
db.users.createIndex({ "name": 1, "age": -1 })

Multikey Index

// Automatically created for arrays
db.posts.createIndex({ "tags": 1 })

// Query với array
db.posts.find({ "tags": "mongodb" })

Text Index

// Full-text search index
db.articles.createIndex({ 
  "title": "text", 
  "content": "text" 
})

// Search
db.articles.find({ $text: { $search: "mongodb tutorial" }})

Geospatial Index

// 2dsphere index for GeoJSON
db.places.createIndex({ "location": "2dsphere" })

// Query nearby locations
db.places.find({
  location: {
    $near: {
      $geometry: { type: "Point", coordinates: [-73.9857, 40.7484] },
      $maxDistance: 1000
    }
  }
})

Partial Index

// Index only active users
db.users.createIndex(
  { "email": 1 },
  { partialFilterExpression: { "status": "active" }}
)

Index Optimization

// Check index usage
db.users.find({ "email": "john@example.com" }).explain("executionStats")

// Index hints
db.users.find({ "name": "John" }).hint({ "name": 1 })

// List indexes
db.users.getIndexes()

🔄 CRUD Operations

Create (Insert)

// Insert one document
db.users.insertOne({
  "name": "John Doe",
  "email": "john@example.com",
  "age": 30
})

// Insert multiple documents
db.users.insertMany([
  { "name": "Alice", "age": 25 },
  { "name": "Bob", "age": 35 }
])

Read (Find)

// Find all documents
db.users.find()

// Find with condition
db.users.find({ "age": { $gte: 25 }})

// Find with projection
db.users.find(
  { "status": "active" },
  { "name": 1, "email": 1, "_id": 0 }
)

// Find with sorting and limiting
db.users.find({ "age": { $gte: 21 }})
        .sort({ "age": -1 })
        .limit(10)
        .skip(5)

Update

// Update one document
db.users.updateOne(
  { "_id": ObjectId("...") },
  { $set: { "status": "inactive" }}
)

// Update multiple documents
db.users.updateMany(
  { "lastLogin": { $lt: ISODate("2024-01-01") }},
  { $set: { "status": "dormant" }}
)

// Upsert (update or insert)
db.users.updateOne(
  { "email": "new@example.com" },
  { $set: { "name": "New User" }},
  { upsert: true }
)

Delete

// Delete one document
db.users.deleteOne({ "_id": ObjectId("...") })

// Delete multiple documents
db.users.deleteMany({ "status": "inactive" })

📊 Aggregation Framework

Pipeline Stages

$match - Filtering

db.orders.aggregate([
  { $match: { "status": "completed" }}
])

$group - Grouping

// Group by customer and sum total
db.orders.aggregate([
  {
    $group: {
      "_id": "$customerId",
      "totalAmount": { $sum: "$amount" },
      "orderCount": { $sum: 1 }
    }
  }
])

$project - Field Selection

db.users.aggregate([
  {
    $project: {
      "name": 1,
      "email": 1,
      "fullName": { $concat: ["$firstName", " ", "$lastName"] }
    }
  }
])

$lookup - Joins

// Left outer join
db.orders.aggregate([
  {
    $lookup: {
      from: "customers",
      localField: "customerId",
      foreignField: "_id",
      as: "customer"
    }
  }
])

$unwind - Array Deconstruction

db.posts.aggregate([
  { $unwind: "$tags" },
  { $group: { "_id": "$tags", "count": { $sum: 1 }}}
])

Complex Aggregation Example

// Sales analytics pipeline
db.sales.aggregate([
  // Filter sales from last year
  {
    $match: {
      "date": { 
        $gte: ISODate("2023-01-01"),
        $lt: ISODate("2024-01-01")
      }
    }
  },

  // Group by month and product category
  {
    $group: {
      "_id": {
        "month": { $month: "$date" },
        "category": "$productCategory"
      },
      "totalSales": { $sum: "$amount" },
      "averageOrderValue": { $avg: "$amount" },
      "orderCount": { $sum: 1 }
    }
  },

  // Sort by month and total sales
  {
    $sort: {
      "_id.month": 1,
      "totalSales": -1
    }
  },

  // Reshape output
  {
    $project: {
      "_id": 0,
      "month": "$_id.month",
      "category": "$_id.category",
      "totalSales": 1,
      "averageOrderValue": { $round: ["$averageOrderValue", 2] },
      "orderCount": 1
    }
  }
])

⚡ Performance Optimization

Query Optimization

// Use explain() to analyze query performance
db.users.find({ "email": "john@example.com" })
        .explain("executionStats")

// Key metrics to check:
// - executionTimeMillis
// - totalDocsExamined
// - totalDocsReturned
// - indexesUsed

Indexing Best Practices

// ESR Rule: Equality, Sort, Range
db.users.createIndex({ 
  "status": 1,      // Equality
  "createdAt": -1,  // Sort
  "age": 1          // Range
})

// Compound index field order matters
db.users.find({ "status": "active", "age": { $gte: 25 }})
        .sort({ "createdAt": -1 })

Memory Management

// Monitor memory usage
db.serverStatus().wiredTiger.cache

// Configure WiredTiger cache
// In mongod.conf:
storage:
  wiredTiger:
    engineConfig:
      cacheSizeGB: 8

🔄 Replication

Replica Set Setup

// Initialize replica set
rs.initiate({
  "_id": "myReplicaSet",
  "members": [
    { "_id": 0, "host": "mongo1:27017", "priority": 3 },
    { "_id": 1, "host": "mongo2:27017", "priority": 2 },
    { "_id": 2, "host": "mongo3:27017", "priority": 1 }
  ]
})

// Check replica set status
rs.status()

// Add member
rs.add("mongo4:27017")

// Step down primary (for maintenance)
rs.stepDown(60)

Read Preferences

// Primary (default) - read from primary only
db.users.find().readPref("primary")

// Secondary - read from secondary only
db.users.find().readPref("secondary")

// Primary Preferred - primary if available, else secondary
db.users.find().readPref("primaryPreferred")

// Secondary Preferred - secondary if available, else primary
db.users.find().readPref("secondaryPreferred")

// Nearest - lowest network latency
db.users.find().readPref("nearest")

Write Concerns

// Default - acknowledge from primary
db.users.insertOne(doc, { writeConcern: { w: 1 }})

// Majority - acknowledge from majority of replica set
db.users.insertOne(doc, { writeConcern: { w: "majority" }})

// All nodes - acknowledge from all nodes (not recommended)
db.users.insertOne(doc, { writeConcern: { w: 3 }})

// With journal - wait for journal write
db.users.insertOne(doc, { 
  writeConcern: { w: "majority", j: true }
})

🔀 Sharding

Sharding Architecture

// Config servers (replica set)
mongod --configsvr --replSet configReplSet --port 27019

// Query routers (mongos)
mongos --configdb configReplSet/config1:27019,config2:27019,config3:27019

// Shard servers (replica sets)
mongod --shardsvr --replSet shard1ReplSet --port 27018

Shard Key Selection

// Range-based sharding (good for queries with ranges)
sh.shardCollection("mydb.users", { "userId": 1 })

// Hash-based sharding (good for even distribution)
sh.shardCollection("mydb.orders", { "orderId": "hashed" })

// Compound shard key
sh.shardCollection("mydb.posts", { "authorId": 1, "timestamp": 1 })

Shard Management

// Enable sharding on database
sh.enableSharding("mydb")

// Check shard status
sh.status()

// Add shard
sh.addShard("shard2ReplSet/shard2a:27018,shard2b:27018,shard2c:27018")

// Balance chunks
sh.startBalancer()
sh.stopBalancer()

📋 Transactions

Single Document Transactions

// ACID properties automatically guaranteed for single document
db.accounts.updateOne(
  { "_id": "account1" },
  { $inc: { "balance": -100 }}
)

Multi-Document Transactions

// Start session
const session = db.getMongo().startSession()

try {
  session.startTransaction()

  // Debit from account A
  session.getDatabase("bank").accounts.updateOne(
    { "_id": "accountA" },
    { $inc: { "balance": -100 }},
    { session: session }
  )

  // Credit to account B
  session.getDatabase("bank").accounts.updateOne(
    { "_id": "accountB" },
    { $inc: { "balance": 100 }},
    { session: session }
  )

  // Log transaction
  session.getDatabase("bank").transactions.insertOne(
    {
      "from": "accountA",
      "to": "accountB", 
      "amount": 100,
      "timestamp": new Date()
    },
    { session: session }
  )

  session.commitTransaction()
} catch (error) {
  session.abortTransaction()
  throw error
} finally {
  session.endSession()
}

🔒 Security

Authentication

// Create admin user
use admin
db.createUser({
  user: "admin",
  pwd: "securePassword",
  roles: [ { role: "userAdminAnyDatabase", db: "admin" } ]
})

// Create database user
use myapp
db.createUser({
  user: "appUser",
  pwd: "appPassword", 
  roles: [ { role: "readWrite", db: "myapp" } ]
})

Authorization (RBAC)

// Built-in roles
"read"              // Read any database
"readWrite"         // Read and write any database
"dbAdmin"           // Database administration
"userAdmin"         // User and role management
"clusterAdmin"      // Cluster administration
"backup"            // Backup operations
"restore"           // Restore operations

// Custom role
db.createRole({
  role: "customRole",
  privileges: [
    {
      resource: { db: "myapp", collection: "users" },
      actions: [ "find", "insert", "update" ]
    }
  ],
  roles: []
})

SSL/TLS Configuration

# mongod.conf
net:
  ssl:
    mode: requireSSL
    PEMKeyFile: /path/to/server.pem
    CAFile: /path/to/ca.pem

Kiến thức MongoDB này cung cấp foundation vững chắc cho việc development và phỏng vấn.