MongoDB - Lý Thuyết Cơ Bản
📚 Giới Thiệu
MongoDB là document-oriented NoSQL database, lưu trữ dữ liệu dưới dạng BSON (Binary JSON) documents. Được thiết kế cho scalability, performance và ease of development.
🏗️ Kiến Trúc MongoDB
1. Logical Architecture
Database
↓
Collections (equivalent to tables)
↓
Documents (equivalent to rows)
↓
Fields (equivalent to columns)
2. Physical Architecture
mongod (Database Server)
↓
Storage Engine (WiredTiger)
↓
Data Files + Journal Files
↓
File System
3. Deployment Architectures
Standalone
Application → mongod instance
Replica Set
Application → Primary → Secondary
→ Secondary
→ Arbiter (optional)
Sharded Cluster
Application → mongos Router
↓
Config Servers
↓
Shard1 Shard2 Shard3
(RS) (RS) (RS)
📄 Document Structure
BSON Data Types
{
// String
"name": "John Doe",
// Number (Int32, Int64, Double)
"age": 30,
"salary": 75000.50,
// Boolean
"isActive": true,
// Date
"createdAt": ISODate("2024-01-15T10:30:00Z"),
// Array
"tags": ["developer", "senior", "javascript"],
// Object/Document
"address": {
"street": "123 Main St",
"city": "New York",
"zipCode": "10001"
},
// ObjectId
"_id": ObjectId("507f1f77bcf86cd799439011"),
// Null
"middleName": null,
// Binary Data
"profileImage": BinData(0, "...")
}
Schema Design Patterns
Embedding Pattern
// User với embedded addresses
{
"_id": ObjectId("..."),
"name": "John Doe",
"addresses": [
{
"type": "home",
"street": "123 Main St",
"city": "New York"
},
{
"type": "work",
"street": "456 Office Blvd",
"city": "Boston"
}
]
}
Referencing Pattern
// User document
{
"_id": ObjectId("user1"),
"name": "John Doe",
"addressIds": [
ObjectId("addr1"),
ObjectId("addr2")
]
}
// Address documents
{
"_id": ObjectId("addr1"),
"userId": ObjectId("user1"),
"type": "home",
"street": "123 Main St"
}
🔍 Indexing
Index Types
Single Field Index
// Create index on email field
db.users.createIndex({ "email": 1 })
// Compound index
db.users.createIndex({ "name": 1, "age": -1 })
Multikey Index
// Automatically created for arrays
db.posts.createIndex({ "tags": 1 })
// Query với array
db.posts.find({ "tags": "mongodb" })
Text Index
// Full-text search index
db.articles.createIndex({
"title": "text",
"content": "text"
})
// Search
db.articles.find({ $text: { $search: "mongodb tutorial" }})
Geospatial Index
// 2dsphere index for GeoJSON
db.places.createIndex({ "location": "2dsphere" })
// Query nearby locations
db.places.find({
location: {
$near: {
$geometry: { type: "Point", coordinates: [-73.9857, 40.7484] },
$maxDistance: 1000
}
}
})
Partial Index
// Index only active users
db.users.createIndex(
{ "email": 1 },
{ partialFilterExpression: { "status": "active" }}
)
Index Optimization
// Check index usage
db.users.find({ "email": "john@example.com" }).explain("executionStats")
// Index hints
db.users.find({ "name": "John" }).hint({ "name": 1 })
// List indexes
db.users.getIndexes()
🔄 CRUD Operations
Create (Insert)
// Insert one document
db.users.insertOne({
"name": "John Doe",
"email": "john@example.com",
"age": 30
})
// Insert multiple documents
db.users.insertMany([
{ "name": "Alice", "age": 25 },
{ "name": "Bob", "age": 35 }
])
Read (Find)
// Find all documents
db.users.find()
// Find with condition
db.users.find({ "age": { $gte: 25 }})
// Find with projection
db.users.find(
{ "status": "active" },
{ "name": 1, "email": 1, "_id": 0 }
)
// Find with sorting and limiting
db.users.find({ "age": { $gte: 21 }})
.sort({ "age": -1 })
.limit(10)
.skip(5)
Update
// Update one document
db.users.updateOne(
{ "_id": ObjectId("...") },
{ $set: { "status": "inactive" }}
)
// Update multiple documents
db.users.updateMany(
{ "lastLogin": { $lt: ISODate("2024-01-01") }},
{ $set: { "status": "dormant" }}
)
// Upsert (update or insert)
db.users.updateOne(
{ "email": "new@example.com" },
{ $set: { "name": "New User" }},
{ upsert: true }
)
Delete
// Delete one document
db.users.deleteOne({ "_id": ObjectId("...") })
// Delete multiple documents
db.users.deleteMany({ "status": "inactive" })
📊 Aggregation Framework
Pipeline Stages
$match - Filtering
db.orders.aggregate([
{ $match: { "status": "completed" }}
])
$group - Grouping
// Group by customer and sum total
db.orders.aggregate([
{
$group: {
"_id": "$customerId",
"totalAmount": { $sum: "$amount" },
"orderCount": { $sum: 1 }
}
}
])
$project - Field Selection
db.users.aggregate([
{
$project: {
"name": 1,
"email": 1,
"fullName": { $concat: ["$firstName", " ", "$lastName"] }
}
}
])
$lookup - Joins
// Left outer join
db.orders.aggregate([
{
$lookup: {
from: "customers",
localField: "customerId",
foreignField: "_id",
as: "customer"
}
}
])
$unwind - Array Deconstruction
db.posts.aggregate([
{ $unwind: "$tags" },
{ $group: { "_id": "$tags", "count": { $sum: 1 }}}
])
Complex Aggregation Example
// Sales analytics pipeline
db.sales.aggregate([
// Filter sales from last year
{
$match: {
"date": {
$gte: ISODate("2023-01-01"),
$lt: ISODate("2024-01-01")
}
}
},
// Group by month and product category
{
$group: {
"_id": {
"month": { $month: "$date" },
"category": "$productCategory"
},
"totalSales": { $sum: "$amount" },
"averageOrderValue": { $avg: "$amount" },
"orderCount": { $sum: 1 }
}
},
// Sort by month and total sales
{
$sort: {
"_id.month": 1,
"totalSales": -1
}
},
// Reshape output
{
$project: {
"_id": 0,
"month": "$_id.month",
"category": "$_id.category",
"totalSales": 1,
"averageOrderValue": { $round: ["$averageOrderValue", 2] },
"orderCount": 1
}
}
])
⚡ Performance Optimization
Query Optimization
// Use explain() to analyze query performance
db.users.find({ "email": "john@example.com" })
.explain("executionStats")
// Key metrics to check:
// - executionTimeMillis
// - totalDocsExamined
// - totalDocsReturned
// - indexesUsed
Indexing Best Practices
// ESR Rule: Equality, Sort, Range
db.users.createIndex({
"status": 1, // Equality
"createdAt": -1, // Sort
"age": 1 // Range
})
// Compound index field order matters
db.users.find({ "status": "active", "age": { $gte: 25 }})
.sort({ "createdAt": -1 })
Memory Management
// Monitor memory usage
db.serverStatus().wiredTiger.cache
// Configure WiredTiger cache
// In mongod.conf:
storage:
wiredTiger:
engineConfig:
cacheSizeGB: 8
🔄 Replication
Replica Set Setup
// Initialize replica set
rs.initiate({
"_id": "myReplicaSet",
"members": [
{ "_id": 0, "host": "mongo1:27017", "priority": 3 },
{ "_id": 1, "host": "mongo2:27017", "priority": 2 },
{ "_id": 2, "host": "mongo3:27017", "priority": 1 }
]
})
// Check replica set status
rs.status()
// Add member
rs.add("mongo4:27017")
// Step down primary (for maintenance)
rs.stepDown(60)
Read Preferences
// Primary (default) - read from primary only
db.users.find().readPref("primary")
// Secondary - read from secondary only
db.users.find().readPref("secondary")
// Primary Preferred - primary if available, else secondary
db.users.find().readPref("primaryPreferred")
// Secondary Preferred - secondary if available, else primary
db.users.find().readPref("secondaryPreferred")
// Nearest - lowest network latency
db.users.find().readPref("nearest")
Write Concerns
// Default - acknowledge from primary
db.users.insertOne(doc, { writeConcern: { w: 1 }})
// Majority - acknowledge from majority of replica set
db.users.insertOne(doc, { writeConcern: { w: "majority" }})
// All nodes - acknowledge from all nodes (not recommended)
db.users.insertOne(doc, { writeConcern: { w: 3 }})
// With journal - wait for journal write
db.users.insertOne(doc, {
writeConcern: { w: "majority", j: true }
})
🔀 Sharding
Sharding Architecture
// Config servers (replica set)
mongod --configsvr --replSet configReplSet --port 27019
// Query routers (mongos)
mongos --configdb configReplSet/config1:27019,config2:27019,config3:27019
// Shard servers (replica sets)
mongod --shardsvr --replSet shard1ReplSet --port 27018
Shard Key Selection
// Range-based sharding (good for queries with ranges)
sh.shardCollection("mydb.users", { "userId": 1 })
// Hash-based sharding (good for even distribution)
sh.shardCollection("mydb.orders", { "orderId": "hashed" })
// Compound shard key
sh.shardCollection("mydb.posts", { "authorId": 1, "timestamp": 1 })
Shard Management
// Enable sharding on database
sh.enableSharding("mydb")
// Check shard status
sh.status()
// Add shard
sh.addShard("shard2ReplSet/shard2a:27018,shard2b:27018,shard2c:27018")
// Balance chunks
sh.startBalancer()
sh.stopBalancer()
📋 Transactions
Single Document Transactions
// ACID properties automatically guaranteed for single document
db.accounts.updateOne(
{ "_id": "account1" },
{ $inc: { "balance": -100 }}
)
Multi-Document Transactions
// Start session
const session = db.getMongo().startSession()
try {
session.startTransaction()
// Debit from account A
session.getDatabase("bank").accounts.updateOne(
{ "_id": "accountA" },
{ $inc: { "balance": -100 }},
{ session: session }
)
// Credit to account B
session.getDatabase("bank").accounts.updateOne(
{ "_id": "accountB" },
{ $inc: { "balance": 100 }},
{ session: session }
)
// Log transaction
session.getDatabase("bank").transactions.insertOne(
{
"from": "accountA",
"to": "accountB",
"amount": 100,
"timestamp": new Date()
},
{ session: session }
)
session.commitTransaction()
} catch (error) {
session.abortTransaction()
throw error
} finally {
session.endSession()
}
🔒 Security
Authentication
// Create admin user
use admin
db.createUser({
user: "admin",
pwd: "securePassword",
roles: [ { role: "userAdminAnyDatabase", db: "admin" } ]
})
// Create database user
use myapp
db.createUser({
user: "appUser",
pwd: "appPassword",
roles: [ { role: "readWrite", db: "myapp" } ]
})
Authorization (RBAC)
// Built-in roles
"read" // Read any database
"readWrite" // Read and write any database
"dbAdmin" // Database administration
"userAdmin" // User and role management
"clusterAdmin" // Cluster administration
"backup" // Backup operations
"restore" // Restore operations
// Custom role
db.createRole({
role: "customRole",
privileges: [
{
resource: { db: "myapp", collection: "users" },
actions: [ "find", "insert", "update" ]
}
],
roles: []
})
SSL/TLS Configuration
# mongod.conf
net:
ssl:
mode: requireSSL
PEMKeyFile: /path/to/server.pem
CAFile: /path/to/ca.pem
Kiến thức MongoDB này cung cấp foundation vững chắc cho việc development và phỏng vấn.