This post is based on a query by one of my readers regarding the example of a clustered app in the book.
var cluster = require('cluster');
// Master process - starts the workers
if (cluster.isMaster) {
// Create a worker process for each core
require('os').cpus().forEach(function() {
// Start a worker process
cluster.fork();
});
// In case a worker dies, a new one should be started
cluster.on('exit', function (worker, code, signal) {
cluster.fork();
});
}
// Code for the worker processes to execute
else {
var worker_id = 'Worker' + cluster.worker.id;
var http = require('http');
var express = require('express');
var app = express();
app.get('/', function (req, res) {
// An resource-intensive operation
var a = [];
for (var i = 0; i < 100000; i++) { a.push(i); }
// A large chunk of data
res.send(a.toString());
});
// Start the app
http.createServer(app).listen(3000, function() {
console.log('Express app started by %s', worker_id);
});
}
One might wonder if the code above could be 'optimized' in the following manner.
var cluster = require('cluster');
var http = require('http');
var express = require('express');
var app = express();
app.get('/', function (req, res) {
...
});
// Master process - starts the workers
if (cluster.isMaster) {
...
}
// Code for the worker processes to execute
else {
var worker_id = 'Worker' + cluster.worker.id;
// Start the app
http.createServer(app).listen(3000, function() {
console.log('Express app started by %s', worker_id);
});
}
The idea behind the 'optimization' comes from the intention of sharing one Express instance between the workers, instead of creating multiple instances of Express. Will it work? Is it a good idea?
The following information will help you understand why the 'optimization' introduces redundancy, instead of actually optimizing the code.
When you cluster an app, the script is executed (NUMBER_OF_FORKS + 1) times - and they all will be executing as separate processes with no references of objects created in any of the instances. Think of it like you are manually doing $ node app
, except they will all magically listen on the same port number without any conflict.
Hence, putting the initialization code outside the if
construct does not help in optimizing the code or performance, because the app instances will not be shared by the forks. Rather you add redundant initialization objects even to the master process, which it does not need.