graceful-shutdown-nodejs

star 0

Graceful shutdown patterns for Node.js services including HTTP, WebSocket, BullMQ, Prisma, Redis, and grammY bot

artvision-agency By artvision-agency schedule Updated 2/28/2026

name: graceful-shutdown-nodejs description: Graceful shutdown patterns for Node.js services including HTTP, WebSocket, BullMQ, Prisma, Redis, and grammY bot

Graceful Shutdown Patterns for Node.js

Overview

Graceful shutdown ensures a service stops cleanly: no dropped requests, no corrupted state, no orphaned connections. The shutdown sequence is always: stop accepting new work, drain in-flight work, close resources, exit.

Signal Handling (SIGTERM, SIGINT)

// shutdown/signals.ts
type ShutdownHook = () => Promise<void>

const hooks: ShutdownHook[] = []
let isShuttingDown = false

export function onShutdown(hook: ShutdownHook): void {
  hooks.push(hook)
}

export function setupSignalHandlers(options: { timeout?: number } = {}): void {
  const timeout = options.timeout ?? 30_000

  async function shutdown(signal: string) {
    if (isShuttingDown) return
    isShuttingDown = true
    console.log(`\n[shutdown] Received ${signal}, starting graceful shutdown...`)

    // Force exit after timeout
    const forceTimer = setTimeout(() => {
      console.error('[shutdown] Timeout exceeded, forcing exit')
      process.exit(1)
    }, timeout)
    forceTimer.unref()

    // Run all hooks in reverse registration order (LIFO)
    for (let i = hooks.length - 1; i >= 0; i--) {
      try {
        await hooks[i]()
      } catch (err) {
        console.error(`[shutdown] Hook ${i} failed:`, err)
      }
    }

    console.log('[shutdown] Clean exit')
    process.exit(0)
  }

  process.on('SIGTERM', () => shutdown('SIGTERM'))
  process.on('SIGINT', () => shutdown('SIGINT'))

  // Handle uncaught errors during shutdown
  process.on('uncaughtException', (err) => {
    console.error('[shutdown] Uncaught exception:', err)
    if (!isShuttingDown) shutdown('uncaughtException')
  })

  process.on('unhandledRejection', (reason) => {
    console.error('[shutdown] Unhandled rejection:', reason)
    if (!isShuttingDown) shutdown('unhandledRejection')
  })
}

export function isTerminating(): boolean {
  return isShuttingDown
}

Shutdown Sequence Ordering

The correct order for shutdown hooks matters. Register them in the order they should run on shutdown (LIFO means the last registered hook runs first).

// index.ts
import { setupSignalHandlers, onShutdown } from './shutdown/signals'

async function main() {
  setupSignalHandlers({ timeout: 30_000 })

  // 1. First registered = last to close (database is the last thing to shut down)
  const prisma = new PrismaClient()
  await prisma.$connect()
  onShutdown(async () => {
    console.log('[shutdown] Disconnecting Prisma...')
    await prisma.$disconnect()
  })

  // 2. Redis
  const redis = createRedisClient()
  onShutdown(async () => {
    console.log('[shutdown] Closing Redis...')
    await redis.quit()
  })

  // 3. BullMQ workers
  const worker = createWorker(redis)
  onShutdown(async () => {
    console.log('[shutdown] Closing BullMQ worker...')
    await worker.close()
  })

  // 4. WebSocket server
  const wss = createWebSocketServer()
  onShutdown(async () => {
    console.log('[shutdown] Closing WebSocket server...')
    await closeWebSocketServer(wss)
  })

  // 5. HTTP server (last registered = first to close = stop accepting requests first)
  const server = createHttpServer()
  onShutdown(async () => {
    console.log('[shutdown] Closing HTTP server...')
    await closeHttpServer(server)
  })

  server.listen(3000, () => {
    console.log('Server listening on :3000')
  })
}

main()

HTTP Server Shutdown with Keep-Alive Handling

// shutdown/http.ts
import http from 'node:http'

const connections = new Set<import('node:net').Socket>()

export function trackConnections(server: http.Server): void {
  server.on('connection', (socket) => {
    connections.add(socket)
    socket.on('close', () => connections.delete(socket))
  })
}

export function closeHttpServer(
  server: http.Server,
  options: { drainTimeout?: number } = {}
): Promise<void> {
  const drainTimeout = options.drainTimeout ?? 10_000

  return new Promise((resolve, reject) => {
    // Stop accepting new connections
    server.close((err) => {
      if (err) reject(err)
      else resolve()
    })

    // Set a short keep-alive timeout to allow in-flight requests to finish
    // but prevent new requests on existing connections
    for (const socket of connections) {
      // End idle connections immediately
      if (!(socket as any)._httpMessage) {
        socket.destroy()
      } else {
        // Active connections: set a short timeout
        socket.setTimeout(drainTimeout)
      }
    }

    // Force-close remaining connections after drain timeout
    setTimeout(() => {
      for (const socket of connections) {
        socket.destroy()
      }
    }, drainTimeout)
  })
}

Express/Fastify Integration

// With Express
import express from 'express'
import { trackConnections, closeHttpServer } from './shutdown/http'

const app = express()

// Middleware to reject requests during shutdown
app.use((req, res, next) => {
  if (isTerminating()) {
    res.status(503).set('Connection', 'close').json({ error: 'Server shutting down' })
    return
  }
  next()
})

const server = app.listen(3000)
trackConnections(server)

onShutdown(() => closeHttpServer(server))
// With Fastify
import Fastify from 'fastify'

const fastify = Fastify({ logger: true })

// Fastify has built-in graceful shutdown
onShutdown(async () => {
  await fastify.close()
})

WebSocket Graceful Disconnect

// shutdown/websocket.ts
import { WebSocketServer, WebSocket } from 'ws'

export function closeWebSocketServer(wss: WebSocketServer): Promise<void> {
  return new Promise((resolve) => {
    // Notify all clients that server is shutting down
    for (const client of wss.clients) {
      if (client.readyState === WebSocket.OPEN) {
        client.send(JSON.stringify({
          type: 'server_shutdown',
          message: 'Server is restarting, please reconnect shortly.',
          reconnectAfter: 5000,
        }))
        // Close with code 1001 (Going Away)
        client.close(1001, 'Server shutting down')
      }
    }

    // Wait for clients to disconnect, with timeout
    const checkInterval = setInterval(() => {
      if (wss.clients.size === 0) {
        clearInterval(checkInterval)
        wss.close(() => resolve())
      }
    }, 100)

    // Force close after 5 seconds
    setTimeout(() => {
      clearInterval(checkInterval)
      for (const client of wss.clients) {
        client.terminate()
      }
      wss.close(() => resolve())
    }, 5000)
  })
}

BullMQ Worker Shutdown

// workers/gameWorker.ts
import { Worker, Queue } from 'bullmq'
import type { RedisOptions } from 'ioredis'

const redisOpts: RedisOptions = {
  host: process.env.REDIS_HOST ?? 'localhost',
  port: parseInt(process.env.REDIS_PORT ?? '6379'),
  maxRetriesPerRequest: null, // Required by BullMQ
}

const worker = new Worker(
  'game-tasks',
  async (job) => {
    switch (job.name) {
      case 'processMove':
        await processGameMove(job.data)
        break
      case 'endRound':
        await endGameRound(job.data)
        break
    }
  },
  {
    connection: redisOpts,
    concurrency: 5,
    // Lock duration should be less than shutdown timeout
    lockDuration: 30_000,
  }
)

worker.on('completed', (job) => {
  console.log(`Job ${job.id} completed`)
})

worker.on('failed', (job, err) => {
  console.error(`Job ${job?.id} failed:`, err.message)
})

// Graceful shutdown for BullMQ worker:
// 1. Stops picking up new jobs
// 2. Waits for current jobs to complete (up to lock duration)
// 3. Closes Redis connection
onShutdown(async () => {
  console.log('[shutdown] Draining BullMQ worker...')
  await worker.close()
  console.log('[shutdown] BullMQ worker closed')
})

Queue Scheduler and Flow Producer

// If using QueueScheduler (BullMQ < 4) or FlowProducer
const queue = new Queue('game-tasks', { connection: redisOpts })

onShutdown(async () => {
  await queue.close()
})

Prisma / Database Disconnect

// db/prisma.ts
import { PrismaClient } from '@prisma/client'

const prisma = new PrismaClient({
  log: process.env.NODE_ENV === 'development' ? ['query', 'warn', 'error'] : ['warn', 'error'],
})

export { prisma }

// Register shutdown hook
onShutdown(async () => {
  console.log('[shutdown] Disconnecting Prisma...')
  await prisma.$disconnect()
  console.log('[shutdown] Prisma disconnected')
})

Handling In-Flight Transactions

// Use an AbortController pattern for long-running transactions
import { isTerminating } from './shutdown/signals'

export async function longRunningTask() {
  const batchSize = 100
  let processed = 0

  while (processed < totalCount) {
    if (isTerminating()) {
      console.log(`[task] Shutdown requested, stopping at ${processed}/${totalCount}`)
      break
    }

    await prisma.$transaction(async (tx) => {
      const batch = await tx.item.findMany({ skip: processed, take: batchSize })
      for (const item of batch) {
        await tx.item.update({ where: { id: item.id }, data: { processed: true } })
      }
    })

    processed += batchSize
  }
}

Redis Connection Cleanup

// db/redis.ts
import Redis from 'ioredis'

export function createRedisClient(): Redis {
  const redis = new Redis({
    host: process.env.REDIS_HOST ?? 'localhost',
    port: parseInt(process.env.REDIS_PORT ?? '6379'),
    // Do not reconnect during shutdown
    retryStrategy(times) {
      if (isTerminating()) return null
      return Math.min(times * 200, 5000)
    },
    enableReadyCheck: true,
    maxRetriesPerRequest: 3,
  })

  redis.on('error', (err) => {
    console.error('[redis] Error:', err.message)
  })

  return redis
}

// Shutdown hook
onShutdown(async () => {
  console.log('[shutdown] Closing Redis...')
  // quit() sends the QUIT command and waits for reply
  // disconnect() forces close immediately
  // Always prefer quit() for graceful shutdown
  await redis.quit()
  console.log('[shutdown] Redis closed')
})

Pub/Sub Cleanup

const subscriber = redis.duplicate()
await subscriber.subscribe('game:events')

subscriber.on('message', (channel, message) => {
  handleGameEvent(JSON.parse(message))
})

onShutdown(async () => {
  await subscriber.unsubscribe()
  await subscriber.quit()
})

grammY Bot Stop (Polling + Webhook Modes)

Polling Mode

// bot/index.ts
import { Bot } from 'grammy'

const bot = new Bot(process.env.BOT_TOKEN!)

bot.command('start', (ctx) => ctx.reply('Hello!'))

// Start polling
bot.start({
  onStart: () => console.log('Bot started (polling)'),
})

// Graceful stop for polling:
// 1. Stops fetching new updates
// 2. Finishes processing current update batch
// 3. Closes the bot instance
onShutdown(async () => {
  console.log('[shutdown] Stopping bot (polling)...')
  await bot.stop()
  console.log('[shutdown] Bot stopped')
})

Webhook Mode (with Express)

import express from 'express'
import { Bot, webhookCallback } from 'grammy'

const bot = new Bot(process.env.BOT_TOKEN!)
const app = express()

app.use(express.json())
app.post('/webhook', webhookCallback(bot, 'express'))

const server = app.listen(3000)
trackConnections(server)

// In webhook mode, bot.stop() is not needed since updates come via HTTP.
// Just stop the HTTP server.
onShutdown(async () => {
  // First delete the webhook so Telegram stops sending updates
  await bot.api.deleteWebhook()
  console.log('[shutdown] Webhook deleted')

  // Then close the HTTP server (drain in-flight webhook requests)
  await closeHttpServer(server)
})

grammY Runner (Long Polling with Concurrency)

import { Bot } from 'grammy'
import { run } from '@grammyjs/runner'

const bot = new Bot(process.env.BOT_TOKEN!)
const runner = run(bot)

onShutdown(async () => {
  console.log('[shutdown] Stopping grammY runner...')
  // runner.stop() is the equivalent of bot.stop() for the runner
  if (runner.isRunning()) {
    await runner.stop()
  }
  console.log('[shutdown] grammY runner stopped')
})

Docker Stop Signal and Timeout Configuration

# Dockerfile
FROM node:20-alpine

WORKDIR /app
COPY package*.json ./
RUN npm ci --production
COPY dist/ ./dist/

# Node.js handles SIGTERM by default
# Make sure your app listens for SIGTERM (not just SIGINT)
STOPSIGNAL SIGTERM

# Run as non-root
USER node

CMD ["node", "dist/index.js"]
# docker-compose.yml
services:
  app:
    build: .
    stop_signal: SIGTERM
    # Time Docker waits before SIGKILL after sending stop_signal
    # Must be GREATER than your app's shutdown timeout
    stop_grace_period: 45s
    deploy:
      restart_policy:
        condition: on-failure
        max_attempts: 3
    healthcheck:
      test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/health"]
      interval: 15s
      timeout: 5s
      retries: 3
      start_period: 10s

Important: Node.js in Docker

# BAD: npm start wraps node in a shell process that does not forward signals
CMD ["npm", "start"]

# GOOD: run node directly so it receives SIGTERM
CMD ["node", "dist/index.js"]

# ALSO GOOD: use tini as init process to forward signals
RUN apk add --no-cache tini
ENTRYPOINT ["/sbin/tini", "--"]
CMD ["node", "dist/index.js"]

Health Check Endpoints During Shutdown

// health/index.ts
import { Router } from 'express'
import { isTerminating } from '../shutdown/signals'

const healthRouter = Router()

// Liveness probe: is the process alive?
healthRouter.get('/health/live', (_req, res) => {
  res.status(200).json({ status: 'ok' })
})

// Readiness probe: is the service ready to accept traffic?
healthRouter.get('/health/ready', (_req, res) => {
  if (isTerminating()) {
    // Return 503 so load balancers stop sending traffic
    res.status(503).json({ status: 'shutting_down' })
    return
  }

  // Check dependencies
  const checks = {
    database: checkDatabase(),
    redis: checkRedis(),
    queue: checkQueue(),
  }

  const allHealthy = Object.values(checks).every((c) => c.healthy)

  res.status(allHealthy ? 200 : 503).json({
    status: allHealthy ? 'ok' : 'degraded',
    checks,
  })
})

export { healthRouter }

Kubernetes Integration

# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
spec:
  template:
    spec:
      terminationGracePeriodSeconds: 45
      containers:
        - name: app
          livenessProbe:
            httpGet:
              path: /health/live
              port: 3000
            initialDelaySeconds: 5
            periodSeconds: 10
          readinessProbe:
            httpGet:
              path: /health/ready
              port: 3000
            initialDelaySeconds: 5
            periodSeconds: 5
          lifecycle:
            preStop:
              exec:
                # Give the load balancer time to remove this pod
                command: ["sleep", "5"]

Common Mistakes

1. Exceeding Kill Timeout

// BAD: Shutdown takes longer than Docker's stop_grace_period
onShutdown(async () => {
  await processAllRemainingItems() // Could take minutes
  await prisma.$disconnect()
})

// GOOD: Respect the timeout, stop early if needed
onShutdown(async () => {
  await processRemainingItems({ maxDuration: 20_000 })
  await prisma.$disconnect()
})

2. Resource Leaks from Missing Cleanup

// BAD: setInterval prevents clean exit
setInterval(() => syncStats(), 60_000)

// GOOD: track and clear intervals
const statsInterval = setInterval(() => syncStats(), 60_000)
onShutdown(async () => {
  clearInterval(statsInterval)
})

3. Not Handling Duplicate Signals

// BAD: shutdown runs twice on rapid Ctrl+C
process.on('SIGINT', async () => {
  await cleanup() // Runs twice, may cause errors
  process.exit(0)
})

// GOOD: guard against re-entry (already shown in the signals module above)
let isShuttingDown = false
async function shutdown() {
  if (isShuttingDown) return
  isShuttingDown = true
  // ...
}

4. Using process.exit() Without Cleanup

// BAD: skips all cleanup
if (criticalError) process.exit(1)

// GOOD: trigger graceful shutdown
if (criticalError) {
  console.error('Critical error, shutting down')
  process.kill(process.pid, 'SIGTERM')
}

5. Not Draining the Event Loop

// BAD: process exits while async operations are in flight
server.close(() => {
  process.exit(0) // Pending database writes may be lost
})

// GOOD: wait for all shutdown hooks to complete before exiting
// (handled by the shutdown module pattern shown above)

6. Ignoring npm start Signal Forwarding in Docker

When using npm start in Docker, the npm process wraps Node.js in a shell. SIGTERM goes to npm, not your Node.js process. Always use node directly or tini.

Complete Example: Full Service Shutdown

// index.ts - putting it all together
import { setupSignalHandlers, onShutdown } from './shutdown/signals'
import { createApp } from './app'
import { prisma } from './db/prisma'
import { createRedisClient } from './db/redis'
import { createWorker } from './workers'
import { Bot } from 'grammy'

async function main() {
  setupSignalHandlers({ timeout: 30_000 })

  // Layer 1: Database (closes last)
  await prisma.$connect()
  onShutdown(async () => {
    await prisma.$disconnect()
  })

  // Layer 2: Redis
  const redis = createRedisClient()
  onShutdown(async () => {
    await redis.quit()
  })

  // Layer 3: Background workers
  const worker = createWorker(redis)
  onShutdown(async () => {
    await worker.close()
  })

  // Layer 4: Telegram bot
  const bot = new Bot(process.env.BOT_TOKEN!)
  bot.start()
  onShutdown(async () => {
    await bot.stop()
  })

  // Layer 5: HTTP server (closes first)
  const { server } = createApp(prisma, redis)
  onShutdown(async () => {
    await closeHttpServer(server)
  })

  console.log('All services started')
}

main().catch((err) => {
  console.error('Failed to start:', err)
  process.exit(1)
})
Install via CLI
npx skills add https://github.com/artvision-agency/claude-code-settings --skill graceful-shutdown-nodejs
Repository Details
star Stars 0
call_split Forks 0
navigation Branch main
article Path SKILL.md
More from Creator
artvision-agency
artvision-agency Explore all skills →