From 993976a7f765277b64b0d3581f6cee26d77b6b88 Mon Sep 17 00:00:00 2001 From: samuel-p Date: Fri, 28 Feb 2020 23:54:32 +0100 Subject: [PATCH] major improvements and bug fixes added retry options (closes #1) --- README.md | 40 ++++++++++++++++++++++++++++++---------- index.js | 53 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index a3abe61..54bcb05 100644 --- a/README.md +++ b/README.md @@ -6,34 +6,54 @@ Simple monitor to watch URLs (`HTTP`) or ports (`TCP`, `UDP`) and update [Cachet ## Configuration -cachet-monitor can monitor a list of services. Therefore it requires to setup all services in `./data/config.json`. __The id of each service has to match the cachet component id you want to update!__ You also can specify a custom timeout in seconds for each service. If the service timeout is passed the status will be `SLOW` (Cachet `Performance Issues`). +cachet-monitor can monitor a list of services. Therefore it requires to setup all services in `./data/config.json`. __The id of each service has to match the cachet component id you want to update!__ Each service needs the following attributes (additionally to `id` and `type`): -You also need to specify the interval (`cron`) your services should be checked. You can use the cron syntax from [`node-cron`](https://www.npmjs.com/package/node-cron). You also have to set `offlineTimeUntilMajor` which is the offline time in seconds until the state of an offline service turns from partial to major outage. Finally you need to provide the information to your cachet instance (`api` and `token`). The "global" `timeout` value will be used as a final request timeout for each service. If the check request does not complete in this time the service will be marked as offline. +* type `HTTP` + * `url` +* type `TCP` or `UDP` + * `host` + * `port` + +Optionally you can add the following options to each service, or change the default values globally: + +* `retry` - number how often the check should be retried if the service is offline (default value `0`) +* `waitUntilRetry` - number of seconds the retry should be delayed (default value `5`) +* `performanceTimeout` - time in seconds in which the request has to be completed, otherwise the status will be `SLOW` (Cachet `Performance Issues`) (default value `1`) +* `requestTimeout` - time in seconds in which the request has to be completed, otherwise the status will be offline (default value `30`) +* `offlineTimeUntilMajor` - time in seconds a service has to be offline until it turns from partial to major outage (default value `300`) + +You can specify the interval (`cron`) your services should be checked. You can use the cron syntax from [`node-cron`](https://www.npmjs.com/package/node-cron). Finally you need to provide the information to your cachet instance (`api` and `token`). + +To change the default values globally you can set the in the `defaults` object. Example: ```json { + "cron": "0 * * * * *", + "api": "https:///api/v1", + "token": "" "services": [ { "id": 1, "type": "HTTP", "url": "https://sp-codes.de", - "timeout": 60 + "performanceTimeout": 1 }, { "id": 2, "type": "TCP", "host": "sp-codes.de", - "port": 443, - "timeout": 60 + "port": 443 } ], - "cron": "0 * * * * *", - "timeout": 30, - "offlineTimeUntilMajor": 300, - "api": "https:///api/v1", - "token": "" + "defaults": { + "retry": 1, + "waitUntilRetry": 5, + "performanceTimeout": 2, + "requestTimeout": 10, + "offlineTimeUntilMajor": 600 + } } ``` diff --git a/index.js b/index.js index 9253742..f4f306f 100644 --- a/index.js +++ b/index.js @@ -25,21 +25,28 @@ const cachetStatusMapping = { "INCIDENT": 4 }; -const checkHttp = async (url, performanceTimeout, requestTimeout = 60) => { +config.cron = config.cron || "* * * * *"; +config.defaults = config.defaults || {}; +config.defaults.retry = config.defaults.retry || 0; +config.defaults.waitUntilRetry = config.defaults.waitUntilRetry || 5; +config.defaults.performanceTimeout = config.defaults.performanceTimeout || 1; +config.defaults.requestTimeout = config.defaults.requestTimeout || 30; +config.defaults.offlineTimeUntilMajor = config.defaults.offlineTimeUntilMajor || 300; + +const checkHttp = async (url, performanceTimeout, requestTimeout) => { const controller = new abort.AbortController(); - const timeout = setTimeout(() => controller.abort(), requestTimeout); + const timeout = setTimeout(() => controller.abort(), requestTimeout * 1000); try { const start = new Date().getTime(); const response = await fetch(url, {signal: controller.signal}); const stop = new Date().getTime(); if (response.ok) { - if (stop - start > performanceTimeout) { + if (stop - start > performanceTimeout * 1000) { return {status: "SLOW", message: response.statusText}; } return {status: "ONLINE", message: response.statusText}; - } else { - return {status: "OFFLINE", message: response.statusText}; } + return {status: "OFFLINE", message: response.statusText}; } catch (e) { return {status: "OFFLINE", message: e.message}; } finally { @@ -47,12 +54,12 @@ const checkHttp = async (url, performanceTimeout, requestTimeout = 60) => { } }; -const checkPort = async (host, port, type, performanceTimeout, requestTimeout = 60) => { +const checkPort = async (host, port, type, performanceTimeout, requestTimeout) => { return await new Promise(resolve => { nmap.scan({ range: [host], ports: port.toString(), - timeout: requestTimeout / 1000, + timeout: requestTimeout, udp: type === 'udp' }, (error, report) => { if (error) { @@ -62,7 +69,7 @@ const checkPort = async (host, port, type, performanceTimeout, requestTimeout = const time = parseInt(result.item.endtime) - parseInt(result.item.starttime); const status = result.ports[0].port[0].state[0].item; if (status.state.includes('open')) { - if (time > performanceTimeout) { + if (time > performanceTimeout * 1000) { resolve({status: "SLOW", message: status.state}); } else { resolve({status: "ONLINE", message: status.state}); @@ -76,24 +83,38 @@ const checkPort = async (host, port, type, performanceTimeout, requestTimeout = }; async function checkStatus(service) { + const performanceTimeout = service.performanceTimeout || config.defaults.performanceTimeout; + const requestTimeout = service.requestTimeout || config.defaults.requestTimeout; switch (service.type) { case 'HTTP': - return await checkHttp(service.url, service.timeout * 1000, config.timeout); + return await checkHttp(service.url, performanceTimeout, requestTimeout); case 'TCP': - return await checkPort(service.host, service.port, 'tcp', service.timeout * 1000, config.timeout); + return await checkPort(service.host, service.port, 'tcp', performanceTimeout, requestTimeout); case 'UDP': - return await checkPort(service.host, service.port, 'udp', service.timeout * 1000, config.timeout); + return await checkPort(service.host, service.port, 'udp', performanceTimeout, requestTimeout); default: throw new Error('unsupported type "' + type + '"') } } const checkService = async (service, oldStatus) => { - const newStatus = await checkStatus(service); - newStatus.changed = new Date().getTime(); - if (newStatus.status === "OFFLINE" && oldStatus && ["OFFLINE", "INCIDENT"].includes(oldStatus.status) && - oldStatus.changed + config.offlineTimeUntilMajor * 1000 < newStatus.changed) { - newStatus.status = "INCIDENT"; + const retry = service.retry || config.defaults.retry; + const waitUntilRetry = (service.waitUntilRetry || config.defaults.waitUntilRetry) * 1000; + let newStatus; + for (let i = retry; i >= 0; i--) { + newStatus = await checkStatus(service); + console.log(i); + newStatus.changed = new Date().getTime(); + if (newStatus.status !== "OFFLINE") { + return newStatus; + } + const offlineTimeUntilMajor = (service.offlineTimeUntilMajor || config.defaults.offlineTimeUntilMajor) * 1000; + if (oldStatus && ["OFFLINE", "INCIDENT"].includes(oldStatus.status) && oldStatus.changed + offlineTimeUntilMajor < newStatus.changed) { + newStatus.status = "INCIDENT"; + } + if (i >= 0 && waitUntilRetry > 0) { + await new Promise(r => setTimeout(r, waitUntilRetry)); + } } return newStatus; };