major improvements and bug fixes
All checks were successful
continuous-integration/drone/push Build is passing

added retry options (closes #1)
This commit is contained in:
Samuel Philipp 2020-02-28 23:54:32 +01:00
parent 2b09aaef19
commit 993976a7f7
2 changed files with 67 additions and 26 deletions

View file

@ -6,34 +6,54 @@ Simple monitor to watch URLs (`HTTP`) or ports (`TCP`, `UDP`) and update [Cachet
## Configuration ## Configuration
cachet-monitor can monitor a list of services. Therefore it requires to setup all services in `./data/config.json`. __The id of each service has to match the cachet component id you want to update!__ You also can specify a custom timeout in seconds for each service. If the service timeout is passed the status will be `SLOW` (Cachet `Performance Issues`). cachet-monitor can monitor a list of services. Therefore it requires to setup all services in `./data/config.json`. __The id of each service has to match the cachet component id you want to update!__ Each service needs the following attributes (additionally to `id` and `type`):
You also need to specify the interval (`cron`) your services should be checked. You can use the cron syntax from [`node-cron`](https://www.npmjs.com/package/node-cron). You also have to set `offlineTimeUntilMajor` which is the offline time in seconds until the state of an offline service turns from partial to major outage. Finally you need to provide the information to your cachet instance (`api` and `token`). The "global" `timeout` value will be used as a final request timeout for each service. If the check request does not complete in this time the service will be marked as offline. * type `HTTP`
* `url`
* type `TCP` or `UDP`
* `host`
* `port`
Optionally you can add the following options to each service, or change the default values globally:
* `retry` - number how often the check should be retried if the service is offline (default value `0`)
* `waitUntilRetry` - number of seconds the retry should be delayed (default value `5`)
* `performanceTimeout` - time in seconds in which the request has to be completed, otherwise the status will be `SLOW` (Cachet `Performance Issues`) (default value `1`)
* `requestTimeout` - time in seconds in which the request has to be completed, otherwise the status will be offline (default value `30`)
* `offlineTimeUntilMajor` - time in seconds a service has to be offline until it turns from partial to major outage (default value `300`)
You can specify the interval (`cron`) your services should be checked. You can use the cron syntax from [`node-cron`](https://www.npmjs.com/package/node-cron). Finally you need to provide the information to your cachet instance (`api` and `token`).
To change the default values globally you can set the in the `defaults` object.
Example: Example:
```json ```json
{ {
"cron": "0 * * * * *",
"api": "https://<cachet-url>/api/v1",
"token": "<user-token>"
"services": [ "services": [
{ {
"id": 1, "id": 1,
"type": "HTTP", "type": "HTTP",
"url": "https://sp-codes.de", "url": "https://sp-codes.de",
"timeout": 60 "performanceTimeout": 1
}, },
{ {
"id": 2, "id": 2,
"type": "TCP", "type": "TCP",
"host": "sp-codes.de", "host": "sp-codes.de",
"port": 443, "port": 443
"timeout": 60
} }
], ],
"cron": "0 * * * * *", "defaults": {
"timeout": 30, "retry": 1,
"offlineTimeUntilMajor": 300, "waitUntilRetry": 5,
"api": "https://<cachet-url>/api/v1", "performanceTimeout": 2,
"token": "<user-token>" "requestTimeout": 10,
"offlineTimeUntilMajor": 600
}
} }
``` ```

View file

@ -25,21 +25,28 @@ const cachetStatusMapping = {
"INCIDENT": 4 "INCIDENT": 4
}; };
const checkHttp = async (url, performanceTimeout, requestTimeout = 60) => { config.cron = config.cron || "* * * * *";
config.defaults = config.defaults || {};
config.defaults.retry = config.defaults.retry || 0;
config.defaults.waitUntilRetry = config.defaults.waitUntilRetry || 5;
config.defaults.performanceTimeout = config.defaults.performanceTimeout || 1;
config.defaults.requestTimeout = config.defaults.requestTimeout || 30;
config.defaults.offlineTimeUntilMajor = config.defaults.offlineTimeUntilMajor || 300;
const checkHttp = async (url, performanceTimeout, requestTimeout) => {
const controller = new abort.AbortController(); const controller = new abort.AbortController();
const timeout = setTimeout(() => controller.abort(), requestTimeout); const timeout = setTimeout(() => controller.abort(), requestTimeout * 1000);
try { try {
const start = new Date().getTime(); const start = new Date().getTime();
const response = await fetch(url, {signal: controller.signal}); const response = await fetch(url, {signal: controller.signal});
const stop = new Date().getTime(); const stop = new Date().getTime();
if (response.ok) { if (response.ok) {
if (stop - start > performanceTimeout) { if (stop - start > performanceTimeout * 1000) {
return {status: "SLOW", message: response.statusText}; return {status: "SLOW", message: response.statusText};
} }
return {status: "ONLINE", message: response.statusText}; return {status: "ONLINE", message: response.statusText};
} else {
return {status: "OFFLINE", message: response.statusText};
} }
return {status: "OFFLINE", message: response.statusText};
} catch (e) { } catch (e) {
return {status: "OFFLINE", message: e.message}; return {status: "OFFLINE", message: e.message};
} finally { } finally {
@ -47,12 +54,12 @@ const checkHttp = async (url, performanceTimeout, requestTimeout = 60) => {
} }
}; };
const checkPort = async (host, port, type, performanceTimeout, requestTimeout = 60) => { const checkPort = async (host, port, type, performanceTimeout, requestTimeout) => {
return await new Promise(resolve => { return await new Promise(resolve => {
nmap.scan({ nmap.scan({
range: [host], range: [host],
ports: port.toString(), ports: port.toString(),
timeout: requestTimeout / 1000, timeout: requestTimeout,
udp: type === 'udp' udp: type === 'udp'
}, (error, report) => { }, (error, report) => {
if (error) { if (error) {
@ -62,7 +69,7 @@ const checkPort = async (host, port, type, performanceTimeout, requestTimeout =
const time = parseInt(result.item.endtime) - parseInt(result.item.starttime); const time = parseInt(result.item.endtime) - parseInt(result.item.starttime);
const status = result.ports[0].port[0].state[0].item; const status = result.ports[0].port[0].state[0].item;
if (status.state.includes('open')) { if (status.state.includes('open')) {
if (time > performanceTimeout) { if (time > performanceTimeout * 1000) {
resolve({status: "SLOW", message: status.state}); resolve({status: "SLOW", message: status.state});
} else { } else {
resolve({status: "ONLINE", message: status.state}); resolve({status: "ONLINE", message: status.state});
@ -76,25 +83,39 @@ const checkPort = async (host, port, type, performanceTimeout, requestTimeout =
}; };
async function checkStatus(service) { async function checkStatus(service) {
const performanceTimeout = service.performanceTimeout || config.defaults.performanceTimeout;
const requestTimeout = service.requestTimeout || config.defaults.requestTimeout;
switch (service.type) { switch (service.type) {
case 'HTTP': case 'HTTP':
return await checkHttp(service.url, service.timeout * 1000, config.timeout); return await checkHttp(service.url, performanceTimeout, requestTimeout);
case 'TCP': case 'TCP':
return await checkPort(service.host, service.port, 'tcp', service.timeout * 1000, config.timeout); return await checkPort(service.host, service.port, 'tcp', performanceTimeout, requestTimeout);
case 'UDP': case 'UDP':
return await checkPort(service.host, service.port, 'udp', service.timeout * 1000, config.timeout); return await checkPort(service.host, service.port, 'udp', performanceTimeout, requestTimeout);
default: default:
throw new Error('unsupported type "' + type + '"') throw new Error('unsupported type "' + type + '"')
} }
} }
const checkService = async (service, oldStatus) => { const checkService = async (service, oldStatus) => {
const newStatus = await checkStatus(service); const retry = service.retry || config.defaults.retry;
const waitUntilRetry = (service.waitUntilRetry || config.defaults.waitUntilRetry) * 1000;
let newStatus;
for (let i = retry; i >= 0; i--) {
newStatus = await checkStatus(service);
console.log(i);
newStatus.changed = new Date().getTime(); newStatus.changed = new Date().getTime();
if (newStatus.status === "OFFLINE" && oldStatus && ["OFFLINE", "INCIDENT"].includes(oldStatus.status) && if (newStatus.status !== "OFFLINE") {
oldStatus.changed + config.offlineTimeUntilMajor * 1000 < newStatus.changed) { return newStatus;
}
const offlineTimeUntilMajor = (service.offlineTimeUntilMajor || config.defaults.offlineTimeUntilMajor) * 1000;
if (oldStatus && ["OFFLINE", "INCIDENT"].includes(oldStatus.status) && oldStatus.changed + offlineTimeUntilMajor < newStatus.changed) {
newStatus.status = "INCIDENT"; newStatus.status = "INCIDENT";
} }
if (i >= 0 && waitUntilRetry > 0) {
await new Promise(r => setTimeout(r, waitUntilRetry));
}
}
return newStatus; return newStatus;
}; };