[IMP] Support sqlite db to store status and history

This commit is contained in:
2026-04-01 15:25:06 +02:00
parent 9c71e8d493
commit 425080dfec
+175 -133
View File
@@ -18,14 +18,12 @@ import logging
import re
import time
import datetime
import json
import signal
import sqlite3
from iniparse import RawConfigParser
from optparse import OptionParser
#from distutils.spawn import find_executable
usage="""\
%prog -c configfile action
@@ -37,9 +35,10 @@ action is either :
monitor : monitor in background all providers and enable/disable them
check [all,<provider>] : check all or one provider and display reachability
check-json [all,<provider>] : check providers and output state as json data
status : display current state from state file
"""
version = "0.0.1"
version = "0.0.2"
parser=OptionParser(usage=usage,version="%prog " + version)
parser.add_option("-i","--check-interval", dest="check_interval", type=int, default=60, help="Config file full path (default: %default)")
@@ -53,6 +52,66 @@ parser.add_option("-l","--loglevel", dest="loglevel", default='info', type='choi
REPORT = re.compile(r'\n(?P<transmitted>\d+)\s+packets transmitted,\s+(?P<received>\d+) received,\s+(?P<loss>\d+)%\s+packet loss')
RTT = re.compile(r'rtt min/avg/max/mdev = (?P<min>[0-9.]+)/(?P<avg>[0-9.]+)/(?P<max>[0-9.]+)/(?P<mdev>[0-9.]+) ms')
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
BASE_DIR = '/opt/check_providers'
DB_PATH = os.path.join(BASE_DIR, 'check-providers.db')
STATE_FILE = os.path.join(BASE_DIR, 'check-providers-state.json')
MONITOR_PID_FILE = os.path.join(BASE_DIR, 'check-providers.pid')
# ---------------------------------------------------------------------------
# Database
# ---------------------------------------------------------------------------
def init_db():
"""Create the SQLite database and events table if not present."""
os.makedirs(BASE_DIR, exist_ok=True)
with sqlite3.connect(DB_PATH) as conn:
conn.execute('''
CREATE TABLE IF NOT EXISTS events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts TEXT NOT NULL,
provider TEXT NOT NULL,
available INTEGER,
rtt REAL,
loss INTEGER,
status TEXT,
transition INTEGER DEFAULT 0
)
''')
conn.execute('CREATE INDEX IF NOT EXISTS idx_events_provider ON events(provider)')
conn.execute('CREATE INDEX IF NOT EXISTS idx_events_ts ON events(ts)')
conn.commit()
def purge_old_events(days=30):
"""Remove events older than `days` days."""
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"DELETE FROM events WHERE ts < datetime('now', '-{} days')".format(days)
)
conn.commit()
def write_state_file(providers):
"""Write the current state of all providers to the JSON state file."""
tmp = STATE_FILE + '.tmp'
with open(tmp, 'w') as f:
f.write(jsondumps([p.as_dict() for p in providers], indent=True))
os.replace(tmp, STATE_FILE) # atomic replace
def record_providers(providers):
"""Insert one row per provider into the events table."""
with sqlite3.connect(DB_PATH) as conn:
for provider in providers:
provider.record(conn)
conn.commit()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def run(cmd, dry_run=False):
try:
@@ -64,7 +123,6 @@ def run(cmd,dry_run=False):
else:
print("DRYRUN : {}".format(cmd))
return (0, "#### DRYRUN ### no output for {}".format(cmd))
except subprocess.CalledProcessError as e:
return (e.returncode, e.output)
@@ -79,50 +137,25 @@ def default_json(o):
else:
return u"{}".format(o)
def jsondumps(o, **kwargs):
"""extended json dump of o"""
"""Extended json dump of o."""
return json.dumps(o, default=default_json, **kwargs)
def arping(device,target_ip,ping_count=3):
# arping
#root@gw-pironniere:/opt/check-providers# arping -i eth0 -c4 192.168.149.254
"""\
ARPING 192.168.149.254
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=0 time=229.674 usec
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=1 time=258.173 usec
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=2 time=251.468 usec
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=3 time=252.305 usec
--- 192.168.149.254 statistics ---
4 packets transmitted, 4 packets received, 0% unanswered (0 extra)
"""
# iputils-arping
#root@gw-pironniere:/opt/check-providers# arping -c2 192.168.149.254
"""\
ARPING 192.168.149.254 from 192.168.149.184 eth0
Unicast reply from 192.168.149.254 [00:0A:FA:24:18:F7] 0.886ms
Unicast reply from 192.168.149.254 [00:0A:FA:24:18:F7] 0.777ms
Sent 2 probes (1 broadcast(s))
"""
def arping(device, target_ip, ping_count=3):
ARPING1 = re.compile(r'bytes from (?P<mac>\S+).*time=(?P<rtt>[0-9.]*) (?P<unit>.*)')
ARPING2 = re.compile(r'reply from.*\[(?P<mac>\S+)\]\s+(?P<rtt>[0-9.]*)(?P<unit>.*)')
# ARPING_PATH = find_executable('arping')
ARPING_PATH = "/usr/sbin/arping"
if ARPING_PATH == None:
if ARPING_PATH is None:
raise Exception('No arping command found')
elif "/usr/bin/arping" in ARPING_PATH:
(returncode, output) = run('arping -c{ping_count} -I{device} {target_ip}'.format(
ping_count = ping_count,
device = device,
target_ip = target_ip,
))
ping_count=ping_count, device=device, target_ip=target_ip))
packets = [p.groupdict() for p in ARPING2.finditer(output.decode('utf-8'))]
elif "/usr/sbin/arping" in ARPING_PATH:
(returncode, output) = run('arping -c{ping_count} -i{device} {target_ip}'.format(
ping_count = ping_count,
device = device,
target_ip = target_ip,
))
ping_count=ping_count, device=device, target_ip=target_ip))
packets = [p.groupdict() for p in ARPING1.finditer(output.decode('utf-8'))]
result = {}
if packets:
@@ -135,15 +168,9 @@ Sent 2 probes (1 broadcast(s))
result['alive'] = False
return result
def openvpn_local_sockets():
"""
Returns:
list of str of IP where openvpn is bound.
"""
(retcode, output) = run("/bin/netstat -lupnw | grep -E '(udp|tcp) .*/openvpn'")
"""
udp 0 0 192.168.1.254:1194 0.0.0.0:* 16919/openvpn
"""
result = []
listening = output.splitlines()
for conn in listening:
@@ -153,14 +180,15 @@ def openvpn_local_sockets():
result.append((proto, local_ip, local_port))
return result
def delete_conntrack(conn):
"""Remove conntrack entries matching the OpenVPN listening processes"""
for (proto, ip, port) in conn:
if ip != '0.0.0.0':
run('/usr/sbin/conntrack -D -p {proto} -s {src} --sport={port}'.format(src=ip, proto=proto, port=port))
else:
run('/usr/sbin/conntrack -D -p {proto} --sport={port}'.format(src=ip, proto=proto, port=port))
def restart_openvpn():
conn = openvpn_local_sockets()
print(run('/etc/init.d/openvpn stop'))
@@ -169,10 +197,14 @@ def restart_openvpn():
print(run('/etc/init.d/openvpn start'))
# ---------------------------------------------------------------------------
# Provider
# ---------------------------------------------------------------------------
class Provider(object):
def __init__(self,provider_name,device=None,gateway=None,target_ip=None,max_rtt=2000.0,max_loss=30,ping_count=10,ping_interval=0.5,timeout=1.5,led=None):
"""Parameters of an Internet provider as defined in Shorewall and availability limits
"""
def __init__(self, provider_name, device=None, gateway=None, target_ip=None,
max_rtt=2000.0, max_loss=30, ping_count=10, ping_interval=0.5,
timeout=1.5, led=None):
self.target_ip = target_ip
self.provider_name = provider_name
self.device = device
@@ -199,6 +231,8 @@ class Provider(object):
self.last_loss = None
self._available = None
self._previous_available = None # for transition detection
self._state_since = None # datetime of last state change
self._link_states = []
self._link_status = 'UNKNOWN'
@@ -209,12 +243,35 @@ class Provider(object):
self.dry_run = False
def record(self, conn):
"""Insert current state into the events table. Marks up<->down transitions."""
transition = int(
self._previous_available != self._available
and self._previous_available is not None
)
if transition:
self._state_since = datetime.datetime.now()
logger.info('Transition detected for {}: {} -> {}'.format(
self.provider_name, self._previous_available, self._available))
conn.execute(
'''INSERT INTO events (ts, provider, available, rtt, loss, status, transition)
VALUES (?, ?, ?, ?, ?, ?, ?)''',
(
datetime.datetime.now().isoformat(),
self.provider_name,
int(self._available) if self._available is not None else None,
self.last_rtt,
self.last_loss,
self.status,
transition,
)
)
self._previous_available = self._available
def used_by_openvpn(self, proto='udp', port=1194):
(retcode,output) = run('conntrack -L -p {proto} --dport {port} -o extended | grep "={src}"'.format(proto=proto,src=self.last_ip,port=port))
"""
conntrack v1.2.1 (conntrack-tools): 1 flow entries have been shown.
ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=80.13.55.10 dst=192.168.149.184 sport=1194 dport=1194 [ASSURED] mark=1 use=1
"""
(retcode, output) = run('conntrack -L -p {proto} --dport {port} -o extended | grep "={src}"'.format(
proto=proto, src=self.last_ip, port=port))
conn = output.splitlines()
for c in conn:
if "={src} ".format(src=self.last_ip) in c.decode('utf-8'):
@@ -240,14 +297,6 @@ ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=
@property
def device_up(self):
(retcode, output) = run('ip link show dev {device}'.format(device=self.device))
"""
4: eth2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast master br1 state DOWN mode DEFAULT qlen 1000
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN mode DEFAULT qlen 1000
4: eth2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast master br1 state DOWN mode DEFAULT qlen 1000
10: ppp3g: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN mode DEFAULT qlen 3
6: br1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT
"""
LINK = re.compile(r':\s+<(?P<link_states>.+)>.* state (?P<link_status>.+?)\s')
link = LINK.search(output.decode('utf-8'))
if link:
@@ -258,18 +307,21 @@ ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=
return None
def check_test_route(self):
"""Test if there is a route to target_ip through the gateway or interface, and add it if not present"""
if self.target_ip:
(retcode, route) = run('/sbin/ip route show {target_ip}'.format(target_ip=self.target_ip))
if self.gateway:
if not "{target_ip} via {gateway}".format(target_ip=self.target_ip, gateway=self.gateway) in route.decode('utf-8'):
logger.debug(run('/sbin/ip route del {target_ip}'.format(target_ip=self.target_ip), dry_run=self.dry_run)[1])
logger.warning('No route for {target_ip} via {gateway}, adding one'.format(target_ip=self.target_ip,gateway=self.gateway))
logger.debug(run('/sbin/ip route add {target_ip} via {gateway}'.format(target_ip=self.target_ip,gateway=self.gateway),dry_run=self.dry_run)[1])
logger.warning('No route for {target_ip} via {gateway}, adding one'.format(
target_ip=self.target_ip, gateway=self.gateway))
logger.debug(run('/sbin/ip route add {target_ip} via {gateway}'.format(
target_ip=self.target_ip, gateway=self.gateway), dry_run=self.dry_run)[1])
elif self.device:
if not " {} ".format(self.device) in route.decode('utf-8'):
logger.warning('No route for {target_ip} through {device}, adding one'.format(target_ip=self.target_ip,device=self.device))
logger.debug(run('/sbin/ip route add {target_ip} dev {device}'.format(target_ip=self.target_ip,device=self.device),dry_run=self.dry_run)[1])
logger.warning('No route for {target_ip} through {device}, adding one'.format(
target_ip=self.target_ip, device=self.device))
logger.debug(run('/sbin/ip route add {target_ip} dev {device}'.format(
target_ip=self.target_ip, device=self.device), dry_run=self.dry_run)[1])
else:
logger.critical('No gateway for {target_ip}'.format(target_ip=self.target_ip))
@@ -286,9 +338,6 @@ ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=
return self.gateway_alive
def check_available(self):
"""ping the target and change available property based on max_rtt and max_loss
available == True if actual rtt and loss are below the max_rtt and max_loss
"""
self._available = None
self.last_check_time = datetime.datetime.now()
if self.device_up:
@@ -339,7 +388,6 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
return self._available
def check_local_ip(self):
"""Get local ip of device, set ip, device_mac and device_type"""
(retcode, output) = run('ip addr show dev {device}'.format(device=self.device))
IPV4ADDR = re.compile(r'\sinet\s+(?P<ipv4>\d+.\d+.\d+.\d+)[/\s]')
MACADDR = re.compile(r'link/(?P<type>\S+)(\s(?P<mac>\S+))?')
@@ -360,18 +408,12 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
@property
def gateway(self):
if self._gateway:
#ppp, shorewall notation for no gateway
if self._gateway == '-':
return None
else:
return self._gateway
else:
#from dhcp
(retcode, output) = run('ip route list table {}'.format(self.provider_name))
"""root@htouv:~# ip route list dev eth1
88.163.76.0/24 proto kernel scope link src 88.163.76.120
88.163.76.254 scope link src 88.163.76.120
"""
GW = re.compile(r'default via (?P<gateway>\d+.\d+.\d+.\d+)\s+')
gw = GW.search(str(output))
if gw:
@@ -400,8 +442,6 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
return self.last_enabled
def led_off(self):
# led_path = '/sys/class/leds/alix:{}'.format(self.led)
print('led off')
led_path = r'/sys/class/leds/apu:green:{}'.format(self.led)
if os.path.isdir(led_path):
with open(os.path.join(led_path, 'brightness'), 'wb') as f:
@@ -410,7 +450,6 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
f.write(bytes('none', encoding='utf-8'))
def led_on(self):
# led_path = '/sys/class/leds/alix:{}'.format(self.led)
led_path = r'/sys/class/leds/apu:green:{}'.format(self.led)
if os.path.isdir(led_path):
with open(os.path.join(led_path, 'trigger'), 'wb') as f:
@@ -419,8 +458,6 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
f.write(bytes('1', encoding='utf-8'))
def led_blink(self):
# led_path = '/sys/class/leds/alix:{}'.format(self.led)
print('led blink')
led_path = r'/sys/class/leds/apu:green:{}'.format(self.led)
if os.path.isdir(led_path):
with open(os.path.join(led_path, 'brightness'), 'wb') as f:
@@ -429,14 +466,9 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
f.write(bytes('heartbeat', encoding='utf-8'))
def update_leds(self):
""""""
# /sys/class/leds/alix\:1/trigger
#none backlight default-on [heartbeat] timer
if self.enabled:
if self._available:
self.led_on()
elif self.device_up:
self.led_off()
else:
self.led_off()
else:
@@ -450,7 +482,6 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
except Exception as e:
logger.info('Retrying to disable/enable provider because %s' % e)
print(run('/var/lib/shorewall/firewall restart', dry_run=self.dry_run))
# here check the connectivity.... else rollback
self.update_leds()
print('Routes after enabling provider %s\n%s' % (self.provider_name, run('/sbin/shorewall show routing')))
else:
@@ -460,19 +491,15 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
if self.enabled:
openvpn = self.used_by_openvpn()
logger.debug('Disable {}'.format(self.provider_name))
# restart openvpn if it was running on this provider
if openvpn:
logger.info('openvpn was running here, stopping openvpn')
print(run('/etc/init.d/openvpn stop', dry_run=self.dry_run))
print(run('/var/lib/shorewall/firewall disable {}'.format(self.provider_name), dry_run=self.dry_run))
# remove connections
if self.last_ip:
logger.info('removing conntrack entries')
logger.info(run('/usr/sbin/conntrack -D -s {src}'.format(src=self.last_ip), dry_run=self.dry_run)[1])
logger.info(run('/usr/sbin/conntrack -D -q {src}'.format(src=self.last_ip), dry_run=self.dry_run)[1])
# be sure there is no default gw in main table so that fallback provider can be reached
self.remove_default_gw()
# restart openvpn if it was running on this provider
if openvpn:
logger.info('openvpn was running here, restarting openvpn')
print(run('/etc/init.d/openvpn start', dry_run=self.dry_run))
@@ -480,7 +507,6 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
print('Routes after provider %s disabling\n%s' % (self.provider_name, run('/sbin/shorewall show routing')))
def remove_default_gw(self):
"""Remove default route which could have been added in main routing table and will prevent fallback interface from taking over"""
(retcode, routes) = run('ip route list table main dev {}'.format(self.device))
if retcode == 0:
if 'default ' in str(routes):
@@ -533,26 +559,30 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
gateway_mac=self.gateway_mac,
gateway_rtt=self.gateway_rtt,
enabled=self.last_enabled,
state_since=self._state_since,
)
# ---------------------------------------------------------------------------
# Config / pid helpers
# ---------------------------------------------------------------------------
def read_config(filename, providers):
cp = RawConfigParser()
cp.read(filename)
while providers:
providers.pop()
for provider_name in cp.sections():
provider = Provider(provider_name)
provider.read_config(cp)
providers.append(provider)
def is_pid_running(pidfile):
"""return pid if pid in pidfile is a running process, remove pidfile if pid is no more running"""
if os.path.isfile(pidfile):
with open(pidfile, 'rb') as f:
pid = f.read().strip()
if pid and os.path.isdir("/proc/{}".format(pid)):
if pid and os.path.isdir("/proc/{}".format(pid.decode())):
return int(pid)
else:
os.unlink(pidfile)
@@ -565,19 +595,22 @@ def write_pidfile(pidfile,pid=None):
if pid is None:
pid = os.getpid()
oldpid = is_pid_running(pidfile)
if oldpid:
# if oldpid <> pid:
if oldpid != pid:
if oldpid and oldpid != pid:
raise Exception('There is already a running process {} for the pid file {}'.format(oldpid, pidfile))
os.makedirs(os.path.dirname(pidfile), exist_ok=True)
with open(pidfile, "wb") as f:
print(pid)
f.write(bytes(pid))
f.write(bytes(str(pid), 'utf-8'))
def remove_pidfile(pidfile):
if os.path.isfile(pidfile):
os.unlink(pidfile)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == '__main__':
(options, args) = parser.parse_args()
@@ -592,7 +625,7 @@ if __name__ == '__main__':
verbose = options.verbose
loglevel = options.loglevel
monitor_pid_file = '/var/run/check-providers.pid'
monitor_pid_file = MONITOR_PID_FILE
current_pid = os.getpid()
# setup Logger
@@ -605,21 +638,16 @@ if __name__ == '__main__':
hdlr = logging.StreamHandler()
logger.addHandler(hdlr)
# set loglevel
if loglevel in ('debug', 'warning', 'info', 'error', 'critical'):
numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel)
logger.setLevel(numeric_level)
# Config file
if not os.path.isfile(config_file):
logger.error("Error : could not find file : " + config_file + ", please check the path")
logger.debug("Using " + config_file + " config file")
#adsl = Provider('ADSL',device='eth0',target_ip='185.16.48.54',gateway='192.168.149.254')
#gsm = Provider('GSM',device='ppp3g',target_ip='185.16.48.55',max_loss=40,max_rtt=2000,ping_count=20)
providers = []
read_config(config_file, providers)
@@ -632,10 +660,13 @@ if __name__ == '__main__':
for provider in providers:
provider.dry_run = options.dry_run
# -----------------------------------------------------------------------
# Actions
# -----------------------------------------------------------------------
if action == 'stop':
monitor_pid = is_pid_running(monitor_pid_file)
if monitor_pid:
# wakeup current monitor...
logger.info('Sending a TERM signal to running monitor process {}'.format(monitor_pid))
os.kill(monitor_pid, signal.SIGTERM)
sys.exit(0)
@@ -643,10 +674,9 @@ if __name__ == '__main__':
logger.warning('No running monitoring found')
sys.exit(0)
if action == 'trigger':
elif action == 'trigger':
monitor_pid = is_pid_running(monitor_pid_file)
if monitor_pid:
# wakeup current monitor...
logger.info('Sending a wakeup signal to running monitor process {}'.format(monitor_pid))
os.kill(monitor_pid, signal.SIGHUP)
sys.exit(0)
@@ -654,41 +684,54 @@ if __name__ == '__main__':
logger.critical('No running monitoring found')
sys.exit(1)
if action == 'monitor':
elif action == 'status':
try:
with open(STATE_FILE) as f:
print(f.read())
except FileNotFoundError:
print(jsondumps({'error': 'No state file found, is monitor running?'}))
sys.exit(0)
elif action == 'monitor':
monitor_pid = is_pid_running(monitor_pid_file)
if monitor_pid:
# wakeup current monitor...
logger.info('Sending a wakeup signal to running monitor process {}'.format(monitor_pid))
os.kill(monitor_pid, signal.SIGHUP)
sys.exit(0)
else:
init_db()
try:
write_pidfile(monitor_pid_file)
cycle_count = 0
def handler(signum, frame):
global providers
logger.info('Wake up by signal {}'.format(signum))
if signum == signal.SIGHUP:
logger.info(jsondumps(providers, indent=True))
elif signum == signal.SIGUSR1:
write_state_file(providers)
logger.info('State file updated on SIGUSR1')
elif signum == signal.SIGTERM:
logger.info('Received kill, closing')
remove_pidfile(monitor_pid_file)
sys.exit(0)
# Set the signal handler and a alarm
signal.signal(signal.SIGALRM, handler)
signal.signal(signal.SIGHUP, handler)
signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGUSR1, handler)
while True:
try:
logger.info('Checking providers {}:'.format(','.join([provider.provider_name for provider in providers])))
cycle_count += 1
logger.info('Checking providers {}:'.format(
','.join([provider.provider_name for provider in providers])))
current_ok = [provider for provider in providers if provider.check_available()]
# list of providers which are used by openvpn
openvpn_prov = [provider for provider in providers if provider.used_by_openvpn()]
shorewall_restart_needed = False
for provider in providers:
# we will check if a workable provider needs to be enabled by shorewall
if provider._available:
if not provider.enabled:
logger.warning("Enabling the available provider {}".format(provider.provider_name))
@@ -696,18 +739,8 @@ if __name__ == '__main__':
run('/usr/sbin/conntrack -F')
if provider.openvpn_master:
restart_openvpn()
# todo : check balance routing table. If an interface involved in default route is removed (ppp or tun)
# the entire default route entry is removed by the kernel.
# so if we can't find a route which refer to it in balance table, trigger a restart of shorewall to cleanup the situation...
if not shorewall_restart_needed and not provider.fallback:
(retcode, output) = run('ip route show table balance')
"""
default
nexthop via 185.16.51.9 realm 3 dev eth1 weight 1
nexthop dev tun2 weight 1
"""
balance = str(output).splitlines()
in_balance = False
for l in balance:
@@ -716,30 +749,37 @@ if __name__ == '__main__':
break
if not in_balance:
shorewall_restart_needed = True
logger.critical("Shorewall restart needed because provider {} is not in default balance route ".format(provider.provider_name))
logger.critical("Shorewall restart needed because provider {} is not in default balance route".format(
provider.provider_name))
run('/usr/sbin/shorewall restart && /usr/sbin/conntrack -F')
else:
if provider.enabled:
if current_ok and not provider.fallback:
logger.critical("Disabling the provider {} because {}".format(provider.provider_name,provider.status))
logger.critical("Disabling the provider {} because {}".format(
provider.provider_name, provider.status))
provider.disable()
else:
if not current_ok:
logger.critical("About to disable provider {} but will not because there are no other one".format(provider.provider_name))
logger.critical("About to disable provider {} but will not because there are no other one".format(
provider.provider_name))
else:
logger.critical("Not disabling fallback provider {}".format(provider.provider_name))
logger.info(' {}'.format(provider))
# Persist state and history
write_state_file(providers)
record_providers(providers)
# Purge old events once every 100 cycles (~every 100 min with default interval)
if cycle_count % 100 == 0:
purge_old_events(days=30)
signal.alarm(options.check_interval)
signal.pause()
#time.sleep(options.check_interval)
except Exception as e:
logger.critical(e)
#raise
finally:
remove_pidfile(monitor_pid_file)
@@ -754,6 +794,7 @@ if __name__ == '__main__':
print(provider)
if provider.used_by_openvpn():
print("This provider is used by Openvpn")
elif action == 'check-json':
result = []
if len(args) >= 2:
@@ -764,3 +805,4 @@ if __name__ == '__main__':
provider.check_available()
result.append(provider.as_dict())
print(jsondumps(result, indent=True))