[IMP] Support sqlite db to store status and history

This commit is contained in:
2026-04-01 15:25:06 +02:00
parent 9c71e8d493
commit 425080dfec
+313 -271
View File
@@ -18,14 +18,12 @@ import logging
import re import re
import time import time
import datetime import datetime
import json import json
import signal import signal
import sqlite3
from iniparse import RawConfigParser from iniparse import RawConfigParser
from optparse import OptionParser from optparse import OptionParser
#from distutils.spawn import find_executable
usage="""\ usage="""\
%prog -c configfile action %prog -c configfile action
@@ -37,9 +35,10 @@ action is either :
monitor : monitor in background all providers and enable/disable them monitor : monitor in background all providers and enable/disable them
check [all,<provider>] : check all or one provider and display reachability check [all,<provider>] : check all or one provider and display reachability
check-json [all,<provider>] : check providers and output state as json data check-json [all,<provider>] : check providers and output state as json data
status : display current state from state file
""" """
version = "0.0.1" version = "0.0.2"
parser=OptionParser(usage=usage,version="%prog " + version) parser=OptionParser(usage=usage,version="%prog " + version)
parser.add_option("-i","--check-interval", dest="check_interval", type=int, default=60, help="Config file full path (default: %default)") parser.add_option("-i","--check-interval", dest="check_interval", type=int, default=60, help="Config file full path (default: %default)")
@@ -53,113 +52,142 @@ parser.add_option("-l","--loglevel", dest="loglevel", default='info', type='choi
REPORT = re.compile(r'\n(?P<transmitted>\d+)\s+packets transmitted,\s+(?P<received>\d+) received,\s+(?P<loss>\d+)%\s+packet loss') REPORT = re.compile(r'\n(?P<transmitted>\d+)\s+packets transmitted,\s+(?P<received>\d+) received,\s+(?P<loss>\d+)%\s+packet loss')
RTT = re.compile(r'rtt min/avg/max/mdev = (?P<min>[0-9.]+)/(?P<avg>[0-9.]+)/(?P<max>[0-9.]+)/(?P<mdev>[0-9.]+) ms') RTT = re.compile(r'rtt min/avg/max/mdev = (?P<min>[0-9.]+)/(?P<avg>[0-9.]+)/(?P<max>[0-9.]+)/(?P<mdev>[0-9.]+) ms')
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
BASE_DIR = '/opt/check_providers'
DB_PATH = os.path.join(BASE_DIR, 'check-providers.db')
STATE_FILE = os.path.join(BASE_DIR, 'check-providers-state.json')
MONITOR_PID_FILE = os.path.join(BASE_DIR, 'check-providers.pid')
def run(cmd,dry_run=False): # ---------------------------------------------------------------------------
# Database
# ---------------------------------------------------------------------------
def init_db():
"""Create the SQLite database and events table if not present."""
os.makedirs(BASE_DIR, exist_ok=True)
with sqlite3.connect(DB_PATH) as conn:
conn.execute('''
CREATE TABLE IF NOT EXISTS events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts TEXT NOT NULL,
provider TEXT NOT NULL,
available INTEGER,
rtt REAL,
loss INTEGER,
status TEXT,
transition INTEGER DEFAULT 0
)
''')
conn.execute('CREATE INDEX IF NOT EXISTS idx_events_provider ON events(provider)')
conn.execute('CREATE INDEX IF NOT EXISTS idx_events_ts ON events(ts)')
conn.commit()
def purge_old_events(days=30):
"""Remove events older than `days` days."""
with sqlite3.connect(DB_PATH) as conn:
conn.execute(
"DELETE FROM events WHERE ts < datetime('now', '-{} days')".format(days)
)
conn.commit()
def write_state_file(providers):
"""Write the current state of all providers to the JSON state file."""
tmp = STATE_FILE + '.tmp'
with open(tmp, 'w') as f:
f.write(jsondumps([p.as_dict() for p in providers], indent=True))
os.replace(tmp, STATE_FILE) # atomic replace
def record_providers(providers):
"""Insert one row per provider into the events table."""
with sqlite3.connect(DB_PATH) as conn:
for provider in providers:
provider.record(conn)
conn.commit()
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def run(cmd, dry_run=False):
try: try:
logger.debug(' running {}'.format(cmd)) logger.debug(' running {}'.format(cmd))
if not dry_run: if not dry_run:
p = subprocess.check_output(cmd,shell=True,stderr=subprocess.STDOUT) p = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
logger.debug(' output : {}'.format(p)) logger.debug(' output : {}'.format(p))
return (0,p) return (0, p)
else: else:
print("DRYRUN : {}".format(cmd)) print("DRYRUN : {}".format(cmd))
return(0,"#### DRYRUN ### no output for {}".format(cmd)) return (0, "#### DRYRUN ### no output for {}".format(cmd))
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
return (e.returncode,e.output) return (e.returncode, e.output)
def default_json(o): def default_json(o):
if hasattr(o,'as_dict'): if hasattr(o, 'as_dict'):
return o.as_dict() return o.as_dict()
elif hasattr(o,'as_json'): elif hasattr(o, 'as_json'):
return o.as_json() return o.as_json()
elif isinstance(o,datetime.datetime): elif isinstance(o, datetime.datetime):
return o.isoformat() return o.isoformat()
else: else:
return u"{}".format(o) return u"{}".format(o)
def jsondumps(o,**kwargs):
"""extended json dump of o"""
return json.dumps(o,default=default_json,**kwargs)
def arping(device,target_ip,ping_count=3): def jsondumps(o, **kwargs):
# arping """Extended json dump of o."""
#root@gw-pironniere:/opt/check-providers# arping -i eth0 -c4 192.168.149.254 return json.dumps(o, default=default_json, **kwargs)
"""\
ARPING 192.168.149.254
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=0 time=229.674 usec
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=1 time=258.173 usec
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=2 time=251.468 usec
60 bytes from 00:0a:fa:24:18:f7 (192.168.149.254): index=3 time=252.305 usec
--- 192.168.149.254 statistics ---
4 packets transmitted, 4 packets received, 0% unanswered (0 extra) def arping(device, target_ip, ping_count=3):
"""
# iputils-arping
#root@gw-pironniere:/opt/check-providers# arping -c2 192.168.149.254
"""\
ARPING 192.168.149.254 from 192.168.149.184 eth0
Unicast reply from 192.168.149.254 [00:0A:FA:24:18:F7] 0.886ms
Unicast reply from 192.168.149.254 [00:0A:FA:24:18:F7] 0.777ms
Sent 2 probes (1 broadcast(s))
"""
ARPING1 = re.compile(r'bytes from (?P<mac>\S+).*time=(?P<rtt>[0-9.]*) (?P<unit>.*)') ARPING1 = re.compile(r'bytes from (?P<mac>\S+).*time=(?P<rtt>[0-9.]*) (?P<unit>.*)')
ARPING2 = re.compile(r'reply from.*\[(?P<mac>\S+)\]\s+(?P<rtt>[0-9.]*)(?P<unit>.*)') ARPING2 = re.compile(r'reply from.*\[(?P<mac>\S+)\]\s+(?P<rtt>[0-9.]*)(?P<unit>.*)')
# ARPING_PATH = find_executable('arping')
ARPING_PATH = "/usr/sbin/arping" ARPING_PATH = "/usr/sbin/arping"
if ARPING_PATH == None: if ARPING_PATH is None:
raise Exception('No arping command found') raise Exception('No arping command found')
elif "/usr/bin/arping" in ARPING_PATH: elif "/usr/bin/arping" in ARPING_PATH:
(returncode,output) = run('arping -c{ping_count} -I{device} {target_ip}'.format( (returncode, output) = run('arping -c{ping_count} -I{device} {target_ip}'.format(
ping_count = ping_count, ping_count=ping_count, device=device, target_ip=target_ip))
device = device,
target_ip = target_ip,
))
packets = [p.groupdict() for p in ARPING2.finditer(output.decode('utf-8'))] packets = [p.groupdict() for p in ARPING2.finditer(output.decode('utf-8'))]
elif "/usr/sbin/arping" in ARPING_PATH: elif "/usr/sbin/arping" in ARPING_PATH:
(returncode,output) = run('arping -c{ping_count} -i{device} {target_ip}'.format( (returncode, output) = run('arping -c{ping_count} -i{device} {target_ip}'.format(
ping_count = ping_count, ping_count=ping_count, device=device, target_ip=target_ip))
device = device,
target_ip = target_ip,
))
packets = [p.groupdict() for p in ARPING1.finditer(output.decode('utf-8'))] packets = [p.groupdict() for p in ARPING1.finditer(output.decode('utf-8'))]
result = {} result = {}
if packets: if packets:
result['mac'] = packets[-1]['mac'] result['mac'] = packets[-1]['mac']
result['rtt'] = packets[-1]['rtt']+packets[-1]['unit'] result['rtt'] = packets[-1]['rtt'] + packets[-1]['unit']
result['alive'] = len(packets)>0 result['alive'] = len(packets) > 0
else: else:
result['mac'] = None result['mac'] = None
result['rtt'] = None result['rtt'] = None
result['alive'] = False result['alive'] = False
return result return result
def openvpn_local_sockets(): def openvpn_local_sockets():
""" (retcode, output) = run("/bin/netstat -lupnw | grep -E '(udp|tcp) .*/openvpn'")
Returns:
list of str of IP where openvpn is bound.
"""
(retcode,output) = run("/bin/netstat -lupnw | grep -E '(udp|tcp) .*/openvpn'")
"""
udp 0 0 192.168.1.254:1194 0.0.0.0:* 16919/openvpn
"""
result = [] result = []
listening = output.splitlines() listening = output.splitlines()
for conn in listening: for conn in listening:
args = conn.split() args = conn.split()
proto = args[0] proto = args[0]
(local_ip,local_port) = args[3].rsplit(':',1) (local_ip, local_port) = args[3].rsplit(':', 1)
result.append((proto,local_ip,local_port)) result.append((proto, local_ip, local_port))
return result return result
def delete_conntrack(conn): def delete_conntrack(conn):
"""Remove conntrack entries matching the OpenVPN listening processes""" for (proto, ip, port) in conn:
for (proto,ip,port) in conn:
if ip != '0.0.0.0': if ip != '0.0.0.0':
run('/usr/sbin/conntrack -D -p {proto} -s {src} --sport={port}'.format(src=ip,proto=proto,port=port)) run('/usr/sbin/conntrack -D -p {proto} -s {src} --sport={port}'.format(src=ip, proto=proto, port=port))
else: else:
run('/usr/sbin/conntrack -D -p {proto} --sport={port}'.format(src=ip,proto=proto,port=port)) run('/usr/sbin/conntrack -D -p {proto} --sport={port}'.format(src=ip, proto=proto, port=port))
def restart_openvpn(): def restart_openvpn():
conn = openvpn_local_sockets() conn = openvpn_local_sockets()
@@ -169,25 +197,29 @@ def restart_openvpn():
print(run('/etc/init.d/openvpn start')) print(run('/etc/init.d/openvpn start'))
class Provider(object): # ---------------------------------------------------------------------------
def __init__(self,provider_name,device=None,gateway=None,target_ip=None,max_rtt=2000.0,max_loss=30,ping_count=10,ping_interval=0.5,timeout=1.5,led=None): # Provider
"""Parameters of an Internet provider as defined in Shorewall and availability limits # ---------------------------------------------------------------------------
"""
self.target_ip=target_ip
self.provider_name=provider_name
self.device=device
self.device_type=None
self.device_mac=None
self.last_ip=None
self._gateway=gateway class Provider(object):
def __init__(self, provider_name, device=None, gateway=None, target_ip=None,
max_rtt=2000.0, max_loss=30, ping_count=10, ping_interval=0.5,
timeout=1.5, led=None):
self.target_ip = target_ip
self.provider_name = provider_name
self.device = device
self.device_type = None
self.device_mac = None
self.last_ip = None
self._gateway = gateway
self.gateway_alive = None self.gateway_alive = None
self.gateway_rtt = None self.gateway_rtt = None
self.gateway_mac = None self.gateway_mac = None
self.max_rtt=max_rtt self.max_rtt = max_rtt
self.max_loss=max_loss self.max_loss = max_loss
self.ping_count = ping_count self.ping_count = ping_count
self.ping_interval = ping_interval self.ping_interval = ping_interval
self.timeout = timeout self.timeout = timeout
@@ -199,6 +231,8 @@ class Provider(object):
self.last_loss = None self.last_loss = None
self._available = None self._available = None
self._previous_available = None # for transition detection
self._state_since = None # datetime of last state change
self._link_states = [] self._link_states = []
self._link_status = 'UNKNOWN' self._link_status = 'UNKNOWN'
@@ -209,45 +243,60 @@ class Provider(object):
self.dry_run = False self.dry_run = False
def used_by_openvpn(self,proto='udp',port=1194): def record(self, conn):
(retcode,output) = run('conntrack -L -p {proto} --dport {port} -o extended | grep "={src}"'.format(proto=proto,src=self.last_ip,port=port)) """Insert current state into the events table. Marks up<->down transitions."""
""" transition = int(
conntrack v1.2.1 (conntrack-tools): 1 flow entries have been shown. self._previous_available != self._available
ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=80.13.55.10 dst=192.168.149.184 sport=1194 dport=1194 [ASSURED] mark=1 use=1 and self._previous_available is not None
""" )
if transition:
self._state_since = datetime.datetime.now()
logger.info('Transition detected for {}: {} -> {}'.format(
self.provider_name, self._previous_available, self._available))
conn.execute(
'''INSERT INTO events (ts, provider, available, rtt, loss, status, transition)
VALUES (?, ?, ?, ?, ?, ?, ?)''',
(
datetime.datetime.now().isoformat(),
self.provider_name,
int(self._available) if self._available is not None else None,
self.last_rtt,
self.last_loss,
self.status,
transition,
)
)
self._previous_available = self._available
def used_by_openvpn(self, proto='udp', port=1194):
(retcode, output) = run('conntrack -L -p {proto} --dport {port} -o extended | grep "={src}"'.format(
proto=proto, src=self.last_ip, port=port))
conn = output.splitlines() conn = output.splitlines()
for c in conn: for c in conn:
if "={src} ".format(src=self.last_ip) in c.decode('utf-8'): if "={src} ".format(src=self.last_ip) in c.decode('utf-8'):
return True return True
return False return False
def read_config(self,config_file): def read_config(self, config_file):
for attrib in ['target_ip','device','gateway']: for attrib in ['target_ip', 'device', 'gateway']:
if config_file.has_option(self.provider_name,attrib): if config_file.has_option(self.provider_name, attrib):
if attrib == 'gateway': if attrib == 'gateway':
setattr(self,'_gateway',config_file.get(self.provider_name,attrib)) setattr(self, '_gateway', config_file.get(self.provider_name, attrib))
else: else:
setattr(self,attrib,config_file.get(self.provider_name,attrib)) setattr(self, attrib, config_file.get(self.provider_name, attrib))
for attrib in ['max_rtt','timeout','ping_interval']: for attrib in ['max_rtt', 'timeout', 'ping_interval']:
if config_file.has_option(self.provider_name,attrib): if config_file.has_option(self.provider_name, attrib):
setattr(self,attrib,config_file.getfloat(self.provider_name,attrib)) setattr(self, attrib, config_file.getfloat(self.provider_name, attrib))
for attrib in ['max_loss','ping_count','led','openvpn_master','fallback']: for attrib in ['max_loss', 'ping_count', 'led', 'openvpn_master', 'fallback']:
if config_file.has_option(self.provider_name,attrib): if config_file.has_option(self.provider_name, attrib):
setattr(self,attrib,config_file.getint(self.provider_name,attrib)) setattr(self, attrib, config_file.getint(self.provider_name, attrib))
@property @property
def device_up(self): def device_up(self):
(retcode,output) = run('ip link show dev {device}'.format(device=self.device)) (retcode, output) = run('ip link show dev {device}'.format(device=self.device))
"""
4: eth2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast master br1 state DOWN mode DEFAULT qlen 1000
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN mode DEFAULT qlen 1000
4: eth2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast master br1 state DOWN mode DEFAULT qlen 1000
10: ppp3g: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN mode DEFAULT qlen 3
6: br1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT
"""
LINK = re.compile(r':\s+<(?P<link_states>.+)>.* state (?P<link_status>.+?)\s') LINK = re.compile(r':\s+<(?P<link_states>.+)>.* state (?P<link_status>.+?)\s')
link = LINK.search(output.decode('utf-8')) link = LINK.search(output.decode('utf-8'))
if link: if link:
@@ -258,24 +307,27 @@ ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=
return None return None
def check_test_route(self): def check_test_route(self):
"""Test if there is a route to target_ip through the gateway or interface, and add it if not present"""
if self.target_ip: if self.target_ip:
(retcode,route) = run('/sbin/ip route show {target_ip}'.format(target_ip=self.target_ip)) (retcode, route) = run('/sbin/ip route show {target_ip}'.format(target_ip=self.target_ip))
if self.gateway: if self.gateway:
if not "{target_ip} via {gateway}".format(target_ip=self.target_ip,gateway=self.gateway) in route.decode('utf-8'): if not "{target_ip} via {gateway}".format(target_ip=self.target_ip, gateway=self.gateway) in route.decode('utf-8'):
logger.debug(run('/sbin/ip route del {target_ip}'.format(target_ip=self.target_ip),dry_run=self.dry_run)[1]) logger.debug(run('/sbin/ip route del {target_ip}'.format(target_ip=self.target_ip), dry_run=self.dry_run)[1])
logger.warning('No route for {target_ip} via {gateway}, adding one'.format(target_ip=self.target_ip,gateway=self.gateway)) logger.warning('No route for {target_ip} via {gateway}, adding one'.format(
logger.debug(run('/sbin/ip route add {target_ip} via {gateway}'.format(target_ip=self.target_ip,gateway=self.gateway),dry_run=self.dry_run)[1]) target_ip=self.target_ip, gateway=self.gateway))
logger.debug(run('/sbin/ip route add {target_ip} via {gateway}'.format(
target_ip=self.target_ip, gateway=self.gateway), dry_run=self.dry_run)[1])
elif self.device: elif self.device:
if not " {} ".format(self.device) in route.decode('utf-8'): if not " {} ".format(self.device) in route.decode('utf-8'):
logger.warning('No route for {target_ip} through {device}, adding one'.format(target_ip=self.target_ip,device=self.device)) logger.warning('No route for {target_ip} through {device}, adding one'.format(
logger.debug(run('/sbin/ip route add {target_ip} dev {device}'.format(target_ip=self.target_ip,device=self.device),dry_run=self.dry_run)[1]) target_ip=self.target_ip, device=self.device))
logger.debug(run('/sbin/ip route add {target_ip} dev {device}'.format(
target_ip=self.target_ip, device=self.device), dry_run=self.dry_run)[1])
else: else:
logger.critical('No gateway for {target_ip}'.format(target_ip=self.target_ip)) logger.critical('No gateway for {target_ip}'.format(target_ip=self.target_ip))
def check_gateway(self): def check_gateway(self):
if self.gateway: if self.gateway:
result = arping(device=self.device,target_ip=self.gateway) result = arping(device=self.device, target_ip=self.gateway)
self.gateway_mac = result['mac'] self.gateway_mac = result['mac']
self.gateway_rtt = result['rtt'] self.gateway_rtt = result['rtt']
self.gateway_alive = result['alive'] self.gateway_alive = result['alive']
@@ -286,9 +338,6 @@ ipv4 2 udp 17 178 src=192.168.149.184 dst=80.13.55.10 sport=1194 dport=1194 src=
return self.gateway_alive return self.gateway_alive
def check_available(self): def check_available(self):
"""ping the target and change available property based on max_rtt and max_loss
available == True if actual rtt and loss are below the max_rtt and max_loss
"""
self._available = None self._available = None
self.last_check_time = datetime.datetime.now() self.last_check_time = datetime.datetime.now()
if self.device_up: if self.device_up:
@@ -299,11 +348,11 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
ping_ip = self.target_ip ping_ip = self.target_ip
if ping_ip: if ping_ip:
self.check_test_route() self.check_test_route()
(returncode,output) = run('/bin/ping -q -n -c{ping_count:n} -W{timeout:n} -i{ping_interval} -I{device} {target_ip}'.format( (returncode, output) = run('/bin/ping -q -n -c{ping_count:n} -W{timeout:n} -i{ping_interval} -I{device} {target_ip}'.format(
ping_count = self.ping_count, ping_count=self.ping_count,
timeout = self.timeout, timeout=self.timeout,
device = self.device, device=self.device,
target_ip = ping_ip, target_ip=ping_ip,
ping_interval=self.ping_interval, ping_interval=self.ping_interval,
)) ))
if returncode == 0: if returncode == 0:
@@ -318,15 +367,15 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
else: else:
self.last_rtt = None self.last_rtt = None
self._available = report and rtt and\ self._available = report and rtt and \
self.last_loss<=self.max_loss and\ self.last_loss <= self.max_loss and \
self.last_rtt<=self.max_rtt self.last_rtt <= self.max_rtt
if self._available: if self._available:
self.status='OK' self.status = 'OK'
elif self.last_loss>self.max_loss: elif self.last_loss > self.max_loss:
self.status='Too much loss {}%'.format(self.last_loss) self.status = 'Too much loss {}%'.format(self.last_loss)
elif self.last_rtt>self.max_rtt: elif self.last_rtt > self.max_rtt:
self.status='Too long RTT {}ms'.format(self.last_rtt) self.status = 'Too long RTT {}ms'.format(self.last_rtt)
else: else:
self.status = 'ping test failed : {}'.format(output.decode('utf-8')) self.status = 'ping test failed : {}'.format(output.decode('utf-8'))
else: else:
@@ -339,8 +388,7 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
return self._available return self._available
def check_local_ip(self): def check_local_ip(self):
"""Get local ip of device, set ip, device_mac and device_type""" (retcode, output) = run('ip addr show dev {device}'.format(device=self.device))
(retcode,output) = run('ip addr show dev {device}'.format(device=self.device))
IPV4ADDR = re.compile(r'\sinet\s+(?P<ipv4>\d+.\d+.\d+.\d+)[/\s]') IPV4ADDR = re.compile(r'\sinet\s+(?P<ipv4>\d+.\d+.\d+.\d+)[/\s]')
MACADDR = re.compile(r'link/(?P<type>\S+)(\s(?P<mac>\S+))?') MACADDR = re.compile(r'link/(?P<type>\S+)(\s(?P<mac>\S+))?')
ipaddr = IPV4ADDR.search(output.decode('utf-8')) ipaddr = IPV4ADDR.search(output.decode('utf-8'))
@@ -360,18 +408,12 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
@property @property
def gateway(self): def gateway(self):
if self._gateway: if self._gateway:
#ppp, shorewall notation for no gateway
if self._gateway == '-': if self._gateway == '-':
return None return None
else: else:
return self._gateway return self._gateway
else: else:
#from dhcp (retcode, output) = run('ip route list table {}'.format(self.provider_name))
(retcode,output) = run('ip route list table {}'.format(self.provider_name))
"""root@htouv:~# ip route list dev eth1
88.163.76.0/24 proto kernel scope link src 88.163.76.120
88.163.76.254 scope link src 88.163.76.120
"""
GW = re.compile(r'default via (?P<gateway>\d+.\d+.\d+.\d+)\s+') GW = re.compile(r'default via (?P<gateway>\d+.\d+.\d+.\d+)\s+')
gw = GW.search(str(output)) gw = GW.search(str(output))
if gw: if gw:
@@ -382,16 +424,16 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
return None return None
@gateway.setter @gateway.setter
def gateway_set(self,value): def gateway_set(self, value):
self._gateway = value self._gateway = value
@property @property
def enabled(self): def enabled(self):
try: try:
(retcode,routes) = run('ip route list table {}'.format(self.provider_name)) (retcode, routes) = run('ip route list table {}'.format(self.provider_name))
if retcode == 0: if retcode == 0:
routes = str(routes).splitlines() routes = str(routes).splitlines()
self.last_enabled = len(routes)>0 self.last_enabled = len(routes) > 0
else: else:
self.last_enabled = False self.last_enabled = False
return self.last_enabled return self.last_enabled
@@ -400,43 +442,33 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
return self.last_enabled return self.last_enabled
def led_off(self): def led_off(self):
# led_path = '/sys/class/leds/alix:{}'.format(self.led)
print('led off')
led_path = r'/sys/class/leds/apu:green:{}'.format(self.led) led_path = r'/sys/class/leds/apu:green:{}'.format(self.led)
if os.path.isdir(led_path): if os.path.isdir(led_path):
with open(os.path.join(led_path,'brightness'),'wb') as f: with open(os.path.join(led_path, 'brightness'), 'wb') as f:
f.write(bytes('0',encoding='utf-8')) f.write(bytes('0', encoding='utf-8'))
with open(os.path.join(led_path,'trigger'),'wb') as f: with open(os.path.join(led_path, 'trigger'), 'wb') as f:
f.write(bytes('none',encoding='utf-8')) f.write(bytes('none', encoding='utf-8'))
def led_on(self): def led_on(self):
# led_path = '/sys/class/leds/alix:{}'.format(self.led)
led_path = r'/sys/class/leds/apu:green:{}'.format(self.led) led_path = r'/sys/class/leds/apu:green:{}'.format(self.led)
if os.path.isdir(led_path): if os.path.isdir(led_path):
with open(os.path.join(led_path,'trigger'),'wb') as f: with open(os.path.join(led_path, 'trigger'), 'wb') as f:
f.write(bytes('none',encoding='utf-8')) f.write(bytes('none', encoding='utf-8'))
with open(os.path.join(led_path,'brightness'),'wb') as f: with open(os.path.join(led_path, 'brightness'), 'wb') as f:
f.write(bytes('1', encoding='utf-8')) f.write(bytes('1', encoding='utf-8'))
def led_blink(self): def led_blink(self):
# led_path = '/sys/class/leds/alix:{}'.format(self.led)
print('led blink')
led_path = r'/sys/class/leds/apu:green:{}'.format(self.led) led_path = r'/sys/class/leds/apu:green:{}'.format(self.led)
if os.path.isdir(led_path): if os.path.isdir(led_path):
with open(os.path.join(led_path,'brightness'),'wb') as f: with open(os.path.join(led_path, 'brightness'), 'wb') as f:
f.write(bytes('1',encoding='utf-8')) f.write(bytes('1', encoding='utf-8'))
with open(os.path.join(led_path,'trigger'),'wb') as f: with open(os.path.join(led_path, 'trigger'), 'wb') as f:
f.write(bytes('heartbeat',encoding='utf-8')) f.write(bytes('heartbeat', encoding='utf-8'))
def update_leds(self): def update_leds(self):
""""""
# /sys/class/leds/alix\:1/trigger
#none backlight default-on [heartbeat] timer
if self.enabled: if self.enabled:
if self._available: if self._available:
self.led_on() self.led_on()
elif self.device_up:
self.led_off()
else: else:
self.led_off() self.led_off()
else: else:
@@ -446,13 +478,12 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
if not self.enabled: if not self.enabled:
logger.debug('Enable {}'.format(self.provider_name)) logger.debug('Enable {}'.format(self.provider_name))
try: try:
print(run('/var/lib/shorewall/firewall enable {}'.format(self.provider_name),dry_run=self.dry_run)) print(run('/var/lib/shorewall/firewall enable {}'.format(self.provider_name), dry_run=self.dry_run))
except Exception as e: except Exception as e:
logger.info('Retrying to disable/enable provider because %s'% e) logger.info('Retrying to disable/enable provider because %s' % e)
print(run('/var/lib/shorewall/firewall restart',dry_run=self.dry_run)) print(run('/var/lib/shorewall/firewall restart', dry_run=self.dry_run))
# here check the connectivity.... else rollback
self.update_leds() self.update_leds()
print('Routes after enabling provider %s\n%s'%(self.provider_name,run('/sbin/shorewall show routing'))) print('Routes after enabling provider %s\n%s' % (self.provider_name, run('/sbin/shorewall show routing')))
else: else:
logger.debug('{} already enabled'.format(self.device)) logger.debug('{} already enabled'.format(self.device))
@@ -460,31 +491,26 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
if self.enabled: if self.enabled:
openvpn = self.used_by_openvpn() openvpn = self.used_by_openvpn()
logger.debug('Disable {}'.format(self.provider_name)) logger.debug('Disable {}'.format(self.provider_name))
# restart openvpn if it was running on this provider
if openvpn: if openvpn:
logger.info('openvpn was running here, stopping openvpn') logger.info('openvpn was running here, stopping openvpn')
print(run('/etc/init.d/openvpn stop',dry_run=self.dry_run)) print(run('/etc/init.d/openvpn stop', dry_run=self.dry_run))
print(run('/var/lib/shorewall/firewall disable {}'.format(self.provider_name),dry_run=self.dry_run)) print(run('/var/lib/shorewall/firewall disable {}'.format(self.provider_name), dry_run=self.dry_run))
# remove connections
if self.last_ip: if self.last_ip:
logger.info('removing conntrack entries') logger.info('removing conntrack entries')
logger.info(run('/usr/sbin/conntrack -D -s {src}'.format(src=self.last_ip),dry_run=self.dry_run)[1]) logger.info(run('/usr/sbin/conntrack -D -s {src}'.format(src=self.last_ip), dry_run=self.dry_run)[1])
logger.info(run('/usr/sbin/conntrack -D -q {src}'.format(src=self.last_ip),dry_run=self.dry_run)[1]) logger.info(run('/usr/sbin/conntrack -D -q {src}'.format(src=self.last_ip), dry_run=self.dry_run)[1])
# be sure there is no default gw in main table so that fallback provider can be reached
self.remove_default_gw() self.remove_default_gw()
# restart openvpn if it was running on this provider
if openvpn: if openvpn:
logger.info('openvpn was running here, restarting openvpn') logger.info('openvpn was running here, restarting openvpn')
print(run('/etc/init.d/openvpn start',dry_run=self.dry_run)) print(run('/etc/init.d/openvpn start', dry_run=self.dry_run))
self.update_leds() self.update_leds()
print('Routes after provider %s disabling\n%s'%(self.provider_name,run('/sbin/shorewall show routing'))) print('Routes after provider %s disabling\n%s' % (self.provider_name, run('/sbin/shorewall show routing')))
def remove_default_gw(self): def remove_default_gw(self):
"""Remove default route which could have been added in main routing table and will prevent fallback interface from taking over""" (retcode, routes) = run('ip route list table main dev {}'.format(self.device))
(retcode,routes) = run('ip route list table main dev {}'.format(self.device))
if retcode == 0: if retcode == 0:
if 'default ' in str(routes): if 'default ' in str(routes):
print(run('ip route del default table main dev {}'.format(self.device),dry_run=self.dry_run)) print(run('ip route del default table main dev {}'.format(self.device), dry_run=self.dry_run))
def __str__(self): def __str__(self):
def get_available(en): def get_available(en):
@@ -500,59 +526,63 @@ available == True if actual rtt and loss are below the max_rtt and max_loss
provider=self.provider_name, provider=self.provider_name,
device=self.device, device=self.device,
target_ip=self.target_ip, target_ip=self.target_ip,
loss = self.last_loss, loss=self.last_loss,
rtt = self.last_rtt, rtt=self.last_rtt,
local_ip = self.last_ip, local_ip=self.last_ip,
gw = self.gateway or "-", gw=self.gateway or "-",
status = self.status, status=self.status,
) )
def as_dict(self): def as_dict(self):
return dict( return dict(
target_ip = self.target_ip, target_ip=self.target_ip,
provider_name = self.provider_name, provider_name=self.provider_name,
device = self.device, device=self.device,
gateway = self._gateway, gateway=self._gateway,
max_rtt = self.max_rtt, max_rtt=self.max_rtt,
max_loss = self.max_loss, max_loss=self.max_loss,
ping_count = self.ping_count, ping_count=self.ping_count,
ping_interval = self.ping_interval, ping_interval=self.ping_interval,
ping_timeout = self.timeout, ping_timeout=self.timeout,
last_rtt = self.last_rtt, last_rtt=self.last_rtt,
last_loss = self.last_loss, last_loss=self.last_loss,
available = self._available, available=self._available,
link_states = self._link_states, link_states=self._link_states,
link_status = self._link_status, link_status=self._link_status,
led = self.led, led=self.led,
status = self.status, status=self.status,
last_check_time = self.last_check_time, last_check_time=self.last_check_time,
last_ip = self.last_ip, last_ip=self.last_ip,
device_mac = self.device_mac, device_mac=self.device_mac,
device_type = self.device_type, device_type=self.device_type,
gateway_alive = self.gateway_alive, gateway_alive=self.gateway_alive,
gateway_mac = self.gateway_mac, gateway_mac=self.gateway_mac,
gateway_rtt = self.gateway_rtt, gateway_rtt=self.gateway_rtt,
enabled = self.last_enabled, enabled=self.last_enabled,
state_since=self._state_since,
) )
def read_config(filename,providers):
# ---------------------------------------------------------------------------
# Config / pid helpers
# ---------------------------------------------------------------------------
def read_config(filename, providers):
cp = RawConfigParser() cp = RawConfigParser()
cp.read(filename) cp.read(filename)
while providers: while providers:
providers.pop() providers.pop()
for provider_name in cp.sections(): for provider_name in cp.sections():
provider = Provider(provider_name) provider = Provider(provider_name)
provider.read_config(cp) provider.read_config(cp)
providers.append(provider) providers.append(provider)
def is_pid_running(pidfile): def is_pid_running(pidfile):
"""return pid if pid in pidfile is a running process, remove pidfile if pid is no more running"""
if os.path.isfile(pidfile): if os.path.isfile(pidfile):
with open(pidfile,'rb') as f: with open(pidfile, 'rb') as f:
pid = f.read().strip() pid = f.read().strip()
if pid and os.path.isdir("/proc/{}".format(pid)): if pid and os.path.isdir("/proc/{}".format(pid.decode())):
return int(pid) return int(pid)
else: else:
os.unlink(pidfile) os.unlink(pidfile)
@@ -561,25 +591,28 @@ def is_pid_running(pidfile):
return None return None
def write_pidfile(pidfile,pid=None): def write_pidfile(pidfile, pid=None):
if pid is None: if pid is None:
pid = os.getpid() pid = os.getpid()
oldpid = is_pid_running(pidfile) oldpid = is_pid_running(pidfile)
if oldpid: if oldpid and oldpid != pid:
# if oldpid <> pid: raise Exception('There is already a running process {} for the pid file {}'.format(oldpid, pidfile))
if oldpid != pid: os.makedirs(os.path.dirname(pidfile), exist_ok=True)
raise Exception('There is already a running process {} for the pid file {}'.format(oldpid,pidfile)) with open(pidfile, "wb") as f:
f.write(bytes(str(pid), 'utf-8'))
with open(pidfile,"wb") as f:
print(pid)
f.write(bytes(pid))
def remove_pidfile(pidfile): def remove_pidfile(pidfile):
if os.path.isfile(pidfile): if os.path.isfile(pidfile):
os.unlink(pidfile) os.unlink(pidfile)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == '__main__': if __name__ == '__main__':
(options,args)=parser.parse_args() (options, args) = parser.parse_args()
if len(args) < 1: if len(args) < 1:
print("ERROR : You must provide one action to perform") print("ERROR : You must provide one action to perform")
@@ -587,41 +620,36 @@ if __name__ == '__main__':
sys.exit(2) sys.exit(2)
action = args[0] action = args[0]
config_file =options.config config_file = options.config
dry_run = options.dry_run dry_run = options.dry_run
verbose = options.verbose verbose = options.verbose
loglevel = options.loglevel loglevel = options.loglevel
monitor_pid_file = '/var/run/check-providers.pid' monitor_pid_file = MONITOR_PID_FILE
current_pid = os.getpid() current_pid = os.getpid()
# setup Logger # setup Logger
logger = logging.getLogger() logger = logging.getLogger()
if options.logfile: if options.logfile:
hdlr = logging.FileHandler(filename=options.logfile,encoding='utf8') hdlr = logging.FileHandler(filename=options.logfile, encoding='utf8')
hdlr.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) hdlr.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
logger.addHandler(hdlr) logger.addHandler(hdlr)
else: else:
hdlr = logging.StreamHandler() hdlr = logging.StreamHandler()
logger.addHandler(hdlr) logger.addHandler(hdlr)
# set loglevel if loglevel in ('debug', 'warning', 'info', 'error', 'critical'):
if loglevel in ('debug','warning','info','error','critical'):
numeric_level = getattr(logging, loglevel.upper(), None) numeric_level = getattr(logging, loglevel.upper(), None)
if not isinstance(numeric_level, int): if not isinstance(numeric_level, int):
raise ValueError('Invalid log level: %s' % loglevel) raise ValueError('Invalid log level: %s' % loglevel)
logger.setLevel(numeric_level) logger.setLevel(numeric_level)
# Config file
if not os.path.isfile(config_file): if not os.path.isfile(config_file):
logger.error("Error : could not find file : " + config_file + ", please check the path") logger.error("Error : could not find file : " + config_file + ", please check the path")
logger.debug("Using " + config_file + " config file") logger.debug("Using " + config_file + " config file")
#adsl = Provider('ADSL',device='eth0',target_ip='185.16.48.54',gateway='192.168.149.254')
#gsm = Provider('GSM',device='ppp3g',target_ip='185.16.48.55',max_loss=40,max_rtt=2000,ping_count=20)
providers = [] providers = []
read_config(config_file,providers) read_config(config_file, providers)
if options.ping_count: if options.ping_count:
for provider in providers: for provider in providers:
@@ -632,63 +660,78 @@ if __name__ == '__main__':
for provider in providers: for provider in providers:
provider.dry_run = options.dry_run provider.dry_run = options.dry_run
# -----------------------------------------------------------------------
# Actions
# -----------------------------------------------------------------------
if action == 'stop': if action == 'stop':
monitor_pid = is_pid_running(monitor_pid_file) monitor_pid = is_pid_running(monitor_pid_file)
if monitor_pid: if monitor_pid:
# wakeup current monitor...
logger.info('Sending a TERM signal to running monitor process {}'.format(monitor_pid)) logger.info('Sending a TERM signal to running monitor process {}'.format(monitor_pid))
os.kill(monitor_pid,signal.SIGTERM) os.kill(monitor_pid, signal.SIGTERM)
sys.exit(0) sys.exit(0)
else: else:
logger.warning('No running monitoring found') logger.warning('No running monitoring found')
sys.exit(0) sys.exit(0)
if action == 'trigger': elif action == 'trigger':
monitor_pid = is_pid_running(monitor_pid_file) monitor_pid = is_pid_running(monitor_pid_file)
if monitor_pid: if monitor_pid:
# wakeup current monitor...
logger.info('Sending a wakeup signal to running monitor process {}'.format(monitor_pid)) logger.info('Sending a wakeup signal to running monitor process {}'.format(monitor_pid))
os.kill(monitor_pid,signal.SIGHUP) os.kill(monitor_pid, signal.SIGHUP)
sys.exit(0) sys.exit(0)
else: else:
logger.critical('No running monitoring found') logger.critical('No running monitoring found')
sys.exit(1) sys.exit(1)
if action == 'monitor': elif action == 'status':
try:
with open(STATE_FILE) as f:
print(f.read())
except FileNotFoundError:
print(jsondumps({'error': 'No state file found, is monitor running?'}))
sys.exit(0)
elif action == 'monitor':
monitor_pid = is_pid_running(monitor_pid_file) monitor_pid = is_pid_running(monitor_pid_file)
if monitor_pid: if monitor_pid:
# wakeup current monitor...
logger.info('Sending a wakeup signal to running monitor process {}'.format(monitor_pid)) logger.info('Sending a wakeup signal to running monitor process {}'.format(monitor_pid))
os.kill(monitor_pid,signal.SIGHUP) os.kill(monitor_pid, signal.SIGHUP)
sys.exit(0) sys.exit(0)
else: else:
init_db()
try: try:
write_pidfile(monitor_pid_file) write_pidfile(monitor_pid_file)
cycle_count = 0
def handler(signum,frame): def handler(signum, frame):
global providers global providers
logger.info('Wake up by signal {}'.format(signum)) logger.info('Wake up by signal {}'.format(signum))
if signum == signal.SIGHUP: if signum == signal.SIGHUP:
logger.info(jsondumps(providers,indent=True)) logger.info(jsondumps(providers, indent=True))
elif signum == signal.SIGUSR1:
write_state_file(providers)
logger.info('State file updated on SIGUSR1')
elif signum == signal.SIGTERM: elif signum == signal.SIGTERM:
logger.info('Received kill, closing') logger.info('Received kill, closing')
remove_pidfile(monitor_pid_file) remove_pidfile(monitor_pid_file)
sys.exit(0) sys.exit(0)
# Set the signal handler and a alarm
signal.signal(signal.SIGALRM, handler) signal.signal(signal.SIGALRM, handler)
signal.signal(signal.SIGHUP, handler) signal.signal(signal.SIGHUP, handler)
signal.signal(signal.SIGTERM, handler) signal.signal(signal.SIGTERM, handler)
signal.signal(signal.SIGUSR1, handler)
while True: while True:
try: try:
logger.info('Checking providers {}:'.format(','.join([provider.provider_name for provider in providers]))) cycle_count += 1
current_ok = [ provider for provider in providers if provider.check_available() ] logger.info('Checking providers {}:'.format(
# list of providers which are used by openvpn ','.join([provider.provider_name for provider in providers])))
openvpn_prov = [ provider for provider in providers if provider.used_by_openvpn() ] current_ok = [provider for provider in providers if provider.check_available()]
openvpn_prov = [provider for provider in providers if provider.used_by_openvpn()]
shorewall_restart_needed = False shorewall_restart_needed = False
for provider in providers: for provider in providers:
# we will check if a workable provider needs to be enabled by shorewall
if provider._available: if provider._available:
if not provider.enabled: if not provider.enabled:
logger.warning("Enabling the available provider {}".format(provider.provider_name)) logger.warning("Enabling the available provider {}".format(provider.provider_name))
@@ -696,56 +739,53 @@ if __name__ == '__main__':
run('/usr/sbin/conntrack -F') run('/usr/sbin/conntrack -F')
if provider.openvpn_master: if provider.openvpn_master:
restart_openvpn() restart_openvpn()
# todo : check balance routing table. If an interface involved in default route is removed (ppp or tun)
# the entire default route entry is removed by the kernel.
# so if we can't find a route which refer to it in balance table, trigger a restart of shorewall to cleanup the situation...
if not shorewall_restart_needed and not provider.fallback: if not shorewall_restart_needed and not provider.fallback:
(retcode,output) = run('ip route show table balance') (retcode, output) = run('ip route show table balance')
"""
default
nexthop via 185.16.51.9 realm 3 dev eth1 weight 1
nexthop dev tun2 weight 1
"""
balance = str(output).splitlines() balance = str(output).splitlines()
in_balance = False in_balance = False
for l in balance: for l in balance:
if provider.gateway in l.split(' ') or provider.device in l.split(' '): if provider.gateway in l.split(' ') or provider.device in l.split(' '):
in_balance= True in_balance = True
break break
if not in_balance: if not in_balance:
shorewall_restart_needed = True shorewall_restart_needed = True
logger.critical("Shorewall restart needed because provider {} is not in default balance route ".format(provider.provider_name)) logger.critical("Shorewall restart needed because provider {} is not in default balance route".format(
provider.provider_name))
run('/usr/sbin/shorewall restart && /usr/sbin/conntrack -F') run('/usr/sbin/shorewall restart && /usr/sbin/conntrack -F')
else: else:
if provider.enabled: if provider.enabled:
if current_ok and not provider.fallback: if current_ok and not provider.fallback:
logger.critical("Disabling the provider {} because {}".format(provider.provider_name,provider.status)) logger.critical("Disabling the provider {} because {}".format(
provider.provider_name, provider.status))
provider.disable() provider.disable()
else: else:
if not current_ok: if not current_ok:
logger.critical("About to disable provider {} but will not because there are no other one".format(provider.provider_name)) logger.critical("About to disable provider {} but will not because there are no other one".format(
provider.provider_name))
else: else:
logger.critical("Not disabling fallback provider {}".format(provider.provider_name)) logger.critical("Not disabling fallback provider {}".format(provider.provider_name))
logger.info(' {}'.format(provider)) logger.info(' {}'.format(provider))
# Persist state and history
write_state_file(providers)
record_providers(providers)
# Purge old events once every 100 cycles (~every 100 min with default interval)
if cycle_count % 100 == 0:
purge_old_events(days=30)
signal.alarm(options.check_interval) signal.alarm(options.check_interval)
signal.pause() signal.pause()
#time.sleep(options.check_interval)
except Exception as e: except Exception as e:
logger.critical(e) logger.critical(e)
#raise
finally: finally:
remove_pidfile(monitor_pid_file) remove_pidfile(monitor_pid_file)
elif action == 'check': elif action == 'check':
if len(args) >= 2: if len(args) >= 2:
selproviders = [ provider for provider in providers if provider.provider_name in args[1:]] selproviders = [provider for provider in providers if provider.provider_name in args[1:]]
else: else:
selproviders = providers selproviders = providers
for provider in selproviders: for provider in selproviders:
@@ -754,13 +794,15 @@ if __name__ == '__main__':
print(provider) print(provider)
if provider.used_by_openvpn(): if provider.used_by_openvpn():
print("This provider is used by Openvpn") print("This provider is used by Openvpn")
elif action == 'check-json': elif action == 'check-json':
result = [] result = []
if len(args) >= 2: if len(args) >= 2:
selproviders = [ provider for provider in providers if provider.provider_name in args[1:]] selproviders = [provider for provider in providers if provider.provider_name in args[1:]]
else: else:
selproviders = providers selproviders = providers
for provider in selproviders: for provider in selproviders:
provider.check_available() provider.check_available()
result.append(provider.as_dict()) result.append(provider.as_dict())
print(jsondumps(result,indent=True)) print(jsondumps(result, indent=True))