discourse/script/cache_critical_dns
Sam 0739c3b1d1 DEV: this introduces a script capable of caching critical DNS locally
This is useful for cases where you want to add resiliency to DNS lookups
for redis and postgres, so they will continue to work even if there is
a DNS outage
2018-11-22 18:46:59 +11:00

181 lines
3.6 KiB
Ruby
Executable File

#!/usr/bin/env ruby
require 'resolv'
require 'time'
require 'timeout'
require 'socket'
HOSTS_PATH = "/etc/hosts"
CRITICAL_HOST_ENV_VARS = %w{
DISCOURSE_DB_HOST
DISCOURSE_DB_BACKUP_HOST
DISCOURSE_REDIS_HOST
DISCOURSE_REDIS_SLAVE_HOST
}
def log(msg)
STDERR.puts "#{Time.now.iso8601}: #{msg}"
end
def error(msg)
log(msg)
end
def swap_address(hosts, name, ips)
new_file = []
hosts.split("\n").each do |line|
line = line.strip
if line[0] != '#'
_, hostname = line.strip.split(/\s+/)
next if hostname == name
end
new_file << line
end
ips.each do |ip|
new_file << "#{ip} #{name} # AUTO GENERATED: #{Time.now.iso8601}"
end
new_file.join("\n")
end
def hosts_entries(dns, name)
host = ENV[name]
results = dns.getresources(host, Resolv::DNS::Resource::IN::A)
results.concat dns.getresources(host, Resolv::DNS::Resource::IN::AAAA)
results.map do |result|
"#{result.address}"
end
end
def send_counter(name, description, labels, value)
host = "localhost"
port = 9405
if labels
labels = labels.map do |k, v|
"\"#{k}\": \"#{v}\""
end.join(",")
else
labels = ""
end
json = <<~JSON
{
"_type": "Custom",
"type": "Counter",
"name": "#{name}",
"description": "#{description}",
"labels": { #{labels} },
"value": #{value}
}
JSON
payload = +"POST /send-metrics HTTP/1.1\r\n"
payload << "Host: #{host}\r\n"
payload << "Connection: Close\r\n"
payload << "Content-Type: application/json\r\n"
payload << "Content-Length: #{json.bytesize}\r\n"
payload << "\r\n"
payload << json
socket = TCPSocket.new host, port
socket.write payload
socket.flush
result = socket.read
first_line = result.split("\n")[0]
if first_line.strip != "HTTP/1.1 200 OK"
error("Failed to report metric #{result}")
end
socket.close
rescue => e
error("Failed to send metric to Prometheus #{e}")
end
def report_success
send_counter('critical_dns_successes_total', 'critical DNS resolution success', nil, 1)
end
def report_failure(errors)
errors.each do |host, count|
send_counter('critical_dns_failures_total', 'critical DNS resolution failures', host ? { host: host } : nil, count)
end
end
@vars = CRITICAL_HOST_ENV_VARS.map do |name|
begin
host = ENV[name]
next if !host || host.length == 0
IPAddr.new(ENV[name])
nil
rescue IPAddr::InvalidAddressError, IPAddr::AddressFamilyError
name
end
end.compact
def loop
errors = {}
Resolv::DNS.open do |dns|
dns.timeouts = 2
resolved = {}
hosts = @vars.each do |var|
host = ENV[var]
begin
entries = hosts_entries(dns, var)
rescue => e
error("Failed to resolve DNS for #{name} - #{e}")
errors[host] ||= 0
errors[host] += 1
end
if entries&.length > 0
resolved[host] = entries
else
error("Failed to find any DNS entry for #{var} : #{ENV[var]}")
errors[host] ||= 0
errors[host] += 1
end
end
hosts_content = File.read(HOSTS_PATH)
hosts = Resolv::Hosts.new(HOSTS_PATH)
changed = false
resolved.each do |name, ips|
if hosts.getaddresses(name).map(&:to_s).sort != ips.sort
log("IP addresses for #{name} changed to #{ips}")
hosts_content = swap_address(hosts_content, name, ips)
changed = true
end
end
if changed
File.write(HOSTS_PATH, hosts_content)
end
end
rescue => e
error("Failed to access DNS - #{e}")
errors[nil] = 1
ensure
if errors == {}
report_success
else
report_failure(errors)
end
end
while true
loop
sleep 30
end