From 2389186155c23faca47c36cef8a6c3967596a472 Mon Sep 17 00:00:00 2001 From: Michael Fitz-Payne Date: Thu, 16 Nov 2023 08:58:07 +1000 Subject: [PATCH] DEV(cache_critical_dns): sort resolved SRV targets by priority The priority field in an SRV RR indicates a preferential order at which the underlying targets should be utilised. We need to prefer healthy services in order of priority, where 0 is highest. Prior to this commit, we relied on whatever order the dnsclient.getresources method returned. As it turns out, this assumption is incorrect. The order returned is likely whatever order the system resolver received DNS responses in, which may not be ordered according to the spec. This introduces a ResolvedAddress type which holds the priority value for SRV targets, or a stand-in priority of zero for A/AAAA RRs. This type is used as a return value from the underlying name resolution routines in Name and SRVName. In this manner, all ordering by priority and resolved time can be performed directly within the ResolverCache class and calling code can continue to be none-the-wiser. Before sorting, we still ensure that we only consider targets with a priority within the given threshold as previously implemented. See t/115911. --- script/cache_critical_dns | 71 +++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/script/cache_critical_dns b/script/cache_critical_dns index 803b0216e6f..51886aa7b83 100755 --- a/script/cache_critical_dns +++ b/script/cache_critical_dns @@ -140,6 +140,12 @@ module DNSClient end end +ResolvedAddress = Struct.new(:address, :priority) do |s| + def initialize(address:, priority: 0) + super(address, priority) + end +end + class Name include DNSClient @@ -147,12 +153,18 @@ class Name @name = hostname end + # Resolves A and AAAA DNS RRs for @name. + # Returns an array of the type ResolvedAddress. def resolve dns_client_with_timeout do |dns_client| - [].tap do |addresses| - addresses.concat(dns_client.getresources(@name, Resolv::DNS::Resource::IN::A).map(&:address)) - addresses.concat(dns_client.getresources(@name, Resolv::DNS::Resource::IN::AAAA).map(&:address)) - end.map(&:to_s) + [].tap do |results| + dns_client.getresources(@name, Resolv::DNS::Resource::IN::A).map(&:address).each do |a| + results << ResolvedAddress.new(address: a.to_s) + end + dns_client.getresources(@name, Resolv::DNS::Resource::IN::AAAA).map(&:address).each do |aaaa| + results << ResolvedAddress.new(address: aaaa.to_s) + end + end end end end @@ -165,18 +177,28 @@ class SRVName @prio_filter = prio_filter end + # Starts by resolving the SRV RRs for @name. + # For all returned targets, resolves the underlying A and AAAA RRs. + # Returns an array of the type ResolvedAddress. def resolve dns_client_with_timeout do |dns_client| - [].tap do |addresses| + [].tap do |results| targets = dns_client.getresources(@name, Resolv::DNS::Resource::IN::SRV) targets.delete_if { |t| !@prio_filter.within_threshold?(t.priority) } - addresses.concat(targets.map { |t| Name.new(t.target.to_s).resolve }.flatten) + targets.each do |t| + begin + Name.new(t.target.to_s).resolve.each do |ra| + results << ResolvedAddress.new(address: ra.address, priority: t.priority) + end + rescue Resolv::ResolvError, Resolv::ResolvTimeout + end + end end end end end -CacheMeta = Struct.new(:first_seen, :last_seen) +CacheMeta = Struct.new(:first_seen, :last_seen, :priority) class EmptyCacheError < StandardError; end @@ -189,30 +211,45 @@ class ResolverCache @cached = {} end - # Returns a list of resolved addresses ordered by first seen time. Most - # recently seen address is first. - # If an exception occurs during DNS resolution we return whatever addresses - # are cached. + # Resolve @name and return an array of resolved addresses. + # The order of returned addresses is as follows: + # Addresses are ordered by priority and time they were first seen. + # Addresses with lower numeric priority are always returned first. + # Ties between addresses of equal priority are broken by ordering the + # return value by the time an address was first observed by the cache. + # Newer addresses, that appeared later in time, are returned ahead of older + # addresses. + # # Addresses last seen more than 30 minutes ago are evicted from the cache. + # Upon any exception during DNS resolution, cached addresses are returned. # Raises EmptyCacheError if the cache is empty after DNS resolution and cache - # eviction is performed. + # eviction. def resolve + now = Time.now.utc begin - @name.resolve.each do |address| + @name.resolve.each do |ra| + address = ra.address if @cached[address] - @cached[address].last_seen = Time.now.utc + @cached[address].last_seen = now + @cached[address].priority = ra.priority else - @cached[address] = CacheMeta.new(Time.now.utc, Time.now.utc) + @cached[address] = CacheMeta.new(now, now, ra.priority) end end rescue Resolv::ResolvError, Resolv::ResolvTimeout end - @cached = @cached.delete_if { |_, meta| Time.now.utc - 30 * 60 > meta.last_seen } + @cached = @cached.delete_if { |_, meta| now - 30 * 60 > meta.last_seen } if @cached.empty? raise EmptyCacheError, "DNS resolver found no usable addresses" end - @cached.sort_by { |_, meta| meta.first_seen }.reverse.map(&:first) + + @cached.sort_by do |_, meta| + [ + meta.priority, + now - meta.first_seen, + ] + end.map(&:first).uniq end end