From 8e10878e1a37a608b0e1705bf9d0a06a295b8a63 Mon Sep 17 00:00:00 2001 From: Alan Guo Xiang Tan Date: Fri, 5 Jul 2024 09:41:52 +0800 Subject: [PATCH] DEV: Redo `DiscourseLogstashLogger` to not rely on `logstash-logger` (#27663) This commit rewrites `DiscourseLogstashLogger` to not be an instance of `LogstashLogger`. The reason we don't want it to be an instance of `LogstashLogger` is because we want the new logger to be chained to Logster's logger which can then pass down useful information like the request's env and error backtraces which Logster has already gathered. Note that this commit does not bother to maintain backwards compatibility and drops the `LOGSTASH_URI` and `UNICORN_LOGSTASH_URI` ENV variables which were previously used to configure the destination in which `logstash-logger` would send the logs to. Instead, we introduce the `ENABLE_LOGSTASH_LOGGER` ENV variable to replace both ENV and remove the need for the log paths to be specified. Note that the previous feature was considered experimental as stated in d888d3c54c4744d52b9d21d3728f5d6dbc132cde and the new feature should be considered experimental as well. The code may be moved into a plugin in the future. --- Gemfile | 1 - Gemfile.lock | 5 +- config/environments/development.rb | 3 +- config/initializers/101-lograge.rb | 23 +++- config/unicorn.conf.rb | 30 +++-- lib/discourse.rb | 30 +---- lib/discourse_logstash_logger.rb | 112 +++++++++++++++-- lib/git_utils.rb | 33 +++++ lib/version.rb | 5 +- spec/lib/discourse_logstash_logger_spec.rb | 137 +++++++++++++++++++++ 10 files changed, 314 insertions(+), 65 deletions(-) create mode 100644 lib/git_utils.rb create mode 100644 spec/lib/discourse_logstash_logger_spec.rb diff --git a/Gemfile b/Gemfile index ad2429502ff..04df2b0b7ca 100644 --- a/Gemfile +++ b/Gemfile @@ -212,7 +212,6 @@ gem "cppjieba_rb", require: false gem "lograge", require: false gem "logstash-event", require: false -gem "logstash-logger", require: false gem "logster" # A fork of sassc with dart-sass support diff --git a/Gemfile.lock b/Gemfile.lock index a36440accc7..325957f4e9c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -218,9 +218,7 @@ GEM railties (>= 4) request_store (~> 1.0) logstash-event (1.2.02) - logstash-logger (0.26.1) - logstash-event (~> 1.2) - logster (2.19.1) + logster (2.20.0) loofah (2.22.0) crass (~> 1.0.2) nokogiri (>= 1.12.0) @@ -640,7 +638,6 @@ DEPENDENCIES listen lograge logstash-event - logstash-logger logster loofah lru_redux diff --git a/config/environments/development.rb b/config/environments/development.rb index 6609ecf373b..defd9ce4d0e 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -44,6 +44,7 @@ Discourse::Application.configure do config.log_level = ENV["DISCOURSE_DEV_LOG_LEVEL"] if ENV["DISCOURSE_DEV_LOG_LEVEL"] + config.active_record.logger = nil if ENV["RAILS_DISABLE_ACTIVERECORD_LOGS"] == "1" config.active_record.verbose_query_logs = true if ENV["RAILS_VERBOSE_QUERY_LOGS"] == "1" if defined?(BetterErrors) @@ -90,8 +91,6 @@ Discourse::Application.configure do end end - ActiveRecord::Base.logger = nil if ENV["RAILS_DISABLE_ACTIVERECORD_LOGS"] == "1" - if ENV["BULLET"] Bullet.enable = true Bullet.rails_logger = true diff --git a/config/initializers/101-lograge.rb b/config/initializers/101-lograge.rb index 5eaedb5b207..3b992816314 100644 --- a/config/initializers/101-lograge.rb +++ b/config/initializers/101-lograge.rb @@ -102,18 +102,29 @@ Rails.application.config.to_prepare do end end - if ENV["LOGSTASH_URI"] + if ENV["ENABLE_LOGSTASH_LOGGER"] == "1" config.lograge.formatter = Lograge::Formatters::Logstash.new require "discourse_logstash_logger" config.lograge.logger = - DiscourseLogstashLogger.logger(uri: ENV["LOGSTASH_URI"], type: :rails) + DiscourseLogstashLogger.logger( + logdev: Rails.root.join("log", "#{Rails.env}.log"), + type: :rails, + customize_event: + lambda do |event| + event["database"] = RailsMultisite::ConnectionManagement.current_db + end, + ) - # Remove ActiveSupport::Logger from the chain and replace with Lograge's - # logger - Rails.logger.stop_broadcasting_to(Rails.logger.broadcasts.first) - Rails.logger.broadcast_to(config.lograge.logger) + # Stop broadcasting to Rails' default logger + Rails.logger.stop_broadcasting_to( + Rails.logger.broadcasts.find { |logger| logger.is_a?(ActiveSupport::Logger) }, + ) + + Logster.logger.subscribe do |severity, message, progname, opts, &block| + config.lograge.logger.add_with_opts(severity, message, progname, opts, &block) + end end end end diff --git a/config/unicorn.conf.rb b/config/unicorn.conf.rb index 5d377cfc31e..a52082659d7 100644 --- a/config/unicorn.conf.rb +++ b/config/unicorn.conf.rb @@ -1,13 +1,6 @@ # frozen_string_literal: true # See http://unicorn.bogomips.org/Unicorn/Configurator.html - -if (ENV["LOGSTASH_UNICORN_URI"] || "").length > 0 - require_relative "../lib/discourse_logstash_logger" - require_relative "../lib/unicorn_logstash_patch" - logger DiscourseLogstashLogger.logger(uri: ENV["LOGSTASH_UNICORN_URI"], type: :unicorn) -end - discourse_path = File.expand_path(File.expand_path(File.dirname(__FILE__)) + "/../") # tune down if not enough ram @@ -25,18 +18,31 @@ FileUtils.mkdir_p("#{discourse_path}/tmp/pids") if !File.exist?("#{discourse_pat # feel free to point this anywhere accessible on the filesystem pid(ENV["UNICORN_PID_PATH"] || "#{discourse_path}/tmp/pids/unicorn.pid") -if ENV["RAILS_ENV"] != "production" - logger Logger.new(STDOUT) - # we want a longer timeout in dev cause first request can be really slow - timeout(ENV["UNICORN_TIMEOUT"] && ENV["UNICORN_TIMEOUT"].to_i || 60) -else +if ENV["RAILS_ENV"] == "production" # By default, the Unicorn logger will write to stderr. # Additionally, some applications/frameworks log to stderr or stdout, # so prevent them from going to /dev/null when daemonized here: stderr_path "#{discourse_path}/log/unicorn.stderr.log" stdout_path "#{discourse_path}/log/unicorn.stdout.log" + # nuke workers after 30 seconds instead of 60 seconds (the default) timeout 30 +else + # we want a longer timeout in dev cause first request can be really slow + timeout(ENV["UNICORN_TIMEOUT"] && ENV["UNICORN_TIMEOUT"].to_i || 60) +end + +enable_logstash_logger = ENV["ENABLE_LOGSTASH_LOGGER"] == "1" + +if enable_logstash_logger + require_relative "../lib/discourse_logstash_logger" + require_relative "../lib/unicorn_logstash_patch" + logger DiscourseLogstashLogger.logger( + logdev: "#{discourse_path}/log/unicorn.stderr.log", + type: :unicorn, + ) +else + logger Logger.new(STDOUT) end # important for Ruby 2.0 diff --git a/lib/discourse.rb b/lib/discourse.rb index 4957b4c174c..774e065b298 100644 --- a/lib/discourse.rb +++ b/lib/discourse.rb @@ -4,6 +4,7 @@ require "cache" require "open3" require "plugin/instance" require "version" +require "git_utils" module Discourse DB_POST_MIGRATE_PATH ||= "db/post_migrate" @@ -829,42 +830,23 @@ module Discourse end def self.git_version - @git_version ||= - begin - git_cmd = "git rev-parse HEAD" - self.try_git(git_cmd, Discourse::VERSION::STRING) - end + @git_version ||= GitUtils.git_version end def self.git_branch - @git_branch ||= - self.try_git("git branch --show-current", nil) || - self.try_git("git config user.discourse-version", "unknown") + @git_branch ||= GitUtils.git_branch end def self.full_version - @full_version ||= - begin - git_cmd = 'git describe --dirty --match "v[0-9]*" 2> /dev/null' - self.try_git(git_cmd, "unknown") - end + @full_version ||= GitUtils.full_version end def self.last_commit_date - @last_commit_date ||= - begin - git_cmd = 'git log -1 --format="%ct"' - seconds = self.try_git(git_cmd, nil) - seconds.nil? ? nil : DateTime.strptime(seconds, "%s") - end + @last_commit_date ||= GitUtils.last_commit_date end def self.try_git(git_cmd, default_value) - begin - `#{git_cmd}`.strip - rescue StandardError - default_value - end.presence || default_value + GitUtils.try_git(git_cmd, default_value) end # Either returns the site_contact_username user or the first admin. diff --git a/lib/discourse_logstash_logger.rb b/lib/discourse_logstash_logger.rb index 96f2711fd68..df435d3799b 100644 --- a/lib/discourse_logstash_logger.rb +++ b/lib/discourse_logstash_logger.rb @@ -1,18 +1,104 @@ # frozen_string_literal: true -require "logstash-logger" +require "json" +require "socket" +require_relative "git_utils" -class DiscourseLogstashLogger - def self.logger(uri:, type:) - LogStashLogger.new( - uri: uri, - sync: true, - customize_event: ->(event) do - event["severity_name"] = event["severity"] - event["severity"] = Object.const_get("Logger::Severity::#{event["severity"]}") - event["type"] = type - event["pid"] = Process.pid - end, - ) +class DiscourseLogstashLogger < Logger + PROCESS_PID = Process.pid + HOST = Socket.gethostname + GIT_VERSION = GitUtils.git_version + + attr_accessor :customize_event, :type + + # Creates a new logger instance. + # + # @param logdev [String, IO, nil] The log device. This can be one of: + # - A string filepath: entries are written to the file at that path. If the file exists, new entries are appended. + # - An IO stream (typically +$stdout+, +$stderr+, or an open file): entries are written to the given stream. + # - nil or File::NULL: no entries are written. + # @param type [String] The type of log messages. This will add a `type` field to all log messages. + # @param customize_event [Proc, nil] A proc that customizes the log event before it is written to the log device. + # The proc is called with a hash of log event data and can be modified in place. + # + # @return [Logger] A new logger instance with the specified log device and type. + def self.logger(logdev:, type:, customize_event: nil) + logger = self.new(logdev) + logger.type = type + logger.customize_event = customize_event if customize_event + logger + end + + # :nodoc: + def add(*args, &block) + add_with_opts(*args, &block) + end + + ALLOWED_HEADERS_FROM_ENV = %w[ + REQUEST_URI + REQUEST_METHOD + HTTP_HOST + HTTP_USER_AGENT + HTTP_ACCEPT + HTTP_REFERER + HTTP_X_FORWARDED_FOR + HTTP_X_REAL_IP + ] + + # :nodoc: + def add_with_opts(severity, message = nil, progname = nil, opts = {}, &block) + return true if @logdev.nil? || severity < @level + + progname = @progname if progname.nil? + + if message.nil? + if block_given? + message = yield + else + message = progname + progname = @progname + end + end + + event = { + "message" => message, + "severity" => severity, + "severity_name" => Logger::SEV_LABEL[severity], + "pid" => PROCESS_PID, + "type" => @type.to_s, + "host" => HOST, + "git_version" => GitUtils.git_version, + } + + # Only log backtrace and env for Logger::WARN and above. + # Backtrace is just noise for anything below that. + if severity >= Logger::WARN + if (backtrace = opts&.dig(:backtrace)).present? + event["backtrace"] = backtrace + end + + if (env = opts&.dig(:env)).present? + ALLOWED_HEADERS_FROM_ENV.each do |header| + event["request.headers.#{header.downcase}"] = opts[:env][header] + end + end + end + + if message.start_with?("{") && message.end_with?("}") + begin + parsed = JSON.parse(message) + event["message"] = parsed.delete("message") if parsed["message"] + event.merge!(parsed) + event + rescue JSON::ParserError + # Do nothing + end + end + + @customize_event.call(event) if @customize_event + + @logdev.write("#{event.to_json}\n") + rescue Exception => e + STDERR.puts "Error logging message in DiscourseLogstashLogger: #{message} (#{e.message})\n#{e.backtrace.join("\n")}" end end diff --git a/lib/git_utils.rb b/lib/git_utils.rb new file mode 100644 index 00000000000..f7080cca650 --- /dev/null +++ b/lib/git_utils.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +class GitUtils + def self.git_version + self.try_git("git rev-parse HEAD", "unknown") + end + + def self.git_branch + self.try_git("git branch --show-current", nil) || + self.try_git("git config user.discourse-version", "unknown") + end + + def self.full_version + self.try_git('git describe --dirty --match "v[0-9]*" 2> /dev/null', "unknown") + end + + def self.last_commit_date + git_cmd = 'git log -1 --format="%ct"' + seconds = self.try_git(git_cmd, nil) + seconds.nil? ? nil : DateTime.strptime(seconds, "%s") + end + + def self.try_git(git_cmd, default_value) + value = + begin + `#{git_cmd}`.strip + rescue StandardError + default_value + end + + (value.present? ? value : nil) || default_value + end +end diff --git a/lib/version.rb b/lib/version.rb index adfc4554b93..c8dbda13761 100644 --- a/lib/version.rb +++ b/lib/version.rb @@ -3,7 +3,6 @@ module Discourse VERSION_REGEXP ||= /\A\d+\.\d+\.\d+(\.beta\d+)?\z/ VERSION_COMPATIBILITY_FILENAME ||= ".discourse-compatibility" - # work around reloader unless defined?(::Discourse::VERSION) module VERSION #:nodoc: @@ -16,8 +15,8 @@ module Discourse MAJOR = PARTS[0].to_i MINOR = PARTS[1].to_i TINY = PARTS[2].to_i - PRE = PARTS[3]&.split("-", 2)&.first - DEV = PARTS[3]&.split("-", 2)&.second + PRE = PARTS[3]&.split("-", 2)&.[](0) + DEV = PARTS[3]&.split("-", 2)&.[](1) end end diff --git a/spec/lib/discourse_logstash_logger_spec.rb b/spec/lib/discourse_logstash_logger_spec.rb new file mode 100644 index 00000000000..f0bbb2ae668 --- /dev/null +++ b/spec/lib/discourse_logstash_logger_spec.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseLogstashLogger do + let(:lograge_logstash_formatter_formatted_message) do + "{\"method\":\"GET\",\"path\":\"/\",\"format\":\"html\",\"controller\":\"ListController\",\"action\":\"latest\",\"status\":200,\"allocations\":242307,\"duration\":178.2,\"view\":78.36,\"db\":0.0,\"params\":\"\",\"ip\":\"127.0.0.1\",\"username\":null,\"@timestamp\":\"2024-07-01T07:51:11.283Z\",\"@version\":\"1\",\"message\":\"[200] GET / (ListController#latest)\"}" + end + + let(:output) { StringIO.new } + let(:logger) { described_class.logger(logdev: output, type: "test") } + + describe "#add" do + it "logs a JSON string with the right fields" do + logger.add(Logger::INFO, lograge_logstash_formatter_formatted_message) + output.rewind + + expect(output.read.chomp).to eq( + { + "message" => "[200] GET / (ListController#latest)", + "severity" => 1, + "severity_name" => "INFO", + "pid" => described_class::PROCESS_PID, + "type" => "test", + "host" => described_class::HOST, + "git_version" => described_class::GIT_VERSION, + "method" => "GET", + "path" => "/", + "format" => "html", + "controller" => "ListController", + "action" => "latest", + "status" => 200, + "allocations" => 242_307, + "duration" => 178.2, + "view" => 78.36, + "db" => 0.0, + "params" => "", + "ip" => "127.0.0.1", + "username" => nil, + "@timestamp" => "2024-07-01T07:51:11.283Z", + "@version" => "1", + }.to_json, + ) + end + + it "logs a JSON string with the right fields when `customize_event` attribute is set" do + logger = + described_class.logger( + logdev: output, + type: "test", + customize_event: ->(event) { event["custom"] = "custom" }, + ) + + logger.add(Logger::INFO, lograge_logstash_formatter_formatted_message) + output.rewind + parsed = JSON.parse(output.read.chomp) + + expect(parsed["custom"]).to eq("custom") + end + + it "does not log a JSON string with the `backtrace` field when severity is less than `Logger::WARN`" do + logger.add( + Logger::INFO, + lograge_logstash_formatter_formatted_message, + nil, + backtrace: "backtrace", + ) + + output.rewind + parsed = JSON.parse(output.read.chomp) + + expect(parsed).not_to have_key("backtrace") + end + + it "logs a JSON string with the `backtrace` field when severity is at least `Logger::WARN`" do + logger.add( + Logger::ERROR, + lograge_logstash_formatter_formatted_message, + nil, + backtrace: "backtrace", + ) + + output.rewind + parsed = JSON.parse(output.read.chomp) + + expect(parsed["backtrace"]).to eq("backtrace") + end + + described_class::ALLOWED_HEADERS_FROM_ENV.each do |header| + it "does not include `#{header}` from `env` keyword argument in the logged JSON string when severity is less than `Logger::WARN`" do + logger.add( + Logger::INFO, + lograge_logstash_formatter_formatted_message, + nil, + env: { + header => "header", + }, + ) + + output.rewind + parsed = JSON.parse(output.read.chomp) + + expect(parsed).not_to have_key("request.headers.#{header.downcase}") + end + + it "includes `#{header}` from `env` keyword argument in the logged JSON string when severity is at least `Logger::WARN`" do + logger.add( + Logger::ERROR, + lograge_logstash_formatter_formatted_message, + nil, + env: { + header => "header", + }, + ) + + output.rewind + parsed = JSON.parse(output.read.chomp) + + expect(parsed["request.headers.#{header.downcase}"]).to eq("header") + end + end + + it "does not include keys from `env` keyword argument in the logged JSOn string which are not in the allow list" do + logger.add( + Logger::ERROR, + lograge_logstash_formatter_formatted_message, + nil, + env: { + "SOME_RANDOM_HEADER" => "header", + }, + ) + + output.rewind + parsed = JSON.parse(output.read.chomp) + + expect(parsed).not_to have_key("request.headers.some_random_header") + end + end +end