From 0604dc7d3eedc9da87311b2d108ad7e9733a89dd Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Thu, 19 Oct 2023 14:37:37 -0300 Subject: [PATCH] FIX: Remove invalid chars from feed XMLs (#24001) * FIX: Remove invalid chars from feed XMLs See https://meta.discourse.org/t/rss-subscription-broken-by-post-content/282415?u=falco * Adjust filter condition --- app/controllers/application_controller.rb | 9 +++++++++ lib/xml_cleaner.rb | 5 +++++ spec/requests/topics_controller_spec.rb | 9 +++++++++ 3 files changed, 23 insertions(+) create mode 100644 lib/xml_cleaner.rb diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 92bdf13da01..398fad9d448 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -52,6 +52,7 @@ class ApplicationController < ActionController::Base if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt } after_action :add_noindex_header_to_non_canonical, if: :spa_boot_request? after_action :set_cross_origin_opener_policy_header, if: :spa_boot_request? + after_action :clean_xml, if: :is_feed_response? around_action :link_preload, if: -> { spa_boot_request? && GlobalSetting.preload_link_header } HONEYPOT_KEY ||= "HONEYPOT_KEY" @@ -968,6 +969,10 @@ class ApplicationController < ActionController::Base request.format.atom? || request.format.rss? end + def is_feed_response? + request.get? && response&.content_type&.match?(/(rss|atom)/) + end + def add_noindex_header if request.get? && !response.headers["X-Robots-Tag"] if SiteSetting.allow_index_in_robots_txt @@ -1120,4 +1125,8 @@ class ApplicationController < ActionController::Base default end end + + def clean_xml + response.body.gsub!(XmlCleaner::INVALID_CHARACTERS, "") + end end diff --git a/lib/xml_cleaner.rb b/lib/xml_cleaner.rb new file mode 100644 index 00000000000..39a1fb2d7bf --- /dev/null +++ b/lib/xml_cleaner.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module XmlCleaner + INVALID_CHARACTERS = /[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}]/u +end diff --git a/spec/requests/topics_controller_spec.rb b/spec/requests/topics_controller_spec.rb index 1ad6bf728f7..fc8c91c4c4f 100644 --- a/spec/requests/topics_controller_spec.rb +++ b/spec/requests/topics_controller_spec.rb @@ -3402,6 +3402,15 @@ RSpec.describe TopicsController do expect(response.headers["X-Robots-Tag"]).to eq("noindex, nofollow") end + it "removes invalid characters from the feed" do + topic.title = "This is a big topic title with a " + topic.save! + + get "/t/foo/#{topic.id}.rss" + expect(response.status).to eq(200) + expect(response.body).to_not include("") + end + it "renders rss of the topic correctly with subfolder" do set_subfolder "/forum" get "/t/foo/#{topic.id}.rss"