FIX: Remove invalid chars from feed XMLs (#24001)

* FIX: Remove invalid chars from feed XMLs

See https://meta.discourse.org/t/rss-subscription-broken-by-post-content/282415?u=falco

* Adjust filter condition
This commit is contained in:
Rafael dos Santos Silva 2023-10-19 14:37:37 -03:00 committed by GitHub
parent ad433daf3a
commit 0604dc7d3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 0 deletions

View File

@ -52,6 +52,7 @@ class ApplicationController < ActionController::Base
if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt } if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt }
after_action :add_noindex_header_to_non_canonical, if: :spa_boot_request? after_action :add_noindex_header_to_non_canonical, if: :spa_boot_request?
after_action :set_cross_origin_opener_policy_header, if: :spa_boot_request? after_action :set_cross_origin_opener_policy_header, if: :spa_boot_request?
after_action :clean_xml, if: :is_feed_response?
around_action :link_preload, if: -> { spa_boot_request? && GlobalSetting.preload_link_header } around_action :link_preload, if: -> { spa_boot_request? && GlobalSetting.preload_link_header }
HONEYPOT_KEY ||= "HONEYPOT_KEY" HONEYPOT_KEY ||= "HONEYPOT_KEY"
@ -968,6 +969,10 @@ class ApplicationController < ActionController::Base
request.format.atom? || request.format.rss? request.format.atom? || request.format.rss?
end end
def is_feed_response?
request.get? && response&.content_type&.match?(/(rss|atom)/)
end
def add_noindex_header def add_noindex_header
if request.get? && !response.headers["X-Robots-Tag"] if request.get? && !response.headers["X-Robots-Tag"]
if SiteSetting.allow_index_in_robots_txt if SiteSetting.allow_index_in_robots_txt
@ -1120,4 +1125,8 @@ class ApplicationController < ActionController::Base
default default
end end
end end
def clean_xml
response.body.gsub!(XmlCleaner::INVALID_CHARACTERS, "")
end
end end

5
lib/xml_cleaner.rb Normal file
View File

@ -0,0 +1,5 @@
# frozen_string_literal: true
module XmlCleaner
INVALID_CHARACTERS = /[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}]/u
end

View File

@ -3402,6 +3402,15 @@ RSpec.describe TopicsController do
expect(response.headers["X-Robots-Tag"]).to eq("noindex, nofollow") expect(response.headers["X-Robots-Tag"]).to eq("noindex, nofollow")
end end
it "removes invalid characters from the feed" do
topic.title = "This is a big topic title with a "
topic.save!
get "/t/foo/#{topic.id}.rss"
expect(response.status).to eq(200)
expect(response.body).to_not include("")
end
it "renders rss of the topic correctly with subfolder" do it "renders rss of the topic correctly with subfolder" do
set_subfolder "/forum" set_subfolder "/forum"
get "/t/foo/#{topic.id}.rss" get "/t/foo/#{topic.id}.rss"