FIX: bots could generate errors when slug generation method is encoded (#17224)

* FIX: bots could generate errors when slug generation method is encoded

When slug generation method is encoded (non default) then bots could
cause errors in the logs for urls containing special chars.

ó for example in a URL can be requested in a valid ASCII-8BIT string, and
later when joined to UTF-8 would result in encoding issues.

Fix here ensures we force encoding correctly for outlier cases.

Browser tend to always encode these chars, hence we did not notice this.


Co-authored-by: Jarek Radosz <jradosz@gmail.com>
This commit is contained in:
Sam 2022-07-29 16:27:52 +10:00 committed by GitHub
parent f590b62a31
commit d716e32a32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 6 deletions

View File

@ -264,6 +264,13 @@ module ApplicationHelper
opts ||= {}
opts[:url] ||= "#{Discourse.base_url_no_prefix}#{request.fullpath}"
# if slug generation method is encoded, non encoded urls can sneak in
# via bots
url = opts[:url]
if url.encoding.name != "UTF-8" || !url.valid_encoding?
opts[:url] = url.dup.force_encoding("UTF-8").scrub!
end
if opts[:image].blank?
twitter_summary_large_image_url = SiteSetting.site_twitter_summary_large_image_url
@ -279,12 +286,12 @@ module ApplicationHelper
opts[:twitter_summary_large_image] =
get_absolute_image_url(opts[:twitter_summary_large_image]) if opts[:twitter_summary_large_image].present?
# Add opengraph & twitter tags
result = []
result << tag(:meta, property: 'og:site_name', content: SiteSetting.title)
result << tag(:meta, property: 'og:type', content: 'website')
result = generate_twitter_card_metadata(opts, result)
generate_twitter_card_metadata(result, opts)
result << tag(:meta, property: "og:image", content: opts[:image]) if opts[:image].present?
[:url, :title, :description].each do |property|
@ -313,7 +320,7 @@ module ApplicationHelper
result.join("\n")
end
def generate_twitter_card_metadata(opts, result)
private def generate_twitter_card_metadata(result, opts)
img_url = opts[:twitter_summary_large_image].present? ? \
opts[:twitter_summary_large_image] :
opts[:image]
@ -332,8 +339,6 @@ module ApplicationHelper
else
result << tag(:meta, name: 'twitter:card', content: "summary")
end
result
end
def render_sitelinks_search_tag

View File

@ -161,7 +161,7 @@ RSpec.describe ApplicationHelper do
context "when dark theme is present" do
before do
dark_theme = Theme.create(
_dark_theme = Theme.create(
name: "Dark",
user_id: -1,
color_scheme_id: ColorScheme.find_by(base_scheme_id: "Dark").id
@ -418,6 +418,15 @@ RSpec.describe ApplicationHelper do
end
describe 'crawlable_meta_data' do
it 'Supports ASCII URLs with odd chars' do
result = helper.crawlable_meta_data(
url: (+"http://localhost/ión").force_encoding("ASCII-8BIT").freeze
)
expect(result).to include("ión")
end
context "opengraph image" do
it 'returns the correct image' do
SiteSetting.opengraph_image = Fabricate(:upload,