discourse/lib/slug.rb
Rafael dos Santos Silva 2304dcf993 FIX: Do not truncate encoded slugs
Trying to truncate encoded slugs will mean that we have to keep the URL
valid, which can be tricky as you have to be aware of multibyte
characters.

Since we already have upper bounds for the title, the slug won't grow
for more than title*6 in the worst case. The slug column in the topic
table can store that just fine.

Added a test to ensure that a generated slug is a valid URL too, so we
don't introduce regressions in the future.
2019-10-17 13:38:31 -03:00

66 lines
1.9 KiB
Ruby

# encoding: utf-8
# frozen_string_literal: true
module Slug
CHAR_FILTER_REGEXP = /[:\/\?#\[\]@!\$&'\(\)\*\+,;=_\.~%\\`^\s|\{\}"<>]+/ # :/?#[]@!$&'()*+,;=_.~%\`^|{}"<>
MAX_LENGTH = 255
def self.for(string, default = 'topic', max_length = MAX_LENGTH)
string = string.gsub(/:([\w\-+]+(?::t\d)?):/, '') if string.present? # strip emoji strings
if SiteSetting.slug_generation_method == 'encoded'
max_length = 9999 # do not truncate encoded slugs
end
slug =
case (SiteSetting.slug_generation_method || :ascii).to_sym
when :ascii then self.ascii_generator(string)
when :encoded then self.encoded_generator(string)
when :none then self.none_generator(string)
end
# Reject slugs that only contain numbers, because they would be indistinguishable from id's.
slug = (slug =~ /[^\d]/ ? slug : '')
slug = self.prettify_slug(slug, max_length: max_length)
slug.blank? ? default : slug
end
def self.sanitize(string, downcase: false, max_length: MAX_LENGTH)
slug = self.encoded_generator(string, downcase: downcase)
self.prettify_slug(slug, max_length: max_length)
end
private
def self.prettify_slug(slug, max_length:)
slug
.tr("_", "-")
.truncate(max_length, omission: '')
.squeeze('-') # squeeze continuous dashes to prettify slug
.gsub(/\A-+|-+\z/, '') # remove possible trailing and preceding dashes
end
def self.ascii_generator(string)
I18n.with_locale(SiteSetting.default_locale) do
string.tr("'", "").parameterize
end
end
def self.encoded_generator(string, downcase: true)
# This generator will sanitize almost all special characters,
# including reserved characters from RFC3986.
# See also URI::REGEXP::PATTERN.
string = string.strip
.gsub(/\s+/, '-')
.gsub(CHAR_FILTER_REGEXP, '')
string = string.downcase if downcase
CGI.escape(string)
end
def self.none_generator(string)
''
end
end