mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 22:26:26 +08:00
2304dcf993
Trying to truncate encoded slugs will mean that we have to keep the URL valid, which can be tricky as you have to be aware of multibyte characters. Since we already have upper bounds for the title, the slug won't grow for more than title*6 in the worst case. The slug column in the topic table can store that just fine. Added a test to ensure that a generated slug is a valid URL too, so we don't introduce regressions in the future.
66 lines
1.9 KiB
Ruby
66 lines
1.9 KiB
Ruby
# encoding: utf-8
|
|
# frozen_string_literal: true
|
|
|
|
module Slug
|
|
|
|
CHAR_FILTER_REGEXP = /[:\/\?#\[\]@!\$&'\(\)\*\+,;=_\.~%\\`^\s|\{\}"<>]+/ # :/?#[]@!$&'()*+,;=_.~%\`^|{}"<>
|
|
MAX_LENGTH = 255
|
|
|
|
def self.for(string, default = 'topic', max_length = MAX_LENGTH)
|
|
string = string.gsub(/:([\w\-+]+(?::t\d)?):/, '') if string.present? # strip emoji strings
|
|
|
|
if SiteSetting.slug_generation_method == 'encoded'
|
|
max_length = 9999 # do not truncate encoded slugs
|
|
end
|
|
|
|
slug =
|
|
case (SiteSetting.slug_generation_method || :ascii).to_sym
|
|
when :ascii then self.ascii_generator(string)
|
|
when :encoded then self.encoded_generator(string)
|
|
when :none then self.none_generator(string)
|
|
end
|
|
# Reject slugs that only contain numbers, because they would be indistinguishable from id's.
|
|
slug = (slug =~ /[^\d]/ ? slug : '')
|
|
slug = self.prettify_slug(slug, max_length: max_length)
|
|
slug.blank? ? default : slug
|
|
end
|
|
|
|
def self.sanitize(string, downcase: false, max_length: MAX_LENGTH)
|
|
slug = self.encoded_generator(string, downcase: downcase)
|
|
self.prettify_slug(slug, max_length: max_length)
|
|
end
|
|
|
|
private
|
|
|
|
def self.prettify_slug(slug, max_length:)
|
|
slug
|
|
.tr("_", "-")
|
|
.truncate(max_length, omission: '')
|
|
.squeeze('-') # squeeze continuous dashes to prettify slug
|
|
.gsub(/\A-+|-+\z/, '') # remove possible trailing and preceding dashes
|
|
end
|
|
|
|
def self.ascii_generator(string)
|
|
I18n.with_locale(SiteSetting.default_locale) do
|
|
string.tr("'", "").parameterize
|
|
end
|
|
end
|
|
|
|
def self.encoded_generator(string, downcase: true)
|
|
# This generator will sanitize almost all special characters,
|
|
# including reserved characters from RFC3986.
|
|
# See also URI::REGEXP::PATTERN.
|
|
string = string.strip
|
|
.gsub(/\s+/, '-')
|
|
.gsub(CHAR_FILTER_REGEXP, '')
|
|
|
|
string = string.downcase if downcase
|
|
|
|
CGI.escape(string)
|
|
end
|
|
|
|
def self.none_generator(string)
|
|
''
|
|
end
|
|
end
|