discourse/lib/text_cleaner.rb
Josh Soref 59097b207f
DEV: Correct typos and spelling mistakes (#12812)
Over the years we accrued many spelling mistakes in the code base. 

This PR attempts to fix spelling mistakes and typos in all areas of the code that are extremely safe to change 

- comments
- test descriptions
- other low risk areas
2021-05-21 11:43:47 +10:00

70 lines
2.5 KiB
Ruby

# frozen_string_literal: true
#
# Clean up a text
#
# We use ActiveSupport mb_chars from here to properly support non ascii downcase
require 'active_support/core_ext/string/multibyte'
class TextCleaner
def self.title_options
# cf. http://meta.discourse.org/t/should-we-have-auto-replace-rules-in-titles/5687
{
deduplicate_exclamation_marks: SiteSetting.title_prettify,
deduplicate_question_marks: SiteSetting.title_prettify,
replace_all_upper_case: SiteSetting.title_prettify && !SiteSetting.allow_uppercase_posts,
capitalize_first_letter: SiteSetting.title_prettify,
remove_all_periods_from_the_end: SiteSetting.title_prettify,
remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.title_remove_extraneous_space,
fixes_interior_spaces: true,
strip_whitespaces: true,
strip_zero_width_spaces: true
}
end
def self.clean_title(title)
TextCleaner.clean(title, TextCleaner.title_options)
end
def self.clean(text, opts = {})
text = text.dup
# Remove invalid byte sequences
text.scrub!("")
# Replace !!!!! with a single !
text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks]
# Replace ????? with a single ?
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
# Replace all-caps text with regular case letters
text = text.mb_chars.downcase.to_s if opts[:replace_all_upper_case] && (text == text.mb_chars.upcase)
# Capitalize first letter, but only when entire first word is lowercase
first, rest = text.split(' ', 2)
if first && opts[:capitalize_first_letter] && first == first.mb_chars.downcase
text = +"#{first.mb_chars.capitalize}#{rest ? ' ' + rest : ''}"
end
# Remove unnecessary periods at the end
text.sub!(/([^.])\.+(\s*)\z/, '\1\2') if opts[:remove_all_periods_from_the_end]
# Remove extraneous space before the end punctuation
text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space]
# Fixes interior spaces
text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces]
# Normalize whitespaces
text = normalize_whitespaces(text)
# Strip whitespaces
text.strip! if opts[:strip_whitespaces]
# Strip zero width spaces
text.gsub!(/\u200b/, '') if opts[:strip_zero_width_spaces]
text
end
@@whitespaces_regexp = Regexp.new("(\u00A0|\u1680|\u180E|[\u2000-\u200A]|\u2028|\u2029|\u202F|\u205F|\u3000)", "u").freeze
def self.normalize_whitespaces(text)
text&.gsub(@@whitespaces_regexp, ' ')
end
end