mirror of
https://github.com/discourse/discourse.git
synced 2025-01-19 13:53:16 +08:00
a48f7ba61c
It used to simply say "title is invalid" without giving any hint what the problem could be. This commit adds different errors messages for all caps titles, low entropy titles or titles with very long words.
91 lines
2.8 KiB
Ruby
91 lines
2.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# Whe use ActiveSupport mb_chars from here to properly support non ascii downcase
|
|
# TODO remove when ruby 2.4 lands
|
|
require 'active_support/core_ext/string/multibyte'
|
|
|
|
#
|
|
# Given a string, tell us whether or not is acceptable.
|
|
#
|
|
class TextSentinel
|
|
|
|
attr_accessor :text
|
|
|
|
ENTROPY_SCALE ||= 0.7
|
|
|
|
def initialize(text, opts = nil)
|
|
@opts = opts || {}
|
|
@text = text.to_s.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
|
end
|
|
|
|
def self.body_sentinel(text, opts = {})
|
|
entropy = SiteSetting.body_min_entropy
|
|
if opts[:private_message]
|
|
scale_entropy = SiteSetting.min_personal_message_post_length.to_f / SiteSetting.min_post_length.to_f
|
|
entropy = (entropy * scale_entropy).to_i
|
|
entropy = (SiteSetting.min_personal_message_post_length.to_f * ENTROPY_SCALE).to_i if entropy > SiteSetting.min_personal_message_post_length
|
|
else
|
|
entropy = (SiteSetting.min_post_length.to_f * ENTROPY_SCALE).to_i if entropy > SiteSetting.min_post_length
|
|
end
|
|
TextSentinel.new(text, min_entropy: entropy)
|
|
end
|
|
|
|
def self.title_sentinel(text)
|
|
entropy = if SiteSetting.min_topic_title_length > SiteSetting.title_min_entropy
|
|
SiteSetting.title_min_entropy
|
|
else
|
|
(SiteSetting.min_topic_title_length.to_f * ENTROPY_SCALE).to_i
|
|
end
|
|
TextSentinel.new(text, min_entropy: entropy, max_word_length: SiteSetting.title_max_word_length)
|
|
end
|
|
|
|
# Entropy is a number of how many unique characters the string needs.
|
|
# Non-ASCII characters are weighted heavier since they contain more "information"
|
|
def entropy
|
|
chars = @text.to_s.strip.split('')
|
|
@entropy ||= chars.pack('M*' * chars.size).gsub("\n", '').split('=').uniq.size
|
|
end
|
|
|
|
def valid?
|
|
@text.present? &&
|
|
seems_meaningful? &&
|
|
seems_pronounceable? &&
|
|
seems_unpretentious? &&
|
|
seems_quiet?
|
|
end
|
|
|
|
def seems_meaningful?
|
|
# Minimum entropy if entropy check required
|
|
@opts[:min_entropy].blank? || (entropy >= @opts[:min_entropy])
|
|
end
|
|
|
|
def seems_pronounceable?
|
|
# At least some non-symbol characters
|
|
# (We don't have a comprehensive list of symbols, but this will eliminate some noise)
|
|
@text.gsub(symbols_regex, '').size > 0
|
|
end
|
|
|
|
def seems_unpretentious?
|
|
return true if skipped_locale.include?(SiteSetting.default_locale)
|
|
# Don't allow super long words if there is a word length maximum
|
|
@opts[:max_word_length].blank? || @text.split(/\s|\/|-|\.|:/).map(&:size).max <= @opts[:max_word_length]
|
|
end
|
|
|
|
def seems_quiet?
|
|
return true if skipped_locale.include?(SiteSetting.default_locale)
|
|
# We don't allow all upper case content
|
|
SiteSetting.allow_uppercase_posts || @text == @text.mb_chars.downcase.to_s || @text != @text.mb_chars.upcase.to_s
|
|
end
|
|
|
|
private
|
|
|
|
def symbols_regex
|
|
/[\ -\/\[-\`\:-\@\{-\~]/m
|
|
end
|
|
|
|
def skipped_locale
|
|
%w(zh_CN zh_TW ko ja).freeze
|
|
end
|
|
|
|
end
|