FIX: Don't inject extra terms for version lexeme.

This commit is contained in:
Guo Xiang Tan 2020-07-27 14:46:44 +08:00
parent ce53180c3b
commit b70f1084f7
No known key found for this signature in database
GPG Key ID: FBD110179AAC1F20
2 changed files with 18 additions and 7 deletions

View File

@ -59,13 +59,15 @@ class SearchIndexer
tsvector.scan(/'(([a-zA-Z0-9]+\.)+[a-zA-Z0-9]+)'\:([\w+,]+)/).reduce(additional_lexemes) do |array, (lexeme, _, positions)|
count = 0
loop do
count += 1
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
term, _, remaining = lexeme.partition(".")
break if remaining.blank?
array << "'#{term}':#{positions} '#{remaining}':#{positions}"
lexeme = remaining
if lexeme !~ /^(\d+\.)?(\d+\.)?(\*|\d+)$/
loop do
count += 1
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
term, _, remaining = lexeme.partition(".")
break if remaining.blank?
array << "'#{term}':#{positions} '#{remaining}':#{positions}"
lexeme = remaining
end
end
array

View File

@ -141,6 +141,15 @@ describe SearchIndexer do
)
end
it 'should not tokenize versions' do
post.topic.update!(title: "this is a title that I am testing")
post.update!(raw: '1.2.2')
expect(post.post_search_data.search_data).to eq(
"'1.2.2':10 'test':8A 'titl':4A 'uncategor':9B"
)
end
it 'should tokenize host of a URL and removes query string' do
category = Fabricate(:category, name: 'awesome category')
topic = Fabricate(:topic, category: category, title: 'this is a test topic')