discourse/spec/lib/search_spec.rb
Sam Saffron 862773ec83
FIX: do not remove stop words when using English locale
PG already handles English stop words, the list in cppjieba is
bigger than the list PG uses, which in turn causes confusion cause
words such as "volume" are stripped using cppijieba stop word list

We will follow up with another commit here to apply the Chinese
word stopwords, but for now to eliminate the confusion we are
skipping applying the stopword list when the dictionary in PG is
in English.
2020-05-18 10:54:56 +10:00

68 lines
2.4 KiB
Ruby

# frozen_string_literal: true
require 'rails_helper'
describe Search do
context "#prepare_data" do
it "does not remove English stop words in mixed mode" do
SiteSetting.search_tokenize_chinese_japanese_korean = true
tokenized = Search.prepare_data("monkey 吃香蕉 in a loud volume")
expect(tokenized).to eq("monkey 吃 香蕉 in a loud volume")
SiteSetting.default_locale = 'zh_CN'
tokenized = Search.prepare_data("monkey 吃香蕉 in a loud volume")
expect(tokenized).to eq("monkey 吃 香蕉 loud")
end
end
context "#ts_config" do
it "maps locales to correct Postgres dictionaries" do
expect(Search.ts_config).to eq("english")
expect(Search.ts_config("en")).to eq("english")
expect(Search.ts_config("en_US")).to eq("english")
expect(Search.ts_config("pt_BR")).to eq("portuguese")
expect(Search.ts_config("tr")).to eq("turkish")
expect(Search.ts_config("xx")).to eq("simple")
end
end
context "#GroupedSearchResults.blurb_for" do
it "strips audio and video URLs from search blurb" do
cooked = <<~RAW
link to an external page: https://google.com/?u=bar
link to an audio file: https://somesite.com/content/file123.m4a
link to a video file: https://somesite.com/content/somethingelse.MOV
RAW
result = Search::GroupedSearchResults.blurb_for(cooked)
expect(result).to eq("link to an external page: https://google.com/?u=bar link to an audio file: #{I18n.t("search.audio")} link to a video file: #{I18n.t("search.video")}")
end
it "strips URLs correctly when blurb is longer than limit" do
cooked = <<~RAW
Here goes a test cooked with enough characters to hit the blurb limit.
Something is very interesting about this audio file.
http://localhost/uploads/default/original/1X/90adc0092b30c04b761541bc0322d0dce3d896e7.m4a
RAW
result = Search::GroupedSearchResults.blurb_for(cooked)
expect(result).to eq("Here goes a test cooked with enough characters to hit the blurb limit. Something is very interesting about this audio file. #{I18n.t("search.audio")}")
end
it "does not fail on bad URLs" do
cooked = <<~RAW
invalid URL: http:error] should not trip up blurb generation.
RAW
result = Search::GroupedSearchResults.blurb_for(cooked)
expect(result).to eq("invalid URL: http:error] should not trip up blurb generation.")
end
end
end