mirror of
https://github.com/discourse/discourse.git
synced 2025-01-07 18:34:35 +08:00
862773ec83
PG already handles English stop words, the list in cppjieba is bigger than the list PG uses, which in turn causes confusion cause words such as "volume" are stripped using cppijieba stop word list We will follow up with another commit here to apply the Chinese word stopwords, but for now to eliminate the confusion we are skipping applying the stopword list when the dictionary in PG is in English.
68 lines
2.4 KiB
Ruby
68 lines
2.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require 'rails_helper'
|
|
|
|
describe Search do
|
|
|
|
context "#prepare_data" do
|
|
it "does not remove English stop words in mixed mode" do
|
|
SiteSetting.search_tokenize_chinese_japanese_korean = true
|
|
|
|
tokenized = Search.prepare_data("monkey 吃香蕉 in a loud volume")
|
|
expect(tokenized).to eq("monkey 吃 香蕉 in a loud volume")
|
|
|
|
SiteSetting.default_locale = 'zh_CN'
|
|
|
|
tokenized = Search.prepare_data("monkey 吃香蕉 in a loud volume")
|
|
expect(tokenized).to eq("monkey 吃 香蕉 loud")
|
|
end
|
|
end
|
|
|
|
context "#ts_config" do
|
|
it "maps locales to correct Postgres dictionaries" do
|
|
expect(Search.ts_config).to eq("english")
|
|
expect(Search.ts_config("en")).to eq("english")
|
|
expect(Search.ts_config("en_US")).to eq("english")
|
|
expect(Search.ts_config("pt_BR")).to eq("portuguese")
|
|
expect(Search.ts_config("tr")).to eq("turkish")
|
|
expect(Search.ts_config("xx")).to eq("simple")
|
|
end
|
|
end
|
|
|
|
context "#GroupedSearchResults.blurb_for" do
|
|
it "strips audio and video URLs from search blurb" do
|
|
cooked = <<~RAW
|
|
link to an external page: https://google.com/?u=bar
|
|
|
|
link to an audio file: https://somesite.com/content/file123.m4a
|
|
|
|
link to a video file: https://somesite.com/content/somethingelse.MOV
|
|
RAW
|
|
result = Search::GroupedSearchResults.blurb_for(cooked)
|
|
expect(result).to eq("link to an external page: https://google.com/?u=bar link to an audio file: #{I18n.t("search.audio")} link to a video file: #{I18n.t("search.video")}")
|
|
end
|
|
|
|
it "strips URLs correctly when blurb is longer than limit" do
|
|
cooked = <<~RAW
|
|
Here goes a test cooked with enough characters to hit the blurb limit.
|
|
|
|
Something is very interesting about this audio file.
|
|
|
|
http://localhost/uploads/default/original/1X/90adc0092b30c04b761541bc0322d0dce3d896e7.m4a
|
|
RAW
|
|
|
|
result = Search::GroupedSearchResults.blurb_for(cooked)
|
|
expect(result).to eq("Here goes a test cooked with enough characters to hit the blurb limit. Something is very interesting about this audio file. #{I18n.t("search.audio")}")
|
|
end
|
|
|
|
it "does not fail on bad URLs" do
|
|
cooked = <<~RAW
|
|
invalid URL: http:error] should not trip up blurb generation.
|
|
RAW
|
|
result = Search::GroupedSearchResults.blurb_for(cooked)
|
|
expect(result).to eq("invalid URL: http:error] should not trip up blurb generation.")
|
|
end
|
|
end
|
|
|
|
end
|