2019-05-03 06:17:27 +08:00
|
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
require_relative 'base'
|
|
|
|
|
require 'tiny_tds'
|
|
|
|
|
|
2018-08-13 21:27:51 +08:00
|
|
|
|
# Import script for Telligent communities
|
|
|
|
|
#
|
2019-05-30 23:37:51 +08:00
|
|
|
|
# It's really hard to find all attachments, but the script tries to do it anyway.
|
2020-03-12 08:39:16 +08:00
|
|
|
|
#
|
|
|
|
|
# You can supply a JSON file if you need to map and ignore categories during the import
|
|
|
|
|
# by providing the path to the file in the `CATEGORY_MAPPING` environment variable.
|
|
|
|
|
# You can also add tags to remapped categories and remap multiple old forums into one
|
|
|
|
|
# category. Here's an example of such a `mapping.json` file:
|
|
|
|
|
#
|
|
|
|
|
# {
|
|
|
|
|
# "ignored_forum_ids": [41, 360, 378],
|
|
|
|
|
#
|
|
|
|
|
# "mapping": [
|
|
|
|
|
# {
|
|
|
|
|
# "category": ["New Category 1"],
|
|
|
|
|
# "forums": [
|
|
|
|
|
# { "id": 348, "tag": "some_tag" },
|
|
|
|
|
# { "id": 347, "tag": "another_tag" }
|
|
|
|
|
# ]
|
|
|
|
|
# },
|
|
|
|
|
# {
|
|
|
|
|
# "category": ["New Category 2"],
|
|
|
|
|
# "forums": [
|
|
|
|
|
# { "id": 9 }
|
|
|
|
|
# ]
|
|
|
|
|
# },
|
|
|
|
|
# {
|
|
|
|
|
# "category": ["Nested", "Category"],
|
|
|
|
|
# "forums": [
|
|
|
|
|
# { "id": 322 }
|
|
|
|
|
# ]
|
|
|
|
|
# }
|
|
|
|
|
# ]
|
|
|
|
|
# }
|
2018-08-13 21:27:51 +08:00
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
class ImportScripts::Telligent < ImportScripts::Base
|
|
|
|
|
BATCH_SIZE ||= 1000
|
|
|
|
|
LOCAL_AVATAR_REGEX ||= /\A~\/.*(?<directory>communityserver-components-(?:selectable)?avatars)\/(?<path>[^\/]+)\/(?<filename>.+)/i
|
|
|
|
|
REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i
|
2020-03-26 23:33:38 +08:00
|
|
|
|
ATTACHMENT_REGEXES ||= [
|
|
|
|
|
/<a[^>]*\shref="[^"]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)".*?>.*?<\/a>/i,
|
|
|
|
|
/<img[^>]*\ssrc="[^"]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)".*?>/i,
|
|
|
|
|
/\[View:[^\]]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)(?:\:[:\d\s]*?)?\]/i,
|
|
|
|
|
/\[(?<tag>img|url)\][^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)\[\/\k<tag>\]/i,
|
|
|
|
|
/\[(?<tag>img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)\][^\[]*?\[\/\k<tag>\]/i
|
|
|
|
|
]
|
2020-03-23 16:13:36 +08:00
|
|
|
|
PROPERTY_NAMES_REGEX ||= /(?<name>\w+):S:(?<start>\d+):(?<length>\d+):/
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
|
|
|
|
CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1'
|
|
|
|
|
TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1'
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
UNICODE_REPLACEMENTS = {
|
|
|
|
|
"5F00" => "_",
|
|
|
|
|
"2800" => "(",
|
|
|
|
|
"2900" => ")",
|
|
|
|
|
"2D00" => "-",
|
|
|
|
|
"2C00" => ",",
|
|
|
|
|
"2700" => "'",
|
|
|
|
|
"5B00" => "[",
|
|
|
|
|
"5D00" => "]",
|
|
|
|
|
"3D00" => "=",
|
|
|
|
|
"2600" => "&",
|
|
|
|
|
"2100" => "!",
|
|
|
|
|
"2300" => "#",
|
|
|
|
|
"7E00" => "~",
|
|
|
|
|
"2500" => "%",
|
|
|
|
|
"2E00" => ".",
|
|
|
|
|
"4000" => "@",
|
|
|
|
|
"2B00" => "+",
|
|
|
|
|
"2400" => "$",
|
|
|
|
|
"1920" => "’",
|
|
|
|
|
"E900" => "é",
|
|
|
|
|
"E000" => "à",
|
|
|
|
|
"F300" => "ó",
|
|
|
|
|
"1C20" => "“",
|
|
|
|
|
"1D20" => "”",
|
|
|
|
|
"B000" => "°",
|
|
|
|
|
"0003" => ["0300".to_i(16)].pack("U"),
|
|
|
|
|
"0103" => ["0301".to_i(16)].pack("U")
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
def initialize
|
|
|
|
|
super()
|
|
|
|
|
|
|
|
|
|
@client = TinyTds::Client.new(
|
|
|
|
|
host: ENV["DB_HOST"],
|
|
|
|
|
username: ENV["DB_USERNAME"],
|
|
|
|
|
password: ENV["DB_PASSWORD"],
|
2019-05-30 23:37:51 +08:00
|
|
|
|
database: ENV["DB_NAME"],
|
|
|
|
|
timeout: 60 # the user query is very slow
|
2018-06-27 06:02:03 +08:00
|
|
|
|
)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
@filestore_root_directory = ENV["FILE_BASE_DIR"]
|
|
|
|
|
@files = {}
|
|
|
|
|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
SiteSetting.tagging_enabled = true
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def execute
|
|
|
|
|
add_permalink_normalizations
|
2020-03-23 16:13:36 +08:00
|
|
|
|
index_filestore
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
import_categories
|
2020-03-12 08:39:16 +08:00
|
|
|
|
import_users
|
2018-06-27 06:02:03 +08:00
|
|
|
|
import_topics
|
|
|
|
|
import_posts
|
|
|
|
|
mark_topics_as_solved
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def index_filestore
|
|
|
|
|
puts "", "Indexing filestore..."
|
|
|
|
|
index_directory(@filestore_root_directory)
|
|
|
|
|
end
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
def import_users
|
|
|
|
|
puts "", "Importing users..."
|
|
|
|
|
|
|
|
|
|
user_conditions = <<~SQL
|
|
|
|
|
(
|
|
|
|
|
EXISTS(SELECT 1
|
|
|
|
|
FROM te_Forum_Threads t
|
|
|
|
|
WHERE t.UserId = u.UserID) OR
|
|
|
|
|
EXISTS(SELECT 1
|
|
|
|
|
FROM te_Forum_ThreadReplies r
|
|
|
|
|
WHERE r.UserId = u.UserID)
|
|
|
|
|
)
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
last_user_id = -1
|
|
|
|
|
total_count = count(<<~SQL)
|
|
|
|
|
SELECT COUNT(1) AS count
|
2019-05-30 23:37:51 +08:00
|
|
|
|
FROM cs_Users u
|
2018-06-27 06:02:03 +08:00
|
|
|
|
WHERE #{user_conditions}
|
|
|
|
|
SQL
|
2019-05-30 23:37:51 +08:00
|
|
|
|
import_count = 0
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2019-05-30 23:37:51 +08:00
|
|
|
|
loop do
|
2018-06-27 06:02:03 +08:00
|
|
|
|
rows = query(<<~SQL)
|
2020-03-23 16:13:36 +08:00
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
|
|
|
|
u.UserID, u.Email, u.UserName, u.CreateDate,
|
|
|
|
|
ap.PropertyNames AP_PropertyNames, ap.PropertyValuesString AS AP_PropertyValues,
|
|
|
|
|
up.PropertyNames UP_PropertyNames, up.PropertyValues AS UP_PropertyValues
|
|
|
|
|
FROM cs_Users u
|
|
|
|
|
LEFT OUTER JOIN aspnet_Profile ap ON ap.UserId = u.MembershipID
|
|
|
|
|
LEFT OUTER JOIN cs_UserProfile up ON up.UserID = u.UserID
|
|
|
|
|
WHERE u.UserID > #{last_user_id} AND #{user_conditions}
|
2018-06-27 06:02:03 +08:00
|
|
|
|
ORDER BY UserID
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
break if rows.blank?
|
|
|
|
|
last_user_id = rows[-1]["UserID"]
|
|
|
|
|
|
2019-05-30 23:37:51 +08:00
|
|
|
|
if all_records_exist?(:users, rows.map { |row| row["UserID"] })
|
|
|
|
|
import_count += rows.size
|
|
|
|
|
next
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
create_users(rows, total: total_count, offset: import_count) do |row|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
ap_properties = parse_properties(row["AP_PropertyNames"], row["AP_PropertyValues"])
|
|
|
|
|
up_properties = parse_properties(row["UP_PropertyNames"], row["UP_PropertyValues"])
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
{
|
|
|
|
|
id: row["UserID"],
|
|
|
|
|
email: row["Email"],
|
|
|
|
|
username: row["UserName"],
|
2020-03-23 16:13:36 +08:00
|
|
|
|
name: ap_properties["commonName"],
|
2018-06-27 06:02:03 +08:00
|
|
|
|
created_at: row["CreateDate"],
|
2020-03-23 16:13:36 +08:00
|
|
|
|
bio_raw: html_to_markdown(ap_properties["bio"]),
|
|
|
|
|
location: ap_properties["location"],
|
|
|
|
|
website: ap_properties["webAddress"],
|
2018-06-27 06:02:03 +08:00
|
|
|
|
post_create_action: proc do |user|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
import_avatar(user, up_properties["avatarUrl"])
|
|
|
|
|
suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"])
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
}
|
|
|
|
|
end
|
2019-05-30 23:37:51 +08:00
|
|
|
|
|
|
|
|
|
import_count += rows.size
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# TODO move into base importer (create_user) and use consistent error handling
|
|
|
|
|
def import_avatar(user, avatar_url)
|
2020-03-23 16:13:36 +08:00
|
|
|
|
return if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous")
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
|
|
|
|
if match_data = avatar_url.match(LOCAL_AVATAR_REGEX)
|
2020-03-23 16:13:36 +08:00
|
|
|
|
avatar_path = File.join(@filestore_root_directory,
|
2018-06-27 06:02:03 +08:00
|
|
|
|
match_data[:directory].gsub("-", "."),
|
|
|
|
|
match_data[:path].split("-"),
|
|
|
|
|
match_data[:filename])
|
|
|
|
|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
if File.file?(avatar_path)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
@uploader.create_avatar(user, avatar_path)
|
|
|
|
|
else
|
|
|
|
|
STDERR.puts "Could not find avatar: #{avatar_path}"
|
|
|
|
|
end
|
|
|
|
|
elsif avatar_url.match?(REMOTE_AVATAR_REGEX)
|
|
|
|
|
UserAvatar.import_url_for_user(avatar_url, user) rescue nil
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def suspend_user(user, banned_until, ban_reason)
|
|
|
|
|
return if banned_until.blank?
|
|
|
|
|
|
|
|
|
|
if banned_until = DateTime.parse(banned_until) > DateTime.now
|
|
|
|
|
user.suspended_till = banned_until
|
|
|
|
|
user.suspended_at = DateTime.now
|
|
|
|
|
user.save!
|
|
|
|
|
|
|
|
|
|
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def import_categories
|
2020-03-12 08:39:16 +08:00
|
|
|
|
if ENV['CATEGORY_MAPPING']
|
|
|
|
|
import_mapped_forums_as_categories
|
|
|
|
|
else
|
|
|
|
|
import_groups_and_forums_as_categories
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def import_mapped_forums_as_categories
|
|
|
|
|
puts "", "Importing categories..."
|
|
|
|
|
|
|
|
|
|
json = JSON.parse(File.read(ENV['CATEGORY_MAPPING']))
|
|
|
|
|
|
|
|
|
|
categories = []
|
|
|
|
|
@forum_ids_to_tags = {}
|
|
|
|
|
@ignored_forum_ids = json["ignored_forum_ids"]
|
|
|
|
|
|
|
|
|
|
json["mapping"].each do |m|
|
|
|
|
|
parent_id = nil
|
|
|
|
|
last_index = m["category"].size - 1
|
|
|
|
|
forum_ids = []
|
|
|
|
|
|
|
|
|
|
m["forums"].each do |f|
|
|
|
|
|
forum_ids << f["id"]
|
|
|
|
|
@forum_ids_to_tags[f["id"]] = f["tag"] if f["tag"].present?
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
m["category"].each_with_index do |name, index|
|
|
|
|
|
id = Digest::MD5.hexdigest(name)
|
|
|
|
|
categories << {
|
|
|
|
|
id: id,
|
|
|
|
|
name: name,
|
|
|
|
|
parent_id: parent_id,
|
|
|
|
|
forum_ids: index == last_index ? forum_ids : nil
|
|
|
|
|
}
|
|
|
|
|
parent_id = id
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
create_categories(categories) do |c|
|
|
|
|
|
if category_id = category_id_from_imported_category_id(c[:id])
|
|
|
|
|
map_forum_ids(category_id, c[:forum_ids])
|
|
|
|
|
nil
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
id: c[:id],
|
|
|
|
|
name: c[:name],
|
|
|
|
|
parent_category_id: category_id_from_imported_category_id(c[:parent_id]),
|
|
|
|
|
post_create_action: proc do |category|
|
|
|
|
|
map_forum_ids(category.id, c[:forum_ids])
|
|
|
|
|
end
|
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def map_forum_ids(category_id, forum_ids)
|
|
|
|
|
return if forum_ids.blank?
|
|
|
|
|
|
|
|
|
|
forum_ids.each do |id|
|
|
|
|
|
url = "f/#{id}"
|
|
|
|
|
Permalink.create(url: url, category_id: category_id) unless Permalink.exists?(url: url)
|
|
|
|
|
add_category(id, Category.find_by_id(category_id))
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def import_groups_and_forums_as_categories
|
2018-06-27 06:02:03 +08:00
|
|
|
|
puts "", "Importing parent categories..."
|
|
|
|
|
parent_categories = query(<<~SQL)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
SELECT GroupID, Name, HtmlDescription, DateCreated, SortOrder
|
2018-06-27 06:02:03 +08:00
|
|
|
|
FROM cs_Groups g
|
|
|
|
|
WHERE (SELECT COUNT(1)
|
|
|
|
|
FROM te_Forum_Forums f
|
|
|
|
|
WHERE f.GroupId = g.GroupID) > 1
|
|
|
|
|
ORDER BY SortOrder, Name
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
create_categories(parent_categories) do |row|
|
|
|
|
|
{
|
|
|
|
|
id: "G#{row['GroupID']}",
|
|
|
|
|
name: clean_category_name(row["Name"]),
|
|
|
|
|
description: html_to_markdown(row["HtmlDescription"]),
|
|
|
|
|
position: row["SortOrder"]
|
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
puts "", "Importing child categories..."
|
|
|
|
|
child_categories = query(<<~SQL)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
SELECT ForumId, GroupId, Name, Description, DateCreated, SortOrder
|
2018-06-27 06:02:03 +08:00
|
|
|
|
FROM te_Forum_Forums
|
|
|
|
|
ORDER BY GroupId, SortOrder, Name
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
create_categories(child_categories) do |row|
|
|
|
|
|
parent_category_id = parent_category_id_for(row)
|
|
|
|
|
|
2019-05-30 23:37:51 +08:00
|
|
|
|
if category_id = replace_with_category_id(child_categories, parent_category_id)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
add_category(row['ForumId'], Category.find_by_id(category_id))
|
2018-08-13 21:27:51 +08:00
|
|
|
|
url = "f/#{row['ForumId']}"
|
|
|
|
|
Permalink.create(url: url, category_id: category_id) unless Permalink.exists?(url: url)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
nil
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
id: row['ForumId'],
|
|
|
|
|
parent_category_id: parent_category_id,
|
|
|
|
|
name: clean_category_name(row["Name"]),
|
|
|
|
|
description: html_to_markdown(row["Description"]),
|
2020-03-12 08:39:16 +08:00
|
|
|
|
position: row["SortOrder"],
|
|
|
|
|
post_create_action: proc do |category|
|
|
|
|
|
url = "f/#{row['ForumId']}"
|
|
|
|
|
Permalink.create(url: url, category_id: category.id) unless Permalink.exists?(url: url)
|
|
|
|
|
end
|
2018-06-27 06:02:03 +08:00
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def parent_category_id_for(row)
|
2019-05-30 23:37:51 +08:00
|
|
|
|
category_id_from_imported_category_id("G#{row['GroupId']}") if row.key?("GroupId")
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
|
2019-05-30 23:37:51 +08:00
|
|
|
|
def replace_with_category_id(child_categories, parent_category_id)
|
|
|
|
|
parent_category_id if only_child?(child_categories, parent_category_id)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def only_child?(child_categories, parent_category_id)
|
|
|
|
|
count = 0
|
|
|
|
|
|
|
|
|
|
child_categories.each do |row|
|
|
|
|
|
count += 1 if parent_category_id_for(row) == parent_category_id
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
count == 1
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def clean_category_name(name)
|
|
|
|
|
CGI.unescapeHTML(name)
|
|
|
|
|
.strip
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def import_topics
|
|
|
|
|
puts "", "Importing topics..."
|
|
|
|
|
|
|
|
|
|
last_topic_id = -1
|
2020-03-12 08:39:16 +08:00
|
|
|
|
total_count = count("SELECT COUNT(1) AS count FROM te_Forum_Threads t WHERE #{ignored_forum_sql_condition}")
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
|
rows = query(<<~SQL)
|
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
2020-03-23 16:13:36 +08:00
|
|
|
|
t.ThreadId, t.ForumId, t.UserId, t.TotalViews, t.ContentID AS TopicContentId,
|
2018-06-27 06:02:03 +08:00
|
|
|
|
t.Subject, t.Body, t.DateCreated, t.IsLocked, t.StickyDate,
|
2020-03-12 08:39:16 +08:00
|
|
|
|
a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName, a.IsRemote
|
2018-06-27 06:02:03 +08:00
|
|
|
|
FROM te_Forum_Threads t
|
|
|
|
|
LEFT JOIN te_Attachments a
|
|
|
|
|
ON (a.ApplicationId = t.ForumId AND a.ApplicationTypeId = 0 AND a.ContentId = t.ThreadId AND
|
|
|
|
|
a.ApplicationContentTypeId = 0)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
WHERE t.ThreadId > #{last_topic_id} AND #{ignored_forum_sql_condition}
|
2018-06-27 06:02:03 +08:00
|
|
|
|
ORDER BY t.ThreadId
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
break if rows.blank?
|
|
|
|
|
last_topic_id = rows[-1]["ThreadId"]
|
|
|
|
|
next if all_records_exist?(:post, rows.map { |row| import_topic_id(row["ThreadId"]) })
|
|
|
|
|
|
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
|
|
|
|
user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID
|
|
|
|
|
|
|
|
|
|
post = {
|
|
|
|
|
id: import_topic_id(row["ThreadId"]),
|
|
|
|
|
title: CGI.unescapeHTML(row["Subject"]),
|
2020-03-12 08:39:16 +08:00
|
|
|
|
raw: raw_with_attachment(row, user_id, :topic),
|
2018-06-27 06:02:03 +08:00
|
|
|
|
category: category_id_from_imported_category_id(row["ForumId"]),
|
|
|
|
|
user_id: user_id,
|
|
|
|
|
created_at: row["DateCreated"],
|
|
|
|
|
closed: row["IsLocked"],
|
2020-03-12 08:39:16 +08:00
|
|
|
|
views: row["TotalViews"],
|
2018-09-04 10:16:21 +08:00
|
|
|
|
post_create_action: proc do |action_post|
|
|
|
|
|
topic = action_post.topic
|
2018-06-27 06:02:03 +08:00
|
|
|
|
Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) if topic.pinned_until
|
2018-08-13 21:27:51 +08:00
|
|
|
|
url = "f/#{row['ForumId']}/t/#{row['ThreadId']}"
|
|
|
|
|
Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url)
|
2020-03-23 16:13:36 +08:00
|
|
|
|
import_topic_views(topic, row["TopicContentId"])
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if row["StickyDate"] > Time.now
|
|
|
|
|
post[:pinned_until] = row["StickyDate"]
|
|
|
|
|
post[:pinned_at] = row["DateCreated"]
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
post
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def import_topic_id(topic_id)
|
|
|
|
|
"T#{topic_id}"
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def import_topic_views(topic, content_id)
|
|
|
|
|
last_user_id = -1
|
|
|
|
|
|
|
|
|
|
batches do |_|
|
|
|
|
|
rows = query(<<~SQL)
|
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
|
|
|
|
UserId, MAX(CreatedUtcDate) AS ViewDate
|
|
|
|
|
FROM te_Content_Views
|
|
|
|
|
WHERE ContentId = '#{content_id}' AND UserId > #{last_user_id}
|
|
|
|
|
GROUP BY UserId
|
|
|
|
|
ORDER BY UserId
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
break if rows.blank?
|
|
|
|
|
last_user_id = rows[-1]["UserId"]
|
|
|
|
|
|
|
|
|
|
rows.each do |row|
|
|
|
|
|
user_id = user_id_from_imported_user_id(row["UserId"])
|
|
|
|
|
TopicViewItem.add(topic.id, "127.0.0.1", user_id, row["ViewDate"], true) if user_id
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
def ignored_forum_sql_condition
|
|
|
|
|
@ignored_forum_sql_condition ||= @ignored_forum_ids.present? \
|
|
|
|
|
? "t.ForumId NOT IN (#{@ignored_forum_ids.join(',')})" \
|
|
|
|
|
: "1 = 1"
|
|
|
|
|
end
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
def import_posts
|
|
|
|
|
puts "", "Importing posts..."
|
|
|
|
|
|
|
|
|
|
last_post_id = -1
|
2020-03-12 08:39:16 +08:00
|
|
|
|
total_count = count(<<~SQL)
|
|
|
|
|
SELECT COUNT(1) AS count
|
|
|
|
|
FROM te_Forum_ThreadReplies tr
|
|
|
|
|
JOIN te_Forum_Threads t ON (tr.ThreadId = t.ThreadId)
|
|
|
|
|
WHERE #{ignored_forum_sql_condition}
|
|
|
|
|
SQL
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
|
rows = query(<<~SQL)
|
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
2020-03-12 08:39:16 +08:00
|
|
|
|
tr.ThreadReplyId, tr.ThreadId, tr.UserId, pr.ThreadReplyId AS ParentReplyId,
|
2018-06-27 06:02:03 +08:00
|
|
|
|
tr.Body, tr.ThreadReplyDate,
|
|
|
|
|
CONVERT(BIT,
|
|
|
|
|
CASE WHEN tr.AnswerVerifiedUtcDate IS NOT NULL AND NOT EXISTS(
|
|
|
|
|
SELECT 1
|
|
|
|
|
FROM te_Forum_ThreadReplies x
|
|
|
|
|
WHERE
|
|
|
|
|
x.ThreadId = tr.ThreadId AND x.ThreadReplyId < tr.ThreadReplyId AND x.AnswerVerifiedUtcDate IS NOT NULL
|
|
|
|
|
)
|
|
|
|
|
THEN 1
|
|
|
|
|
ELSE 0 END) AS IsFirstVerifiedAnswer,
|
2020-03-12 08:39:16 +08:00
|
|
|
|
a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName, a.IsRemote
|
2018-06-27 06:02:03 +08:00
|
|
|
|
FROM te_Forum_ThreadReplies tr
|
|
|
|
|
JOIN te_Forum_Threads t ON (tr.ThreadId = t.ThreadId)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
LEFT JOIN te_Forum_ThreadReplies pr ON (tr.ParentReplyId = pr.ThreadReplyId AND tr.ParentReplyId < tr.ThreadReplyId AND tr.ThreadId = pr.ThreadId)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
LEFT JOIN te_Attachments a
|
|
|
|
|
ON (a.ApplicationId = t.ForumId AND a.ApplicationTypeId = 0 AND a.ContentId = tr.ThreadReplyId AND
|
|
|
|
|
a.ApplicationContentTypeId = 1)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
WHERE tr.ThreadReplyId > #{last_post_id} AND #{ignored_forum_sql_condition}
|
2018-06-27 06:02:03 +08:00
|
|
|
|
ORDER BY tr.ThreadReplyId
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
|
|
break if rows.blank?
|
|
|
|
|
last_post_id = rows[-1]["ThreadReplyId"]
|
|
|
|
|
next if all_records_exist?(:post, rows.map { |row| row["ThreadReplyId"] })
|
|
|
|
|
|
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
imported_parent_id = row["ParentReplyId"]&.nonzero? ? row["ParentReplyId"] : import_topic_id(row["ThreadId"])
|
2018-06-27 06:02:03 +08:00
|
|
|
|
parent_post = topic_lookup_from_imported_post_id(imported_parent_id)
|
|
|
|
|
user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID
|
|
|
|
|
|
|
|
|
|
if parent_post
|
|
|
|
|
post = {
|
|
|
|
|
id: row["ThreadReplyId"],
|
2020-03-12 08:39:16 +08:00
|
|
|
|
raw: raw_with_attachment(row, user_id, :post),
|
2018-06-27 06:02:03 +08:00
|
|
|
|
user_id: user_id,
|
|
|
|
|
topic_id: parent_post[:topic_id],
|
|
|
|
|
created_at: row["ThreadReplyDate"],
|
|
|
|
|
reply_to_post_number: parent_post[:post_number]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
post[:custom_fields] = { is_accepted_answer: "true" } if row["IsFirstVerifiedAnswer"]
|
|
|
|
|
post
|
|
|
|
|
else
|
|
|
|
|
puts "Failed to import post #{row['ThreadReplyId']}. Parent was not found."
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def index_directory(root_directory)
|
|
|
|
|
Dir.foreach(root_directory) do |directory_name|
|
|
|
|
|
next if directory_name == "." || directory_name == ".."
|
|
|
|
|
|
|
|
|
|
path = File.join(root_directory, directory_name)
|
|
|
|
|
if File.directory?(path)
|
|
|
|
|
index_directory(path)
|
|
|
|
|
else
|
|
|
|
|
path.delete_prefix!(@filestore_root_directory)
|
|
|
|
|
path.delete_prefix!("/")
|
|
|
|
|
@files[path.downcase] = path
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
def raw_with_attachment(row, user_id, type)
|
2018-08-13 21:27:51 +08:00
|
|
|
|
raw, embedded_paths, upload_ids = replace_embedded_attachments(row["Body"], user_id)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
raw = html_to_markdown(raw) || ""
|
|
|
|
|
|
|
|
|
|
filename = row["FileName"]
|
2020-03-23 16:13:36 +08:00
|
|
|
|
return raw if @filestore_root_directory.blank? || filename.blank?
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
if row["IsRemote"]
|
|
|
|
|
return "#{raw}\n#{filename}"
|
|
|
|
|
end
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
path = File.join(
|
|
|
|
|
"telligent.evolution.components.attachments",
|
|
|
|
|
"%02d" % row["ApplicationTypeId"],
|
|
|
|
|
"%02d" % row["ApplicationId"],
|
|
|
|
|
"%02d" % row["ApplicationContentTypeId"],
|
2020-03-23 16:13:36 +08:00
|
|
|
|
("%010d" % row["ContentId"]).scan(/.{2}/)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
)
|
2020-03-23 16:13:36 +08:00
|
|
|
|
path = fix_attachment_path(path, filename)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
if path && !embedded_paths.include?(path)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
if File.file?(path)
|
2018-06-27 06:02:03 +08:00
|
|
|
|
upload = @uploader.create_upload(user_id, path, filename)
|
2018-08-13 21:27:51 +08:00
|
|
|
|
|
|
|
|
|
if upload.present? && upload.persisted? && !upload_ids.include?(upload.id)
|
2019-05-30 23:37:51 +08:00
|
|
|
|
raw = "#{raw}\n#{@uploader.html_for_upload(upload, filename)}"
|
2018-08-13 21:27:51 +08:00
|
|
|
|
end
|
2018-06-27 06:02:03 +08:00
|
|
|
|
else
|
2020-03-12 08:39:16 +08:00
|
|
|
|
id = type == :topic ? row['ThreadId'] : row['ThreadReplyId']
|
|
|
|
|
STDERR.puts "Could not find file for #{type} #{id}: #{path}"
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
raw
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def replace_embedded_attachments(raw, user_id)
|
|
|
|
|
paths = []
|
2018-08-13 21:27:51 +08:00
|
|
|
|
upload_ids = []
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
return [raw, paths, upload_ids] if @filestore_root_directory.blank?
|
2020-03-12 08:39:16 +08:00
|
|
|
|
|
2020-03-26 23:33:38 +08:00
|
|
|
|
ATTACHMENT_REGEXES.each do |regex|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
raw = raw.gsub(regex) do
|
|
|
|
|
match_data = Regexp.last_match
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
path = File.join(match_data[:directory], match_data[:path])
|
|
|
|
|
fixed_path = fix_attachment_path(path, match_data[:filename])
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
if fixed_path && File.file?(fixed_path)
|
|
|
|
|
filename = File.basename(fixed_path)
|
|
|
|
|
upload = @uploader.create_upload(user_id, fixed_path, filename)
|
|
|
|
|
|
|
|
|
|
if upload.present? && upload.persisted?
|
|
|
|
|
paths << fixed_path
|
|
|
|
|
upload_ids << upload.id
|
|
|
|
|
@uploader.html_for_upload(upload, filename)
|
|
|
|
|
end
|
|
|
|
|
else
|
|
|
|
|
path = File.join(path, match_data[:filename])
|
|
|
|
|
STDERR.puts "Could not find file: #{path}"
|
2020-03-26 23:33:38 +08:00
|
|
|
|
match_data[0]
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2018-08-13 21:27:51 +08:00
|
|
|
|
[raw, paths, upload_ids]
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def fix_attachment_path(base_path, filename)
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
base_path.downcase!
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
2020-03-12 08:39:16 +08:00
|
|
|
|
filename = CGI.unescapeHTML(filename)
|
2020-03-23 16:13:36 +08:00
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
filename.gsub!("-", " ")
|
|
|
|
|
filename.strip!
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
directories = base_path.split(File::SEPARATOR)
|
|
|
|
|
first_directory = directories.shift
|
|
|
|
|
first_directory.gsub!("-", ".")
|
|
|
|
|
base_path = File.join(first_directory, directories)
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
directories.map! { |d| File.join(d.split(/[\.\-]/).map(&:strip)) }
|
|
|
|
|
base_path = File.join(first_directory, directories)
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
directories = base_path.split(File::SEPARATOR)
|
|
|
|
|
directories.map! { |d| d.gsub("+", " ").strip }
|
|
|
|
|
base_path = File.join(directories)
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
replace_codes!(filename)
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
replace_codes!(base_path)
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
|
|
|
|
|
|
|
|
|
filename.gsub!(/(?:\:\d+)+$/, "")
|
|
|
|
|
path = find_correct_path(base_path, filename)
|
|
|
|
|
return path if attachment_exists?(path)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
path = File.join(base_path, filename)
|
|
|
|
|
path_regex = Regexp.new("^#{Regexp.escape(path)}-\\d+x\\d+\\.\\w+$", Regexp::IGNORECASE)
|
|
|
|
|
path = find_correct_path_with_regex(path_regex)
|
|
|
|
|
return path if attachment_exists?(path)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
nil
|
2018-08-13 21:27:51 +08:00
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def find_correct_path(base_path, filename)
|
|
|
|
|
path = File.join(base_path, filename)
|
|
|
|
|
path = @files[path.downcase]
|
|
|
|
|
path ? File.join(@filestore_root_directory, path) : nil
|
2018-08-13 21:27:51 +08:00
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def find_correct_path_with_regex(regex)
|
|
|
|
|
keys = @files.keys.filter { |key| regex =~ key }
|
|
|
|
|
keys.size == 1 ? File.join(@filestore_root_directory, @files[keys.first]) : nil
|
|
|
|
|
end
|
2018-08-13 21:27:51 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def attachment_exists?(path)
|
|
|
|
|
path.present? && File.file?(path)
|
|
|
|
|
end
|
2018-06-27 06:02:03 +08:00
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def replace_codes!(text)
|
|
|
|
|
text.gsub!(/_(\h{4}+)_/i) do
|
|
|
|
|
codes = Regexp.last_match[1].upcase.scan(/.{4}/)
|
|
|
|
|
mapped_codes = codes.map { |c| UNICODE_REPLACEMENTS[c] }
|
|
|
|
|
mapped_codes.any? { |c| c.nil? } ? Regexp.last_match[0] : mapped_codes.join("")
|
2020-03-12 08:39:16 +08:00
|
|
|
|
end
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def html_to_markdown(html)
|
2020-03-12 08:39:16 +08:00
|
|
|
|
return html if html.blank?
|
|
|
|
|
|
|
|
|
|
md = HtmlToMarkdown.new(html).to_markdown
|
|
|
|
|
md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n")
|
|
|
|
|
md.gsub!(/(?<!^)\[\/quote\]/, "\n[/quote]\n")
|
2020-03-23 16:13:36 +08:00
|
|
|
|
md.gsub!(/\[\/quote\](?!$)/, "\n[/quote]\n")
|
|
|
|
|
md.gsub!(/\[View:(http.*?)[:\d\s]*?(?:\]|\z)/i, '\1')
|
2020-03-12 08:39:16 +08:00
|
|
|
|
md.strip!
|
|
|
|
|
md
|
2018-06-27 06:02:03 +08:00
|
|
|
|
end
|
|
|
|
|
|
2020-03-23 16:13:36 +08:00
|
|
|
|
def parse_properties(names, values)
|
|
|
|
|
properties = {}
|
|
|
|
|
return properties if names.blank? || values.blank?
|
|
|
|
|
|
|
|
|
|
names.scan(PROPERTY_NAMES_REGEX).each do |property|
|
|
|
|
|
name = property[0]
|
|
|
|
|
start_index = property[1].to_i
|
|
|
|
|
end_index = start_index + property[2].to_i - 1
|
|
|
|
|
|
|
|
|
|
properties[name] = values[start_index..end_index]
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
properties
|
|
|
|
|
end
|
|
|
|
|
|
2020-03-15 05:10:19 +08:00
|
|
|
|
def mark_topics_as_solved
|
|
|
|
|
puts "", "Marking topics as solved..."
|
|
|
|
|
|
|
|
|
|
DB.exec <<~SQL
|
|
|
|
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
|
|
|
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
|
|
|
|
FROM post_custom_fields pcf
|
|
|
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
|
|
|
WHERE pcf.name = 'is_accepted_answer' AND pcf.value = 'true'
|
|
|
|
|
SQL
|
|
|
|
|
end
|
|
|
|
|
|
2018-06-27 06:02:03 +08:00
|
|
|
|
def add_permalink_normalizations
|
|
|
|
|
normalizations = SiteSetting.permalink_normalizations
|
|
|
|
|
normalizations = normalizations.blank? ? [] : normalizations.split('|')
|
|
|
|
|
|
|
|
|
|
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION)
|
|
|
|
|
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION)
|
|
|
|
|
|
|
|
|
|
SiteSetting.permalink_normalizations = normalizations.join('|')
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def add_normalization(normalizations, normalization)
|
|
|
|
|
normalizations << normalization unless normalizations.include?(normalization)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def batches
|
|
|
|
|
super(BATCH_SIZE)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def query(sql)
|
|
|
|
|
@client.execute(sql).to_a
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def count(sql)
|
|
|
|
|
query(sql).first["count"]
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
ImportScripts::Telligent.new.perform
|