diff --git a/script/import_scripts/telligent.rb b/script/import_scripts/telligent.rb new file mode 100644 index 00000000000..c8721388952 --- /dev/null +++ b/script/import_scripts/telligent.rb @@ -0,0 +1,448 @@ +require_relative 'base' +require 'tiny_tds' + +class ImportScripts::Telligent < ImportScripts::Base + BATCH_SIZE ||= 1000 + LOCAL_AVATAR_REGEX ||= /\A~\/.*(?communityserver-components-(?:selectable)?avatars)\/(?[^\/]+)\/(?.+)/i + REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i + EMBEDDED_ATTACHMENT_REGEX ||= /.*?<\/a>/i + + CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1' + TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1' + + def initialize + super() + + @client = TinyTds::Client.new( + host: ENV["DB_HOST"], + username: ENV["DB_USERNAME"], + password: ENV["DB_PASSWORD"], + database: ENV["DB_NAME"] + ) + end + + def execute + add_permalink_normalizations + import_users + import_categories + import_topics + import_posts + mark_topics_as_solved + end + + def import_users + puts "", "Importing users..." + + user_conditions = <<~SQL + ( + EXISTS(SELECT 1 + FROM te_Forum_Threads t + WHERE t.UserId = u.UserID) OR + EXISTS(SELECT 1 + FROM te_Forum_ThreadReplies r + WHERE r.UserId = u.UserID) + ) + SQL + + last_user_id = -1 + total_count = count(<<~SQL) + SELECT COUNT(1) AS count + FROM temp_User u + WHERE #{user_conditions} + SQL + + batches do |offset| + rows = query(<<~SQL) + SELECT TOP #{BATCH_SIZE} * + FROM + ( + SELECT + u.UserID, + u.Email, + u.UserName, + u.CommonName, + u.CreateDate, + p.PropertyName, + p.PropertyValue + FROM temp_User u + LEFT OUTER JOIN temp_UserProperties p ON (u.UserID = p.UserID) + WHERE u.UserID > #{last_user_id} AND #{user_conditions} + ) x + PIVOT ( + MAX(PropertyValue) + FOR PropertyName + IN (avatarUrl, bio, Location, webAddress, BannedUntil, UserBanReason) + ) y + ORDER BY UserID + SQL + + break if rows.blank? + last_user_id = rows[-1]["UserID"] + next if all_records_exist?(:users, rows.map { |row| row["UserID"] }) + + create_users(rows, total: total_count, offset: offset) do |row| + { + id: row["UserID"], + email: row["Email"], + username: row["UserName"], + name: row["CommonName"], + created_at: row["CreateDate"], + bio_raw: html_to_markdown(row["bio"]), + location: row["Location"], + website: row["webAddress"], + post_create_action: proc do |user| + import_avatar(user, row["avatarUrl"]) + suspend_user(user, row["BannedUntil"], row["UserBanReason"]) + end + } + end + end + end + + # TODO move into base importer (create_user) and use consistent error handling + def import_avatar(user, avatar_url) + return if avatar_url.blank? || avatar_url.include?("anonymous") + + if match_data = avatar_url.match(LOCAL_AVATAR_REGEX) + avatar_path = File.join(ENV["FILE_BASE_DIR"], + match_data[:directory].gsub("-", "."), + match_data[:path].split("-"), + match_data[:filename]) + + if File.exists?(avatar_path) + @uploader.create_avatar(user, avatar_path) + else + STDERR.puts "Could not find avatar: #{avatar_path}" + end + elsif avatar_url.match?(REMOTE_AVATAR_REGEX) + UserAvatar.import_url_for_user(avatar_url, user) rescue nil + end + end + + def suspend_user(user, banned_until, ban_reason) + return if banned_until.blank? + + if banned_until = DateTime.parse(banned_until) > DateTime.now + user.suspended_till = banned_until + user.suspended_at = DateTime.now + user.save! + + StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason) + end + end + + def import_categories + @new_parent_categories = {} + @new_parent_categories[:archives] = create_category({ name: "Archives" }, nil) + @new_parent_categories[:spotlight] = create_category({ name: "Spotlight" }, nil) + @new_parent_categories[:optimizer] = create_category({ name: "SQL Optimizer" }, nil) + + puts "", "Importing parent categories..." + parent_categories = query(<<~SQL) + SELECT + GroupID, + Name, HtmlDescription, + DateCreated, SortOrder + FROM cs_Groups g + WHERE (SELECT COUNT(1) + FROM te_Forum_Forums f + WHERE f.GroupId = g.GroupID) > 1 + ORDER BY SortOrder, Name + SQL + + create_categories(parent_categories) do |row| + { + id: "G#{row['GroupID']}", + name: clean_category_name(row["Name"]), + description: html_to_markdown(row["HtmlDescription"]), + position: row["SortOrder"] + } + end + + puts "", "Importing child categories..." + child_categories = query(<<~SQL) + SELECT + ForumId, GroupId, + Name, Description, + DateCreated, SortOrder + FROM te_Forum_Forums + ORDER BY GroupId, SortOrder, Name + SQL + + create_categories(child_categories) do |row| + parent_category_id = parent_category_id_for(row) + + if category_id = replace_with_category_id(row, child_categories, parent_category_id) + add_category(row['ForumId'], Category.find_by_id(category_id)) + Permalink.create(url: "f/#{row['ForumId']}", category_id: category_id) + nil + else + { + id: row['ForumId'], + parent_category_id: parent_category_id, + name: clean_category_name(row["Name"]), + description: html_to_markdown(row["Description"]), + position: row["SortOrder"] + } + end + end + end + + def parent_category_id_for(row) + name = row["Name"].downcase + + if name.include?("beta") + @new_parent_categories[:archives].id + elsif name.include?("spotlight") + @new_parent_categories[:spotlight].id + elsif name.include?("optimizer") + @new_parent_categories[:optimizer].id + elsif row.key?("GroupId") + category_id_from_imported_category_id("G#{row['GroupId']}") + else + nil + end + end + + def replace_with_category_id(row, child_categories, parent_category_id) + name = row["Name"].downcase + + if name.include?("data modeler") || name.include?("benchmark") + category_id_from_imported_category_id("G#{row['GroupId']}") + elsif only_child?(child_categories, parent_category_id) + parent_category_id + end + end + + def only_child?(child_categories, parent_category_id) + count = 0 + + child_categories.each do |row| + count += 1 if parent_category_id_for(row) == parent_category_id + end + + count == 1 + end + + def clean_category_name(name) + CGI.unescapeHTML(name) + .sub(/(?:\- )?Forum/i, "") + .strip + end + + def import_topics + puts "", "Importing topics..." + + last_topic_id = -1 + total_count = count("SELECT COUNT(1) AS count FROM te_Forum_Threads") + + batches do |offset| + rows = query(<<~SQL) + SELECT TOP #{BATCH_SIZE} + t.ThreadId, t.ForumId, t.UserId, + t.Subject, t.Body, t.DateCreated, t.IsLocked, t.StickyDate, + a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName + FROM te_Forum_Threads t + LEFT JOIN te_Attachments a + ON (a.ApplicationId = t.ForumId AND a.ApplicationTypeId = 0 AND a.ContentId = t.ThreadId AND + a.ApplicationContentTypeId = 0) + WHERE t.ThreadId > #{last_topic_id} + ORDER BY t.ThreadId + SQL + + break if rows.blank? + last_topic_id = rows[-1]["ThreadId"] + next if all_records_exist?(:post, rows.map { |row| import_topic_id(row["ThreadId"]) }) + + create_posts(rows, total: total_count, offset: offset) do |row| + user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID + + post = { + id: import_topic_id(row["ThreadId"]), + title: CGI.unescapeHTML(row["Subject"]), + raw: raw_with_attachment(row, user_id), + category: category_id_from_imported_category_id(row["ForumId"]), + user_id: user_id, + created_at: row["DateCreated"], + closed: row["IsLocked"], + post_create_action: proc do |post| + topic = post.topic + Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) if topic.pinned_until + Permalink.create(url: "f/#{row['ForumId']}/t/#{row['ThreadId']}", topic_id: topic.id) + end + } + + if row["StickyDate"] > Time.now + post[:pinned_until] = row["StickyDate"] + post[:pinned_at] = row["DateCreated"] + end + + post + end + end + end + + def import_topic_id(topic_id) + "T#{topic_id}" + end + + def import_posts + puts "", "Importing posts..." + + last_post_id = -1 + total_count = count("SELECT COUNT(1) AS count FROM te_Forum_ThreadReplies") + + batches do |offset| + rows = query(<<~SQL) + SELECT TOP #{BATCH_SIZE} + tr.ThreadReplyId, tr.ThreadId, tr.UserId, tr.ParentReplyId, + tr.Body, tr.ThreadReplyDate, + CONVERT(BIT, + CASE WHEN tr.AnswerVerifiedUtcDate IS NOT NULL AND NOT EXISTS( + SELECT 1 + FROM te_Forum_ThreadReplies x + WHERE + x.ThreadId = tr.ThreadId AND x.ThreadReplyId < tr.ThreadReplyId AND x.AnswerVerifiedUtcDate IS NOT NULL + ) + THEN 1 + ELSE 0 END) AS IsFirstVerifiedAnswer, + a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName + FROM te_Forum_ThreadReplies tr + JOIN te_Forum_Threads t ON (tr.ThreadId = t.ThreadId) + LEFT JOIN te_Attachments a + ON (a.ApplicationId = t.ForumId AND a.ApplicationTypeId = 0 AND a.ContentId = tr.ThreadReplyId AND + a.ApplicationContentTypeId = 1) + WHERE tr.ThreadReplyId > #{last_post_id} + ORDER BY tr.ThreadReplyId + SQL + + break if rows.blank? + last_post_id = rows[-1]["ThreadReplyId"] + next if all_records_exist?(:post, rows.map { |row| row["ThreadReplyId"] }) + + create_posts(rows, total: total_count, offset: offset) do |row| + imported_parent_id = row["ParentReplyId"] > 0 ? row["ParentReplyId"] : import_topic_id(row["ThreadId"]) + parent_post = topic_lookup_from_imported_post_id(imported_parent_id) + user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID + + if parent_post + post = { + id: row["ThreadReplyId"], + raw: raw_with_attachment(row, user_id), + user_id: user_id, + topic_id: parent_post[:topic_id], + created_at: row["ThreadReplyDate"], + reply_to_post_number: parent_post[:post_number] + } + + post[:custom_fields] = { is_accepted_answer: "true" } if row["IsFirstVerifiedAnswer"] + post + else + puts "Failed to import post #{row['ThreadReplyId']}. Parent was not found." + end + end + end + end + + def raw_with_attachment(row, user_id) + raw, embedded_paths = replace_embedded_attachments(row["Body"], user_id) + raw = html_to_markdown(raw) || "" + + filename = row["FileName"] + return raw if filename.blank? + + path = File.join( + ENV["FILE_BASE_DIR"], + "telligent.evolution.components.attachments", + "%02d" % row["ApplicationTypeId"], + "%02d" % row["ApplicationId"], + "%02d" % row["ApplicationContentTypeId"], + ("%010d" % row["ContentId"]).scan(/.{2}/), + filename + ) + + unless embedded_paths.include?(path) + if File.exists?(path) + upload = @uploader.create_upload(user_id, path, filename) + raw << "\n" << @uploader.html_for_upload(upload, filename) if upload.present? && upload.persisted? + else + STDERR.puts "Could not find file: #{path}" + end + end + + raw + end + + def replace_embedded_attachments(raw, user_id) + paths = [] + + raw = raw.gsub(EMBEDDED_ATTACHMENT_REGEX) do + match_data = Regexp.last_match + filename = match_data[:filename] + + path = File.join( + ENV["FILE_BASE_DIR"], + match_data[:directory].gsub("-", "."), + match_data[:path].split("-"), + filename + ) + + if File.exists?(path) + upload = @uploader.create_upload(user_id, path, filename) + + if upload.present? && upload.persisted? + paths << path + @uploader.html_for_upload(upload, filename) + end + else + STDERR.puts "Could not find file: #{path}" + end + end + + [raw, paths] + end + + def mark_topics_as_solved + puts "", "Marking topics as solved..." + + DB.exec <<~SQL + INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at) + SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at + FROM post_custom_fields pcf + JOIN posts p ON p.id = pcf.post_id + WHERE pcf.name = 'is_accepted_answer' AND pcf.value = 'true' + SQL + end + + def html_to_markdown(html) + HtmlToMarkdown.new(html).to_markdown if html.present? + end + + def add_permalink_normalizations + normalizations = SiteSetting.permalink_normalizations + normalizations = normalizations.blank? ? [] : normalizations.split('|') + + add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION) + add_normalization(normalizations, TOPIC_LINK_NORMALIZATION) + + SiteSetting.permalink_normalizations = normalizations.join('|') + end + + def add_normalization(normalizations, normalization) + normalizations << normalization unless normalizations.include?(normalization) + end + + def batches + super(BATCH_SIZE) + end + + def query(sql) + @client.execute(sql).to_a + end + + def count(sql) + query(sql).first["count"] + end +end + +ImportScripts::Telligent.new.perform