# Notes: # # Written by Sam # # Lithium are quite protective of data, there is no simple way of exporting # If you have leverage you may get a data dump, in my case it was provided in XML # format # # First step is to convert it to db format so you can import it into a DB # that was done using import_scripts/support/convert_mysql_xml_to_mysql.rb # require 'mysql2' require 'csv' require 'reverse_markdown' require File.expand_path(File.dirname(__FILE__) + "/base.rb") require 'htmlentities' # remove table conversion [:table, :td, :tr, :th, :thead, :tbody].each do |tag| ReverseMarkdown::Converters.unregister(tag) end class ImportScripts::Lithium < ImportScripts::Base BATCH_SIZE = 1000 # CHANGE THESE BEFORE RUNNING THE IMPORTER DATABASE = "wd" PASSWORD = "password" AVATAR_DIR = '/tmp/avatars' ATTACHMENT_DIR = '/tmp/attachments' UPLOAD_DIR = '/tmp/uploads' OLD_DOMAIN = 'community.wd.com' TEMP = "" USER_CUSTOM_FIELDS = [ { name: "sso_id", user: "sso_id" }, { name: "user_field_1", profile: "jobtitle" }, { name: "user_field_2", profile: "company" }, { name: "user_field_3", profile: "industry" }, ] LITHIUM_PROFILE_FIELDS = "'profile.jobtitle', 'profile.company', 'profile.industry', 'profile.location'" USERNAME_MAPPINGS = { "admins": "admin_user" }.with_indifferent_access def initialize super @old_username_to_new_usernames = {} @htmlentities = HTMLEntities.new @client = Mysql2::Client.new( host: "localhost", username: "root", password: PASSWORD, database: DATABASE ) end def execute @max_start_id = Post.maximum(:id) import_groups import_categories import_users import_user_visits import_topics import_posts import_likes import_accepted_answers import_pms close_topics create_permalinks post_process_posts end def import_groups puts "", "importing groups..." groups = mysql_query <<-SQL SELECT DISTINCT name FROM roles ORDER BY name SQL create_groups(groups) do |group| { id: group["name"], name: @htmlentities.decode(group["name"]).strip } end end def import_users puts "", "importing users" user_count = mysql_query("SELECT COUNT(*) count FROM users").first["count"] avatar_files = Dir.entries(AVATAR_DIR) duplicate_emails = mysql_query("SELECT email_lower FROM users GROUP BY email_lower HAVING COUNT(email_lower) > 1").map { |e| [e["email_lower"], 0] }.to_h batches(BATCH_SIZE) do |offset| users = mysql_query <<-SQL SELECT id, nlogin, login_canon, email, registration_time, sso_id FROM users ORDER BY id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if users.size < 1 next if all_records_exist? :users, users.map { |u| u["id"].to_i } users = users.to_a first_id = users.first["id"] last_id = users.last["id"] profiles = mysql_query <<-SQL SELECT user_id, param, nvalue FROM user_profile WHERE nvalue IS NOT NULL AND param IN (#{LITHIUM_PROFILE_FIELDS}) AND user_id >= #{first_id} AND user_id <= #{last_id} ORDER BY user_id SQL create_users(users, total: user_count, offset: offset) do |user| user_id = user["id"] profile = profiles.select { |p| p["user_id"] == user_id } result = profile.select { |p| p["param"] == "profile.location" } location = result.count > 0 ? result.first["nvalue"] : nil username = user["login_canon"] username = USERNAME_MAPPINGS[username] if USERNAME_MAPPINGS[username].present? email = user["email"].presence || fake_email email_lower = email.downcase if duplicate_emails.key?(email_lower) duplicate_emails[email_lower] += 1 email.sub!("@", "+#{duplicate_emails[email_lower]}@") if duplicate_emails[email_lower] > 1 end { id: user_id, name: user["nlogin"], username: username, email: email, location: location, custom_fields: user_custom_fields(user, profile), # website: user["homepage"].strip, # title: @htmlentities.decode(user["usertitle"]).strip, # primary_group_id: group_id_from_imported_group_id(user["usergroupid"]), created_at: unix_time(user["registration_time"]), post_create_action: proc do |u| @old_username_to_new_usernames[user["login_canon"]] = u.username # import user avatar sso_id = u.custom_fields["sso_id"] if sso_id.present? prefix = "#{AVATAR_DIR}/#{sso_id}_" file = get_file(prefix + "actual.jpeg") file ||= get_file(prefix + "profile.jpeg") if file.present? upload = UploadCreator.new(file, file.path, type: "avatar").create_for(u.id) u.create_user_avatar unless u.user_avatar if !u.user_avatar.contains_upload?(upload.id) u.user_avatar.update_columns(custom_upload_id: upload.id) if u.uploaded_avatar_id.nil? || !u.user_avatar.contains_upload?(u.uploaded_avatar_id) u.update_columns(uploaded_avatar_id: upload.id) end end end end end } end end end def import_user_visits puts "", "importing user visits" batches(BATCH_SIZE) do |offset| visits = mysql_query <<-SQL SELECT user_id, login_time FROM user_log ORDER BY user_id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if visits.size < 1 user_ids = visits.uniq { |v| v["user_id"] } user_ids.each do |user_id| user = UserCustomField.find_by(name: "import_id", value: user_id).try(:user) raise "User not found for id #{user_id}" if user.blank? user_visits = visits.select { |v| v["user_id"] == user_id } user_visits.each do |v| date = unix_time(v["login_time"]) user.update_visit_record!(date) end end end end def user_custom_fields(user, profile) fields = Hash.new USER_CUSTOM_FIELDS.each do |attr| name = attr[:name] if attr[:user].present? fields[name] = user[attr[:user]] elsif attr[:profile].present? && profile.count > 0 result = profile.select { |p| p["param"] == "profile.#{attr[:profile]}" } fields[name] = result.first["nvalue"] if result.count > 0 end end fields end def get_file(path) return File.open(path) if File.exist?(path) nil end def unix_time(t) Time.at(t / 1000.0) end def import_profile_picture(old_user, imported_user) query = mysql_query <<-SQL SELECT filedata, filename FROM customavatar WHERE userid = #{old_user["userid"]} ORDER BY dateline DESC LIMIT 1 SQL picture = query.first return if picture.nil? file = Tempfile.new("profile-picture") file.write(picture["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8")) file.rewind upload = UploadCreator.new(file, picture["filename"]).create_for(imported_user.id) return if !upload.persisted? imported_user.create_user_avatar imported_user.user_avatar.update(custom_upload_id: upload.id) imported_user.update(uploaded_avatar_id: upload.id) ensure file.close rescue nil file.unlind rescue nil end def import_profile_background(old_user, imported_user) query = mysql_query <<-SQL SELECT filedata, filename FROM customprofilepic WHERE userid = #{old_user["userid"]} ORDER BY dateline DESC LIMIT 1 SQL background = query.first return if background.nil? file = Tempfile.new("profile-background") file.write(background["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8")) file.rewind upload = UploadCreator.new(file, background["filename"]).create_for(imported_user.id) return if !upload.persisted? imported_user.user_profile.update(profile_background: upload.url) ensure file.close rescue nil file.unlink rescue nil end def import_categories puts "", "importing top level categories..." categories = mysql_query <<-SQL SELECT n.node_id, n.display_id, c.nvalue c_title, b.nvalue b_title, n.position, n.parent_node_id, n.type_id FROM nodes n LEFT JOIN settings c ON n.node_id = c.node_id AND c.param = 'category.title' LEFT JOIN settings b ON n.node_id = b.node_id AND b.param = 'board.title' ORDER BY n.type_id DESC, n.node_id ASC SQL categories = categories.map { |c| (c["name"] = c["c_title"] || c["b_title"] || c["display_id"]) && c } # To prevent duplicate category names categories = categories.map do |category| count = categories.to_a.count { |c| c["name"].present? && c["name"] == category["name"] } category["name"] << " (#{category["node_id"]})" if count > 1 category end parent_categories = categories.select { |c| c["parent_node_id"] <= 2 } create_categories(parent_categories) do |category| { id: category["node_id"], name: category["name"], position: category["position"], post_create_action: lambda do |record| after_category_create(record, category) end } end puts "", "importing children categories..." children_categories = categories.select { |c| c["parent_node_id"] > 2 } create_categories(children_categories) do |category| { id: category["node_id"], name: category["name"], position: category["position"], parent_category_id: category_id_from_imported_category_id(category["parent_node_id"]), post_create_action: lambda do |record| after_category_create(record, category) end } end end def after_category_create(category, params) node_id = category.custom_fields["import_id"] roles = mysql_query <<-SQL SELECT name FROM roles WHERE node_id = #{node_id} SQL if roles.count > 0 category.update(read_restricted: true) roles.each do |role| group_id = group_id_from_imported_group_id(role["name"]) if group_id.present? CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg| cg.permission_type = CategoryGroup.permission_types[:full] end else puts "", "Group not found for id '#{role["name"]}'" end end end end def staff_guardian @_staff_guardian ||= Guardian.new(Discourse.system_user) end def import_topics puts "", "importing topics..." SiteSetting.tagging_enabled = true default_max_tags_per_topic = SiteSetting.max_tags_per_topic default_max_tag_length = SiteSetting.max_tag_length SiteSetting.max_tags_per_topic = 10 SiteSetting.max_tag_length = 100 topic_count = mysql_query("SELECT COUNT(*) count FROM message2 where id = root_id").first["count"] topic_tags = mysql_query("SELECT e.target_id, GROUP_CONCAT(l.tag_text SEPARATOR ',') tags FROM tag_events_label_message e LEFT JOIN tags_label l ON e.tag_id = l.tag_id GROUP BY e.target_id") batches(BATCH_SIZE) do |offset| topics = mysql_query <<-SQL SELECT id, subject, body, deleted, user_id, post_date, views, node_id, unique_id, row_version FROM message2 WHERE id = root_id #{TEMP} ORDER BY node_id, id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if topics.size < 1 next if all_records_exist? :posts, topics.map { |topic| "#{topic["node_id"]} #{topic["id"]}" } create_posts(topics, total: topic_count, offset: offset) do |topic| category_id = category_id_from_imported_category_id(topic["node_id"]) deleted_at = topic["deleted"] == 1 ? topic["row_version"] : nil raw = topic["body"] if category_id.present? && raw.present? { id: "#{topic["node_id"]} #{topic["id"]}", user_id: user_id_from_imported_user_id(topic["user_id"]) || Discourse::SYSTEM_USER_ID, title: @htmlentities.decode(topic["subject"]).strip[0...255], category: category_id, raw: raw, created_at: unix_time(topic["post_date"]), deleted_at: deleted_at, views: topic["views"], custom_fields: { import_unique_id: topic["unique_id"] }, import_mode: true, post_create_action: proc do |post| result = topic_tags.select { |t| t["target_id"] == topic["unique_id"] } if result.count > 0 tag_names = result.first["tags"].split(",") DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names) end end } else message = "Unknown" message = "Category '#{category_id}' not exist" if category_id.blank? message = "Topic 'body' is empty" if raw.blank? PluginStoreRow.find_or_create_by(plugin_name: "topic_import_log", key: topic["unique_id"].to_s, value: message, type_name: 'String') nil end end end SiteSetting.max_tags_per_topic = default_max_tags_per_topic SiteSetting.max_tag_length = default_max_tag_length end def import_posts post_count = mysql_query("SELECT COUNT(*) count FROM message2 WHERE id <> root_id").first["count"] puts "", "importing posts... (#{post_count})" batches(BATCH_SIZE) do |offset| posts = mysql_query <<-SQL SELECT id, body, deleted, user_id, post_date, parent_id, root_id, node_id, unique_id, row_version FROM message2 WHERE id <> root_id #{TEMP} ORDER BY node_id, root_id, id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if posts.size < 1 next if all_records_exist? :posts, posts.map { |post| "#{post["node_id"]} #{post["root_id"]} #{post["id"]}" } create_posts(posts, total: post_count, offset: offset) do |post| raw = post["raw"] next unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}") deleted_at = topic["deleted"] == 1 ? topic["row_version"] : nil raw = post["body"] if raw.present? new_post = { id: "#{post["node_id"]} #{post["root_id"]} #{post["id"]}", user_id: user_id_from_imported_user_id(post["user_id"]) || Discourse::SYSTEM_USER_ID, topic_id: topic[:topic_id], raw: raw, created_at: unix_time(post["post_date"]), deleted_at: deleted_at, custom_fields: { import_unique_id: post["unique_id"] }, import_mode: true } if parent = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]} #{post["parent_id"]}") new_post[:reply_to_post_number] = parent[:post_number] end new_post else PluginStoreRow.find_or_create_by(plugin_name: "post_import_log", key: post["unique_id"].to_s, value: "Post 'body' is empty", type_name: 'String') nil end end end end SMILEY_SUBS = { "smileyhappy" => "smiley", "smileyindifferent" => "neutral_face", "smileymad" => "angry", "smileysad" => "cry", "smileysurprised" => "dizzy_face", "smileytongue" => "stuck_out_tongue", "smileyvery-happy" => "grin", "smileywink" => "wink", "smileyfrustrated" => "confounded", "smileyembarrassed" => "flushed", "smileylol" => "laughing", "cathappy" => "smiley_cat", "catindifferent" => "cat", "catmad" => "smirk_cat", "catsad" => "crying_cat_face", "catsurprised" => "scream_cat", "cattongue" => "stuck_out_tongue", "catvery-happy" => "smile_cat", "catwink" => "wink", "catfrustrated" => "grumpycat", "catembarrassed" => "kissing_cat", "catlol" => "joy_cat" } def import_likes puts "\nimporting likes..." sql = "select source_id user_id, target_id post_id, row_version created_at from tag_events_score_message" results = mysql_query(sql) puts "loading unique id map" existing_map = {} PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id| existing_map[import_id] = post_id end puts "loading data into temp table" PostAction.exec_sql("create temp table like_data(user_id int, post_id int, created_at timestamp without time zone)") PostAction.transaction do results.each do |result| result["user_id"] = user_id_from_imported_user_id(result["user_id"].to_s) result["post_id"] = existing_map[result["post_id"].to_s] next unless result["user_id"] && result["post_id"] PostAction.exec_sql("INSERT INTO like_data VALUES (:user_id,:post_id,:created_at)", user_id: result["user_id"], post_id: result["post_id"], created_at: result["created_at"] ) end end puts "creating missing post actions" PostAction.exec_sql <<-SQL INSERT INTO post_actions (post_id, user_id, post_action_type_id, created_at, updated_at) SELECT l.post_id, l.user_id, 2, l.created_at, l.created_at FROM like_data l LEFT JOIN post_actions a ON a.post_id = l.post_id AND l.user_id = a.user_id AND a.post_action_type_id = 2 WHERE a.id IS NULL SQL puts "creating missing user actions" UserAction.exec_sql <<-SQL INSERT INTO user_actions (user_id, action_type, target_topic_id, target_post_id, acting_user_id, created_at, updated_at) SELECT pa.user_id, 1, p.topic_id, p.id, pa.user_id, pa.created_at, pa.created_at FROM post_actions pa JOIN posts p ON p.id = pa.post_id LEFT JOIN user_actions ua ON action_type = 1 AND ua.target_post_id = pa.post_id AND ua.user_id = pa.user_id WHERE ua.id IS NULL AND pa.post_action_type_id = 2 SQL # reverse action UserAction.exec_sql <<-SQL INSERT INTO user_actions (user_id, action_type, target_topic_id, target_post_id, acting_user_id, created_at, updated_at) SELECT p.user_id, 2, p.topic_id, p.id, pa.user_id, pa.created_at, pa.created_at FROM post_actions pa JOIN posts p ON p.id = pa.post_id LEFT JOIN user_actions ua ON action_type = 2 AND ua.target_post_id = pa.post_id AND ua.acting_user_id = pa.user_id AND ua.user_id = p.user_id WHERE ua.id IS NULL AND pa.post_action_type_id = 2 SQL puts "updating like counts on posts" Post.exec_sql <<-SQL UPDATE posts SET like_count = coalesce(cnt,0) FROM ( SELECT post_id, count(*) cnt FROM post_actions WHERE post_action_type_id = 2 AND deleted_at IS NULL GROUP BY post_id ) x WHERE posts.like_count <> x.cnt AND posts.id = x.post_id SQL puts "updating like counts on topics" Post.exec_sql <<-SQL UPDATE topics SET like_count = coalesce(cnt,0) FROM ( SELECT topic_id, sum(like_count) cnt FROM posts WHERE deleted_at IS NULL GROUP BY topic_id ) x WHERE topics.like_count <> x.cnt AND topics.id = x.topic_id SQL end def import_accepted_answers puts "\nimporting accepted answers..." sql = "select unique_id post_id from message2 where (attributes & 0x4000 ) != 0 and deleted = 0;" results = mysql_query(sql) puts "loading unique id map" existing_map = {} PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id| existing_map[import_id] = post_id end puts "loading data into temp table" PostAction.exec_sql("create temp table accepted_data(post_id int primary key)") PostAction.transaction do results.each do |result| result["post_id"] = existing_map[result["post_id"].to_s] next unless result["post_id"] PostAction.exec_sql("INSERT INTO accepted_data VALUES (:post_id)", post_id: result["post_id"] ) end end puts "deleting dupe answers" PostAction.exec_sql <<-SQL DELETE FROM accepted_data WHERE post_id NOT IN ( SELECT post_id FROM ( SELECT topic_id, MIN(post_id) post_id FROM accepted_data a JOIN posts p ON p.id = a.post_id GROUP BY topic_id ) X ) SQL puts "importing accepted answers" PostAction.exec_sql <<-SQL INSERT into post_custom_fields (name, value, post_id, created_at, updated_at) SELECT 'is_accepted_answer', 'true', a.post_id, current_timestamp, current_timestamp FROM accepted_data a LEFT JOIN post_custom_fields f ON name = 'is_accepted_answer' AND f.post_id = a.post_id WHERE f.id IS NULL SQL puts "marking accepted topics" PostAction.exec_sql <<-SQL INSERT into topic_custom_fields (name, value, topic_id, created_at, updated_at) SELECT 'accepted_answer_post_id', a.post_id::varchar, p.topic_id, current_timestamp, current_timestamp FROM accepted_data a JOIN posts p ON p.id = a.post_id LEFT JOIN topic_custom_fields f ON name = 'accepted_answer_post_id' AND f.topic_id = p.topic_id WHERE f.id IS NULL SQL puts "done importing accepted answers" end def import_pms puts "", "importing pms..." puts "determining participation records" inbox = mysql_query("SELECT note_id, recipient_user_id user_id FROM tblia_notes_inbox") outbox = mysql_query("SELECT note_id, recipient_id user_id FROM tblia_notes_outbox") users = {} [inbox, outbox].each do |r| r.each do |row| ary = (users[row["note_id"]] ||= Set.new) user_id = user_id_from_imported_user_id(row["user_id"]) ary << user_id if user_id end end puts "untangling PM soup" note_to_subject = {} subject_to_first_note = {} mysql_query("SELECT note_id, subject, sender_user_id FROM tblia_notes_content order by note_id").each do |row| user_id = user_id_from_imported_user_id(row["sender_user_id"]) ary = (users[row["note_id"]] ||= Set.new) if user_id ary << user_id end note_to_subject[row["note_id"]] = row["subject"] if row["subject"] !~ /^Re: / subject_to_first_note[[row["subject"], ary]] ||= row["note_id"] end end puts "Loading user_id to username map" user_map = {} User.pluck(:id, :username).each do |id, username| user_map[id] = username end topic_count = mysql_query("SELECT COUNT(*) count FROM tblia_notes_content").first["count"] batches(BATCH_SIZE) do |offset| topics = mysql_query <<-SQL SELECT note_id, subject, body, sender_user_id, sent_time FROM tblia_notes_content ORDER BY note_id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if topics.size < 1 next if all_records_exist? :posts, topics.map { |topic| "pm_#{topic["note_id"]}" } create_posts(topics, total: topic_count, offset: offset) do |topic| user_id = user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID participants = users[topic["note_id"]] usernames = (participants - [user_id]).map { |id| user_map[id] } subject = topic["subject"] topic_id = nil if subject =~ /^Re: / parent_id = subject_to_first_note[[subject[4..-1], participants]] if parent_id if t = topic_lookup_from_imported_post_id("pm_#{parent_id}") topic_id = t[:topic_id] end end end raw = topic["body"] if raw.present? msg = { id: "pm_#{topic["note_id"]}", user_id: user_id, raw: raw, created_at: unix_time(topic["sent_time"]), import_mode: true } unless topic_id msg[:title] = @htmlentities.decode(topic["subject"]).strip[0...255] msg[:archetype] = Archetype.private_message msg[:target_usernames] = usernames.join(',') else msg[:topic_id] = topic_id end msg else PluginStoreRow.find_or_create_by(plugin_name: "pm_import_log", key: topic["note_id"].to_s, value: "PM 'body' is empty", type_name: 'String') nil end end end end def close_topics puts "\nclosing closed topics..." sql = "select unique_id post_id from message2 where root_id = id AND (attributes & 0x0002 ) != 0;" results = mysql_query(sql) # loading post map existing_map = {} PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id| existing_map[import_id.to_i] = post_id.to_i end results.map { |r| r["post_id"] }.each_slice(500) do |ids| mapped = ids.map { |id| existing_map[id] }.compact Topic.exec_sql(" UPDATE topics SET closed = true WHERE id IN (SELECT topic_id FROM posts where id in (:ids)) ", ids: mapped) if mapped.present? end end def create_permalinks puts "Creating permalinks" SiteSetting.permalink_normalizations = '/t5\\/.*p\\/(\\d+).*//p/\\1' sql = <<-SQL INSERT INTO permalinks (url, topic_id, created_at, updated_at) SELECT '/p/' || value, p.topic_id, current_timestamp, current_timestamp FROM post_custom_fields f JOIN posts p on f.post_id = p.id AND post_number = 1 LEFT JOIN permalinks pm ON url = '/p/' || value WHERE pm.id IS NULL AND f.name = 'import_unique_id' SQL r = Permalink.exec_sql sql puts "#{r.cmd_tuples} permalinks to topics added!" sql = <<-SQL INSERT INTO permalinks (url, post_id, created_at, updated_at) SELECT '/p/' || value, p.id, current_timestamp, current_timestamp FROM post_custom_fields f JOIN posts p on f.post_id = p.id AND post_number <> 1 LEFT JOIN permalinks pm ON url = '/p/' || value WHERE pm.id IS NULL AND f.name = 'import_unique_id' SQL r = Permalink.exec_sql sql puts "#{r.cmd_tuples} permalinks to posts added!" end def find_upload(user_id, attachment_id, real_filename) filename = attachment_id.to_s.rjust(4, "0") filename = File.join(ATTACHMENT_DIR, "000#{filename[0]}/#{filename}.dat") unless File.exists?(filename) puts "Attachment file doesn't exist: #{filename}" return nil end real_filename.prepend SecureRandom.hex if real_filename[0] == '.' upload = create_upload(user_id, filename, real_filename) if upload.nil? || !upload.valid? puts "Upload not valid :(" puts upload.errors.inspect if upload return nil end return upload, real_filename end def post_process_posts puts "", "Postprocessing posts..." default_extensions = SiteSetting.authorized_extensions default_max_att_size = SiteSetting.max_attachment_size_kb SiteSetting.authorized_extensions = "*" SiteSetting.max_attachment_size_kb = 307200 current = 0 max = Post.count mysql_query("create index idxUniqueId on message2(unique_id)") rescue nil attachments = mysql_query("SELECT a.attachment_id, a.file_name, m.message_uid FROM tblia_attachment a INNER JOIN tblia_message_attachments m ON a.attachment_id = m.attachment_id") Post.where('id > ?', @max_start_id).find_each do |post| begin id = post.custom_fields["import_unique_id"] next unless id raw = mysql_query("select body from message2 where unique_id = '#{id}'").first['body'] unless raw puts "Missing raw for post: #{post.id}" next end new_raw = postprocess_post_raw(raw, post.user_id) files = attachments.select { |a| a["message_uid"].to_s == id } new_raw << html_for_attachments(post.user_id, files) unless post.raw == new_raw post.raw = new_raw post.cooked = post.cook(new_raw) cpp = CookedPostProcessor.new(post) cpp.keep_reverse_index_up_to_date post.custom_fields["import_post_process"] = true post.save end rescue PrettyText::JavaScriptError puts "GOT A JS error on post: #{post.id}" nil ensure print_status(current += 1, max) end end SiteSetting.authorized_extensions = default_extensions SiteSetting.max_attachment_size_kb = default_max_att_size end def postprocess_post_raw(raw, user_id) doc = Nokogiri::HTML.fragment(raw) doc.css("a,img,li-image").each do |l| upload_name, image, linked_upload = [nil] * 3 if l.name == "li-image" && l["id"] upload_name = l["id"] else uri = URI.parse(l["href"] || l["src"]) rescue nil uri.hostname = nil if uri && uri.hostname == OLD_DOMAIN if uri && !uri.hostname if l["href"] l["href"] = uri.path # we have an internal link, lets see if we can remap it? permalink = Permalink.find_by_url(uri.path) rescue nil if l["href"] if permalink && permalink.target_url l["href"] = permalink.target_url elsif l["href"] =~ /^\/gartner\/attachments\/gartner\/([^.]*).(\w*)/ linked_upload = "#{$1}.#{$2}" end end elsif l["src"] # we need an upload here upload_name = $1 if uri.path =~ /image-id\/([^\/]+)/ end end end if upload_name png = UPLOAD_DIR + "/" + upload_name + ".png" jpg = UPLOAD_DIR + "/" + upload_name + ".jpg" gif = UPLOAD_DIR + "/" + upload_name + ".gif" # check to see if we have it if File.exist?(png) image = png elsif File.exists?(jpg) image = jpg elsif File.exists?(gif) image = gif end if image File.open(image) do |file| upload = UploadCreator.new(file, "image." + (image.ends_with?(".png") ? "png" : "jpg")).create_for(user_id) l.name = "img" if l.name == "li-image" l["src"] = upload.url end else puts "image was missing #{l["src"]}" end elsif linked_upload segments = linked_upload.match(/\/(\d*)\/(\d)\/([^.]*).(\w*)$/) if segments.present? lithium_post_id = segments[1] attachment_number = segments[2] result = mysql_query("select a.attachment_id, f.file_name from tblia_message_attachments a INNER JOIN message2 m ON a.message_uid = m.unique_id INNER JOIN tblia_attachment f ON a.attachment_id = f.attachment_id where m.id = #{lithium_post_id} AND a.attach_num = #{attachment_number} limit 0, 1") result.each do |row| upload, filename = find_upload(user_id, row["attachment_id"], row["file_name"]) if upload.present? l["href"] = upload.url else puts "attachment was missing #{l["href"]}" end end end end end # for user mentions doc.css("li-user").each do |l| uid = l["uid"] if uid.present? user = UserCustomField.find_by(name: 'import_id', value: uid).try(:user) if user.present? username = user.username span = l.document.create_element "span" span.inner_html = "@#{username}" l.replace span end end end raw = ReverseMarkdown.convert(doc.to_s) raw.gsub!(/^\s* \s*$/, "") # ugly quotes raw.gsub!(/^>[\s\*]*$/, "") raw.gsub!(/:([a-z]+):/) do |match| ":#{SMILEY_SUBS[$1] || $1}:" end # nbsp central raw.gsub!(/([a-zA-Z0-9]) ([a-zA-Z0-9])/, "\\1 \\2") raw end def html_for_attachments(user_id, files) html = ""; files.each do |file| upload, filename = find_upload(user_id, file["attachment_id"], file["file_name"]) if upload.present? html << "\n" if html.present? html << html_for_upload(upload, filename) end end html end def fake_email SecureRandom.hex << "@domain.com" end def mysql_query(sql) @client.query(sql, cache_rows: true) end end ImportScripts::Lithium.new.perform