# frozen_string_literal: true require "mysql2" require "htmlentities" require File.expand_path(File.dirname(__FILE__) + "/base.rb") class ImportScripts::Drupal < ImportScripts::Base DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal" VID = ENV["DRUPAL_VID"] || 1 BATCH_SIZE = 1000 ATTACHMENT_DIR = "/root/files/upload" def initialize super @htmlentities = HTMLEntities.new @client = Mysql2::Client.new( host: "localhost", username: "root", #password: "password", database: DRUPAL_DB, ) end def execute import_users import_categories # "Nodes" in Drupal are divided into types. Here we import two types, # and will later import all the comments/replies for each node. # You will need to figure out what the type names are on your install and edit the queries to match. import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"] import_forum_topics import_replies import_likes mark_topics_as_solved import_sso_records import_attachments postprocess_posts create_permalinks import_gravatars end def import_users puts "", "importing users" user_count = mysql_query("SELECT count(uid) count FROM users").first["count"] last_user_id = -1 batches(BATCH_SIZE) do |offset| users = mysql_query(<<-SQL).to_a SELECT uid, name username, mail email, created FROM users WHERE uid > #{last_user_id} ORDER BY uid LIMIT #{BATCH_SIZE} SQL break if users.empty? last_user_id = users[-1]["uid"] users.reject! { |u| @lookup.user_already_imported?(u["uid"]) } create_users(users, total: user_count, offset: offset) do |user| email = user["email"].presence || fake_email email = fake_email if !EmailAddressValidator.valid_value?(email) username = @htmlentities.decode(user["username"]).strip { id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) } end end end def import_categories # You'll need to edit the following query for your Drupal install: # # * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here. # * Table name may be term_data. # * May need to select a vid other than 1 puts "", "importing categories" categories = mysql_query(<<-SQL).to_a SELECT tid, name, description FROM taxonomy_term_data WHERE vid = #{VID} SQL create_categories(categories) do |category| { id: category["tid"], name: @htmlentities.decode(category["name"]).strip, description: @htmlentities.decode(category["description"]).strip, } end end def import_blog_topics puts "", "importing blog topics" unless Category.find_by_name("Blog") create_category({ name: "Blog", description: "Articles from the blog" }, nil) end blogs = mysql_query(<<-SQL).to_a SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky, f.body_value body FROM node n, field_data_body f WHERE n.type = 'article' AND n.nid = f.entity_id AND n.status = 1 SQL category_id = Category.find_by_name("Blog").id create_posts(blogs) do |topic| { id: "nid:#{topic["nid"]}", user_id: user_id_from_imported_user_id(topic["uid"]) || -1, category: category_id, raw: topic["body"], created_at: Time.zone.at(topic["created"]), pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil, title: topic["title"].try(:strip), custom_fields: { import_id: "nid:#{topic["nid"]}", }, } end end def import_forum_topics puts "", "importing forum topics" total_count = mysql_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM forum_index fi, node n WHERE n.type = 'forum' AND fi.nid = n.nid AND n.status = 1 SQL batches(BATCH_SIZE) do |offset| results = mysql_query(<<-SQL).to_a SELECT fi.nid nid, fi.title title, fi.tid tid, n.uid uid, fi.created created, fi.sticky sticky, f.body_value body, nc.totalcount views, fl.timestamp solved FROM forum_index fi LEFT JOIN node n ON fi.nid = n.nid LEFT JOIN field_data_body f ON f.entity_id = n.nid LEFT JOIN flagging fl ON fl.entity_id = n.nid AND fl.fid = 7 LEFT JOIN node_counter nc ON nc.nid = n.nid WHERE n.type = 'forum' AND n.status = 1 LIMIT #{BATCH_SIZE} OFFSET #{offset}; SQL break if results.size < 1 next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" } create_posts(results, total: total_count, offset: offset) do |row| raw = preprocess_raw(row["body"]) topic = { id: "nid:#{row["nid"]}", user_id: user_id_from_imported_user_id(row["uid"]) || -1, category: category_id_from_imported_category_id(row["tid"]), raw: raw, created_at: Time.zone.at(row["created"]), pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil, title: row["title"].try(:strip), views: row["views"], } topic[:custom_fields] = { import_solved: true } if row["solved"].present? topic end end end def import_replies puts "", "creating replies in topics" total_count = mysql_query(<<-SQL).first["count"] SELECT COUNT(*) count FROM comment c, node n WHERE n.nid = c.nid AND c.status = 1 AND n.type IN ('article', 'forum') AND n.status = 1 SQL batches(BATCH_SIZE) do |offset| results = mysql_query(<<-SQL).to_a SELECT c.cid, c.pid, c.nid, c.uid, c.created, f.comment_body_value body FROM comment c, field_data_comment_body f, node n WHERE c.cid = f.entity_id AND n.nid = c.nid AND c.status = 1 AND n.type IN ('blog', 'forum') AND n.status = 1 ORDER BY c.cid ASC LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if results.size < 1 next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" } create_posts(results, total: total_count, offset: offset) do |row| topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}") if topic_mapping && topic_id = topic_mapping[:topic_id] raw = preprocess_raw(row["body"]) h = { id: "cid:#{row["cid"]}", topic_id: topic_id, user_id: user_id_from_imported_user_id(row["uid"]) || -1, raw: raw, created_at: Time.zone.at(row["created"]), } if row["pid"] parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}") h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1) end h else puts "No topic found for comment #{row["cid"]}" nil end end end end def import_likes puts "", "importing post likes" batches(BATCH_SIZE) do |offset| likes = mysql_query(<<-SQL).to_a SELECT flagging_id, fid, entity_id, uid FROM flagging WHERE fid = 5 OR fid = 6 LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if likes.empty? likes.each do |l| identifier = l["fid"] == 5 ? "nid" : "cid" next unless user_id = user_id_from_imported_user_id(l["uid"]) next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}") next unless user = User.find_by(id: user_id) next unless post = Post.find_by(id: post_id) begin PostActionCreator.like(user, post) rescue StandardError nil end end end end def mark_topics_as_solved puts "", "marking topics as solved" solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id) solved_topics.each do |topic_id| next unless topic = Topic.find(topic_id) next unless post = topic.posts.last post_id = post.id PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true) TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id) end end def import_sso_records puts "", "importing sso records" start_time = Time.now current_count = 0 users = UserCustomField.where(name: "import_id") total_count = users.count return if users.empty? users.each do |ids| user_id = ids.user_id external_id = ids.value next unless user = User.find(user_id) begin current_count += 1 print_status(current_count, total_count, start_time) SingleSignOnRecord.create!( user_id: user.id, external_id: external_id, external_email: user.email, last_payload: "", ) rescue StandardError next end end end def import_attachments puts "", "importing attachments" current_count = 0 success_count = 0 fail_count = 0 total_count = mysql_query(<<-SQL).first["count"] SELECT count(field_post_attachment_fid) count FROM field_data_field_post_attachment SQL batches(BATCH_SIZE) do |offset| attachments = mysql_query(<<-SQL).to_a SELECT * FROM field_data_field_post_attachment fp LEFT JOIN file_managed fm ON fp.field_post_attachment_fid = fm.fid LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if attachments.size < 1 attachments.each do |attachment| current_count += 1 print_status current_count, total_count identifier = attachment["entity_type"] == "comment" ? "cid" : "nid" next unless user_id = user_id_from_imported_user_id(attachment["uid"]) unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}") next end next unless user = User.find(user_id) next unless post = Post.find(post_id) begin new_raw = post.raw.dup upload, filename = find_upload(post, attachment) unless upload fail_count += 1 next end upload_html = html_for_upload(upload, filename) new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html) if new_raw != post.raw PostRevisor.new(post).revise!( post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal", ) else puts "", "Skipped upload: already imported" end success_count += 1 rescue => e puts e end end end end def create_permalinks puts "", "creating permalinks..." Topic.listable_topics.find_each do |topic| begin tcf = topic.custom_fields if tcf && tcf["import_id"] node_id = tcf["import_id"][/nid:(\d+)/, 1] slug = "/node/#{node_id}" Permalink.create(url: slug, topic_id: topic.id) end rescue => e puts e.message puts "Permalink creation failed for id #{topic.id}" end end end def find_upload(post, attachment) uri = attachment["uri"][%r{public://upload/(.+)}, 1] real_filename = CGI.unescapeHTML(uri) file = File.join(ATTACHMENT_DIR, real_filename) unless File.exist?(file) puts "Attachment file #{attachment["filename"]} doesn't exist" tmpfile = "attachments_failed.txt" filename = File.join("/tmp/", tmpfile) File.open(filename, "a") { |f| f.puts attachment["filename"] } end upload = create_upload(post.user.id || -1, file, real_filename) if upload.nil? || upload.errors.any? puts "Upload not valid" puts upload.errors.inspect if upload return end [upload, real_filename] end def preprocess_raw(raw) return if raw.blank? # quotes on new lines raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote| quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" } quote.gsub!(/\n(.+?)/) { "\n> #{$1}" } end # [QUOTE=]...[/QUOTE] raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do username, quote = $1, $2 "\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n" end raw.strip! raw end def postprocess_posts puts "", "postprocessing posts" current = 0 max = Post.count Post.find_each do |post| begin raw = post.raw new_raw = raw.dup # replace old topic to new topic links new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do post_id = post_id_from_imported_post_id("nid:#{$1}") next unless post_id topic = Post.find(post_id).topic "https://community.site.com/t/-/#{topic.id}" end # replace old comment to reply links new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do post_id = post_id_from_imported_post_id("cid:#{$1}") next unless post_id post_ref = Post.find(post_id) "https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}" end if raw != new_raw post.raw = new_raw post.save end rescue StandardError puts "", "Failed rewrite on post: #{post.id}" ensure print_status(current += 1, max) end end end def import_gravatars puts "", "importing gravatars" current = 0 max = User.count User.find_each do |user| begin user.create_user_avatar(user_id: user.id) unless user.user_avatar user.user_avatar.update_gravatar! rescue StandardError puts "", 'Failed avatar update on user #{user.id}' ensure print_status(current += 1, max) end end end def parse_datetime(time) DateTime.strptime(time, "%s") end def mysql_query(sql) @client.query(sql, cache_rows: true) end end ImportScripts::Drupal.new.perform if __FILE__ == $0