discourse/script/import_scripts/drupal.rb

# frozen_string_literal: true

require "mysql2"
require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")

class ImportScripts::Drupal < ImportScripts::Base
  DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal"
  VID = ENV["DRUPAL_VID"] || 1
  BATCH_SIZE = 1000
  ATTACHMENT_DIR = "/root/files/upload"

  def initialize
    super

    @htmlentities = HTMLEntities.new

    @client =
      Mysql2::Client.new(
        host: "localhost",
        username: "root",
        #password: "password",
        database: DRUPAL_DB,
      )
  end

  def execute
    import_users
    import_categories

    # "Nodes" in Drupal are divided into types. Here we import two types,
    # and will later import all the comments/replies for each node.
    # You will need to figure out what the type names are on your install and edit the queries to match.
    import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]

    import_forum_topics

    import_replies
    import_likes
    mark_topics_as_solved
    import_sso_records
    import_attachments
    postprocess_posts
    create_permalinks
    import_gravatars
  end

  def import_users
    puts "", "importing users"

    user_count = mysql_query("SELECT count(uid) count FROM users").first["count"]

    last_user_id = -1

    batches(BATCH_SIZE) do |offset|
      users = mysql_query(<<-SQL).to_a
          SELECT uid,
                 name username,
                 mail email,
                 created
            FROM users
           WHERE uid > #{last_user_id}
        ORDER BY uid
           LIMIT #{BATCH_SIZE}
      SQL

      break if users.empty?

      last_user_id = users[-1]["uid"]

      users.reject! { |u| @lookup.user_already_imported?(u["uid"]) }

      create_users(users, total: user_count, offset: offset) do |user|
        email = user["email"].presence || fake_email
        email = fake_email if !EmailAddressValidator.valid_value?(email)

        username = @htmlentities.decode(user["username"]).strip

        { id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) }
      end
    end
  end

  def import_categories
    # You'll need to edit the following query for your Drupal install:
    #
    #   * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
    #   * Table name may be term_data.
    #   * May need to select a vid other than 1

    puts "", "importing categories"

    categories = mysql_query(<<-SQL).to_a
        SELECT tid,
               name,
               description
          FROM taxonomy_term_data
         WHERE vid = #{VID}
    SQL

    create_categories(categories) do |category|
      {
        id: category["tid"],
        name: @htmlentities.decode(category["name"]).strip,
        description: @htmlentities.decode(category["description"]).strip,
      }
    end
  end

  def import_blog_topics
    puts "", "importing blog topics"

    unless Category.find_by_name("Blog")
      create_category({ name: "Blog", description: "Articles from the blog" }, nil)
    end

    blogs = mysql_query(<<-SQL).to_a
      SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
             f.body_value body
        FROM node n,
             field_data_body f
       WHERE n.type = 'article'
         AND n.nid = f.entity_id
         AND n.status = 1
    SQL

    category_id = Category.find_by_name("Blog").id

    create_posts(blogs) do |topic|
      {
        id: "nid:#{topic["nid"]}",
        user_id: user_id_from_imported_user_id(topic["uid"]) || -1,
        category: category_id,
        raw: topic["body"],
        created_at: Time.zone.at(topic["created"]),
        pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil,
        title: topic["title"].try(:strip),
        custom_fields: {
          import_id: "nid:#{topic["nid"]}",
        },
      }
    end
  end

  def import_forum_topics
    puts "", "importing forum topics"

    total_count = mysql_query(<<-SQL).first["count"]
        SELECT COUNT(*) count
          FROM forum_index fi, node n
         WHERE n.type = 'forum'
           AND fi.nid = n.nid
           AND n.status = 1
    SQL

    batches(BATCH_SIZE) do |offset|
      results = mysql_query(<<-SQL).to_a
        SELECT fi.nid nid,
               fi.title title,
               fi.tid tid,
               n.uid uid,
               fi.created created,
               fi.sticky sticky,
               f.body_value body,
	       nc.totalcount views,
	       fl.timestamp solved
          FROM forum_index fi
	 LEFT JOIN node n ON fi.nid = n.nid
	 LEFT JOIN field_data_body f ON f.entity_id = n.nid
	 LEFT JOIN flagging fl ON fl.entity_id = n.nid
	     AND fl.fid = 7
	 LEFT JOIN node_counter nc ON nc.nid = n.nid
         WHERE n.type = 'forum'
           AND n.status = 1
         LIMIT #{BATCH_SIZE}
        OFFSET #{offset};
      SQL

      break if results.size < 1

      next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }

      create_posts(results, total: total_count, offset: offset) do |row|
        raw = preprocess_raw(row["body"])
        topic = {
          id: "nid:#{row["nid"]}",
          user_id: user_id_from_imported_user_id(row["uid"]) || -1,
          category: category_id_from_imported_category_id(row["tid"]),
          raw: raw,
          created_at: Time.zone.at(row["created"]),
          pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
          title: row["title"].try(:strip),
          views: row["views"],
        }
        topic[:custom_fields] = { import_solved: true } if row["solved"].present?
        topic
      end
    end
  end

  def import_replies
    puts "", "creating replies in topics"

    total_count = mysql_query(<<-SQL).first["count"]
        SELECT COUNT(*) count
          FROM comment c,
               node n
         WHERE n.nid = c.nid
           AND c.status = 1
           AND n.type IN ('article', 'forum')
           AND n.status = 1
    SQL

    batches(BATCH_SIZE) do |offset|
      results = mysql_query(<<-SQL).to_a
        SELECT c.cid, c.pid, c.nid, c.uid, c.created,
               f.comment_body_value body
          FROM comment c,
               field_data_comment_body f,
               node n
         WHERE c.cid = f.entity_id
           AND n.nid = c.nid
           AND c.status = 1
           AND n.type IN ('blog', 'forum')
           AND n.status = 1
         ORDER BY c.cid ASC
         LIMIT #{BATCH_SIZE}
        OFFSET #{offset}
      SQL

      break if results.size < 1

      next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }

      create_posts(results, total: total_count, offset: offset) do |row|
        topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
        if topic_mapping && topic_id = topic_mapping[:topic_id]
          raw = preprocess_raw(row["body"])
          h = {
            id: "cid:#{row["cid"]}",
            topic_id: topic_id,
            user_id: user_id_from_imported_user_id(row["uid"]) || -1,
            raw: raw,
            created_at: Time.zone.at(row["created"]),
          }
          if row["pid"]
            parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
            h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
          end
          h
        else
          puts "No topic found for comment #{row["cid"]}"
          nil
        end
      end
    end
  end

  def import_likes
    puts "", "importing post likes"

    batches(BATCH_SIZE) do |offset|
      likes = mysql_query(<<-SQL).to_a
        SELECT flagging_id,
               fid,
	       entity_id,
	       uid
	  FROM flagging
	 WHERE fid = 5
	    OR fid = 6
	 LIMIT #{BATCH_SIZE}
	OFFSET #{offset}
      SQL

      break if likes.empty?

      likes.each do |l|
        identifier = l["fid"] == 5 ? "nid" : "cid"
        next unless user_id = user_id_from_imported_user_id(l["uid"])
        next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}")
        next unless user = User.find_by(id: user_id)
        next unless post = Post.find_by(id: post_id)
        begin
          PostActionCreator.like(user, post)
        rescue StandardError
          nil
        end
      end
    end
  end

  def mark_topics_as_solved
    puts "", "marking topics as solved"

    solved_topics =
      TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)

    solved_topics.each do |topic_id|
      next unless topic = Topic.find(topic_id)
      next unless post = topic.posts.last
      post_id = post.id

      PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true)
      TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id)
    end
  end

  def import_sso_records
    puts "", "importing sso records"

    start_time = Time.now
    current_count = 0

    users = UserCustomField.where(name: "import_id")

    total_count = users.count

    return if users.empty?

    users.each do |ids|
      user_id = ids.user_id
      external_id = ids.value
      next unless user = User.find(user_id)

      begin
        current_count += 1
        print_status(current_count, total_count, start_time)
        SingleSignOnRecord.create!(
          user_id: user.id,
          external_id: external_id,
          external_email: user.email,
          last_payload: "",
        )
      rescue StandardError
        next
      end
    end
  end

  def import_attachments
    puts "", "importing attachments"

    current_count = 0
    success_count = 0
    fail_count = 0

    total_count = mysql_query(<<-SQL).first["count"]
      SELECT count(field_post_attachment_fid) count
        FROM field_data_field_post_attachment
    SQL

    batches(BATCH_SIZE) do |offset|
      attachments = mysql_query(<<-SQL).to_a
          SELECT *
            FROM field_data_field_post_attachment fp
       LEFT JOIN file_managed fm
              ON fp.field_post_attachment_fid = fm.fid
           LIMIT #{BATCH_SIZE}
          OFFSET #{offset}
      SQL

      break if attachments.size < 1

      attachments.each do |attachment|
        current_count += 1
        print_status current_count, total_count

        identifier = attachment["entity_type"] == "comment" ? "cid" : "nid"
        next unless user_id = user_id_from_imported_user_id(attachment["uid"])
        unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}")
          next
        end
        next unless user = User.find(user_id)
        next unless post = Post.find(post_id)

        begin
          new_raw = post.raw.dup
          upload, filename = find_upload(post, attachment)

          unless upload
            fail_count += 1
            next
          end

          upload_html = html_for_upload(upload, filename)
          new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html)

          if new_raw != post.raw
            PostRevisor.new(post).revise!(
              post.user,
              { raw: new_raw },
              bypass_bump: true,
              edit_reason: "Import attachment from Drupal",
            )
          else
            puts "", "Skipped upload: already imported"
          end

          success_count += 1
        rescue => e
          puts e
        end
      end
    end
  end

  def create_permalinks
    puts "", "creating permalinks..."

    Topic.listable_topics.find_each do |topic|
      begin
        tcf = topic.custom_fields
        if tcf && tcf["import_id"]
          node_id = tcf["import_id"][/nid:(\d+)/, 1]
          slug = "/node/#{node_id}"
          Permalink.create(url: slug, topic_id: topic.id)
        end
      rescue => e
        puts e.message
        puts "Permalink creation failed for id #{topic.id}"
      end
    end
  end

  def find_upload(post, attachment)
    uri = attachment["uri"][%r{public://upload/(.+)}, 1]
    real_filename = CGI.unescapeHTML(uri)
    file = File.join(ATTACHMENT_DIR, real_filename)

    unless File.exist?(file)
      puts "Attachment file #{attachment["filename"]} doesn't exist"

      tmpfile = "attachments_failed.txt"
      filename = File.join("/tmp/", tmpfile)
      File.open(filename, "a") { |f| f.puts attachment["filename"] }
    end

    upload = create_upload(post.user.id || -1, file, real_filename)

    if upload.nil? || upload.errors.any?
      puts "Upload not valid"
      puts upload.errors.inspect if upload
      return
    end

    [upload, real_filename]
  end

  def preprocess_raw(raw)
    return if raw.blank?
    # quotes on new lines
    raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
      quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
      quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
    end

    # [QUOTE=<username>]...[/QUOTE]
    raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
      username, quote = $1, $2
      "\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
    end

    raw.strip!
    raw
  end

  def postprocess_posts
    puts "", "postprocessing posts"

    current = 0
    max = Post.count

    Post.find_each do |post|
      begin
        raw = post.raw
        new_raw = raw.dup

        # replace old topic to new topic links
        new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do
          post_id = post_id_from_imported_post_id("nid:#{$1}")
          next unless post_id
          topic = Post.find(post_id).topic
          "https://community.site.com/t/-/#{topic.id}"
        end

        # replace old comment to reply links
        new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do
          post_id = post_id_from_imported_post_id("cid:#{$1}")
          next unless post_id
          post_ref = Post.find(post_id)
          "https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}"
        end

        if raw != new_raw
          post.raw = new_raw
          post.save
        end
      rescue StandardError
        puts "", "Failed rewrite on post: #{post.id}"
      ensure
        print_status(current += 1, max)
      end
    end
  end

  def import_gravatars
    puts "", "importing gravatars"
    current = 0
    max = User.count
    User.find_each do |user|
      begin
        user.create_user_avatar(user_id: user.id) unless user.user_avatar
        user.user_avatar.update_gravatar!
      rescue StandardError
        puts "", 'Failed avatar update on user #{user.id}'
      ensure
        print_status(current += 1, max)
      end
    end
  end

  def parse_datetime(time)
    DateTime.strptime(time, "%s")
  end

  def mysql_query(sql)
    @client.query(sql, cache_rows: true)
  end
end

ImportScripts::Drupal.new.perform if __FILE__ == $0