discourse/script/import_scripts/vanilla_mysql.rb

# frozen_string_literal: true

require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'

class ImportScripts::VanillaSQL < ImportScripts::Base

  VANILLA_DB = "vanilla_mysql"
  TABLE_PREFIX = "GDN_"
  ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments
  BATCH_SIZE = 1000
  CONVERT_HTML = true

  def initialize
    super
    @htmlentities = HTMLEntities.new
    @client = Mysql2::Client.new(
      host: "localhost",
      username: "root",
      password: "pa$$word",
      database: VANILLA_DB
    )

    @import_tags = false
    begin
      r = @client.query("select count(*) count from #{TABLE_PREFIX}Tag where countdiscussions > 0")
      @import_tags = true if r.first["count"].to_i > 0
    rescue => e
      puts "Tags won't be imported. #{e.message}"
    end
  end

  def execute
    if @import_tags
      SiteSetting.tagging_enabled = true
      SiteSetting.max_tags_per_topic = 10
    end

    import_users
    import_avatars
    import_categories
    import_topics
    import_posts

    update_tl0

    create_permalinks
  end

  def import_users
    puts '', "creating users"

    @user_is_deleted = false
    @last_deleted_username = nil
    username = nil
    @last_user_id = -1
    total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count']

    batches(BATCH_SIZE) do |offset|
      results = mysql_query(
        "SELECT UserID, Name, Title, Location, About, Email,
                DateInserted, DateLastActive, InsertIPAddress, Admin
         FROM #{TABLE_PREFIX}User
         WHERE UserID > #{@last_user_id}
         ORDER BY UserID ASC
         LIMIT #{BATCH_SIZE};")

      break if results.size < 1
      @last_user_id = results.to_a.last['UserID']
      next if all_records_exist? :users, results.map { |u| u['UserID'].to_i }

      create_users(results, total: total_count, offset: offset) do |user|
        next if user['Email'].blank?
        next if user['Name'].blank?
        next if @lookup.user_id_from_imported_user_id(user['UserID'])
        if user['Name'] == '[Deleted User]'
          # EVERY deleted user record in Vanilla has the same username: [Deleted User]
          # Save our UserNameSuggester some pain:
          @user_is_deleted = true
          username = @last_deleted_username || user['Name']
        else
          @user_is_deleted = false
          username = user['Name']
        end

        { id: user['UserID'],
          email: user['Email'],
          username: username,
          name: user['Name'],
          created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']),
          bio_raw: user['About'],
          registration_ip_address: user['InsertIPAddress'],
          last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']),
          location: user['Location'],
          admin: user['Admin'] == 1,
          post_create_action: proc do |newuser|
            if @user_is_deleted
              @last_deleted_username = newuser.username
            end
          end }
      end
    end
  end

  def import_avatars
    if ATTACHMENTS_BASE_DIR && File.exists?(ATTACHMENTS_BASE_DIR)
      puts "", "importing user avatars"

      User.find_each do |u|
        next unless u.custom_fields["import_id"]

        r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first
        next if r.nil?
        photo = r["photo"]
        next unless photo.present?

        # Possible encoded values:
        # 1. cf://uploads/userpics/820/Y0AFUQYYM6QN.jpg
        # 2. ~cf/userpics2/cf566487133f1f538e02da96f9a16b18.jpg
        # 3. ~cf/userpics/txkt8kw1wozn.jpg

        photo_real_filename = nil
        parts = photo.squeeze("/").split("/")
        if parts[0] == "cf:"
          photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")
        elsif parts[0] == "~cf"
          photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")
        else
          puts "UNKNOWN FORMAT: #{photo}"
          next
        end

        if !File.exists?(photo_path)
          puts "Path to avatar file not found! Skipping. #{photo_path}"
          next
        end

        photo_real_filename = find_photo_file(photo_path, parts.last)
        if photo_real_filename.nil?
          puts "Couldn't find file for #{photo}. Skipping."
          next
        end

        print "."

        upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename))
        if upload.persisted?
          u.import_mode = false
          u.create_user_avatar
          u.import_mode = true
          u.user_avatar.update(custom_upload_id: upload.id)
          u.update(uploaded_avatar_id: upload.id)
        else
          puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!"
        end
      end
    end
  end

  def find_photo_file(path, base_filename)
    base_guess = base_filename.dup
    full_guess = File.join(path, base_guess) # often an exact match exists

    return full_guess if File.exists?(full_guess)

    # Otherwise, the file exists but with a prefix:
    # The p prefix seems to be the full file, so try to find that one first.
    ['p', 't', 'n'].each do |prefix|
      full_guess = File.join(path, "#{prefix}#{base_guess}")
      return full_guess if File.exists?(full_guess)
    end

    # Didn't find it.
    nil
  end

  def import_categories
    puts "", "importing categories..."

    categories = mysql_query("
                              SELECT CategoryID, Name, Description
                              FROM #{TABLE_PREFIX}Category
                              ORDER BY CategoryID ASC
                            ").to_a

    create_categories(categories) do |category|
      {
        id: category['CategoryID'],
        name: CGI.unescapeHTML(category['Name']),
        description: CGI.unescapeHTML(category['Description'])
      }
    end
  end

  def import_topics
    puts "", "importing topics..."

    tag_names_sql = "select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != '';"

    total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Discussion;").first['count']

    @last_topic_id = -1

    batches(BATCH_SIZE) do |offset|
      discussions = mysql_query(
        "SELECT DiscussionID, CategoryID, Name, Body,
                DateInserted, InsertUserID
         FROM #{TABLE_PREFIX}Discussion
         WHERE DiscussionID > #{@last_topic_id}
         ORDER BY DiscussionID ASC
         LIMIT #{BATCH_SIZE};")

      break if discussions.size < 1
      @last_topic_id = discussions.to_a.last['DiscussionID']
      next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['DiscussionID'].to_s }

      create_posts(discussions, total: total_count, offset: offset) do |discussion|
        {
          id: "discussion#" + discussion['DiscussionID'].to_s,
          user_id: user_id_from_imported_user_id(discussion['InsertUserID']) || Discourse::SYSTEM_USER_ID,
          title: discussion['Name'],
          category: category_id_from_imported_category_id(discussion['CategoryID']),
          raw: clean_up(discussion['Body']),
          created_at: Time.zone.at(discussion['DateInserted']),
          post_create_action: proc do |post|
            if @import_tags
              tag_names = @client.query(tag_names_sql.gsub('{discussionid}', discussion['DiscussionID'].to_s)).map { |row| row['tag_name'] }
              DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
            end
          end
        }
      end
    end
  end

  def import_posts
    puts "", "importing posts..."

    total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Comment;").first['count']
    @last_post_id = -1
    batches(BATCH_SIZE) do |offset|
      comments = mysql_query(
        "SELECT CommentID, DiscussionID, Body,
                DateInserted, InsertUserID
         FROM #{TABLE_PREFIX}Comment
         WHERE CommentID > #{@last_post_id}
         ORDER BY CommentID ASC
         LIMIT #{BATCH_SIZE};")

      break if comments.size < 1
      @last_post_id = comments.to_a.last['CommentID']
      next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['CommentID'].to_s }

      create_posts(comments, total: total_count, offset: offset) do |comment|
        next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)
        next if comment['Body'].blank?
        {
          id: "comment#" + comment['CommentID'].to_s,
          user_id: user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID,
          topic_id: t[:topic_id],
          raw: clean_up(comment['Body']),
          created_at: Time.zone.at(comment['DateInserted'])
        }
      end
    end
  end

  def clean_up(raw)
    return "" if raw.blank?

    # decode HTML entities
    raw = @htmlentities.decode(raw)

    # fix whitespaces
    raw = raw.gsub(/(\\r)?\\n/, "\n")
      .gsub("\\t", "\t")

    # [HTML]...[/HTML]
    raw = raw.gsub(/\[html\]/i, "\n```html\n")
      .gsub(/\[\/html\]/i, "\n```\n")

    # [PHP]...[/PHP]
    raw = raw.gsub(/\[php\]/i, "\n```php\n")
      .gsub(/\[\/php\]/i, "\n```\n")

    # [HIGHLIGHT="..."]
    raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }

    # [CODE]...[/CODE]
    # [HIGHLIGHT]...[/HIGHLIGHT]
    raw = raw.gsub(/\[\/?code\]/i, "\n```\n")
      .gsub(/\[\/?highlight\]/i, "\n```\n")

    # [SAMP]...[/SAMP]
    raw.gsub!(/\[\/?samp\]/i, "`")

    unless CONVERT_HTML
      # replace all chevrons with HTML entities
      # NOTE: must be done
      #  - AFTER all the "code" processing
      #  - BEFORE the "quote" processing
      raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
        .gsub("<", "&lt;")
        .gsub("\u2603", "<")

      raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
        .gsub(">", "&gt;")
        .gsub("\u2603", ">")
    end

    # [URL=...]...[/URL]
    raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }

    # [IMG]...[/IMG]
    raw.gsub!(/\[\/?img\]/i, "")

    # [URL]...[/URL]
    # [MP3]...[/MP3]
    raw = raw.gsub(/\[\/?url\]/i, "")
      .gsub(/\[\/?mp3\]/i, "")

    # [QUOTE]...[/QUOTE]
    raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }

    # [YOUTUBE]<id>[/YOUTUBE]
    raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }

    # [youtube=425,350]id[/youtube]
    raw.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" }

    # [MEDIA=youtube]id[/MEDIA]
    raw.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }

    # [VIDEO=youtube;<id>]...[/VIDEO]
    raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }

    # Convert image bbcode
    raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')

    # Remove the color tag
    raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
    raw.gsub!(/\[\/color\]/i, "")

    # remove attachments
    raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")

    # sanitize img tags
    # This regexp removes everything between the first and last img tag. The .* is too much.
    # If it's needed, it needs to be fixed.
    # raw.gsub!(/\<img.*src\="([^\"]+)\".*\>/i) {"\n<img src='#{$1}'>\n"}

    raw
  end

  def staff_guardian
    @_staff_guardian ||= Guardian.new(Discourse.system_user)
  end

  def mysql_query(sql)
    @client.query(sql)
    # @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault
  end

  def create_permalinks
    puts '', 'Creating redirects...', ''

    User.find_each do |u|
      ucf = u.custom_fields
      if ucf && ucf["import_id"] && ucf["import_username"]
        Permalink.create(url: "profile/#{ucf['import_id']}/#{ucf['import_username']}", external_url: "/users/#{u.username}") rescue nil
        print '.'
      end
    end

    Post.find_each do |post|
      pcf = post.custom_fields
      if pcf && pcf["import_id"]
        topic = post.topic
        id = pcf["import_id"].split('#').last
        if post.post_number == 1
          slug = Slug.for(topic.title) # probably matches what vanilla would do...
          Permalink.create(url: "discussion/#{id}/#{slug}", topic_id: topic.id) rescue nil
        else
          Permalink.create(url: "discussion/comment/#{id}", post_id: post.id) rescue nil
        end
        print '.'
      end
    end
  end

end

ImportScripts::VanillaSQL.new.perform
DEV: enable frozen string literal on all files This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging 2019-05-03 06:17:27 +08:00			`# frozen_string_literal: true`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`require "mysql2"`
			`require File.expand_path(File.dirname(__FILE__) + "/base.rb")`
			`require 'htmlentities'`

			`class ImportScripts::VanillaSQL < ImportScripts::Base`

			`VANILLA_DB = "vanilla_mysql"`
			`TABLE_PREFIX = "GDN_"`
FEATURE: import script for vanilla_mysql can import user avatars if you have the files 2016-04-02 03:57:20 +08:00			`ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`BATCH_SIZE = 1000`
Vanilla importer: add an option for whether vanilla_mysql.rb should convert html markup into literals 2016-01-16 05:47:05 +08:00			`CONVERT_HTML = true`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`def initialize`
			`super`
			`@htmlentities = HTMLEntities.new`
			`@client = Mysql2::Client.new(`
			`host: "localhost",`
			`username: "root",`
			`password: "pa$$word",`
			`database: VANILLA_DB`
			`)`
FEATURE: vanilla_mysql importer can import tags 2016-06-28 04:17:00 +08:00
			`@import_tags = false`
			`begin`
			`r = @client.query("select count(*) count from #{TABLE_PREFIX}Tag where countdiscussions > 0")`
			`@import_tags = true if r.first["count"].to_i > 0`
			`rescue => e`
			`puts "Tags won't be imported. #{e.message}"`
			`end`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`end`

			`def execute`
FEATURE: vanilla_mysql importer can import tags 2016-06-28 04:17:00 +08:00			`if @import_tags`
			`SiteSetting.tagging_enabled = true`
			`SiteSetting.max_tags_per_topic = 10`
			`end`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`import_users`
FEATURE: import script for vanilla_mysql can import user avatars if you have the files 2016-04-02 03:57:20 +08:00			`import_avatars`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`import_categories`
			`import_topics`
			`import_posts`
vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00
			`update_tl0`

			`create_permalinks`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`end`

			`def import_users`
			`puts '', "creating users"`

Vanilla MySQL: improve performance when the db is full of users with the same username: [Deleted User] 2016-01-13 07:44:14 +08:00			`@user_is_deleted = false`
			`@last_deleted_username = nil`
			`username = nil`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`@last_user_id = -1`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count']`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`batches(BATCH_SIZE) do \|offset\|`
			`results = mysql_query(`
			`"SELECT UserID, Name, Title, Location, About, Email,`
			`DateInserted, DateLastActive, InsertIPAddress, Admin`
			`FROM #{TABLE_PREFIX}User`
FIX: invalid SQL in vanilla_mysql import script 2018-01-20 00:53:07 +08:00			`WHERE UserID > #{@last_user_id}`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`ORDER BY UserID ASC`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`LIMIT #{BATCH_SIZE};")`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`break if results.size < 1`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`@last_user_id = results.to_a.last['UserID']`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`next if all_records_exist? :users, results.map { \|u\| u['UserID'].to_i }`
FEATURE: Skip batches if all records exist Update all import scripts to take advantage of all_records_exist? 2015-09-22 07:48:42 +08:00
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`create_users(results, total: total_count, offset: offset) do \|user\|`
			`next if user['Email'].blank?`
			`next if user['Name'].blank?`
vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00			`next if @lookup.user_id_from_imported_user_id(user['UserID'])`
Vanilla MySQL: improve performance when the db is full of users with the same username: [Deleted User] 2016-01-13 07:44:14 +08:00			`if user['Name'] == '[Deleted User]'`
			`# EVERY deleted user record in Vanilla has the same username: [Deleted User]`
			`# Save our UserNameSuggester some pain:`
			`@user_is_deleted = true`
			`username = @last_deleted_username \|\| user['Name']`
			`else`
			`@user_is_deleted = false`
			`username = user['Name']`
			`end`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`{ id: user['UserID'],`
			`email: user['Email'],`
Vanilla MySQL: improve performance when the db is full of users with the same username: [Deleted User] 2016-01-13 07:44:14 +08:00			`username: username,`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`name: user['Name'],`
			`created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']),`
			`bio_raw: user['About'],`
			`registration_ip_address: user['InsertIPAddress'],`
			`last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']),`
			`location: user['Location'],`
Vanilla MySQL: improve performance when the db is full of users with the same username: [Deleted User] 2016-01-13 07:44:14 +08:00			`admin: user['Admin'] == 1,`
			`post_create_action: proc do \|newuser\|`
			`if @user_is_deleted`
			`@last_deleted_username = newuser.username`
			`end`
			`end }`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`end`
			`end`
			`end`

FEATURE: import script for vanilla_mysql can import user avatars if you have the files 2016-04-02 03:57:20 +08:00			`def import_avatars`
			`if ATTACHMENTS_BASE_DIR && File.exists?(ATTACHMENTS_BASE_DIR)`
			`puts "", "importing user avatars"`

			`User.find_each do \|u\|`
			`next unless u.custom_fields["import_id"]`

			`r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first`
			`next if r.nil?`
			`photo = r["photo"]`
			`next unless photo.present?`

			`# Possible encoded values:`
			`# 1. cf://uploads/userpics/820/Y0AFUQYYM6QN.jpg`
			`# 2. ~cf/userpics2/cf566487133f1f538e02da96f9a16b18.jpg`
			`# 3. ~cf/userpics/txkt8kw1wozn.jpg`

			`photo_real_filename = nil`
			`parts = photo.squeeze("/").split("/")`
			`if parts[0] == "cf:"`
			`photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")`
			`elsif parts[0] == "~cf"`
			`photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")`
			`else`
			`puts "UNKNOWN FORMAT: #{photo}"`
			`next`
			`end`

			`if !File.exists?(photo_path)`
			`puts "Path to avatar file not found! Skipping. #{photo_path}"`
			`next`
			`end`

			`photo_real_filename = find_photo_file(photo_path, parts.last)`
			`if photo_real_filename.nil?`
			`puts "Couldn't find file for #{photo}. Skipping."`
			`next`
			`end`

			`print "."`

			`upload = create_upload(u.id, photo_real_filename, File.basename(photo_real_filename))`
			`if upload.persisted?`
			`u.import_mode = false`
			`u.create_user_avatar`
			`u.import_mode = true`
			`u.user_avatar.update(custom_upload_id: upload.id)`
			`u.update(uploaded_avatar_id: upload.id)`
			`else`
			`puts "Error: Upload did not persist for #{u.username} #{photo_real_filename}!"`
			`end`
			`end`
			`end`
			`end`

			`def find_photo_file(path, base_filename)`
			`base_guess = base_filename.dup`
			`full_guess = File.join(path, base_guess) # often an exact match exists`

			`return full_guess if File.exists?(full_guess)`

			`# Otherwise, the file exists but with a prefix:`
			`# The p prefix seems to be the full file, so try to find that one first.`
			`['p', 't', 'n'].each do \|prefix\|`
			`full_guess = File.join(path, "#{prefix}#{base_guess}")`
			`return full_guess if File.exists?(full_guess)`
			`end`

			`# Didn't find it.`
			`nil`
			`end`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`def import_categories`
			`puts "", "importing categories..."`

			`categories = mysql_query("`
			`SELECT CategoryID, Name, Description`
			`FROM #{TABLE_PREFIX}Category`
			`ORDER BY CategoryID ASC`
			`").to_a`

			`create_categories(categories) do \|category\|`
			`{`
			`id: category['CategoryID'],`
			`name: CGI.unescapeHTML(category['Name']),`
			`description: CGI.unescapeHTML(category['Description'])`
			`}`
			`end`
			`end`

			`def import_topics`
			`puts "", "importing topics..."`

FEATURE: vanilla_mysql importer can import tags 2016-06-28 04:17:00 +08:00			`tag_names_sql = "select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != '';"`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Discussion;").first['count']`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`@last_topic_id = -1`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`batches(BATCH_SIZE) do \|offset\|`
			`discussions = mysql_query(`
			`"SELECT DiscussionID, CategoryID, Name, Body,`
			`DateInserted, InsertUserID`
			`FROM #{TABLE_PREFIX}Discussion`
FIX: invalid SQL in vanilla_mysql import script 2018-01-20 00:53:07 +08:00			`WHERE DiscussionID > #{@last_topic_id}`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`ORDER BY DiscussionID ASC`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`LIMIT #{BATCH_SIZE};")`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`break if discussions.size < 1`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`@last_topic_id = discussions.to_a.last['DiscussionID']`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`next if all_records_exist? :posts, discussions.map { \|t\| "discussion#" + t['DiscussionID'].to_s }`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`create_posts(discussions, total: total_count, offset: offset) do \|discussion\|`
			`{`
			`id: "discussion#" + discussion['DiscussionID'].to_s,`
			`user_id: user_id_from_imported_user_id(discussion['InsertUserID']) \|\| Discourse::SYSTEM_USER_ID,`
			`title: discussion['Name'],`
			`category: category_id_from_imported_category_id(discussion['CategoryID']),`
			`raw: clean_up(discussion['Body']),`
FEATURE: vanilla_mysql importer can import tags 2016-06-28 04:17:00 +08:00			`created_at: Time.zone.at(discussion['DateInserted']),`
			`post_create_action: proc do \|post\|`
			`if @import_tags`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`tag_names = @client.query(tag_names_sql.gsub('{discussionid}', discussion['DiscussionID'].to_s)).map { \|row\| row['tag_name'] }`
FEATURE: vanilla_mysql importer can import tags 2016-06-28 04:17:00 +08:00			`DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)`
			`end`
			`end`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`}`
			`end`
			`end`
			`end`

			`def import_posts`
			`puts "", "importing posts..."`

			`total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}Comment;").first['count']`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`@last_post_id = -1`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`batches(BATCH_SIZE) do \|offset\|`
			`comments = mysql_query(`
			`"SELECT CommentID, DiscussionID, Body,`
			`DateInserted, InsertUserID`
			`FROM #{TABLE_PREFIX}Comment`
FIX: invalid SQL in vanilla_mysql import script 2018-01-20 00:53:07 +08:00			`WHERE CommentID > #{@last_post_id}`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`ORDER BY CommentID ASC`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`LIMIT #{BATCH_SIZE};")`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`break if comments.size < 1`
Optimize import script - vanilla_mysql.rb Use WHERE id > last_id instead of OFFSET because OFFSET causes performance issues 2017-06-19 13:00:55 +08:00			`@last_post_id = comments.to_a.last['CommentID']`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`next if all_records_exist? :posts, comments.map { \|comment\| "comment#" + comment['CommentID'].to_s }`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`create_posts(comments, total: total_count, offset: offset) do \|comment\|`
			`next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)`
			`next if comment['Body'].blank?`
			`{`
			`id: "comment#" + comment['CommentID'].to_s,`
			`user_id: user_id_from_imported_user_id(comment['InsertUserID']) \|\| Discourse::SYSTEM_USER_ID,`
			`topic_id: t[:topic_id],`
			`raw: clean_up(comment['Body']),`
			`created_at: Time.zone.at(comment['DateInserted'])`
			`}`
			`end`
			`end`
			`end`

			`def clean_up(raw)`
			`return "" if raw.blank?`

			`# decode HTML entities`
			`raw = @htmlentities.decode(raw)`

			`# fix whitespaces`
			`raw = raw.gsub(/(\\r)?\\n/, "\n")`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`.gsub("\\t", "\t")`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`# [HTML]...[/HTML]`
			raw = raw.gsub(/\[html\]/i, "\n```html\n")
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			.gsub(/\[\/html\]/i, "\n```\n")
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`# [PHP]...[/PHP]`
			raw = raw.gsub(/\[php\]/i, "\n```php\n")
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			.gsub(/\[\/php\]/i, "\n```\n")
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`# [HIGHLIGHT="..."]`
			raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }

			`# [CODE]...[/CODE]`
			`# [HIGHLIGHT]...[/HIGHLIGHT]`
			raw = raw.gsub(/\[\/?code\]/i, "\n```\n")
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			.gsub(/\[\/?highlight\]/i, "\n```\n")
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`# [SAMP]...[/SAMP]`
			raw.gsub!(/\[\/?samp\]/i, "`")

Vanilla importer: add an option for whether vanilla_mysql.rb should convert html markup into literals 2016-01-16 05:47:05 +08:00			`unless CONVERT_HTML`
			`# replace all chevrons with HTML entities`
			`# NOTE: must be done`
			`# - AFTER all the "code" processing`
			`# - BEFORE the "quote" processing`
			raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`.gsub("<", "<")`
			`.gsub("\u2603", "<")`
Vanilla importer: add an option for whether vanilla_mysql.rb should convert html markup into literals 2016-01-16 05:47:05 +08:00
			raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`.gsub(">", ">")`
			`.gsub("\u2603", ">")`
Vanilla importer: add an option for whether vanilla_mysql.rb should convert html markup into literals 2016-01-16 05:47:05 +08:00			`end`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`# [URL=...]...[/URL]`
			`raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }`

			`# [IMG]...[/IMG]`
			`raw.gsub!(/\[\/?img\]/i, "")`

			`# [URL]...[/URL]`
			`# [MP3]...[/MP3]`
			`raw = raw.gsub(/\[\/?url\]/i, "")`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`.gsub(/\[\/?mp3\]/i, "")`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`# [QUOTE]...[/QUOTE]`
			`raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }`

			`# [YOUTUBE]<id>[/YOUTUBE]`
			`raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }`

			`# [youtube=425,350]id[/youtube]`
			`raw.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" }`

			`# [MEDIA=youtube]id[/MEDIA]`
			`raw.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }`

			`# [VIDEO=youtube;<id>]...[/VIDEO]`
			`raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }`

			`# Convert image bbcode`
			`raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')`

			`# Remove the color tag`
			`raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")`
			`raw.gsub!(/\[\/color\]/i, "")`

			`# remove attachments`
			`raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")`

			`# sanitize img tags`
FIX: vanilla_mysql importer removes everything between the first and last img tags in a post 2016-05-26 00:41:32 +08:00			`# This regexp removes everything between the first and last img tag. The .* is too much.`
			`# If it's needed, it needs to be fixed.`
			`# raw.gsub!(/\<img.src\="([^\"]+)\".\>/i) {"\n<img src='#{$1}'>\n"}`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00
			`raw`
			`end`

FEATURE: vanilla_mysql importer can import tags 2016-06-28 04:17:00 +08:00			`def staff_guardian`
			`@_staff_guardian \|\|= Guardian.new(Discourse.system_user)`
			`end`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`def mysql_query(sql)`
cache_rows: false causes segfault 2016-01-13 07:45:19 +08:00			`@client.query(sql)`
			`# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault`
add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`end`

vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00			`def create_permalinks`
			`puts '', 'Creating redirects...', ''`

			`User.find_each do \|u\|`
			`ucf = u.custom_fields`
			`if ucf && ucf["import_id"] && ucf["import_username"]`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`Permalink.create(url: "profile/#{ucf['import_id']}/#{ucf['import_username']}", external_url: "/users/#{u.username}") rescue nil`
FIX: importer performance of update_tl0, and give feedback during vanilla_mysql's create_permalinks 2016-07-06 22:58:43 +08:00			`print '.'`
vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00			`end`
			`end`

			`Post.find_each do \|post\|`
			`pcf = post.custom_fields`
			`if pcf && pcf["import_id"]`
			`topic = post.topic`
			`id = pcf["import_id"].split('#').last`
			`if post.post_number == 1`
			`slug = Slug.for(topic.title) # probably matches what vanilla would do...`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`Permalink.create(url: "discussion/#{id}/#{slug}", topic_id: topic.id) rescue nil`
vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00			`else`
Add rubocop to our build. (#5004) 2017-07-28 09:20:09 +08:00			`Permalink.create(url: "discussion/comment/#{id}", post_id: post.id) rescue nil`
vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00			`end`
FIX: importer performance of update_tl0, and give feedback during vanilla_mysql's create_permalinks 2016-07-06 22:58:43 +08:00			`print '.'`
vanilla_mysql faster users when resuming, create_permalinks 2016-05-17 23:16:41 +08:00			`end`
			`end`
			`end`

add Vanilla (MySQL) importer 2015-06-29 20:45:52 +08:00			`end`

			`ImportScripts::VanillaSQL.new.perform`