diff --git a/script/import_scripts/vbulletin5.rb b/script/import_scripts/vbulletin5.rb
new file mode 100644
index 00000000000..21a9f851f2b
--- /dev/null
+++ b/script/import_scripts/vbulletin5.rb
@@ -0,0 +1,634 @@
+require 'mysql2'
+require File.expand_path(File.dirname(__FILE__) + "/base.rb")
+require 'htmlentities'
+
+class ImportScripts::VBulletin < ImportScripts::Base
+ BATCH_SIZE = 1000
+ DBPREFIX = "vb_"
+ ROOT_NODE=2
+
+ # CHANGE THESE BEFORE RUNNING THE IMPORTER
+ DATABASE = "yourforum"
+ TIMEZONE = "America/Los_Angeles"
+ ATTACHMENT_DIR = '/home/discourse/yourforum/customattachments/'
+ AVATAR_DIR = '/home/discourse/yourforum/avatars/'
+
+ def initialize
+ super
+
+ @old_username_to_new_usernames = {}
+
+ @tz = TZInfo::Timezone.get(TIMEZONE)
+
+ @htmlentities = HTMLEntities.new
+
+ @client = Mysql2::Client.new(
+ host: "localhost",
+ username: "root",
+ database: DATABASE,
+ password: "password"
+ )
+
+ end
+
+ def execute
+ import_groups
+ import_users
+ import_categories
+ import_topics
+ import_posts
+ import_attachments
+ close_topics
+ post_process_posts
+ end
+
+ def import_groups
+ puts "", "importing groups..."
+
+ groups = mysql_query <<-SQL
+ SELECT usergroupid, title
+ FROM #{DBPREFIX}usergroup
+ ORDER BY usergroupid
+ SQL
+
+ create_groups(groups) do |group|
+ {
+ id: group["usergroupid"],
+ name: @htmlentities.decode(group["title"]).strip
+ }
+ end
+ end
+
+ def import_users
+ puts "", "importing users"
+
+ user_count = mysql_query("SELECT COUNT(userid) count FROM #{DBPREFIX}user").first["count"]
+
+ batches(BATCH_SIZE) do |offset|
+ users = mysql_query <<-SQL
+ SELECT u.userid, u.username, u.homepage, u.usertitle, u.usergroupid, u.joindate, u.email,
+ CASE WHEN u.scheme='blowfish:10' THEN token
+ WHEN u.scheme='legacy' THEN REPLACE(token, ' ', ':')
+ END AS password,
+ IF(ug.title = 'Administrators', 1, 0) AS admin
+ FROM #{DBPREFIX}user u
+ LEFT JOIN #{DBPREFIX}usergroup ug ON ug.usergroupid = u.usergroupid
+ ORDER BY userid
+ LIMIT #{BATCH_SIZE}
+ OFFSET #{offset}
+ SQL
+
+ break if users.size < 1
+
+ # disabled line below, caused issues
+ # next if all_records_exist? :users, users.map {|u| u["userid"].to_i}
+
+ create_users(users, total: user_count, offset: offset) do |user|
+ username = @htmlentities.decode(user["username"]).strip
+ {
+ id: user["userid"],
+ name: username,
+ username: username,
+ email: user["email"].presence || fake_email,
+ admin: user['admin'] == 1,
+ password: user["password"],
+ website: user["homepage"].strip,
+ title: @htmlentities.decode(user["usertitle"]).strip,
+ primary_group_id: group_id_from_imported_group_id(user["usergroupid"]),
+ created_at: parse_timestamp(user["joindate"]),
+ post_create_action: proc do |u|
+ @old_username_to_new_usernames[user["username"]] = u.username
+ import_profile_picture(user, u)
+ import_profile_background(user, u)
+ end
+ }
+ end
+ end
+ end
+
+ def import_profile_picture(old_user, imported_user)
+ query = mysql_query <<-SQL
+ SELECT filedata, filename
+ FROM #{DBPREFIX}customavatar
+ WHERE userid = #{old_user["userid"]}
+ ORDER BY dateline DESC
+ LIMIT 1
+ SQL
+
+ picture = query.first
+
+ return if picture.nil?
+
+ if picture['filedata']
+ file = Tempfile.new("profile-picture")
+ file.write(picture["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8"))
+ file.rewind
+ upload = Upload.create_for(imported_user.id, file, picture["filename"], file.size)
+ else
+ filename = File.join(AVATAR_DIR, picture['filename'])
+ unless File.exists?(filename)
+ puts "Avatar file doesn't exist: #{filename}"
+ return nil
+ end
+ upload = create_upload(imported_user.id, filename, picture['filename'])
+ end
+
+ return if !upload.persisted?
+
+ imported_user.create_user_avatar
+ imported_user.user_avatar.update(custom_upload_id: upload.id)
+ imported_user.update(uploaded_avatar_id: upload.id)
+ ensure
+ file.close rescue nil
+ file.unlind rescue nil
+ end
+
+ def import_profile_background(old_user, imported_user)
+ query = mysql_query <<-SQL
+ SELECT filedata, filename
+ FROM #{DBPREFIX}customprofilepic
+ WHERE userid = #{old_user["userid"]}
+ ORDER BY dateline DESC
+ LIMIT 1
+ SQL
+
+ background = query.first
+
+ return if background.nil?
+
+ file = Tempfile.new("profile-background")
+ file.write(background["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8"))
+ file.rewind
+
+ upload = Upload.create_for(imported_user.id, file, background["filename"], file.size)
+
+ return if !upload.persisted?
+
+ imported_user.user_profile.update(profile_background: upload.url)
+ ensure
+ file.close rescue nil
+ file.unlink rescue nil
+ end
+
+ def import_categories
+ puts "", "importing top level categories..."
+
+ categories = mysql_query("SELECT nodeid AS forumid, title, description, displayorder, parentid
+ FROM #{DBPREFIX}node
+ WHERE parentid=#{ROOT_NODE}
+ UNION
+ SELECT nodeid, title, description, displayorder, parentid
+ FROM #{DBPREFIX}node
+ WHERE contenttypeid = 23
+ AND parentid IN (SELECT nodeid FROM #{DBPREFIX}node WHERE parentid=#{ROOT_NODE})").to_a
+
+ top_level_categories = categories.select { |c| c["parentid"] == ROOT_NODE }
+
+ create_categories(top_level_categories) do |category|
+ {
+ id: category["forumid"],
+ name: @htmlentities.decode(category["title"]).strip,
+ position: category["displayorder"],
+ description: @htmlentities.decode(category["description"]).strip
+ }
+ end
+
+ puts "", "importing child categories..."
+
+ children_categories = categories.select { |c| c["parentid"] != ROOT_NODE }
+ top_level_category_ids = Set.new(top_level_categories.map { |c| c["forumid"] })
+
+ # cut down the tree to only 2 levels of categories
+ children_categories.each do |cc|
+ while !top_level_category_ids.include?(cc["parentid"])
+ cc["parentid"] = categories.detect { |c| c["forumid"] == cc["parentid"] }["parentid"]
+ end
+ end
+
+ create_categories(children_categories) do |category|
+ {
+ id: category["forumid"],
+ name: @htmlentities.decode(category["title"]).strip,
+ position: category["displayorder"],
+ description: @htmlentities.decode(category["description"]).strip,
+ parent_category_id: category_id_from_imported_category_id(category["parentid"])
+ }
+ end
+ end
+
+ def import_topics
+ puts "", "importing topics..."
+
+ # keep track of closed topics
+ @closed_topic_ids = []
+
+ topic_count = mysql_query("select count(nodeid) cnt from #{DBPREFIX}node where parentid in (
+ select nodeid from #{DBPREFIX}node where contenttypeid=23 ) and contenttypeid=22;").first["cnt"]
+
+ batches(BATCH_SIZE) do |offset|
+ topics = mysql_query <<-SQL
+ SELECT t.nodeid AS threadid, t.title, t.parentid AS forumid,t.open,t.userid AS postuserid,t.publishdate AS dateline,
+ nv.count views, 1 AS visible, t.sticky,
+ CONVERT(CAST(rawtext AS BINARY)USING utf8) AS raw
+ FROM #{DBPREFIX}node t
+ LEFT JOIN #{DBPREFIX}nodeview nv ON nv.nodeid=t.nodeid
+ LEFT JOIN #{DBPREFIX}text txt ON txt.nodeid=t.nodeid
+ WHERE t.parentid in ( select nodeid from #{DBPREFIX}node where contenttypeid=23 )
+ AND t.contenttypeid = 22
+ ORDER BY t.nodeid
+ LIMIT #{BATCH_SIZE}
+ OFFSET #{offset}
+ SQL
+
+ break if topics.size < 1
+
+ # disabled line below, caused issues
+ # next if all_records_exist? :posts, topics.map {|t| "thread-#{topic["threadid"]}" }
+
+ create_posts(topics, total: topic_count, offset: offset) do |topic|
+ raw = preprocess_post_raw(topic["raw"]) rescue nil
+ next if raw.blank?
+ topic_id = "thread-#{topic["threadid"]}"
+ @closed_topic_ids << topic_id if topic["open"] == "0"
+ t = {
+ id: topic_id,
+ user_id: user_id_from_imported_user_id(topic["postuserid"]) || Discourse::SYSTEM_USER_ID,
+ title: @htmlentities.decode(topic["title"]).strip[0...255],
+ category: category_id_from_imported_category_id(topic["forumid"]),
+ raw: raw,
+ created_at: parse_timestamp(topic["dateline"]),
+ visible: topic["visible"].to_i == 1,
+ views: topic["views"],
+ }
+ t[:pinned_at] = t[:created_at] if topic["sticky"].to_i == 1
+ t
+ end
+ end
+ end
+
+ def import_posts
+ puts "", "importing posts..."
+
+ # make sure `firstpostid` is indexed
+ begin
+ mysql_query("CREATE INDEX firstpostid_index ON thread (firstpostid)")
+ rescue
+ end
+
+ post_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DBPREFIX}node WHERE parentid NOT IN (
+ SELECT nodeid FROM #{DBPREFIX}node WHERE contenttypeid=23 ) AND contenttypeid=22;").first["cnt"]
+
+ batches(BATCH_SIZE) do |offset|
+ posts = mysql_query <<-SQL
+ SELECT p.nodeid AS postid, p.userid AS userid, p.parentid AS threadid,
+ CONVERT(CAST(rawtext AS BINARY)USING utf8) AS raw, p.publishdate AS dateline,
+ 1 AS visible, p.parentid AS parentid
+ FROM #{DBPREFIX}node p
+ LEFT JOIN #{DBPREFIX}nodeview nv ON nv.nodeid=p.nodeid
+ LEFT JOIN #{DBPREFIX}text txt ON txt.nodeid=p.nodeid
+ WHERE p.parentid NOT IN ( select nodeid from #{DBPREFIX}node where contenttypeid=23 )
+ AND p.contenttypeid = 22
+ ORDER BY postid
+ LIMIT #{BATCH_SIZE}
+ OFFSET #{offset}
+ SQL
+
+ break if posts.size < 1
+
+ # disabled line below, caused issues
+ # next if all_records_exist? :posts, posts.map {|p| p["postid"] }
+
+ create_posts(posts, total: post_count, offset: offset) do |post|
+ raw = preprocess_post_raw(post["raw"])
+ next if raw.blank?
+ next unless topic = topic_lookup_from_imported_post_id("thread-#{post["threadid"]}")
+ p = {
+ id: post["postid"],
+ user_id: user_id_from_imported_user_id(post["userid"]) || Discourse::SYSTEM_USER_ID,
+ topic_id: topic[:topic_id],
+ raw: raw,
+ created_at: parse_timestamp(post["dateline"]),
+ hidden: post["visible"].to_i == 0,
+ }
+ if parent = topic_lookup_from_imported_post_id(post["parentid"])
+ p[:reply_to_post_number] = parent[:post_number]
+ end
+ p
+ end
+ end
+ end
+
+ # find the uploaded file information from the db
+ def find_upload(post, attachment_id)
+ sql = "SELECT a.filedataid, a.filename, fd.userid, LENGTH(fd.filedata) AS dbsize, filedata
+ FROM #{DBPREFIX}attach a
+ LEFT JOIN #{DBPREFIX}filedata fd ON fd.filedataid = a.filedataid
+ WHERE a.nodeid = #{attachment_id}"
+ results = mysql_query(sql)
+
+ unless (row = results.first)
+ puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
+ return nil
+ end
+
+ filename = File.join(ATTACHMENT_DIR, row['userid'].to_s.split('').join('/'), "#{row['filedataid']}.attach")
+ real_filename = row['filename']
+ real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
+
+ unless File.exists?(filename)
+ if row['dbsize'].to_i == 0
+ puts "Attachment file #{row['filedataid']} doesn't exist"
+ return nil
+ end
+
+ tmpfile = 'attach_' + row['filedataid'].to_s
+ filename = File.join('/tmp/', tmpfile)
+ File.open(filename, 'wb') { |f|
+ #f.write(PG::Connection.unescape_bytea(row['filedata']))
+ f.write(row['filedata'])
+ }
+ end
+
+ upload = create_upload(post.user.id, filename, real_filename)
+
+ if upload.nil? || !upload.valid?
+ puts "Upload not valid :("
+ puts upload.errors.inspect if upload
+ return nil
+ end
+
+ return upload, real_filename
+ rescue Mysql2::Error => e
+ puts "SQL Error"
+ puts e.message
+ puts sql
+ return nil
+ end
+
+ def import_attachments
+ puts '', 'importing attachments...'
+
+ current_count = 0
+ total_count = mysql_query("SELECT COUNT(nodeid) cnt FROM #{DBPREFIX}node WHERE contenttypeid=22 ").first["cnt"]
+
+ success_count = 0
+ fail_count = 0
+
+ attachment_regex = /\[attach[^\]]*\]n(\d+)\[\/attach\]/i
+
+ Post.find_each do |post|
+ current_count += 1
+ print_status current_count, total_count
+
+ new_raw = post.raw.dup
+ new_raw.gsub!(attachment_regex) do |s|
+ matches = attachment_regex.match(s)
+ attachment_id = matches[1]
+
+ upload, filename = find_upload(post, attachment_id)
+ unless upload
+ fail_count += 1
+ next
+ end
+ html_for_upload(upload, filename)
+ end
+
+ if new_raw != post.raw
+ PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: 'Import attachments from vBulletin' })
+ end
+
+ success_count += 1
+ end
+ end
+
+ def close_topics
+ puts "", "Closing topics..."
+
+ sql = <<-SQL
+ WITH closed_topic_ids AS (
+ SELECT t.id AS topic_id
+ FROM post_custom_fields pcf
+ JOIN posts p ON p.id = pcf.post_id
+ JOIN topics t ON t.id = p.topic_id
+ WHERE pcf.name = 'import_id'
+ AND pcf.value IN (?)
+ )
+ UPDATE topics
+ SET closed = true
+ WHERE id IN (SELECT topic_id FROM closed_topic_ids)
+ SQL
+
+ Topic.exec_sql(sql, @closed_topic_ids)
+ end
+
+ def post_process_posts
+ puts "", "Postprocessing posts..."
+
+ current = 0
+ max = Post.count
+
+ Post.find_each do |post|
+ begin
+ new_raw = postprocess_post_raw(post.raw)
+ if new_raw != post.raw
+ post.raw = new_raw
+ post.save
+ end
+ rescue PrettyText::JavaScriptError
+ nil
+ ensure
+ print_status(current += 1, max)
+ end
+ end
+ end
+
+ def preprocess_post_raw(raw)
+ return "" if raw.blank?
+
+ # decode HTML entities
+ raw = @htmlentities.decode(raw)
+
+ # fix whitespaces
+ raw = raw.gsub(/(\\r)?\\n/, "\n")
+ .gsub("\\t", "\t")
+
+ # [HTML]...[/HTML]
+ raw = raw.gsub(/\[html\]/i, "\n```html\n")
+ .gsub(/\[\/html\]/i, "\n```\n")
+
+ # [PHP]...[/PHP]
+ raw = raw.gsub(/\[php\]/i, "\n```php\n")
+ .gsub(/\[\/php\]/i, "\n```\n")
+
+ # [HIGHLIGHT="..."]
+ raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }
+
+ # [CODE]...[/CODE]
+ # [HIGHLIGHT]...[/HIGHLIGHT]
+ raw = raw.gsub(/\[\/?code\]/i, "\n```\n")
+ .gsub(/\[\/?highlight\]/i, "\n```\n")
+
+ # [SAMP]...[/SAMP]
+ raw = raw.gsub(/\[\/?samp\]/i, "`")
+
+ # replace all chevrons with HTML entities
+ # NOTE: must be done
+ # - AFTER all the "code" processing
+ # - BEFORE the "quote" processing
+ raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
+ .gsub("<", "<")
+ .gsub("\u2603", "<")
+
+ raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
+ .gsub(">", ">")
+ .gsub("\u2603", ">")
+
+ # [URL=...]...[/URL]
+ raw.gsub!(/\[url="?(.+?)"?\](.+?)\[\/url\]/i) { "#{$2}" }
+
+ # [URL]...[/URL]
+ # [MP3]...[/MP3]
+ raw = raw.gsub(/\[\/?url\]/i, "")
+ .gsub(/\[\/?mp3\]/i, "")
+
+ # [MENTION][/MENTION]
+ raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do
+ old_username = $1
+ if @old_username_to_new_usernames.has_key?(old_username)
+ old_username = @old_username_to_new_usernames[old_username]
+ end
+ "@#{old_username}"
+ end
+
+ # [USER=][/USER]
+ raw = raw.gsub(/\[user="?(\d+)"?\](.+?)\[\/user\]/i) do
+ user_id, old_username = $1, $2
+ if @old_username_to_new_usernames.has_key?(old_username)
+ new_username = @old_username_to_new_usernames[old_username]
+ else
+ new_username = old_username
+ end
+ "@#{new_username}"
+ end
+
+ # [FONT=blah] and [COLOR=blah]
+ # no idea why the /i is not matching case insensitive..
+ raw.gsub! /\[color=.*?\](.*?)\[\/color\]/im, '\1'
+ raw.gsub! /\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1'
+ raw.gsub! /\[font=.*?\](.*?)\[\/font\]/im, '\1'
+ raw.gsub! /\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1'
+
+ # [CENTER]...[/CENTER]
+ raw.gsub! /\[CENTER\](.*?)\[\/CENTER\]/im, '\1'
+
+ # fix LIST
+ raw.gsub! /\[LIST\](.*?)\[\/LIST\]/im, ''
+ raw.gsub! /\[\*\]/im, ''
+
+ # [QUOTE]...[/QUOTE]
+ raw = raw.gsub(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
+
+ # [QUOTE=]...[/QUOTE]
+ raw = raw.gsub(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
+ old_username, quote = $1, $2
+
+ if @old_username_to_new_usernames.has_key?(old_username)
+ old_username = @old_username_to_new_usernames[old_username]
+ end
+ "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
+ end
+
+ # [YOUTUBE][/YOUTUBE]
+ raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" }
+
+ # [VIDEO=youtube;]...[/VIDEO]
+ raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" }
+
+ raw
+ end
+
+ def postprocess_post_raw(raw)
+ # [QUOTE=;]...[/QUOTE]
+ raw = raw.gsub(/\[quote=([^;]+);n(\d+)\](.+?)\[\/quote\]/im) do
+ old_username, post_id, quote = $1, $2, $3
+
+ if @old_username_to_new_usernames.has_key?(old_username)
+ old_username = @old_username_to_new_usernames[old_username]
+ end
+
+ if topic_lookup = topic_lookup_from_imported_post_id(post_id)
+ post_number = topic_lookup[:post_number]
+ topic_id = topic_lookup[:topic_id]
+ "\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
+ else
+ "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
+ end
+ end
+
+ # remove attachments
+ raw = raw.gsub(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")
+
+ # [THREAD][/THREAD]
+ # ==> http://my.discourse.org/t/slug/
+ raw = raw.gsub(/\[thread\](\d+)\[\/thread\]/i) do
+ thread_id = $1
+ if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
+ topic_lookup[:url]
+ else
+ $&
+ end
+ end
+
+ # [THREAD=]...[/THREAD]
+ # ==> [...](http://my.discourse.org/t/slug/)
+ raw = raw.gsub(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do
+ thread_id, link = $1, $2
+ if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
+ url = topic_lookup[:url]
+ "[#{link}](#{url})"
+ else
+ $&
+ end
+ end
+
+ # [POST][/POST]
+ # ==> http://my.discourse.org/t/slug//
+ raw = raw.gsub(/\[post\](\d+)\[\/post\]/i) do
+ post_id = $1
+ if topic_lookup = topic_lookup_from_imported_post_id(post_id)
+ topic_lookup[:url]
+ else
+ $&
+ end
+ end
+
+ # [POST=]...[/POST]
+ # ==> [...](http://my.discourse.org/t///)
+ raw = raw.gsub(/\[post=(\d+)\](.+?)\[\/post\]/i) do
+ post_id, link = $1, $2
+ if topic_lookup = topic_lookup_from_imported_post_id(post_id)
+ url = topic_lookup[:url]
+ "[#{link}](#{url})"
+ else
+ $&
+ end
+ end
+
+ raw
+ end
+
+ def parse_timestamp(timestamp)
+ Time.zone.at(@tz.utc_to_local(timestamp))
+ end
+
+ def fake_email
+ SecureRandom.hex << "@domain.com"
+ end
+
+ def mysql_query(sql)
+ @client.query(sql, cache_rows: false)
+ end
+
+end
+
+ImportScripts::VBulletin.new.perform