mirror of
https://github.com/discourse/discourse.git
synced 2025-01-24 20:37:15 +08:00
495 lines
13 KiB
Ruby
495 lines
13 KiB
Ruby
require_relative "base"
|
|
require "mysql2"
|
|
require "htmlentities"
|
|
|
|
class BulkImport::VBulletin < BulkImport::Base
|
|
|
|
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
|
CHARSET_MAP = {
|
|
"armscii8" => nil,
|
|
"ascii" => Encoding::US_ASCII,
|
|
"big5" => Encoding::Big5,
|
|
"binary" => Encoding::ASCII_8BIT,
|
|
"cp1250" => Encoding::Windows_1250,
|
|
"cp1251" => Encoding::Windows_1251,
|
|
"cp1256" => Encoding::Windows_1256,
|
|
"cp1257" => Encoding::Windows_1257,
|
|
"cp850" => Encoding::CP850,
|
|
"cp852" => Encoding::CP852,
|
|
"cp866" => Encoding::IBM866,
|
|
"cp932" => Encoding::Windows_31J,
|
|
"dec8" => nil,
|
|
"eucjpms" => Encoding::EucJP_ms,
|
|
"euckr" => Encoding::EUC_KR,
|
|
"gb2312" => Encoding::EUC_CN,
|
|
"gbk" => Encoding::GBK,
|
|
"geostd8" => nil,
|
|
"greek" => Encoding::ISO_8859_7,
|
|
"hebrew" => Encoding::ISO_8859_8,
|
|
"hp8" => nil,
|
|
"keybcs2" => nil,
|
|
"koi8r" => Encoding::KOI8_R,
|
|
"koi8u" => Encoding::KOI8_U,
|
|
"latin1" => Encoding::ISO_8859_1,
|
|
"latin2" => Encoding::ISO_8859_2,
|
|
"latin5" => Encoding::ISO_8859_9,
|
|
"latin7" => Encoding::ISO_8859_13,
|
|
"macce" => Encoding::MacCentEuro,
|
|
"macroman" => Encoding::MacRoman,
|
|
"sjis" => Encoding::SHIFT_JIS,
|
|
"swe7" => nil,
|
|
"tis620" => Encoding::TIS_620,
|
|
"ucs2" => Encoding::UTF_16BE,
|
|
"ujis" => Encoding::EucJP_ms,
|
|
"utf8" => Encoding::UTF_8,
|
|
}
|
|
|
|
def initialize
|
|
super
|
|
|
|
host = ENV["DB_HOST"]
|
|
username = ENV["DB_USERNAME"] || "root"
|
|
password = ENV["DB_PASSWORD"]
|
|
database = ENV["DB_NAME"] || "vbulletin"
|
|
charset = ENV["DB_CHARSET"] || "utf8"
|
|
|
|
@html_entities = HTMLEntities.new
|
|
@encoding = CHARSET_MAP[charset]
|
|
|
|
@client = Mysql2::Client.new(
|
|
host: host,
|
|
username: username,
|
|
password: password,
|
|
database: database,
|
|
encoding: charset
|
|
)
|
|
|
|
@client.query_options.merge!(as: :array, cache_rows: false)
|
|
|
|
@has_post_thanks = mysql_query(<<-SQL
|
|
SELECT `COLUMN_NAME`
|
|
FROM `INFORMATION_SCHEMA`.`COLUMNS`
|
|
WHERE `TABLE_SCHEMA`='#{database}'
|
|
AND `TABLE_NAME`='user'
|
|
AND `COLUMN_NAME` LIKE 'post_thanks_%'
|
|
SQL
|
|
).to_a.count > 0
|
|
end
|
|
|
|
def execute
|
|
import_groups
|
|
import_users
|
|
import_group_users
|
|
|
|
import_user_emails
|
|
import_user_stats
|
|
|
|
import_user_passwords
|
|
import_user_salts
|
|
import_user_profiles
|
|
|
|
import_categories
|
|
import_topics
|
|
import_posts
|
|
|
|
import_private_topics
|
|
import_topic_allowed_users
|
|
import_private_posts
|
|
end
|
|
|
|
def import_groups
|
|
puts "Importing groups..."
|
|
|
|
groups = mysql_stream <<-SQL
|
|
SELECT usergroupid, title, description, usertitle
|
|
FROM usergroup
|
|
WHERE usergroupid > #{@last_imported_group_id}
|
|
ORDER BY usergroupid
|
|
SQL
|
|
|
|
create_groups(groups) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
name: normalize_text(row[1]),
|
|
bio_raw: normalize_text(row[2]),
|
|
title: normalize_text(row[3]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "Importing users..."
|
|
|
|
users = mysql_stream <<-SQL
|
|
SELECT user.userid, username, email, joindate, birthday, ipaddress, user.usergroupid, bandate, liftdate
|
|
FROM user
|
|
LEFT JOIN userban ON userban.userid = user.userid
|
|
WHERE user.userid > #{@last_imported_user_id}
|
|
ORDER BY user.userid
|
|
SQL
|
|
|
|
create_users(users) do |row|
|
|
u = {
|
|
imported_id: row[0],
|
|
username: normalize_text(row[1]),
|
|
created_at: Time.zone.at(row[3]),
|
|
date_of_birth: parse_birthday(row[4]),
|
|
primary_group_id: group_id_from_imported_id(row[6]),
|
|
}
|
|
u[:ip_address] = row[5][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[5].present?
|
|
if row[7]
|
|
u[:suspended_at] = Time.zone.at(row[7])
|
|
u[:suspended_till] = row[8] > 0 ? Time.zone.at(row[8]) : SUSPENDED_TILL
|
|
end
|
|
u
|
|
end
|
|
end
|
|
|
|
def import_user_emails
|
|
puts "Importing user emails..."
|
|
|
|
users = mysql_stream <<-SQL
|
|
SELECT user.userid, email, joindate
|
|
FROM user
|
|
WHERE user.userid > #{@last_imported_user_id}
|
|
ORDER BY user.userid
|
|
SQL
|
|
|
|
create_user_emails(users) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
imported_user_id: row[0],
|
|
email: row[1],
|
|
created_at: Time.zone.at(row[2])
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_user_stats
|
|
puts "Importing user stats..."
|
|
|
|
users = mysql_stream <<-SQL
|
|
SELECT user.userid, joindate, posts, COUNT(thread.threadid) AS threads, post.dateline
|
|
#{", post_thanks_user_amount, post_thanks_thanked_times" if @has_post_thanks}
|
|
FROM user
|
|
LEFT OUTER JOIN post ON post.postid = user.lastpostid
|
|
LEFT OUTER JOIN thread ON user.userid = thread.postuserid
|
|
WHERE user.userid > #{@last_imported_user_id}
|
|
GROUP BY user.userid
|
|
ORDER BY user.userid
|
|
SQL
|
|
|
|
create_user_stats(users) do |row|
|
|
user = {
|
|
imported_id: row[0],
|
|
imported_user_id: row[0],
|
|
new_since: Time.zone.at(row[1]),
|
|
post_count: row[2],
|
|
topic_count: row[3],
|
|
first_post_created_at: row[4] && Time.zone.at(row[4])
|
|
}
|
|
|
|
if @has_post_thanks
|
|
user[:likes_given] = row[5]
|
|
user[:likes_received] = row[6]
|
|
end
|
|
|
|
user
|
|
end
|
|
end
|
|
|
|
def import_group_users
|
|
puts "Importing group users..."
|
|
|
|
group_users = mysql_stream <<-SQL
|
|
SELECT usergroupid, userid
|
|
FROM user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
SQL
|
|
|
|
create_group_users(group_users) do |row|
|
|
{
|
|
group_id: group_id_from_imported_id(row[0]),
|
|
user_id: user_id_from_imported_id(row[1]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_user_passwords
|
|
puts "Importing user passwords..."
|
|
|
|
user_passwords = mysql_stream <<-SQL
|
|
SELECT userid, password
|
|
FROM user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
create_custom_fields("user", "password", user_passwords) do |row|
|
|
{
|
|
record_id: user_id_from_imported_id(row[0]),
|
|
value: row[1],
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_user_salts
|
|
puts "Importing user salts..."
|
|
|
|
user_salts = mysql_stream <<-SQL
|
|
SELECT userid, salt
|
|
FROM user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
AND LENGTH(COALESCE(salt, '')) > 0
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
create_custom_fields("user", "salt", user_salts) do |row|
|
|
{
|
|
record_id: user_id_from_imported_id(row[0]),
|
|
value: row[1],
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_user_profiles
|
|
puts "Importing user profiles..."
|
|
|
|
user_profiles = mysql_stream <<-SQL
|
|
SELECT userid, homepage, profilevisits
|
|
FROM user
|
|
WHERE userid > #{@last_imported_user_id}
|
|
ORDER BY userid
|
|
SQL
|
|
|
|
create_user_profiles(user_profiles) do |row|
|
|
{
|
|
user_id: user_id_from_imported_id(row[0]),
|
|
website: (URI.parse(row[1]).to_s rescue nil),
|
|
views: row[2],
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "Importing categories..."
|
|
|
|
categories = mysql_query(<<-SQL
|
|
SELECT forumid, parentid, title, description, displayorder
|
|
FROM forum
|
|
WHERE forumid > #{@last_imported_category_id}
|
|
ORDER BY forumid
|
|
SQL
|
|
).to_a
|
|
|
|
return if categories.empty?
|
|
|
|
parent_categories = categories.select { |c| c[1] == -1 }
|
|
children_categories = categories.select { |c| c[1] != -1 }
|
|
|
|
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
|
|
|
|
# cut down the tree to only 2 levels of categories
|
|
children_categories.each do |cc|
|
|
until parent_category_ids.include?(cc[1])
|
|
cc[1] = categories.find { |c| c[0] == cc[1] }[1]
|
|
end
|
|
end
|
|
|
|
puts "Importing parent categories..."
|
|
create_categories(parent_categories) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
name: normalize_text(row[2]),
|
|
description: normalize_text(row[3]),
|
|
position: row[4],
|
|
}
|
|
end
|
|
|
|
puts "Importing children categories..."
|
|
create_categories(children_categories) do |row|
|
|
{
|
|
imported_id: row[0],
|
|
name: normalize_text(row[2]),
|
|
description: normalize_text(row[3]),
|
|
position: row[4],
|
|
parent_category_id: category_id_from_imported_id(row[1]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topics
|
|
puts "Importing topics..."
|
|
|
|
topics = mysql_stream <<-SQL
|
|
SELECT threadid, title, forumid, postuserid, open, dateline, views, visible, sticky
|
|
FROM thread
|
|
WHERE threadid > #{@last_imported_topic_id}
|
|
AND EXISTS (SELECT 1 FROM post WHERE post.threadid = thread.threadid)
|
|
ORDER BY threadid
|
|
SQL
|
|
|
|
create_topics(topics) do |row|
|
|
created_at = Time.zone.at(row[5])
|
|
|
|
t = {
|
|
imported_id: row[0],
|
|
title: normalize_text(row[1]),
|
|
category_id: category_id_from_imported_id(row[2]),
|
|
user_id: user_id_from_imported_id(row[3]),
|
|
closed: row[4] == 0,
|
|
created_at: created_at,
|
|
views: row[6],
|
|
visible: row[7] == 1,
|
|
}
|
|
|
|
t[:pinned_at] = created_at if row[8] == 1
|
|
|
|
t
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "Importing posts..."
|
|
|
|
posts = mysql_stream <<-SQL
|
|
SELECT postid, post.threadid, parentid, userid, post.dateline, post.visible, pagetext
|
|
FROM post
|
|
JOIN thread ON thread.threadid = post.threadid
|
|
WHERE postid > #{@last_imported_post_id}
|
|
ORDER BY postid
|
|
SQL
|
|
|
|
create_posts(posts) do |row|
|
|
topic_id = topic_id_from_imported_id(row[1])
|
|
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
|
|
reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
|
|
|
|
{
|
|
imported_id: row[0],
|
|
topic_id: topic_id,
|
|
reply_to_post_number: reply_to_post_number,
|
|
user_id: user_id_from_imported_id(row[3]),
|
|
created_at: Time.zone.at(row[4]),
|
|
hidden: row[5] == 0,
|
|
raw: normalize_text(row[6]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_private_topics
|
|
puts "Importing private topics..."
|
|
|
|
@imported_topics = {}
|
|
|
|
topics = mysql_stream <<-SQL
|
|
SELECT pmtextid, title, fromuserid, touserarray, dateline
|
|
FROM pmtext
|
|
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
|
|
ORDER BY pmtextid
|
|
SQL
|
|
|
|
create_topics(topics) do |row|
|
|
title = extract_pm_title(row[1])
|
|
user_ids = [row[2], row[3].scan(/i:(\d+)/)].flatten.map(&:to_i).sort
|
|
key = [title, user_ids]
|
|
|
|
next if @imported_topics.has_key?(key)
|
|
@imported_topics[key] = row[0] + PRIVATE_OFFSET
|
|
|
|
{
|
|
archetype: Archetype.private_message,
|
|
imported_id: row[0] + PRIVATE_OFFSET,
|
|
title: normalize_text(title),
|
|
user_id: user_id_from_imported_id(row[2]),
|
|
created_at: Time.zone.at(row[4]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topic_allowed_users
|
|
puts "Importing topic allowed users..."
|
|
|
|
allowed_users = []
|
|
|
|
mysql_stream(<<-SQL
|
|
SELECT pmtextid, touserarray
|
|
FROM pmtext
|
|
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
|
|
ORDER BY pmtextid
|
|
SQL
|
|
).each do |row|
|
|
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET)
|
|
row[1].scan(/i:(\d+)/).flatten.each do |id|
|
|
next unless user_id = user_id_from_imported_id(id)
|
|
allowed_users << [topic_id, user_id]
|
|
end
|
|
end
|
|
|
|
create_topic_allowed_users(allowed_users) do |row|
|
|
{
|
|
topic_id: row[0],
|
|
user_id: row[1],
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_private_posts
|
|
puts "Importing private posts..."
|
|
|
|
posts = mysql_stream <<-SQL
|
|
SELECT pmtextid, title, fromuserid, touserarray, dateline, message
|
|
FROM pmtext
|
|
WHERE pmtextid > #{@last_imported_private_post_id - PRIVATE_OFFSET}
|
|
ORDER BY pmtextid
|
|
SQL
|
|
|
|
create_posts(posts) do |row|
|
|
title = extract_pm_title(row[1])
|
|
user_ids = [row[2], row[3].scan(/i:(\d+)/)].flatten.map(&:to_i).sort
|
|
key = [title, user_ids]
|
|
|
|
next unless topic_id = topic_id_from_imported_id(@imported_topics[key])
|
|
|
|
{
|
|
imported_id: row[0] + PRIVATE_OFFSET,
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_id(row[2]),
|
|
created_at: Time.zone.at(row[4]),
|
|
raw: normalize_text(row[5]),
|
|
}
|
|
end
|
|
end
|
|
|
|
def extract_pm_title(title)
|
|
normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "")
|
|
end
|
|
|
|
def normalize_text(text)
|
|
@html_entities.decode(normalize_charset(text.presence || "").scrub)
|
|
end
|
|
|
|
def normalize_charset(text)
|
|
return text if @encoding == Encoding::UTF_8
|
|
return text && text.encode(@encoding).force_encoding(Encoding::UTF_8)
|
|
end
|
|
|
|
def parse_birthday(birthday)
|
|
return if birthday.blank?
|
|
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
|
|
return if date_of_birth.nil?
|
|
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
|
|
end
|
|
|
|
def mysql_stream(sql)
|
|
@client.query(sql, stream: true)
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql)
|
|
end
|
|
|
|
end
|
|
|
|
BulkImport::VBulletin.new.run
|