mirror of
https://github.com/discourse/discourse.git
synced 2024-12-15 08:33:49 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
307 lines
9.6 KiB
Ruby
307 lines
9.6 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
# Before running this script, paste these lines into your shell,
|
|
# then use arrow keys to edit the values
|
|
=begin
|
|
export DB_HOST="localhost"
|
|
export DB_NAME="mybb"
|
|
export DB_PW=""
|
|
export DB_USER="root"
|
|
export TABLE_PREFIX="mybb_"
|
|
export BASE="" #
|
|
=end
|
|
|
|
# Call it like this:
|
|
# RAILS_ENV=production ruby script/import_scripts/mybb.rb
|
|
class ImportScripts::MyBB < ImportScripts::Base
|
|
|
|
DB_HOST ||= ENV['DB_HOST'] || "localhost"
|
|
DB_NAME ||= ENV['DB_NAME'] || "mybb"
|
|
DB_PW ||= ENV['DB_PW'] || ""
|
|
DB_USER ||= ENV['DB_USER'] || "root"
|
|
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "mybb_"
|
|
BATCH_SIZE = 1000
|
|
BASE = ""
|
|
QUIET = true
|
|
|
|
def initialize
|
|
super
|
|
|
|
@client = Mysql2::Client.new(
|
|
host: DB_HOST,
|
|
username: DB_USER,
|
|
password: DB_PW,
|
|
database: DB_NAME
|
|
)
|
|
end
|
|
|
|
def execute
|
|
SiteSetting.disable_emails = "non-staff"
|
|
import_users
|
|
import_categories
|
|
import_posts
|
|
import_private_messages
|
|
create_permalinks
|
|
suspend_users
|
|
end
|
|
|
|
def import_users
|
|
puts '', "creating users"
|
|
|
|
total_count = mysql_query("SELECT count(*) count
|
|
FROM #{TABLE_PREFIX}users u
|
|
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
|
|
WHERE g.title != 'Banned';").first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(
|
|
"SELECT uid id, email email, username, regdate, g.title `group`
|
|
FROM #{TABLE_PREFIX}users u
|
|
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
|
|
WHERE g.title != 'Banned'
|
|
ORDER BY u.uid ASC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};")
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
|
|
|
|
create_users(results, total: total_count, offset: offset) do |user|
|
|
{ id: user['id'],
|
|
email: user['email'],
|
|
username: user['username'],
|
|
created_at: Time.zone.at(user['regdate']),
|
|
moderator: user['group'] == 'Super Moderators',
|
|
admin: user['group'] == 'Administrators' }
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
results = mysql_query("
|
|
SELECT fid id, pid parent_id, left(name, 50) name, description
|
|
FROM #{TABLE_PREFIX}forums
|
|
ORDER BY pid ASC, fid ASC
|
|
")
|
|
|
|
create_categories(results) do |row|
|
|
h = { id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description']) }
|
|
if row['parent_id'].to_i > 0
|
|
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
|
|
end
|
|
h
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "creating topics and posts"
|
|
|
|
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}posts").first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query("
|
|
SELECT p.pid id,
|
|
p.tid topic_id,
|
|
t.fid category_id,
|
|
t.subject title,
|
|
t.firstpost first_post_id,
|
|
p.uid user_id,
|
|
p.message raw,
|
|
p.dateline post_time
|
|
FROM #{TABLE_PREFIX}posts p,
|
|
#{TABLE_PREFIX}threads t
|
|
WHERE p.tid = t.tid
|
|
ORDER BY p.dateline
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
")
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |m|
|
|
skip = false
|
|
mapped = {}
|
|
|
|
# If you have imported a phpbb forum to mybb previously there might
|
|
# be a problem with #{TABLE_PREFIX}threads.firstpost. If these ids are wrong
|
|
# the thread cannot be imported to discourse as the topic post is
|
|
# missing. This query retrieves the first_post_id manually. As it
|
|
# will decrease the performance it is commented out by default.
|
|
# m['first_post_id'] = mysql_query("
|
|
# SELECT p.pid id,
|
|
# FROM #{TABLE_PREFIX}posts p,
|
|
# #{TABLE_PREFIX}threads t
|
|
# WHERE p.tid = #{m['topic_id']} AND t.tid = #{m['topic_id']}
|
|
# ORDER BY p.dateline
|
|
# LIMIT 1
|
|
# ").first['id']
|
|
|
|
mapped[:id] = m['id']
|
|
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
|
|
mapped[:raw] = process_mybb_post(m['raw'], m['id'])
|
|
mapped[:created_at] = Time.zone.at(m['post_time'])
|
|
|
|
if m['id'] == m['first_post_id']
|
|
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
|
|
mapped[:title] = CGI.unescapeHTML(m['title'])
|
|
else
|
|
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
|
|
if parent
|
|
mapped[:topic_id] = parent[:topic_id]
|
|
else
|
|
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
|
|
skip = true
|
|
end
|
|
end
|
|
|
|
skip ? nil : mapped
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_private_messages
|
|
puts "", "private messages are not implemented"
|
|
end
|
|
|
|
def suspend_users
|
|
puts '', "banned users are not implemented"
|
|
end
|
|
|
|
# Discourse usernames don't allow spaces
|
|
def convert_username(username, post_id)
|
|
count = 0
|
|
username.gsub!(/\s+/) { |a| count += 1; '_' }
|
|
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
|
|
if count > 5
|
|
puts "Warning: probably incorrect quote in post #{post_id}"
|
|
end
|
|
return username
|
|
end
|
|
|
|
# Take an original post id and return the migrated topic id and post number for it
|
|
def post_id_to_post_num_and_topic(quoted_post_id, post_id)
|
|
quoted_post_id_from_imported = post_id_from_imported_post_id(quoted_post_id.to_i)
|
|
if quoted_post_id_from_imported
|
|
begin
|
|
post = Post.find(quoted_post_id_from_imported)
|
|
return "post:#{post.post_number}, topic:#{post.topic_id}"
|
|
rescue
|
|
puts "Could not find migrated post #{quoted_post_id_from_imported} quoted by original post #{post_id} as #{quoted_post_id}"
|
|
return ""
|
|
end
|
|
else
|
|
puts "Original post #{post_id} quotes nonexistent post #{quoted_post_id}"
|
|
return ""
|
|
end
|
|
end
|
|
|
|
def process_mybb_post(raw, import_id)
|
|
s = raw.dup
|
|
|
|
# convert the quote line
|
|
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) {
|
|
"[quote=\"#{convert_username($1, import_id)}, " + post_id_to_post_num_and_topic($2, import_id) + '"]'
|
|
}
|
|
|
|
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
|
|
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
|
|
|
|
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
|
|
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
|
|
|
|
# Many phpbb bbcode tags have a hash attached to them. Examples:
|
|
# [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky]
|
|
# [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex]
|
|
s.gsub!(/:(?:\w{8})\]/, ']')
|
|
|
|
# Remove mybb video tags.
|
|
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
|
|
|
|
s = CGI.unescapeHTML(s)
|
|
|
|
# phpBB shortens link text like this, which breaks our markdown processing:
|
|
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
|
|
#
|
|
# Work around it for now:
|
|
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
|
|
|
|
s
|
|
end
|
|
|
|
def create_permalinks
|
|
puts '', 'Creating redirects...', ''
|
|
|
|
SiteSetting.permalink_normalizations = '/(\\w+)-(\\d+)[-.].*/\\1-\\2.html'
|
|
puts '', 'Users...', ''
|
|
total_users = User.count
|
|
start_time = Time.now
|
|
count = 0
|
|
User.find_each do |u|
|
|
ucf = u.custom_fields
|
|
count += 1
|
|
if ucf && ucf["import_id"] && ucf["import_username"]
|
|
Permalink.create(url: "#{BASE}/user-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil
|
|
end
|
|
print_status(count, total_users, start_time)
|
|
end
|
|
|
|
puts '', 'Categories...', ''
|
|
total_categories = Category.count
|
|
start_time = Time.now
|
|
count = 0
|
|
Category.find_each do |cat|
|
|
ccf = cat.custom_fields
|
|
count += 1
|
|
next unless id = ccf["import_id"]
|
|
unless QUIET
|
|
puts ("forum-#{id}.html --> /c/#{cat.id}")
|
|
end
|
|
Permalink.create(url: "#{BASE}/forum-#{id}.html", category_id: cat.id) rescue nil
|
|
print_status(count, total_categories, start_time)
|
|
end
|
|
|
|
puts '', 'Topics...', ''
|
|
total_posts = Post.count
|
|
start_time = Time.now
|
|
count = 0
|
|
puts '', 'Posts...', ''
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query("
|
|
SELECT p.pid id,
|
|
p.tid topic_id
|
|
FROM #{TABLE_PREFIX}posts p,
|
|
#{TABLE_PREFIX}threads t
|
|
WHERE p.tid = t.tid
|
|
AND t.firstpost=p.pid
|
|
ORDER BY p.dateline
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
")
|
|
break if results.size < 1
|
|
results.each do |post|
|
|
count += 1
|
|
if topic = topic_lookup_from_imported_post_id(post['id'])
|
|
id = post['topic_id']
|
|
Permalink.create(url: "#{BASE}/thread-#{id}.html", topic_id: topic[:topic_id]) rescue nil
|
|
unless QUIET
|
|
puts ("#{BASE}/thread-#{id}.html --> http://localhost:3000/t/#{topic[:topic_id]}")
|
|
end
|
|
print_status(count, total_posts, start_time)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql, cache_rows: false)
|
|
end
|
|
end
|
|
|
|
ImportScripts::MyBB.new.perform
|