mirror of
https://github.com/discourse/discourse.git
synced 2024-11-24 14:19:49 +08:00
921e2213b8
* IPB import script replace PHP code tags with proper markdown remove excess newlines in code blocks decode HTML entities in code blocks add replacement for list items proper handling of attachments that are not images fix typo improved quote handling fix code style complaint from travis-ci build
1029 lines
38 KiB
Ruby
1029 lines
38 KiB
Ruby
# coding: utf-8
|
|
require "mysql2"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
require 'htmlentities'
|
|
begin
|
|
require 'reverse_markdown' # https://github.com/jqr/php-serialize
|
|
rescue LoadError
|
|
puts
|
|
puts 'reverse_markdown not found.'
|
|
puts 'Add to Gemfile, like this: '
|
|
puts
|
|
puts "echo gem \\'reverse_markdown\\' >> Gemfile"
|
|
puts "bundle install"
|
|
exit
|
|
end
|
|
|
|
# Before running this script, paste these lines into your shell,
|
|
# then use arrow keys to edit the values
|
|
=begin
|
|
export DB_HOST="localhost"
|
|
export DB_NAME="ipboard"
|
|
export DB_PW="ipboard"
|
|
export DB_USER="ipboard"
|
|
export TABLE_PREFIX="ipb_"
|
|
export IMPORT_AFTER="1970-01-01"
|
|
export UPLOADS="http://example.com/uploads"
|
|
export URL="http://example.com/"
|
|
export AVATARS_DIR="/imports/avatars/"
|
|
export USERDIR="user"
|
|
=end
|
|
|
|
class ImportScripts::IpboardSQL < ImportScripts::Base
|
|
|
|
DB_HOST ||= ENV['DB_HOST'] || "localhost"
|
|
DB_NAME ||= ENV['DB_NAME'] || "ipboard"
|
|
DB_PW ||= ENV['DB_PW'] || "ipboard"
|
|
DB_USER ||= ENV['DB_USER'] || "ipboard"
|
|
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "ipb_"
|
|
IMPORT_AFTER ||= ENV['IMPORT_AFTER'] || "1970-01-01"
|
|
UPLOADS ||= ENV['UPLOADS'] || "http://UPLOADS+LOCATION+IS+NOT+SET/uploads"
|
|
USERDIR ||= ENV['USERDIR'] || "user"
|
|
URL ||= ENV['URL'] || "https://forum.example.com"
|
|
AVATARS_DIR ||= ENV['AVATARS_DIR'] || '/home/pfaffman/data/example.com/avatars/'
|
|
BATCH_SIZE = 1000
|
|
ID_FIRST = true
|
|
QUIET = true
|
|
DEBUG = false
|
|
GALLERY_CAT_ID = 1234567
|
|
GALLERY_CAT_NAME = 'galeria'
|
|
EMO_DIR ||= ENV['EMO_DIR'] || "default"
|
|
OLD_FORMAT = false
|
|
if OLD_FORMAT
|
|
MEMBERS_TABLE = "#{TABLE_PREFIX}core_members"
|
|
FORUMS_TABLE = "#{TABLE_PREFIX}forums_forums"
|
|
POSTS_TABLE = "#{TABLE_PREFIX}forums_posts"
|
|
TOPICS_TABLE = "#{TABLE_PREFIX}forums_topics"
|
|
else
|
|
MEMBERS_TABLE = "#{TABLE_PREFIX}members"
|
|
FORUMS_TABLE = "#{TABLE_PREFIX}forums"
|
|
POSTS_TABLE = "#{TABLE_PREFIX}posts"
|
|
TOPICS_TABLE = "#{TABLE_PREFIX}topics"
|
|
GROUPS_TABLE = "#{TABLE_PREFIX}groups"
|
|
PROFILE_TABLE = "#{TABLE_PREFIX}profile_portal"
|
|
ATTACHMENT_TABLE = "#{TABLE_PREFIX}attachments"
|
|
end
|
|
|
|
# TODO: replace ipb_ with TABLE_PREFIX
|
|
|
|
#################
|
|
# Site settings #
|
|
#################
|
|
# don't send any emails
|
|
SiteSetting.disable_emails = "non-staff"
|
|
# don't send digests (so you can enable email without users noticing)
|
|
SiteSetting.disable_digest_emails = true
|
|
# keep site and users private
|
|
SiteSetting.login_required = true
|
|
SiteSetting.hide_user_profiles_from_public = true
|
|
# if site is made available, don't let it get indexed
|
|
SiteSetting.allow_index_in_robots_txt = false
|
|
# don't notify users when images in their posts get downloaded
|
|
SiteSetting.disable_edit_notifications = true
|
|
# SiteSetting.force_hostname='forum.dev1dev.com'
|
|
SiteSetting.title = "IPB Import"
|
|
|
|
if ID_FIRST
|
|
# TODO figure this out
|
|
puts "WARNING: permalink_normalizations not set!!!"
|
|
sleep 1
|
|
#raw = "[ORIGINAL POST](#{URL}/topic/#{id}-#{slug})\n\n" + raw
|
|
#SiteSetting.permalink_normalizations='/topic/(.*t)\?.*/\1'
|
|
else
|
|
# remove stuff after a "?" and work for urls that end in .html
|
|
SiteSetting.permalink_normalizations = '/(.*t)[?.].*/\1'
|
|
#raw = "[ORIGINAL POST](#{URL}/#{slug}-#{id}t)\n\n" + raw
|
|
end
|
|
|
|
def initialize
|
|
if IMPORT_AFTER > "1970-01-01"
|
|
print_warning("Importing data after #{IMPORT_AFTER}")
|
|
end
|
|
|
|
super
|
|
@htmlentities = HTMLEntities.new
|
|
begin
|
|
@client = Mysql2::Client.new(
|
|
host: DB_HOST,
|
|
username: DB_USER,
|
|
password: DB_PW,
|
|
database: DB_NAME
|
|
)
|
|
rescue Exception => e
|
|
puts '=' * 50
|
|
puts e.message
|
|
puts <<EOM
|
|
Cannot log in to database.
|
|
|
|
Hostname: #{DB_HOST}
|
|
Username: #{DB_USER}
|
|
Password: #{DB_PW}
|
|
database: #{DB_NAME}
|
|
|
|
You should set these variables:
|
|
|
|
export DB_HOST="localhost"
|
|
export DB_NAME="ipboard"
|
|
export DB_PW="ipboard"
|
|
export DB_USER="ipboard"
|
|
export TABLE_PREFIX="ipb_"
|
|
export IMPORT_AFTER="1970-01-01"
|
|
export URL="http://example.com"
|
|
export UPLOADS=
|
|
export USERDIR="user"
|
|
|
|
Exiting.
|
|
EOM
|
|
exit
|
|
end
|
|
end
|
|
|
|
def execute
|
|
import_users
|
|
import_categories
|
|
import_topics
|
|
import_posts
|
|
import_private_messages
|
|
|
|
# not supported import_image_categories
|
|
# NOT SUPPORTED import_gallery_topics
|
|
update_tl0
|
|
create_permalinks
|
|
|
|
end
|
|
|
|
def import_users
|
|
puts '', "creating users"
|
|
|
|
total_count = mysql_query("SELECT count(*) count FROM #{MEMBERS_TABLE}
|
|
WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'));").first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
#notes: no location, url,
|
|
results = mysql_query("
|
|
SELECT member_id id,
|
|
name username,
|
|
member_group_id usergroup,
|
|
email,
|
|
pp_thumb_photo avatar_url,
|
|
# pp_main_photo avatar_url,
|
|
# avatar_location avatar_url,
|
|
# TODO consider joining ibf_profile_portal.avatar_location and avatar_type
|
|
FROM_UNIXTIME(joined) created_at,
|
|
FROM_UNIXTIME(last_activity) last_seen_at,
|
|
ip_address registration_ip_address,
|
|
member_banned banned,
|
|
bday_year, bday_month, bday_day,
|
|
g_title member_type,
|
|
last_visit last_seen_at
|
|
FROM #{MEMBERS_TABLE}, #{PROFILE_TABLE}, #{GROUPS_TABLE}
|
|
WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
AND member_id=pp_member_id
|
|
AND member_group_id = g_id
|
|
order by member_id ASC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};")
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :users, results.map { |u| u['id'].to_i }
|
|
|
|
create_users(results, total: total_count, offset: offset) do |user|
|
|
next if user['email'].blank?
|
|
next if user['username'].blank?
|
|
next if @lookup.user_id_from_imported_user_id(user['id'])
|
|
|
|
birthday = Date.parse("#{user['bday_year']}-#{user['bday_month']}-#{user['bday_day']}") rescue nil
|
|
# TODO: what about timezones?
|
|
next if user['id'] == 0
|
|
{ id: user['id'],
|
|
email: user['email'],
|
|
username: user['username'],
|
|
avatar_url: user['avatar_url'],
|
|
title: user['member_type'],
|
|
created_at: user['created_at'] == nil ? 0 : Time.zone.at(user['created_at']),
|
|
# bio_raw: user['bio_raw'],
|
|
registration_ip_address: user['registration_ip_address'],
|
|
# birthday: birthday,
|
|
last_seen_at: user['last_seen_at'] == nil ? 0 : Time.zone.at(user['last_seen_at']),
|
|
admin: /^Admin/.match(user['member_type']) ? true : false,
|
|
moderator: /^MOD/.match(user['member_type']) ? true : false,
|
|
post_create_action: proc do |newuser|
|
|
if user['avatar_url'] && user['avatar_url'].length > 0
|
|
photo_path = AVATARS_DIR + user['avatar_url']
|
|
if File.exists?(photo_path)
|
|
begin
|
|
upload = create_upload(newuser.id, photo_path, File.basename(photo_path))
|
|
if upload && upload.persisted?
|
|
newuser.import_mode = false
|
|
newuser.create_user_avatar
|
|
newuser.import_mode = true
|
|
newuser.user_avatar.update(custom_upload_id: upload.id)
|
|
newuser.update(uploaded_avatar_id: upload.id)
|
|
else
|
|
puts "Error: Upload did not persist for #{photo_path}!"
|
|
end
|
|
rescue SystemCallError => err
|
|
puts "Could not import avatar #{photo_path}: #{err.message}"
|
|
end
|
|
else
|
|
puts "avatar file not found at #{photo_path}"
|
|
end
|
|
end
|
|
if user['banned'] != 0
|
|
suspend_user(newuser)
|
|
end
|
|
end
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def suspend_user(user)
|
|
user.suspended_at = Time.now
|
|
user.suspended_till = 200.years.from_now
|
|
ban_reason = 'Account deactivated by administrator'
|
|
|
|
user_option = user.user_option
|
|
user_option.email_digests = false
|
|
user_option.email_private_messages = false
|
|
user_option.email_direct = false
|
|
user_option.email_always = false
|
|
user_option.save!
|
|
|
|
if user.save
|
|
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
|
|
else
|
|
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
|
|
end
|
|
end
|
|
|
|
def file_full_path(relpath)
|
|
File.join JSON_FILES_DIR, relpath.split("?").first
|
|
end
|
|
|
|
def import_image_categories
|
|
puts "", "importing image categories..."
|
|
|
|
categories = mysql_query("
|
|
SELECT category_id id,
|
|
category_name_seo name,
|
|
category_parent_id as parent_id
|
|
FROM #{TABLE_PREFIX}gallery_categories
|
|
ORDER BY id ASC
|
|
").to_a
|
|
|
|
category_names = mysql_query("
|
|
SELECT DISTINCT word_key, word_default title
|
|
FROM #{TABLE_PREFIX}core_sys_lang_words where word_app='gallery'
|
|
AND word_key REGEXP 'gallery_category_[0-9]+$'
|
|
ORDER BY word_key ASC
|
|
").to_a
|
|
|
|
cat_map = {}
|
|
puts "Creating gallery_cat_map"
|
|
category_names.each do |name|
|
|
title = name['title']
|
|
word_key = name['word_key']
|
|
puts "Processing #{word_key}: #{title}"
|
|
id = word_key.gsub('gallery_category_', '')
|
|
next if cat_map[id]
|
|
cat_map[id] = cat_map.has_value?(title) ? title + " " + id : title
|
|
puts "#{id} => #{cat_map[id]}"
|
|
end
|
|
|
|
params = { id: GALLERY_CAT_ID,
|
|
name: GALLERY_CAT_NAME }
|
|
create_category(params, params[:id])
|
|
|
|
create_categories(categories) do |category|
|
|
id = (category['id']).to_s
|
|
name = CGI.unescapeHTML(cat_map[id])
|
|
{
|
|
id: id + 'gal',
|
|
name: name,
|
|
parent_category_id: @lookup.category_id_from_imported_category_id(GALLERY_CAT_ID),
|
|
color: random_category_color
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "importing categories..."
|
|
|
|
categories = mysql_query("
|
|
SELECT id,
|
|
name name,
|
|
parent_id as parent_id
|
|
FROM #{FORUMS_TABLE}
|
|
ORDER BY parent_id ASC
|
|
").to_a
|
|
|
|
top_level_categories = categories.select { |c| c["parent.id"] == -1 }
|
|
|
|
create_categories(top_level_categories) do |category|
|
|
id = category['id'].to_s
|
|
name = category['name']
|
|
{
|
|
id: id,
|
|
name: name,
|
|
}
|
|
end
|
|
|
|
children_categories = categories.select { |c| c["parent.id"] != -1 }
|
|
create_categories(children_categories) do |category|
|
|
id = category['id'].to_s
|
|
name = category['name']
|
|
{
|
|
id: id,
|
|
name: name,
|
|
parent_category_id: @lookup.category_id_from_imported_category_id(category['parent_id']),
|
|
color: random_category_color
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topics
|
|
puts "", "importing topics..."
|
|
|
|
total_count = mysql_query("SELECT count(*) count FROM #{POSTS_TABLE}
|
|
WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
AND new_topic=1;")
|
|
.first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
discussions = mysql_query(<<-SQL
|
|
SELECT #{TOPICS_TABLE}.tid tid,
|
|
#{TOPICS_TABLE}.forum_id category,
|
|
#{POSTS_TABLE}.pid pid,
|
|
#{TOPICS_TABLE}.title title,
|
|
#{TOPICS_TABLE}.pinned pinned,
|
|
#{POSTS_TABLE}.post raw,
|
|
#{TOPICS_TABLE}.title_seo as slug,
|
|
FROM_UNIXTIME(#{POSTS_TABLE}.post_date) created_at,
|
|
#{POSTS_TABLE}.author_id user_id
|
|
FROM #{POSTS_TABLE}, #{TOPICS_TABLE}
|
|
WHERE #{POSTS_TABLE}.topic_id = #{TOPICS_TABLE}.tid
|
|
AND #{POSTS_TABLE}.new_topic = 1
|
|
AND #{POSTS_TABLE}.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
ORDER BY #{POSTS_TABLE}.post_date ASC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
)
|
|
|
|
break if discussions.size < 1
|
|
next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['tid'].to_s }
|
|
|
|
create_posts(discussions, total: total_count, offset: offset) do |discussion|
|
|
slug = discussion['slug']
|
|
id = discussion['tid']
|
|
raw = clean_up(discussion['raw'])
|
|
{
|
|
id: "discussion#" + discussion['tid'].to_s,
|
|
user_id: user_id_from_imported_user_id(discussion['user_id']) || Discourse::SYSTEM_USER_ID,
|
|
title: CGI.unescapeHTML(discussion['title']),
|
|
category: category_id_from_imported_category_id(discussion['category'].to_s),
|
|
raw: raw,
|
|
pinned_at: discussion['pinned'].to_i == 1 ? Time.zone.at(discussion['created_at']) : nil,
|
|
created_at: Time.zone.at(discussion['created_at']),
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def array_from_members_string(invited_members = 'a:3:{i:0;i:22629;i:1;i:21837;i:2;i:22234;}')
|
|
out = []
|
|
count_regex = /a:(\d)+:/
|
|
count = count_regex.match(invited_members)[1]
|
|
rest = invited_members.sub(count_regex, "")
|
|
i_regex = /i:\d+;i:(\d+);/
|
|
while m = i_regex.match(rest)
|
|
i = m[1]
|
|
rest.sub!(i_regex, "")
|
|
puts "i: #{i}, #{rest}"
|
|
out += [ i.to_i ]
|
|
end
|
|
out
|
|
end
|
|
|
|
def import_private_messages
|
|
puts "", "importing private messages..."
|
|
|
|
topic_count = mysql_query("SELECT COUNT(msg_id) count FROM #{TABLE_PREFIX}message_posts").first["count"]
|
|
|
|
last_private_message_topic_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
private_messages = mysql_query(<<-SQL
|
|
SELECT msg_id pmtextid,
|
|
msg_topic_id topic_id,
|
|
msg_author_id fromuserid,
|
|
mt_title title,
|
|
msg_post message,
|
|
mt_invited_members touserarray,
|
|
mt_to_member_id to_user_id,
|
|
msg_is_first_post first_post,
|
|
msg_date dateline
|
|
FROM #{TABLE_PREFIX}message_topics, #{TABLE_PREFIX}message_posts
|
|
WHERE msg_topic_id = mt_id
|
|
AND msg_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
ORDER BY msg_topic_id, msg_id
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
)
|
|
|
|
puts "Processing #{private_messages.count} messages"
|
|
break if private_messages.count < 1
|
|
puts "Processing . . . "
|
|
private_messages = private_messages.reject { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") }
|
|
|
|
title_username_of_pm_first_post = {}
|
|
|
|
create_posts(private_messages, total: topic_count, offset: offset) do |m|
|
|
skip = false
|
|
mapped = {}
|
|
|
|
mapped[:id] = "pm-#{m['pmtextid']}"
|
|
mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID
|
|
mapped[:raw] = clean_up(m['message']) rescue nil
|
|
mapped[:created_at] = Time.zone.at(m['dateline'])
|
|
title = @htmlentities.decode(m['title']).strip[0...255]
|
|
topic_id = nil
|
|
|
|
next if mapped[:raw].blank?
|
|
|
|
# users who are part of this private message.
|
|
target_usernames = []
|
|
target_userids = []
|
|
begin
|
|
to_user_array = [ m['to_user_id'] ] + array_from_members_string(m['touserarray'])
|
|
rescue
|
|
puts "#{m['pmtextid']} -- #{m['touserarray']}"
|
|
skip = true
|
|
end
|
|
|
|
begin
|
|
to_user_array.each do |to_user|
|
|
user_id = user_id_from_imported_user_id(to_user)
|
|
username = User.find_by(id: user_id).try(:username)
|
|
target_userids << user_id || Discourse::SYSTEM_USER_ID
|
|
target_usernames << username if username
|
|
if user_id
|
|
puts "Found user: #{to_user} -- #{user_id} -- #{username}"
|
|
else
|
|
puts "Can't find user: #{to_user}"
|
|
end
|
|
end
|
|
rescue
|
|
puts "skipping pm-#{m['pmtextid']} `to_user_array` is broken -- #{to_user_array.inspect}"
|
|
skip = true
|
|
end
|
|
|
|
participants = target_userids
|
|
participants << mapped[:user_id]
|
|
begin
|
|
participants.sort!
|
|
rescue
|
|
puts "one of the participant's id is nil -- #{participants.inspect}"
|
|
end
|
|
|
|
if last_private_message_topic_id != m['topic_id']
|
|
last_private_message_topic_id = m['topic_id']
|
|
puts "New message: #{m['topic_id']}: #{title} from #{m['fromuserid']} (#{mapped[:user_id]})" unless QUIET
|
|
# topic post message
|
|
topic_id = m['topic_id']
|
|
mapped[:title] = title
|
|
mapped[:archetype] = Archetype.private_message
|
|
mapped[:target_usernames] = target_usernames.join(',')
|
|
if mapped[:target_usernames].size < 1 # pm with yourself?
|
|
# skip = true
|
|
mapped[:target_usernames] = "system"
|
|
puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})"
|
|
end
|
|
else # reply
|
|
topic_id = topic_lookup_from_imported_post_id("pm-#{topic_id}")
|
|
if !topic_id
|
|
skip = true
|
|
end
|
|
mapped[:topic_id] = topic_id
|
|
puts "Reply message #{topic_id}: #{m['topic_id']}: from #{m['fromuserid']} (#{mapped[:user_id]})" unless QUIET
|
|
end
|
|
# puts "#{target_usernames} -- #{mapped[:target_usernames]}"
|
|
# puts "Adding #{mapped}"
|
|
skip ? nil : mapped
|
|
# puts "#{'-'*50}> added"
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_gallery_topics
|
|
# pfaffman: I'm not clear whether this is an IPBoard thing or from some other system
|
|
puts "", "importing gallery albums..."
|
|
|
|
gallery_count = 0
|
|
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_images
|
|
;")
|
|
.first['count']
|
|
|
|
# NOTE: for imports with huge numbers of galleries, this needs to use limits
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
# galleries = mysql_query(<<-SQL
|
|
|
|
# SELECT #{TABLE_PREFIX}gallery_albums.album_id tid,
|
|
# #{TABLE_PREFIX}gallery_albums.album_category_id category,
|
|
# #{TABLE_PREFIX}gallery_albums.album_owner_id user_id,
|
|
# #{TABLE_PREFIX}gallery_albums.album_name title,
|
|
# #{TABLE_PREFIX}gallery_albums.album_description raw,
|
|
# #{TABLE_PREFIX}gallery_albums.album_type,
|
|
# FROM_UNIXTIME(#{TABLE_PREFIX}gallery_albums.album_last_img_date) created_at
|
|
# FROM #{TABLE_PREFIX}gallery_albums
|
|
# ORDER BY #{TABLE_PREFIX}gallery_albums.album_id ASC
|
|
|
|
# SQL
|
|
# )
|
|
|
|
images = mysql_query(<<-SQL
|
|
|
|
SELECT #{TABLE_PREFIX}gallery_albums.album_id tid,
|
|
#{TABLE_PREFIX}gallery_albums.album_category_id category,
|
|
#{TABLE_PREFIX}gallery_albums.album_owner_id user_id,
|
|
#{TABLE_PREFIX}gallery_albums.album_name title,
|
|
#{TABLE_PREFIX}gallery_albums.album_description raw,
|
|
#{TABLE_PREFIX}gallery_albums.album_type,
|
|
#{TABLE_PREFIX}gallery_images.image_caption caption,
|
|
#{TABLE_PREFIX}gallery_images.image_description description,
|
|
#{TABLE_PREFIX}gallery_images.image_masked_file_name masked,
|
|
#{TABLE_PREFIX}gallery_images.image_id image_id,
|
|
#{TABLE_PREFIX}gallery_images.image_medium_file_name medium,
|
|
#{TABLE_PREFIX}gallery_images.image_original_file_name orig,
|
|
FROM_UNIXTIME(#{TABLE_PREFIX}gallery_albums.album_last_img_date) created_at,
|
|
#{TABLE_PREFIX}gallery_images.image_file_name filename
|
|
FROM #{TABLE_PREFIX}gallery_albums, #{TABLE_PREFIX}gallery_images
|
|
WHERE #{TABLE_PREFIX}gallery_images.image_album_id=#{TABLE_PREFIX}gallery_albums.album_id
|
|
ORDER BY #{TABLE_PREFIX}gallery_albums.album_id, image_date DESC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
|
|
SQL
|
|
|
|
)
|
|
|
|
break if images.size < 1
|
|
next if all_records_exist? :posts, images.map { |t| "gallery#" + t['tid'].to_s + t['image_id'].to_s }
|
|
|
|
last_id = images.first['tid']
|
|
raw = "Gallery ID: #{last_id}\n" + clean_up(images.first['raw'])
|
|
raw += "#{clean_up(images.first['description'])}\n"
|
|
last_gallery = images.first.dup
|
|
create_posts(images, total: total_count, offset: offset) do |gallery|
|
|
id = gallery['tid'].to_i
|
|
#puts "ID: #{id}, last_id: #{last_id}, image: #{gallery['image_id']}"
|
|
if id == last_id
|
|
raw += "### #{gallery['caption']}\n"
|
|
raw += "#{UPLOADS}/#{gallery['orig']}\n"
|
|
last_gallery = gallery.dup
|
|
next
|
|
else
|
|
insert_raw = raw.dup
|
|
last_id = gallery['tid']
|
|
if DEBUG
|
|
raw = "Gallery ID: #{last_id}\n" + clean_up(gallery['raw'])
|
|
raw += "Cat: #{last_gallery['category'].to_s} - #{category_id_from_imported_category_id(last_gallery['category'].to_s + 'gal')}"
|
|
end
|
|
raw += "#{clean_up(images.first['description'])}\n"
|
|
raw += "### #{gallery['caption']}\n"
|
|
if DEBUG
|
|
raw += "User #{gallery['user_id']}, image_id: #{gallery['image_id']}\n"
|
|
end
|
|
raw += "#{UPLOADS}/#{gallery['orig']}\n"
|
|
gallery_count += 1
|
|
puts "#{gallery_count}--Cat: #{last_gallery['category'].to_s} ==> #{category_id_from_imported_category_id(last_gallery['category'].to_s + 'gal')}" unless QUIET
|
|
{
|
|
id: "gallery#" + last_gallery['tid'].to_s + last_gallery['image_id'].to_s,
|
|
user_id: user_id_from_imported_user_id(last_gallery['user_id']) || Discourse::SYSTEM_USER_ID,
|
|
title: CGI.unescapeHTML(last_gallery['title']),
|
|
category: category_id_from_imported_category_id(last_gallery['category'].to_s + 'gal'),
|
|
raw: insert_raw,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# TODO: use this to figure out to pin posts
|
|
def map_first_post(row, mapped)
|
|
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
|
|
mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255]
|
|
mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL
|
|
mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL
|
|
mapped[:post_create_action] = proc do |post|
|
|
@permalink_importer.create_for_topic(post.topic, row[:topic_id])
|
|
end
|
|
|
|
mapped
|
|
end
|
|
|
|
def import_comments
|
|
puts "", "importing gallery comments..."
|
|
|
|
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_comments;")
|
|
.first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
comments = mysql_query(<<-SQL
|
|
|
|
SELECT #{TABLE_PREFIX}gallery_comments.tid tid,
|
|
#{TABLE_PREFIX}gallery_topics.forum_id category,
|
|
#{TABLE_PREFIX}gallery_posts.pid pid,
|
|
#{TABLE_PREFIX}gallery_topics.title title,
|
|
#{TABLE_PREFIX}gallery_posts.post raw,
|
|
FROM_UNIXTIME(#{TABLE_PREFIX}gallery_posts.post_date) created_at,
|
|
#{TABLE_PREFIX}gallery_posts.author_id user_id
|
|
FROM #{TABLE_PREFIX}gallery_posts, #{TABLE_PREFIX}gallery_topics
|
|
WHERE #{TABLE_PREFIX}gallery_posts.topic_id = #{TABLE_PREFIX}gallery_topics.tid
|
|
AND #{TABLE_PREFIX}gallery_posts.new_topic = 0
|
|
AND #{TABLE_PREFIX}gallery_posts.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
ORDER BY #{TABLE_PREFIX}gallery_posts.post_date ASC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
)
|
|
|
|
break if comments.size < 1
|
|
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['pid'].to_s }
|
|
|
|
create_posts(comments, total: total_count, offset: offset) do |comment|
|
|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['tid'].to_s)
|
|
next if comment['raw'].blank?
|
|
{
|
|
id: "comment#" + comment['pid'].to_s,
|
|
user_id: user_id_from_imported_user_id(comment['user_id']) || Discourse::SYSTEM_USER_ID,
|
|
topic_id: t[:topic_id],
|
|
raw: clean_up(comment['raw']),
|
|
created_at: Time.zone.at(comment['created_at'])
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "importing posts..."
|
|
|
|
total_count = mysql_query("SELECT count(*) count FROM #{POSTS_TABLE}
|
|
WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
AND new_topic=0;")
|
|
.first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
comments = mysql_query(<<-SQL
|
|
SELECT #{TOPICS_TABLE}.tid tid,
|
|
#{TOPICS_TABLE}.forum_id category,
|
|
#{POSTS_TABLE}.pid pid,
|
|
#{TOPICS_TABLE}.title title,
|
|
#{POSTS_TABLE}.post raw,
|
|
FROM_UNIXTIME(#{POSTS_TABLE}.post_date) created_at,
|
|
#{POSTS_TABLE}.author_id user_id
|
|
FROM #{POSTS_TABLE}, #{TOPICS_TABLE}
|
|
WHERE #{POSTS_TABLE}.topic_id = #{TOPICS_TABLE}.tid
|
|
AND #{POSTS_TABLE}.new_topic = 0
|
|
AND #{POSTS_TABLE}.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
ORDER BY #{POSTS_TABLE}.post_date ASC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
)
|
|
|
|
break if comments.size < 1
|
|
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['pid'].to_s }
|
|
|
|
create_posts(comments, total: total_count, offset: offset) do |comment|
|
|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['tid'].to_s)
|
|
next if comment['raw'].blank?
|
|
{
|
|
id: "comment#" + comment['pid'].to_s,
|
|
user_id: user_id_from_imported_user_id(comment['user_id']) || Discourse::SYSTEM_USER_ID,
|
|
topic_id: t[:topic_id],
|
|
raw: clean_up(comment['raw']),
|
|
created_at: Time.zone.at(comment['created_at'])
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def nokogiri_fix_blockquotes(raw)
|
|
# this makes proper quotes with user/topic/post references.
|
|
# I'm not clear if it is for just some bizarre imported data, or it might ever be useful
|
|
# It should be integrated into the Nokogiri section of clean_up, though.
|
|
@doc = Nokogiri::XML("<html>" + raw + "</html>")
|
|
|
|
# handle <blockquote>s with links to original post
|
|
@doc.css('blockquote[class=ipsQuote]').each do |b|
|
|
# puts "\n#{'#'*50}\n#{b}\n\nCONTENT: #{b['data-ipsquote-contentid']}"
|
|
# b.options = Nokogiri::XML::ParseOptions::STRICT
|
|
imported_post_id = b['data-ipsquote-contentcommentid'].to_s
|
|
content_type = b['data-ipsquote-contenttype'].to_s
|
|
content_class = b['data-ipsquote-contentclass'].to_s
|
|
content_id = b['data-ipsquote-contentid'].to_s || b['data-cid'].to_s
|
|
topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id)
|
|
post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id)
|
|
post = topic_lookup ? topic_lookup[:post_number] : nil
|
|
topic = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
post ||= post_lookup ? post_lookup[:post_number] : nil
|
|
topic ||= post_lookup ? post_lookup[:topic_id] : nil
|
|
|
|
# TODO: consider: <blockquote class="ipsStyle_spoiler" data-ipsspoiler="">
|
|
# consider: <pre class="ipsCode prettyprint">
|
|
# TODO make sure it's the imported username
|
|
# TODO: do _s still get \-escaped?
|
|
ips_username = b['data-ipsquote-username'] || b['data-author']
|
|
username = ips_username
|
|
new_text = ""
|
|
if DEBUG
|
|
# new_text += "post: #{imported_post_id} --> #{post_lookup} --> |#{post}|<br>\n"
|
|
# new_text += "topic: #{content_id} --> #{topic_lookup} --> |#{topic}|<br>\n"
|
|
# new_text += "user: #{ips_username} --> |#{username}|<br>\n"
|
|
# new_text += "class: #{content_class}<br>\n"
|
|
# new_text += "type: #{content_type}<br>\n"
|
|
if content_class.length > 0 && content_class != "forums_Topic"
|
|
new_text += "UNEXPECTED CONTENT CLASS! #{content_class}<br>\n"
|
|
end
|
|
if content_type.length > 0 && content_type != "forums"
|
|
new_text += "UNEXPECTED CONTENT TYPE! #{content_type}<br>\n"
|
|
end
|
|
# puts "#{'-'*20} and NOWWWWW!!!! \n #{new_text}"
|
|
end
|
|
if post && topic && username
|
|
quote = "\n[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n"
|
|
else
|
|
if username && username.length > 1
|
|
quote = "\n[quote=\"#{username}\"]\n\n"
|
|
else
|
|
quote = "\n[quote]\n"
|
|
end
|
|
# new_doc = Nokogiri::XML("<div>#{new_text}</div>")
|
|
end
|
|
puts "QUOTE: #{quote}"
|
|
sleep 1
|
|
b.content = quote + b.content + "\n[/quote]\n"
|
|
b.name = 'div'
|
|
end
|
|
|
|
raw = @doc.to_html
|
|
end
|
|
|
|
def clean_up(raw)
|
|
return "" if raw.blank?
|
|
|
|
raw.gsub!(/<#EMO_DIR#>/, EMO_DIR)
|
|
# TODO what about uploads?
|
|
# raw.gsub!(/<fileStore.core_Attachment>/,UPLOADS)
|
|
raw.gsub!(/<br>/, "\n\n")
|
|
raw.gsub!(/<br \/>/, "\n\n")
|
|
raw.gsub!(/<p> <\/p>/, "\n\n")
|
|
raw.gsub!(/\[hr\]/, "\n***\n")
|
|
raw.gsub!(/'/, "'")
|
|
raw.gsub!(/\[url="(.+?)"\]http.+?\[\/url\]/, "\\1\n")
|
|
raw.gsub!(/\[media\](.+?)\[\/media\]/, "\n\\1\n\n")
|
|
raw.gsub!(/\[php\](.+?)\[\/php\]/m) { |m| "\n\n```php\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" }
|
|
raw.gsub!(/\[code\](.+?)\[\/code\]/m) { |m| "\n\n```\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" }
|
|
raw.gsub!(/\[list\](.+?)\[\/list\]/m) { |m| "\n" + $1.gsub(/\[\*\]/, "\n- ") + "\n\n" }
|
|
raw.gsub!(/\[quote\]/, "\n[quote]\n")
|
|
raw.gsub!(/\[\/quote\]/, "\n[/quote]\n")
|
|
raw.gsub!(/date=\'(.+?)\'/, '')
|
|
raw.gsub!(/timestamp=\'(.+?)\' /, '')
|
|
|
|
quote_regex = /\[quote name=\'(.+?)\'\s+post=\'(\d+?)\'\s*\]/
|
|
while quote = quote_regex.match(raw)
|
|
# get IPB post number and find Discourse post and topic number
|
|
puts "----------------------------------------\nName: #{quote[1]}, post: #{quote[2]}" unless QUIET
|
|
imported_post_id = quote[2].to_s
|
|
topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id)
|
|
post_lookup = topic_lookup_from_imported_post_id("discussion#" + imported_post_id)
|
|
puts "topic_lookup: #{topic_lookup}, post: #{post_lookup}" unless QUIET
|
|
post_num = topic_lookup ? topic_lookup[:post_number] : nil
|
|
topic_num = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
post_num ||= post_lookup ? post_lookup[:post_number] : nil
|
|
topic_num ||= post_lookup ? post_lookup[:topic_id] : nil
|
|
|
|
# Fix or leave bogus username?
|
|
username = find_user_by_import_id(quote[1]) || quote[1]
|
|
puts "username: #{username}, post_id: #{post_num}, topic_id: #{topic_num}" unless QUIET
|
|
puts "Before fixing a quote: #{raw}\n**************************************** " unless QUIET
|
|
post_string = post_num ? ", post:#{post_num}" : ""
|
|
topic_string = topic_num ? ", topic:#{topic_num}" : ""
|
|
raw.gsub!(quote_regex, "\n[quote=\"#{username}#{post_string}#{topic_string}\"]\n\n")
|
|
puts "AFTER!!!!!!!!!!!!1: #{raw}" unless QUIET
|
|
sleep 1
|
|
raw
|
|
end
|
|
|
|
attach_regex = /\[attachment=(\d+?):.+\]/
|
|
while attach = attach_regex.match(raw)
|
|
attach_id = attach[1]
|
|
attachments =
|
|
mysql_query("SELECT attach_location as loc,
|
|
attach_file as filename
|
|
FROM #{ATTACHMENT_TABLE}
|
|
WHERE attach_id=#{attach_id}")
|
|
if attachments.count < 1
|
|
puts "Attachment #{attach_id} not found."
|
|
attach_string = "Attachment #{attach_id} not found."
|
|
else
|
|
attach_url = "#{UPLOADS}/#{attachments.first['loc'].gsub(' ', '%20')}"
|
|
if attachments.first['filename'].match(/(png|jpg|jpeg|gif)$/)
|
|
# images are rendered as a link that contains the image
|
|
attach_string = "#{attach_id}\n\n[![#{attachments.first['filename']}](#{attach_url})](#{attach_url})\n"
|
|
else
|
|
# other attachments are simple download links
|
|
attach_string = "#{attach_id}\n\n[#{attachments.first['filename']}](#{attach_url})\n"
|
|
end
|
|
end
|
|
raw.sub!(attach_regex, attach_string)
|
|
end
|
|
|
|
raw
|
|
end
|
|
|
|
def random_category_color
|
|
colors = SiteSetting.category_colors.split('|')
|
|
colors[rand(colors.count)]
|
|
end
|
|
|
|
def old_clean_up(raw)
|
|
# This was for a forum that appeared to have lots of customization's.
|
|
# IT did a good job of handling quotes and whatnot, but I don't know
|
|
# what version if IPBoard it was for.
|
|
return "" if raw.blank?
|
|
|
|
raw.gsub!(/<___base_url___>/, URL)
|
|
raw.gsub!(/<fileStore.core_Emoticons>/, UPLOADS)
|
|
raw.gsub!(/<fileStore.core_Attachment>/, UPLOADS)
|
|
raw.gsub!(/<br>/, "\n")
|
|
|
|
@doc = Nokogiri::XML("<html>" + raw + "</html>")
|
|
|
|
# handle <blockquote>s with links to original post
|
|
@doc.css('blockquote[class=ipsQuote]').each do |b|
|
|
imported_post_id = b['data-ipsquote-contentcommentid'].to_s
|
|
content_type = b['data-ipsquote-contenttype'].to_s
|
|
content_class = b['data-ipsquote-contentclass'].to_s
|
|
content_id = b['data-ipsquote-contentid'].to_s || b['data-cid'].to_s
|
|
topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id)
|
|
post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id)
|
|
post = topic_lookup ? topic_lookup[:post_number] : nil
|
|
topic = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
post ||= post_lookup ? post_lookup[:post_number] : nil
|
|
topic ||= post_lookup ? post_lookup[:topic_id] : nil
|
|
|
|
# TODO: consider: <blockquote class="ipsStyle_spoiler" data-ipsspoiler="">
|
|
# consider: <pre class="ipsCode prettyprint">
|
|
ips_username = b['data-ipsquote-username'] || b['data-author']
|
|
username = ips_username
|
|
new_text = ""
|
|
if DEBUG
|
|
if content_class.length > 0 && content_class != "forums_Topic"
|
|
new_text += "UNEXPECTED CONTENT CLASS! #{content_class}<br>\n"
|
|
end
|
|
if content_type.length > 0 && content_type != "forums"
|
|
new_text += "UNEXPECTED CONTENT TYPE! #{content_type}<br>\n"
|
|
end
|
|
end
|
|
if post && topic && username
|
|
quote = "[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n"
|
|
else
|
|
if username && username.length > 1
|
|
quote = "[quote=\"#{username}\"]\n\n"
|
|
else
|
|
quote = "[quote]\n"
|
|
end
|
|
end
|
|
b.content = quote + b.content + "\n[/quote]\n"
|
|
b.name = 'div'
|
|
end
|
|
|
|
@doc.css('object param embed').each do |embed|
|
|
embed.replace("\n#{embed['src']}\n")
|
|
end
|
|
|
|
# handle <iframe data-embedcontent>s with links to original post
|
|
# no examples in recent import
|
|
@doc.css('iframe[data-embedcontent]').each do |d|
|
|
d.to_s.match(/\-([0-9]+)t/)
|
|
imported_post_id = $1
|
|
if imported_post_id
|
|
puts "Searching for #{imported_post_id}" unless QUIET
|
|
topic_lookup = topic_lookup_from_imported_post_id("discussion#" + imported_post_id)
|
|
topic = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
if topic
|
|
url = URL + "/t/#{topic}"
|
|
d.to_s.match(/comment=([0-9]+)&/)
|
|
content_id = $1 || "-1"
|
|
if content_id
|
|
post_lookup = topic_lookup_from_imported_post_id("comment#" + content_id)
|
|
post = topic_lookup ? topic_lookup[:post_number] : 1
|
|
url += "/#{post}"
|
|
end
|
|
d.content = url
|
|
end
|
|
end
|
|
d.name = 'div'
|
|
end
|
|
|
|
@doc.css('div[class=ipsQuote_citation]').each do |d|
|
|
d.remove
|
|
end
|
|
|
|
raw = @doc.to_html
|
|
|
|
# let ReverseMarkdown handle the rest
|
|
raw = ReverseMarkdown.convert raw
|
|
|
|
# remove tabs at start of line to avoid everything being a <pre>
|
|
raw = raw.gsub(/^\t+/, "")
|
|
|
|
# un \-escape _s in usernames in [quote]s
|
|
raw.gsub!(/^\[quote=.+?_.*$/) do |match|
|
|
match = match.gsub('\_', '_')
|
|
match
|
|
end
|
|
raw
|
|
end
|
|
|
|
def staff_guardian
|
|
@_staff_guardian ||= Guardian.new(Discourse.system_user)
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql)
|
|
# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault
|
|
end
|
|
|
|
def create_permalinks
|
|
puts '', 'Creating redirects...', ''
|
|
|
|
# TODO: permalink normalizations: /(.*t)\?.*/\1
|
|
|
|
puts '', 'Users...', ''
|
|
User.find_each do |u|
|
|
ucf = u.custom_fields
|
|
if ucf && ucf["import_id"] && ucf["import_username"]
|
|
username = URI.escape(ucf["import_username"])
|
|
Permalink.create(url: "#{USERDIR}/#{ucf['import_id']}-#{username}", external_url: "/users/#{u.username}") rescue nil
|
|
print '.'
|
|
end
|
|
end
|
|
|
|
puts '', 'Posts...', ''
|
|
Post.find_each do |post|
|
|
pcf = post.custom_fields
|
|
if pcf && pcf["import_id"]
|
|
if post.post_number == 1
|
|
topic = post.topic
|
|
id = pcf["import_id"].split('#').last
|
|
slug = topic.slug
|
|
if ID_FIRST
|
|
Permalink.create(url: "topic/#{id}-#{slug}", topic_id: topic.id) rescue nil
|
|
unless QUIET
|
|
print_warning("#{URL}topic/#{id}-#{slug} --> http://localhost:3000/topic/#{id}-#{slug}")
|
|
end
|
|
else
|
|
Permalink.create(url: "#{slug}-#{id}t", topic_id: topic.id) rescue nil
|
|
unless QUIET
|
|
print_warning("#{URL}/#{slug}-#{id}t --> http://localhost:3000/t/#{topic.id}")
|
|
end
|
|
end
|
|
else # don't think we can do posts
|
|
# Permalink.create( url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id ) rescue nil
|
|
# unless QUIET
|
|
# print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}")
|
|
# end
|
|
end
|
|
print '.'
|
|
end
|
|
end
|
|
|
|
puts '', 'Categories...', ''
|
|
Category.find_each do |cat|
|
|
ccf = cat.custom_fields
|
|
next unless id = ccf["import_id"]
|
|
slug = cat['slug']
|
|
unless QUIET
|
|
print_warning("/forum/#{URL}-#{slug}-#{id} --> /c/#{slug}")
|
|
end
|
|
Permalink.create(url: "/forum/#{id}-#{slug}", category_id: cat.id) rescue nil
|
|
print '.'
|
|
end
|
|
end
|
|
|
|
def print_warning(message)
|
|
$stderr.puts "#{message}"
|
|
end
|
|
|
|
end
|
|
|
|
ImportScripts::IpboardSQL.new.perform
|