discourse/script/import_scripts/bbpress.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

517 lines
16 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Bbpress < ImportScripts::Base
BB_PRESS_HOST ||= ENV["BBPRESS_HOST"] || "localhost"
BB_PRESS_DB ||= ENV["BBPRESS_DB"] || "bbpress"
BATCH_SIZE ||= 1000
BB_PRESS_PW ||= ENV["BBPRESS_PW"] || ""
BB_PRESS_USER ||= ENV["BBPRESS_USER"] || "root"
BB_PRESS_PREFIX ||= ENV["BBPRESS_PREFIX"] || "wp_"
BB_PRESS_ATTACHMENTS_DIR ||= ENV["BBPRESS_ATTACHMENTS_DIR"] || "/path/to/attachments"
def initialize
super
@he = HTMLEntities.new
@client =
Mysql2::Client.new(
host: BB_PRESS_HOST,
2016-09-27 06:03:42 +08:00
username: BB_PRESS_USER,
2016-03-08 01:21:09 +08:00
database: BB_PRESS_DB,
2016-09-27 06:03:42 +08:00
password: BB_PRESS_PW,
)
end
2016-03-08 01:21:09 +08:00
def execute
import_users
import_anonymous_users
2016-03-08 01:21:09 +08:00
import_categories
import_topics_and_posts
import_private_messages
import_attachments
create_permalinks
end
2016-03-08 01:21:09 +08:00
def import_users
puts "", "importing users..."
last_user_id = -1
total_users = bbpress_query(<<-SQL).first["cnt"]
SELECT COUNT(DISTINCT(u.id)) AS cnt
2017-11-08 03:20:46 +08:00
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
WHERE p.post_type IN ('forum', 'reply', 'topic')
AND user_email LIKE '%@%'
SQL
2016-03-08 01:21:09 +08:00
batches(BATCH_SIZE) do |offset|
users = bbpress_query(<<-SQL).to_a
SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass
FROM #{BB_PRESS_PREFIX}users u
2017-11-08 03:20:46 +08:00
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
2016-03-08 01:21:09 +08:00
WHERE user_email LIKE '%@%'
AND p.post_type IN ('forum', 'reply', 'topic')
AND u.id > #{last_user_id}
GROUP BY u.id
ORDER BY u.id
2016-03-08 01:21:09 +08:00
LIMIT #{BATCH_SIZE}
SQL
break if users.empty?
last_user_id = users[-1]["id"]
user_ids = users.map { |u| u["id"].to_i }
next if all_records_exist?(:users, user_ids)
user_ids_sql = user_ids.join(",")
users_description = {}
bbpress_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] }
SELECT user_id, meta_value description
FROM #{BB_PRESS_PREFIX}usermeta
2016-03-08 01:21:09 +08:00
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'description'
SQL
users_last_activity = {}
bbpress_query(<<-SQL).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
SELECT user_id, meta_value last_activity
FROM #{BB_PRESS_PREFIX}usermeta
2016-03-08 01:21:09 +08:00
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'last_activity'
SQL
create_users(users, total: total_users, offset: offset) do |u|
{
id: u["id"].to_i,
username: u["user_nicename"],
password: u["user_pass"],
2016-03-08 01:21:09 +08:00
email: u["user_email"].downcase,
2017-01-12 04:55:25 +08:00
name: u["display_name"].presence || u["user_nicename"],
2016-03-08 01:21:09 +08:00
created_at: u["user_registered"],
website: u["user_url"],
bio_raw: users_description[u["id"]],
last_seen_at: users_last_activity[u["id"]],
}
end
end
2016-03-08 01:21:09 +08:00
end
def import_anonymous_users
puts "", "importing anonymous users..."
anon_posts = Hash.new
anon_names = Hash.new
emails = Array.new
# gather anonymous users via postmeta table
bbpress_query(<<-SQL).each do |pm|
SELECT post_id, meta_key, meta_value
FROM #{BB_PRESS_PREFIX}postmeta
WHERE meta_key LIKE '_bbp_anonymous%'
SQL
anon_posts[pm["post_id"]] = Hash.new if not anon_posts[pm["post_id"]]
if pm["meta_key"] == "_bbp_anonymous_email"
anon_posts[pm["post_id"]]["email"] = pm["meta_value"]
end
if pm["meta_key"] == "_bbp_anonymous_name"
anon_posts[pm["post_id"]]["name"] = pm["meta_value"]
end
if pm["meta_key"] == "_bbp_anonymous_website"
anon_posts[pm["post_id"]]["website"] = pm["meta_value"]
end
end
# gather every existent username
anon_posts.each do |id, post|
anon_names[post["name"]] = Hash.new if not anon_names[post["name"]]
# overwriting email address, one user can only use one email address
anon_names[post["name"]]["email"] = post["email"]
anon_names[post["name"]]["website"] = post["website"] if post["website"] != ""
end
# make sure every user name has a unique email address
anon_names.each do |k, name|
if not emails.include? name["email"]
emails.push (name["email"])
else
name["email"] = "anonymous_#{SecureRandom.hex}@no-email.invalid"
end
end
create_users(anon_names) do |k, n|
{ id: k, email: n["email"].downcase, name: k, website: n["website"] }
end
end
2016-03-08 01:21:09 +08:00
def import_categories
puts "", "importing categories..."
2016-03-08 01:21:09 +08:00
categories = bbpress_query(<<-SQL)
SELECT id, post_name, post_parent
FROM #{BB_PRESS_PREFIX}posts
2016-03-08 01:21:09 +08:00
WHERE post_type = 'forum'
AND LENGTH(COALESCE(post_name, '')) > 0
ORDER BY post_parent, id
SQL
2016-03-08 01:21:09 +08:00
create_categories(categories) do |c|
category = { id: c["id"], name: c["post_name"] }
if (parent_id = c["post_parent"].to_i) > 0
category[:parent_category_id] = category_id_from_imported_category_id(parent_id)
end
2016-03-08 01:21:09 +08:00
category
end
end
2016-03-08 01:21:09 +08:00
def import_topics_and_posts
puts "", "importing topics and posts..."
2016-03-08 01:21:09 +08:00
last_post_id = -1
total_posts = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}posts
WHERE post_status <> 'spam'
2016-03-08 01:21:09 +08:00
AND post_type IN ('topic', 'reply')
SQL
batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL).to_a
SELECT id,
post_author,
post_date,
post_content,
post_title,
post_type,
post_parent
FROM #{BB_PRESS_PREFIX}posts
2016-03-08 01:21:09 +08:00
WHERE post_status <> 'spam'
AND post_type IN ('topic', 'reply')
AND id > #{last_post_id}
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
break if posts.empty?
last_post_id = posts[-1]["id"].to_i
post_ids = posts.map { |p| p["id"].to_i }
next if all_records_exist?(:posts, post_ids)
post_ids_sql = post_ids.join(",")
posts_likes = {}
bbpress_query(<<-SQL).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
SELECT post_id, meta_value likes
FROM #{BB_PRESS_PREFIX}postmeta
2016-03-08 01:21:09 +08:00
WHERE post_id IN (#{post_ids_sql})
AND meta_key = 'Likes'
SQL
anon_names = {}
bbpress_query(<<-SQL).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] }
SELECT post_id, meta_value
FROM #{BB_PRESS_PREFIX}postmeta
WHERE post_id IN (#{post_ids_sql})
AND meta_key = '_bbp_anonymous_name'
SQL
2016-03-08 01:21:09 +08:00
create_posts(posts, total: total_posts, offset: offset) do |p|
skip = false
user_id =
user_id_from_imported_user_id(p["post_author"]) ||
find_user_by_import_id(p["post_author"]).try(:id) ||
user_id_from_imported_user_id(anon_names[p["id"]]) ||
find_user_by_import_id(anon_names[p["id"]]).try(:id) || -1
2016-03-08 01:21:09 +08:00
post = {
id: p["id"],
user_id: user_id,
2016-03-08 01:21:09 +08:00
raw: p["post_content"],
created_at: p["post_date"],
like_count: posts_likes[p["id"]],
}
if post[:raw].present?
post[:raw].gsub!(%r{\<pre\>\<code(=[a-z]*)?\>(.*?)\</code\>\</pre\>}im) do
"```\n#{@he.decode($2)}\n```"
end
end
2016-03-08 01:21:09 +08:00
if p["post_type"] == "topic"
post[:category] = category_id_from_imported_category_id(p["post_parent"])
post[:title] = CGI.unescapeHTML(p["post_title"])
else
2016-03-08 01:21:09 +08:00
if parent = topic_lookup_from_imported_post_id(p["post_parent"])
post[:topic_id] = parent[:topic_id]
post[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
2016-03-08 01:21:09 +08:00
puts "Skipping #{p["id"]}: #{p["post_content"][0..40]}"
skip = true
end
end
2016-03-08 01:21:09 +08:00
skip ? nil : post
end
end
end
def import_attachments
import_attachments_from_postmeta
import_attachments_from_bb_attachments
end
def import_attachments_from_postmeta
puts "", "Importing attachments from 'postmeta'..."
count = 0
last_attachment_id = -1
total_attachments = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}postmeta pm
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
WHERE pm.meta_key = '_wp_attached_file'
AND p.post_parent > 0
SQL
batches(BATCH_SIZE) do |offset|
attachments = bbpress_query(<<-SQL).to_a
SELECT pm.meta_id id, pm.meta_value, p.post_parent post_id
FROM #{BB_PRESS_PREFIX}postmeta pm
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
WHERE pm.meta_key = '_wp_attached_file'
AND p.post_parent > 0
AND pm.meta_id > #{last_attachment_id}
ORDER BY pm.meta_id
LIMIT #{BATCH_SIZE}
SQL
break if attachments.empty?
last_attachment_id = attachments[-1]["id"].to_i
attachments.each do |a|
print_status(count += 1, total_attachments, get_start_time("attachments_from_postmeta"))
path = File.join(BB_PRESS_ATTACHMENTS_DIR, a["meta_value"])
if File.exist?(path)
if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"]))
filename = File.basename(a["meta_value"])
upload = create_upload(post.user.id, path, filename)
if upload&.persisted?
html = html_for_upload(upload, filename)
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
end
end
end
end
def import_attachments_from_bb_attachments
puts "", "Importing attachments from 'bb_attachments'..."
count = 0
last_attachment_id = -1
total_attachments = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}bb_attachments
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
SQL
batches(BATCH_SIZE) do |offset|
attachments = bbpress_query(<<-SQL).to_a
SELECT id, filename, post_id
FROM #{BB_PRESS_PREFIX}bb_attachments
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
AND id > #{last_attachment_id}
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
break if attachments.empty?
last_attachment_id = attachments[-1]["id"].to_i
attachments.each do |a|
print_status(
count += 1,
total_attachments,
get_start_time("attachments_from_bb_attachments"),
)
if path = find_attachment(a["filename"], a["id"])
if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"]))
upload = create_upload(post.user.id, path, a["filename"])
if upload&.persisted?
html = html_for_upload(upload, a["filename"])
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
end
end
end
end
def find_attachment(filename, id)
@attachments ||= Dir[File.join(BB_PRESS_ATTACHMENTS_DIR, "vf-attachs", "**", "*.*")]
@attachments.find { |p| p.end_with?("/#{id}.#{filename}") }
end
def create_permalinks
puts "", "creating permalinks..."
last_topic_id = -1
batches(BATCH_SIZE) do |offset|
topics = bbpress_query(<<-SQL).to_a
SELECT id,
guid
FROM #{BB_PRESS_PREFIX}posts
WHERE post_status <> 'spam'
AND post_type IN ('topic')
AND id > #{last_topic_id}
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
break if topics.empty?
last_topic_id = topics[-1]["id"].to_i
topics.each do |t|
topic = topic_lookup_from_imported_post_id(t["id"])
begin
2017-11-08 03:20:46 +08:00
Permalink.create(url: URI.parse(t["guid"]).path.chomp("/"), topic_id: topic[:topic_id])
rescue StandardError
nil
end
end
end
end
def import_private_messages
puts "", "importing private messages..."
last_post_id = -1
total_posts =
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first[
"count"
]
threads = {}
total_count =
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first[
"count"
]
current_count = 0
batches(BATCH_SIZE) do |offset|
rows = bbpress_query(<<-SQL).to_a
SELECT thread_id, user_id
FROM #{BB_PRESS_PREFIX}bp_messages_recipients
ORDER BY id
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if rows.empty?
rows.each do |row|
current_count += 1
print_status(current_count, total_count, get_start_time("private_messages"))
threads[row["thread_id"]] ||= { target_user_ids: [], imported_topic_id: nil }
user_id = user_id_from_imported_user_id(row["user_id"])
if user_id && !threads[row["thread_id"]][:target_user_ids].include?(user_id)
threads[row["thread_id"]][:target_user_ids] << user_id
end
end
end
batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL).to_a
SELECT id,
thread_id,
date_sent,
sender_id,
subject,
message
FROM wp_bp_messages_messages
WHERE id > #{last_post_id}
ORDER BY thread_id, date_sent
LIMIT #{BATCH_SIZE}
SQL
break if posts.empty?
last_post_id = posts[-1]["id"].to_i
create_posts(posts, total: total_posts, offset: offset) do |post|
if tcf = TopicCustomField.where(name: "bb_thread_id", value: post["thread_id"]).first
{
id: "pm#{post["id"]}",
topic_id: threads[post["thread_id"]][:imported_topic_id],
user_id:
user_id_from_imported_user_id(post["sender_id"]) ||
find_user_by_import_id(post["sender_id"])&.id || -1,
raw: post["message"],
created_at: post["date_sent"],
}
else
# First post of the thread
{
id: "pm#{post["id"]}",
archetype: Archetype.private_message,
user_id:
user_id_from_imported_user_id(post["sender_id"]) ||
find_user_by_import_id(post["sender_id"])&.id || -1,
title: post["subject"],
raw: post["message"],
created_at: post["date_sent"],
target_usernames:
User.where(id: threads[post["thread_id"]][:target_user_ids]).pluck(:username),
post_create_action:
proc do |new_post|
if topic = new_post.topic
threads[post["thread_id"]][:imported_topic_id] = topic.id
TopicCustomField.create(
topic_id: topic.id,
name: "bb_thread_id",
value: post["thread_id"],
)
else
puts "Error in post_create_action! Can't find topic!"
end
end,
}
end
end
end
end
2016-03-08 01:21:09 +08:00
def bbpress_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::Bbpress.new.perform