mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 01:47:22 +08:00
8a5d97ef3f
Discourse stopped using PostUpload in 9db8f00b3d
. Since then, these importers have been writing to the table, but any data was totally unused. This commit updates the easy cases to use UploadReference, and adds an error to the discourse_merger import script, which needs more significant work.
513 lines
16 KiB
Ruby
513 lines
16 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::Bbpress < ImportScripts::Base
|
|
BB_PRESS_HOST ||= ENV["BBPRESS_HOST"] || "localhost"
|
|
BB_PRESS_DB ||= ENV["BBPRESS_DB"] || "bbpress"
|
|
BATCH_SIZE ||= 1000
|
|
BB_PRESS_PW ||= ENV["BBPRESS_PW"] || ""
|
|
BB_PRESS_USER ||= ENV["BBPRESS_USER"] || "root"
|
|
BB_PRESS_PREFIX ||= ENV["BBPRESS_PREFIX"] || "wp_"
|
|
BB_PRESS_ATTACHMENTS_DIR ||= ENV["BBPRESS_ATTACHMENTS_DIR"] || "/path/to/attachments"
|
|
|
|
def initialize
|
|
super
|
|
|
|
@he = HTMLEntities.new
|
|
|
|
@client =
|
|
Mysql2::Client.new(
|
|
host: BB_PRESS_HOST,
|
|
username: BB_PRESS_USER,
|
|
database: BB_PRESS_DB,
|
|
password: BB_PRESS_PW,
|
|
)
|
|
end
|
|
|
|
def execute
|
|
import_users
|
|
import_anonymous_users
|
|
import_categories
|
|
import_topics_and_posts
|
|
import_private_messages
|
|
import_attachments
|
|
create_permalinks
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users..."
|
|
|
|
last_user_id = -1
|
|
total_users = bbpress_query(<<-SQL).first["cnt"]
|
|
SELECT COUNT(DISTINCT(u.id)) AS cnt
|
|
FROM #{BB_PRESS_PREFIX}users u
|
|
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
|
|
WHERE p.post_type IN ('forum', 'reply', 'topic')
|
|
AND user_email LIKE '%@%'
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
users = bbpress_query(<<-SQL).to_a
|
|
SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass
|
|
FROM #{BB_PRESS_PREFIX}users u
|
|
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
|
|
WHERE user_email LIKE '%@%'
|
|
AND p.post_type IN ('forum', 'reply', 'topic')
|
|
AND u.id > #{last_user_id}
|
|
GROUP BY u.id
|
|
ORDER BY u.id
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if users.empty?
|
|
|
|
last_user_id = users[-1]["id"]
|
|
user_ids = users.map { |u| u["id"].to_i }
|
|
|
|
next if all_records_exist?(:users, user_ids)
|
|
|
|
user_ids_sql = user_ids.join(",")
|
|
|
|
users_description = {}
|
|
bbpress_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] }
|
|
SELECT user_id, meta_value description
|
|
FROM #{BB_PRESS_PREFIX}usermeta
|
|
WHERE user_id IN (#{user_ids_sql})
|
|
AND meta_key = 'description'
|
|
SQL
|
|
|
|
users_last_activity = {}
|
|
bbpress_query(<<-SQL).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
|
|
SELECT user_id, meta_value last_activity
|
|
FROM #{BB_PRESS_PREFIX}usermeta
|
|
WHERE user_id IN (#{user_ids_sql})
|
|
AND meta_key = 'last_activity'
|
|
SQL
|
|
|
|
create_users(users, total: total_users, offset: offset) do |u|
|
|
{
|
|
id: u["id"].to_i,
|
|
username: u["user_nicename"],
|
|
password: u["user_pass"],
|
|
email: u["user_email"].downcase,
|
|
name: u["display_name"].presence || u["user_nicename"],
|
|
created_at: u["user_registered"],
|
|
website: u["user_url"],
|
|
bio_raw: users_description[u["id"]],
|
|
last_seen_at: users_last_activity[u["id"]],
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_anonymous_users
|
|
puts "", "importing anonymous users..."
|
|
|
|
anon_posts = Hash.new
|
|
anon_names = Hash.new
|
|
emails = Array.new
|
|
|
|
# gather anonymous users via postmeta table
|
|
bbpress_query(<<-SQL).each do |pm|
|
|
SELECT post_id, meta_key, meta_value
|
|
FROM #{BB_PRESS_PREFIX}postmeta
|
|
WHERE meta_key LIKE '_bbp_anonymous%'
|
|
SQL
|
|
anon_posts[pm["post_id"]] = Hash.new if not anon_posts[pm["post_id"]]
|
|
|
|
if pm["meta_key"] == "_bbp_anonymous_email"
|
|
anon_posts[pm["post_id"]]["email"] = pm["meta_value"]
|
|
end
|
|
if pm["meta_key"] == "_bbp_anonymous_name"
|
|
anon_posts[pm["post_id"]]["name"] = pm["meta_value"]
|
|
end
|
|
if pm["meta_key"] == "_bbp_anonymous_website"
|
|
anon_posts[pm["post_id"]]["website"] = pm["meta_value"]
|
|
end
|
|
end
|
|
|
|
# gather every existent username
|
|
anon_posts.each do |id, post|
|
|
anon_names[post["name"]] = Hash.new if not anon_names[post["name"]]
|
|
# overwriting email address, one user can only use one email address
|
|
anon_names[post["name"]]["email"] = post["email"]
|
|
anon_names[post["name"]]["website"] = post["website"] if post["website"] != ""
|
|
end
|
|
|
|
# make sure every user name has a unique email address
|
|
anon_names.each do |k, name|
|
|
if not emails.include? name["email"]
|
|
emails.push (name["email"])
|
|
else
|
|
name["email"] = "anonymous_#{SecureRandom.hex}@no-email.invalid"
|
|
end
|
|
end
|
|
|
|
create_users(anon_names) do |k, n|
|
|
{ id: k, email: n["email"].downcase, name: k, website: n["website"] }
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "importing categories..."
|
|
|
|
categories = bbpress_query(<<-SQL)
|
|
SELECT id, post_name, post_parent
|
|
FROM #{BB_PRESS_PREFIX}posts
|
|
WHERE post_type = 'forum'
|
|
AND LENGTH(COALESCE(post_name, '')) > 0
|
|
ORDER BY post_parent, id
|
|
SQL
|
|
|
|
create_categories(categories) do |c|
|
|
category = { id: c["id"], name: c["post_name"] }
|
|
if (parent_id = c["post_parent"].to_i) > 0
|
|
category[:parent_category_id] = category_id_from_imported_category_id(parent_id)
|
|
end
|
|
category
|
|
end
|
|
end
|
|
|
|
def import_topics_and_posts
|
|
puts "", "importing topics and posts..."
|
|
|
|
last_post_id = -1
|
|
total_posts = bbpress_query(<<-SQL).first["count"]
|
|
SELECT COUNT(*) count
|
|
FROM #{BB_PRESS_PREFIX}posts
|
|
WHERE post_status <> 'spam'
|
|
AND post_type IN ('topic', 'reply')
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
posts = bbpress_query(<<-SQL).to_a
|
|
SELECT id,
|
|
post_author,
|
|
post_date,
|
|
post_content,
|
|
post_title,
|
|
post_type,
|
|
post_parent
|
|
FROM #{BB_PRESS_PREFIX}posts
|
|
WHERE post_status <> 'spam'
|
|
AND post_type IN ('topic', 'reply')
|
|
AND id > #{last_post_id}
|
|
ORDER BY id
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if posts.empty?
|
|
|
|
last_post_id = posts[-1]["id"].to_i
|
|
post_ids = posts.map { |p| p["id"].to_i }
|
|
|
|
next if all_records_exist?(:posts, post_ids)
|
|
|
|
post_ids_sql = post_ids.join(",")
|
|
|
|
posts_likes = {}
|
|
bbpress_query(<<-SQL).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
|
|
SELECT post_id, meta_value likes
|
|
FROM #{BB_PRESS_PREFIX}postmeta
|
|
WHERE post_id IN (#{post_ids_sql})
|
|
AND meta_key = 'Likes'
|
|
SQL
|
|
|
|
anon_names = {}
|
|
bbpress_query(<<-SQL).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] }
|
|
SELECT post_id, meta_value
|
|
FROM #{BB_PRESS_PREFIX}postmeta
|
|
WHERE post_id IN (#{post_ids_sql})
|
|
AND meta_key = '_bbp_anonymous_name'
|
|
SQL
|
|
|
|
create_posts(posts, total: total_posts, offset: offset) do |p|
|
|
skip = false
|
|
|
|
user_id =
|
|
user_id_from_imported_user_id(p["post_author"]) ||
|
|
find_user_by_import_id(p["post_author"]).try(:id) ||
|
|
user_id_from_imported_user_id(anon_names[p["id"]]) ||
|
|
find_user_by_import_id(anon_names[p["id"]]).try(:id) || -1
|
|
|
|
post = {
|
|
id: p["id"],
|
|
user_id: user_id,
|
|
raw: p["post_content"],
|
|
created_at: p["post_date"],
|
|
like_count: posts_likes[p["id"]],
|
|
}
|
|
|
|
if post[:raw].present?
|
|
post[:raw].gsub!(%r{\<pre\>\<code(=[a-z]*)?\>(.*?)\</code\>\</pre\>}im) do
|
|
"```\n#{@he.decode($2)}\n```"
|
|
end
|
|
end
|
|
|
|
if p["post_type"] == "topic"
|
|
post[:category] = category_id_from_imported_category_id(p["post_parent"])
|
|
post[:title] = CGI.unescapeHTML(p["post_title"])
|
|
else
|
|
if parent = topic_lookup_from_imported_post_id(p["post_parent"])
|
|
post[:topic_id] = parent[:topic_id]
|
|
post[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
|
|
else
|
|
puts "Skipping #{p["id"]}: #{p["post_content"][0..40]}"
|
|
skip = true
|
|
end
|
|
end
|
|
|
|
skip ? nil : post
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments
|
|
import_attachments_from_postmeta
|
|
import_attachments_from_bb_attachments
|
|
end
|
|
|
|
def import_attachments_from_postmeta
|
|
puts "", "Importing attachments from 'postmeta'..."
|
|
|
|
count = 0
|
|
last_attachment_id = -1
|
|
|
|
total_attachments = bbpress_query(<<-SQL).first["count"]
|
|
SELECT COUNT(*) count
|
|
FROM #{BB_PRESS_PREFIX}postmeta pm
|
|
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
|
|
WHERE pm.meta_key = '_wp_attached_file'
|
|
AND p.post_parent > 0
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
attachments = bbpress_query(<<-SQL).to_a
|
|
SELECT pm.meta_id id, pm.meta_value, p.post_parent post_id
|
|
FROM #{BB_PRESS_PREFIX}postmeta pm
|
|
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
|
|
WHERE pm.meta_key = '_wp_attached_file'
|
|
AND p.post_parent > 0
|
|
AND pm.meta_id > #{last_attachment_id}
|
|
ORDER BY pm.meta_id
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if attachments.empty?
|
|
last_attachment_id = attachments[-1]["id"].to_i
|
|
|
|
attachments.each do |a|
|
|
print_status(count += 1, total_attachments, get_start_time("attachments_from_postmeta"))
|
|
path = File.join(BB_PRESS_ATTACHMENTS_DIR, a["meta_value"])
|
|
if File.exist?(path)
|
|
if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"]))
|
|
filename = File.basename(a["meta_value"])
|
|
upload = create_upload(post.user.id, path, filename)
|
|
if upload&.persisted?
|
|
html = html_for_upload(upload, filename)
|
|
if !post.raw[html]
|
|
post.raw << "\n\n" << html
|
|
post.save!
|
|
UploadReference.ensure_exist!(upload_ids: [upload.id], target: post)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments_from_bb_attachments
|
|
puts "", "Importing attachments from 'bb_attachments'..."
|
|
|
|
count = 0
|
|
last_attachment_id = -1
|
|
|
|
total_attachments = bbpress_query(<<-SQL).first["count"]
|
|
SELECT COUNT(*) count
|
|
FROM #{BB_PRESS_PREFIX}bb_attachments
|
|
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
attachments = bbpress_query(<<-SQL).to_a
|
|
SELECT id, filename, post_id
|
|
FROM #{BB_PRESS_PREFIX}bb_attachments
|
|
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
|
|
AND id > #{last_attachment_id}
|
|
ORDER BY id
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if attachments.empty?
|
|
last_attachment_id = attachments[-1]["id"].to_i
|
|
|
|
attachments.each do |a|
|
|
print_status(
|
|
count += 1,
|
|
total_attachments,
|
|
get_start_time("attachments_from_bb_attachments"),
|
|
)
|
|
if path = find_attachment(a["filename"], a["id"])
|
|
if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"]))
|
|
upload = create_upload(post.user.id, path, a["filename"])
|
|
if upload&.persisted?
|
|
html = html_for_upload(upload, a["filename"])
|
|
if !post.raw[html]
|
|
post.raw << "\n\n" << html
|
|
post.save!
|
|
UploadReference.ensure_exist!(upload_ids: [upload.id], target: post)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def find_attachment(filename, id)
|
|
@attachments ||= Dir[File.join(BB_PRESS_ATTACHMENTS_DIR, "vf-attachs", "**", "*.*")]
|
|
@attachments.find { |p| p.end_with?("/#{id}.#{filename}") }
|
|
end
|
|
|
|
def create_permalinks
|
|
puts "", "creating permalinks..."
|
|
|
|
last_topic_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
topics = bbpress_query(<<-SQL).to_a
|
|
SELECT id,
|
|
guid
|
|
FROM #{BB_PRESS_PREFIX}posts
|
|
WHERE post_status <> 'spam'
|
|
AND post_type IN ('topic')
|
|
AND id > #{last_topic_id}
|
|
ORDER BY id
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if topics.empty?
|
|
last_topic_id = topics[-1]["id"].to_i
|
|
|
|
topics.each do |t|
|
|
topic = topic_lookup_from_imported_post_id(t["id"])
|
|
begin
|
|
Permalink.create(url: URI.parse(t["guid"]).path.chomp("/"), topic_id: topic[:topic_id])
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_private_messages
|
|
puts "", "importing private messages..."
|
|
|
|
last_post_id = -1
|
|
total_posts =
|
|
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first[
|
|
"count"
|
|
]
|
|
|
|
threads = {}
|
|
|
|
total_count =
|
|
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first[
|
|
"count"
|
|
]
|
|
current_count = 0
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
rows = bbpress_query(<<-SQL).to_a
|
|
SELECT thread_id, user_id
|
|
FROM #{BB_PRESS_PREFIX}bp_messages_recipients
|
|
ORDER BY id
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if rows.empty?
|
|
|
|
rows.each do |row|
|
|
current_count += 1
|
|
print_status(current_count, total_count, get_start_time("private_messages"))
|
|
|
|
threads[row["thread_id"]] ||= { target_user_ids: [], imported_topic_id: nil }
|
|
user_id = user_id_from_imported_user_id(row["user_id"])
|
|
if user_id && !threads[row["thread_id"]][:target_user_ids].include?(user_id)
|
|
threads[row["thread_id"]][:target_user_ids] << user_id
|
|
end
|
|
end
|
|
end
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
posts = bbpress_query(<<-SQL).to_a
|
|
SELECT id,
|
|
thread_id,
|
|
date_sent,
|
|
sender_id,
|
|
subject,
|
|
message
|
|
FROM wp_bp_messages_messages
|
|
WHERE id > #{last_post_id}
|
|
ORDER BY thread_id, date_sent
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if posts.empty?
|
|
|
|
last_post_id = posts[-1]["id"].to_i
|
|
|
|
create_posts(posts, total: total_posts, offset: offset) do |post|
|
|
if tcf = TopicCustomField.where(name: "bb_thread_id", value: post["thread_id"]).first
|
|
{
|
|
id: "pm#{post["id"]}",
|
|
topic_id: threads[post["thread_id"]][:imported_topic_id],
|
|
user_id:
|
|
user_id_from_imported_user_id(post["sender_id"]) ||
|
|
find_user_by_import_id(post["sender_id"])&.id || -1,
|
|
raw: post["message"],
|
|
created_at: post["date_sent"],
|
|
}
|
|
else
|
|
# First post of the thread
|
|
{
|
|
id: "pm#{post["id"]}",
|
|
archetype: Archetype.private_message,
|
|
user_id:
|
|
user_id_from_imported_user_id(post["sender_id"]) ||
|
|
find_user_by_import_id(post["sender_id"])&.id || -1,
|
|
title: post["subject"],
|
|
raw: post["message"],
|
|
created_at: post["date_sent"],
|
|
target_usernames:
|
|
User.where(id: threads[post["thread_id"]][:target_user_ids]).pluck(:username),
|
|
post_create_action:
|
|
proc do |new_post|
|
|
if topic = new_post.topic
|
|
threads[post["thread_id"]][:imported_topic_id] = topic.id
|
|
TopicCustomField.create(
|
|
topic_id: topic.id,
|
|
name: "bb_thread_id",
|
|
value: post["thread_id"],
|
|
)
|
|
else
|
|
puts "Error in post_create_action! Can't find topic!"
|
|
end
|
|
end,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def bbpress_query(sql)
|
|
@client.query(sql, cache_rows: false)
|
|
end
|
|
end
|
|
|
|
ImportScripts::Bbpress.new.perform
|