mirror of
https://github.com/discourse/discourse.git
synced 2024-11-28 16:03:59 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
530 lines
14 KiB
Ruby
530 lines
14 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require_relative '../base.rb'
|
|
require_relative './redis'
|
|
|
|
class ImportScripts::NodeBB < ImportScripts::Base
|
|
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
|
# ATTACHMENT_DIR needs to be absolute, not relative path
|
|
ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads'
|
|
BATCH_SIZE = 2000
|
|
|
|
def initialize
|
|
super
|
|
|
|
adapter = NodeBB::Redis
|
|
|
|
@client = adapter.new(
|
|
host: "localhost",
|
|
port: "6379",
|
|
db: 14
|
|
)
|
|
|
|
load_merged_posts
|
|
end
|
|
|
|
def load_merged_posts
|
|
puts 'loading merged posts with topics...'
|
|
|
|
# we keep here the posts that were merged
|
|
# as topics
|
|
#
|
|
# { post_id: discourse_post_id }
|
|
@merged_posts_map = {}
|
|
|
|
PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id|
|
|
post = Post.find(post_id)
|
|
topic_id = post.topic_id
|
|
nodebb_post_id = post.custom_fields['import_merged_post_id']
|
|
|
|
@merged_posts_map[nodebb_post_id] = topic_id
|
|
end
|
|
end
|
|
|
|
def execute
|
|
import_groups
|
|
import_categories
|
|
import_users
|
|
add_users_to_groups
|
|
import_topics
|
|
import_posts
|
|
import_attachments
|
|
post_process_posts
|
|
end
|
|
|
|
def import_groups
|
|
puts '', 'importing groups'
|
|
|
|
groups = @client.groups
|
|
total_count = groups.count
|
|
progress_count = 0
|
|
start_time = Time.now
|
|
|
|
create_groups(groups) do |group|
|
|
{
|
|
id: group["name"],
|
|
name: group["slug"]
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "importing top level categories..."
|
|
|
|
category_map = @client.categories
|
|
category_ids = category_map.keys
|
|
categories = category_map.values
|
|
|
|
top_level_categories = categories.select { |c| c["parentCid"] == "0" && c["disabled"] != "1" }
|
|
|
|
create_categories(top_level_categories) do |category|
|
|
{
|
|
id: category["cid"],
|
|
name: category["name"],
|
|
position: category["order"],
|
|
description: category["description"],
|
|
}
|
|
end
|
|
|
|
puts "", "importing child categories..."
|
|
|
|
children_categories = categories.select { |c| c["parentCid"] != "0" && c["disabled"] != "1" }
|
|
top_level_category_ids = Set.new(top_level_categories.map { |c| c["cid"] })
|
|
|
|
# cut down the tree to only 2 levels of categories
|
|
children_categories.each do |cc|
|
|
while !top_level_category_ids.include?(cc["parentCid"])
|
|
cc["parentCid"] = categories.detect { |c| c["cid"] == cc["parentCid"] }["parentCid"]
|
|
end
|
|
end
|
|
|
|
create_categories(children_categories) do |category|
|
|
{
|
|
id: category["cid"],
|
|
name: category["name"],
|
|
position: category["order"],
|
|
description: category["description"],
|
|
parent_category_id: category_id_from_imported_category_id(category["parentCid"])
|
|
}
|
|
end
|
|
|
|
categories.each do |source_category|
|
|
cid = category_id_from_imported_category_id(source_category['cid'])
|
|
Permalink.create(url: "/category/#{source_category['slug']}", category_id: cid) rescue nil
|
|
end
|
|
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users"
|
|
|
|
users = @client.users
|
|
user_count = users.count
|
|
|
|
# we use this group to grant admin to users
|
|
admin_group = @client.group("administrators")
|
|
|
|
create_users(users, total: user_count) do |user|
|
|
username = user["username"]
|
|
email = user["email"]
|
|
|
|
# skip users without username
|
|
next unless username
|
|
|
|
# fake email for users without email
|
|
email = fake_email if email.blank?
|
|
|
|
# use user.suspended to handle banned users
|
|
if user["banned"] == "1"
|
|
suspended_at = Time.now
|
|
suspended_till = Time.now + 100.years
|
|
end
|
|
|
|
{
|
|
id: user["uid"],
|
|
name: user["fullname"],
|
|
username: username,
|
|
email: email,
|
|
admin: admin_group["member_ids"].include?(user["uid"]),
|
|
website: user["website"],
|
|
location: user["location"],
|
|
suspended_at: suspended_at,
|
|
suspended_till: suspended_till,
|
|
primary_group_id: group_id_from_imported_group_id(user["groupTitle"]),
|
|
created_at: user["joindate"],
|
|
bio_raw: user["aboutme"],
|
|
active: true,
|
|
custom_fields: {
|
|
import_pass: user["password"]
|
|
},
|
|
post_create_action: proc do |u|
|
|
import_profile_picture(user, u)
|
|
import_profile_background(user, u)
|
|
end
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_profile_picture(old_user, imported_user)
|
|
picture = old_user["picture"]
|
|
|
|
return if picture.blank?
|
|
|
|
# URI.scheme returns nil for internal URLs
|
|
uri = URI.parse(picture)
|
|
is_external = uri.scheme
|
|
|
|
if is_external
|
|
# download external image
|
|
begin
|
|
string_io = open(picture, read_timeout: 5)
|
|
rescue Net::ReadTimeout
|
|
puts "timeout downloading avatar for user #{imported_user.id}"
|
|
return nil
|
|
end
|
|
|
|
# continue if download failed
|
|
return unless string_io
|
|
|
|
# try to get filename from headers
|
|
if string_io.meta["content-disposition"]
|
|
filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2]
|
|
end
|
|
|
|
# try to get it from path
|
|
filename = File.basename(picture) unless filename
|
|
|
|
# can't determine filename, skip upload
|
|
if !filename
|
|
puts "Can't determine filename, skipping avatar upload for user #{imported_user.id}"
|
|
return
|
|
end
|
|
|
|
# write tmp file
|
|
file = Tempfile.new(filename, encoding: 'ascii-8bit')
|
|
file.write string_io.read
|
|
file.rewind
|
|
|
|
upload = UploadCreator.new(file, filename).create_for(imported_user.id)
|
|
else
|
|
# remove "/assets/uploads/" and "/uploads" from attachment
|
|
picture = picture.gsub("/assets/uploads", "")
|
|
picture = picture.gsub("/uploads", "")
|
|
filepath = File.join(ATTACHMENT_DIR, picture)
|
|
filename = File.basename(picture)
|
|
|
|
unless File.exists?(filepath)
|
|
puts "Avatar file doesn't exist: #{filepath}"
|
|
return nil
|
|
end
|
|
|
|
upload = create_upload(imported_user.id, filepath, filename)
|
|
end
|
|
|
|
return if !upload.persisted?
|
|
|
|
imported_user.create_user_avatar
|
|
imported_user.user_avatar.update(custom_upload_id: upload.id)
|
|
imported_user.update(uploaded_avatar_id: upload.id)
|
|
ensure
|
|
string_io.close rescue nil
|
|
file.close rescue nil
|
|
file.unlind rescue nil
|
|
end
|
|
|
|
def import_profile_background(old_user, imported_user)
|
|
picture = old_user["cover:url"]
|
|
|
|
return if picture.blank?
|
|
|
|
# URI returns nil for invalid URLs
|
|
uri = URI.parse(picture)
|
|
is_external = uri.scheme
|
|
|
|
if is_external
|
|
begin
|
|
string_io = open(picture, read_timeout: 5)
|
|
rescue Net::ReadTimeout
|
|
return nil
|
|
end
|
|
|
|
if string_io.meta["content-disposition"]
|
|
filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2]
|
|
end
|
|
|
|
filename = File.basename(picture) unless filename
|
|
|
|
# can't determine filename, skip upload
|
|
if !filename
|
|
puts "Can't determine filename, skipping background upload for user #{imported_user.id}"
|
|
return
|
|
end
|
|
|
|
# write tmp file
|
|
file = Tempfile.new(filename, encoding: 'ascii-8bit')
|
|
file.write string_io.read
|
|
file.rewind
|
|
|
|
upload = UploadCreator.new(file, filename).create_for(imported_user.id)
|
|
else
|
|
# remove "/assets/uploads/" and "/uploads" from attachment
|
|
picture = picture.gsub("/assets/uploads", "")
|
|
picture = picture.gsub("/uploads", "")
|
|
filepath = File.join(ATTACHMENT_DIR, picture)
|
|
filename = File.basename(picture)
|
|
|
|
unless File.exists?(filepath)
|
|
puts "Background file doesn't exist: #{filepath}"
|
|
return nil
|
|
end
|
|
|
|
upload = create_upload(imported_user.id, filepath, filename)
|
|
end
|
|
|
|
return if !upload.persisted?
|
|
|
|
imported_user.user_profile.upload_profile_background(upload)
|
|
ensure
|
|
string_io.close rescue nil
|
|
file.close rescue nil
|
|
file.unlink rescue nil
|
|
end
|
|
|
|
def add_users_to_groups
|
|
puts "", "adding users to groups..."
|
|
|
|
groups = @client.groups
|
|
total_count = groups.count
|
|
progress_count = 0
|
|
start_time = Time.now
|
|
|
|
@client.groups.each do |group|
|
|
dgroup = find_group_by_import_id(group["name"])
|
|
|
|
# do thing if we migrated this group already
|
|
next if dgroup.custom_fields['import_users_added']
|
|
|
|
group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) }
|
|
group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) }
|
|
|
|
# add members
|
|
dgroup.bulk_add(group_member_ids)
|
|
|
|
# reload group
|
|
dgroup.reload
|
|
|
|
# add owners
|
|
owners = User.find(group_owner_ids)
|
|
owners.each { |owner| dgroup.add_owner(owner) }
|
|
|
|
dgroup.custom_fields['import_users_added'] = true
|
|
dgroup.save
|
|
|
|
progress_count += 1
|
|
print_status(progress_count, total_count, start_time)
|
|
end
|
|
end
|
|
|
|
def import_topics
|
|
puts "", "importing topics..."
|
|
|
|
topic_count = @client.topic_count
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
topics = @client.topics(offset, BATCH_SIZE)
|
|
|
|
break if topics.size < 1
|
|
|
|
create_posts(topics, total: topic_count, offset: offset) do |topic|
|
|
# skip if is deleted
|
|
if topic["deleted"] == "1"
|
|
puts "Topic with id #{topic["tid"]} was deleted, skipping"
|
|
next
|
|
end
|
|
|
|
topic_id = "t#{topic["tid"]}"
|
|
raw = topic["mainpost"]["content"]
|
|
|
|
data = {
|
|
id: topic_id,
|
|
user_id: user_id_from_imported_user_id(topic["uid"]) || Discourse::SYSTEM_USER_ID,
|
|
title: topic["title"],
|
|
category: category_id_from_imported_category_id(topic["cid"]),
|
|
raw: raw,
|
|
created_at: topic["timestamp"],
|
|
views: topic["viewcount"],
|
|
closed: topic["locked"] == "1",
|
|
post_create_action: proc do |p|
|
|
# keep track of this to use in import_posts
|
|
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
|
|
p.save
|
|
@merged_posts_map[topic["mainPid"]] = p.id
|
|
end
|
|
}
|
|
|
|
data[:pinned_at] = data[:created_at] if topic["pinned"] == "1"
|
|
|
|
data
|
|
end
|
|
|
|
topics.each do |import_topic|
|
|
topic = topic_lookup_from_imported_post_id("t#{import_topic["tid"]}")
|
|
Permalink.create(url: "/topic/#{import_topic['slug']}", topic_id: topic[:topic_id]) rescue nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "importing posts..."
|
|
|
|
post_count = @client.post_count
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
posts = @client.posts(offset, BATCH_SIZE)
|
|
|
|
break if posts.size < 1
|
|
|
|
create_posts(posts, total: post_count, offset: offset) do |post|
|
|
# skip if it's merged_post
|
|
next if @merged_posts_map[post["pid"]]
|
|
|
|
# skip if it's deleted
|
|
next if post["deleted"] == "1"
|
|
|
|
raw = post["content"]
|
|
post_id = "p#{post["pid"]}"
|
|
|
|
next if raw.blank?
|
|
topic = topic_lookup_from_imported_post_id("t#{post["tid"]}")
|
|
|
|
unless topic
|
|
puts "Topic with id #{post["tid"]} not found, skipping"
|
|
next
|
|
end
|
|
|
|
data = {
|
|
id: post_id,
|
|
user_id: user_id_from_imported_user_id(post["uid"]) || Discourse::SYSTEM_USER_ID,
|
|
topic_id: topic[:topic_id],
|
|
raw: raw,
|
|
created_at: post["timestamp"],
|
|
post_create_action: proc do |p|
|
|
post["upvoted_by"].each do |upvoter_id|
|
|
user = User.new
|
|
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
|
|
PostActionCreator.like(user, p)
|
|
end
|
|
end
|
|
}
|
|
|
|
if post['toPid']
|
|
# Look reply to topic
|
|
parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number)
|
|
|
|
# Look reply post if topic is missing
|
|
parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number)
|
|
|
|
if parent_id
|
|
data[:reply_to_post_number] = parent_id
|
|
else
|
|
puts "Post with id #{post["toPid"]} not found for reply"
|
|
end
|
|
end
|
|
|
|
data
|
|
end
|
|
end
|
|
end
|
|
|
|
def post_process_posts
|
|
puts "", "Postprocessing posts..."
|
|
|
|
current = 0
|
|
max = Post.count
|
|
start_time = Time.now
|
|
|
|
Post.find_each do |post|
|
|
begin
|
|
next if post.custom_fields['import_post_processing']
|
|
|
|
new_raw = postprocess_post(post)
|
|
if new_raw != post.raw
|
|
post.raw = new_raw
|
|
post.custom_fields['import_post_processing'] = true
|
|
post.save
|
|
end
|
|
ensure
|
|
print_status(current += 1, max, start_time)
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments
|
|
puts '', 'importing attachments...'
|
|
|
|
current = 0
|
|
max = Post.count
|
|
start_time = Time.now
|
|
|
|
Post.find_each do |post|
|
|
current += 1
|
|
print_status(current, max, start_time)
|
|
|
|
new_raw = post.raw.dup
|
|
new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do
|
|
image_md = Regexp.last_match[0]
|
|
text, filepath = $1, $2
|
|
filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR)
|
|
|
|
# if file exists
|
|
# upload attachment and return html for it
|
|
if File.exists?(filepath)
|
|
filename = File.basename(filepath)
|
|
upload = create_upload(post.user_id, filepath, filename)
|
|
|
|
html_for_upload(upload, filename)
|
|
else
|
|
puts "File with path #{filepath} not found for post #{post.id}, upload will be broken"
|
|
image_md
|
|
end
|
|
end
|
|
|
|
if new_raw != post.raw
|
|
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB')
|
|
end
|
|
end
|
|
end
|
|
|
|
def postprocess_post(post)
|
|
raw = post.raw
|
|
|
|
# [link to post](/post/:id)
|
|
raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do
|
|
text, post_id = $1, $2
|
|
|
|
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
|
|
url = topic_lookup[:url]
|
|
"[#{text}](#{url})"
|
|
else
|
|
"/404"
|
|
end
|
|
end
|
|
|
|
# [link to topic](/topic/:id)
|
|
raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do
|
|
text, topic_id = $1, $2
|
|
|
|
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
|
|
url = topic_lookup[:url]
|
|
"[#{text}](#{url})"
|
|
else
|
|
"/404"
|
|
end
|
|
end
|
|
|
|
raw
|
|
end
|
|
end
|
|
|
|
ImportScripts::NodeBB.new.perform
|