mirror of
https://github.com/discourse/discourse.git
synced 2025-01-18 21:02:48 +08:00
nodebb importer (#5842)
This commit is contained in:
parent
0800098f1a
commit
3287f47f07
527
script/import_scripts/nodebb/nodebb.rb
Normal file
527
script/import_scripts/nodebb/nodebb.rb
Normal file
|
@ -0,0 +1,527 @@
|
|||
require_relative '../base.rb'
|
||||
require_relative './redis'
|
||||
|
||||
class ImportScripts::NodeBB < ImportScripts::Base
|
||||
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
||||
# ATTACHMENT_DIR needs to be absolute, not relative path
|
||||
ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads'
|
||||
BATCH_SIZE = 2000
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
adapter = NodeBB::Redis
|
||||
|
||||
@client = adapter.new(
|
||||
host: "localhost",
|
||||
port: "6379",
|
||||
db: 0
|
||||
)
|
||||
|
||||
load_merged_posts
|
||||
end
|
||||
|
||||
def load_merged_posts
|
||||
puts 'loading merged posts with topics...'
|
||||
|
||||
# we keep here the posts that were merged
|
||||
# as topics
|
||||
#
|
||||
# { post_id: discourse_post_id }
|
||||
@merged_posts_map = {}
|
||||
|
||||
PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id|
|
||||
post = Post.find(post_id)
|
||||
topic_id = post.topic_id
|
||||
nodebb_post_id = post.custom_fields['import_merged_post_id']
|
||||
|
||||
@merged_posts_map[nodebb_post_id] = topic_id
|
||||
end
|
||||
end
|
||||
|
||||
def execute
|
||||
import_groups
|
||||
import_categories
|
||||
import_users
|
||||
add_users_to_groups
|
||||
import_topics
|
||||
import_posts
|
||||
import_attachments
|
||||
post_process_posts
|
||||
end
|
||||
|
||||
def import_groups
|
||||
puts '', 'importing groups'
|
||||
|
||||
groups = @client.groups
|
||||
total_count = groups.count
|
||||
progress_count = 0
|
||||
start_time = Time.now
|
||||
|
||||
create_groups(groups) do |group|
|
||||
{
|
||||
id: group["name"],
|
||||
name: group["slug"]
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_categories
|
||||
puts "", "importing top level categories..."
|
||||
|
||||
category_map = @client.categories
|
||||
category_ids = category_map.keys
|
||||
categories = category_map.values
|
||||
|
||||
top_level_categories = categories.select { |c| c["parentCid"] == "0" }
|
||||
|
||||
create_categories(top_level_categories) do |category|
|
||||
{
|
||||
id: category["cid"],
|
||||
name: category["name"],
|
||||
position: category["order"],
|
||||
description: category["description"],
|
||||
}
|
||||
end
|
||||
|
||||
puts "", "importing child categories..."
|
||||
|
||||
children_categories = categories.select { |c| c["parentCid"] != "0" }
|
||||
top_level_category_ids = Set.new(top_level_categories.map { |c| c["cid"] })
|
||||
|
||||
# cut down the tree to only 2 levels of categories
|
||||
children_categories.each do |cc|
|
||||
while !top_level_category_ids.include?(cc["parentCid"])
|
||||
cc["parentCid"] = categories.detect { |c| c["cid"] == cc["parentCid"] }["parentCid"]
|
||||
end
|
||||
end
|
||||
|
||||
create_categories(children_categories) do |category|
|
||||
{
|
||||
id: category["cid"],
|
||||
name: category["name"],
|
||||
position: category["order"],
|
||||
description: category["description"],
|
||||
parent_category_id: category_id_from_imported_category_id(category["parentCid"])
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts "", "importing users"
|
||||
|
||||
users = @client.users
|
||||
user_count = users.count
|
||||
|
||||
# we use this group to grant admin to users
|
||||
admin_group = @client.group("administrators")
|
||||
|
||||
create_users(users, total: user_count) do |user|
|
||||
username = user["username"]
|
||||
email = user["email"]
|
||||
|
||||
# skip users without username
|
||||
next unless username
|
||||
|
||||
# fake email for users without email
|
||||
email = fake_email if email.blank?
|
||||
|
||||
# use user.suspended to handle banned users
|
||||
if user["banned"] == "1"
|
||||
suspended_at = Time.now
|
||||
suspended_till = Time.now + 100.years
|
||||
end
|
||||
|
||||
{
|
||||
id: user["uid"],
|
||||
name: user["fullname"],
|
||||
username: username,
|
||||
email: email,
|
||||
admin: admin_group["member_ids"].include?(user["uid"]),
|
||||
website: user["website"],
|
||||
location: user["location"],
|
||||
suspended_at: suspended_at,
|
||||
suspended_till: suspended_till,
|
||||
primary_group_id: group_id_from_imported_group_id(user["groupTitle"]),
|
||||
created_at: user["joindate"],
|
||||
custom_fields: {
|
||||
import_pass: user["password"]
|
||||
},
|
||||
post_create_action: proc do |u|
|
||||
import_profile_picture(user, u)
|
||||
import_profile_background(user, u)
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_profile_picture(old_user, imported_user)
|
||||
picture = old_user["picture"]
|
||||
|
||||
return if picture.blank?
|
||||
|
||||
# URI.scheme returns nil for internal URLs
|
||||
uri = URI.parse(picture)
|
||||
is_external = uri.scheme
|
||||
|
||||
if is_external
|
||||
# download external image
|
||||
begin
|
||||
string_io = open(picture, read_timeout: 5)
|
||||
rescue Net::ReadTimeout
|
||||
puts "timeout downloading avatar for user #{imported_user.id}"
|
||||
return nil
|
||||
end
|
||||
|
||||
# continue if download failed
|
||||
return unless string_io
|
||||
|
||||
# try to get filename from headers
|
||||
if string_io.meta["content-disposition"]
|
||||
filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2]
|
||||
end
|
||||
|
||||
# try to get it from path
|
||||
filename = File.basename(picture) unless filename
|
||||
|
||||
# can't determine filename, skip upload
|
||||
if !filename
|
||||
puts "Can't determine filename, skipping avatar upload for user #{imported_user.id}"
|
||||
return
|
||||
end
|
||||
|
||||
# write tmp file
|
||||
file = Tempfile.new(filename, encoding: 'ascii-8bit')
|
||||
file.write string_io.read
|
||||
file.rewind
|
||||
|
||||
upload = UploadCreator.new(file, filename).create_for(imported_user.id)
|
||||
else
|
||||
# remove "/assets/uploads/" from attachment
|
||||
picture = picture.gsub("/assets/uploads", "")
|
||||
filepath = File.join(ATTACHMENT_DIR, picture)
|
||||
filename = File.basename(picture)
|
||||
|
||||
unless File.exists?(filepath)
|
||||
puts "Avatar file doesn't exist: #{filename}"
|
||||
return nil
|
||||
end
|
||||
|
||||
upload = create_upload(imported_user.id, filepath, filename)
|
||||
end
|
||||
|
||||
return if !upload.persisted?
|
||||
|
||||
imported_user.create_user_avatar
|
||||
imported_user.user_avatar.update(custom_upload_id: upload.id)
|
||||
imported_user.update(uploaded_avatar_id: upload.id)
|
||||
ensure
|
||||
string_io.close rescue nil
|
||||
file.close rescue nil
|
||||
file.unlind rescue nil
|
||||
end
|
||||
|
||||
def import_profile_background(old_user, imported_user)
|
||||
picture = old_user["cover:url"]
|
||||
|
||||
return if picture.blank?
|
||||
|
||||
# URI returns nil for invalid URLs
|
||||
uri = URI.parse(picture)
|
||||
is_external = uri.scheme
|
||||
|
||||
if is_external
|
||||
begin
|
||||
string_io = open(picture, read_timeout: 5)
|
||||
rescue Net::ReadTimeout
|
||||
return nil
|
||||
end
|
||||
|
||||
if string_io.meta["content-disposition"]
|
||||
filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2]
|
||||
end
|
||||
|
||||
filename = File.basename(picture) unless filename
|
||||
|
||||
# can't determine filename, skip upload
|
||||
if !filename
|
||||
puts "Can't determine filename, skipping background upload for user #{imported_user.id}"
|
||||
return
|
||||
end
|
||||
|
||||
# write tmp file
|
||||
file = Tempfile.new(filename, encoding: 'ascii-8bit')
|
||||
file.write string_io.read
|
||||
file.rewind
|
||||
|
||||
upload = UploadCreator.new(file, filename).create_for(imported_user.id)
|
||||
else
|
||||
# remove "/assets/uploads/" from attachment
|
||||
picture = picture.gsub("/assets/uploads", "")
|
||||
filepath = File.join(ATTACHMENT_DIR, picture)
|
||||
filename = File.basename(picture)
|
||||
|
||||
unless File.exists?(filepath)
|
||||
puts "Background file doesn't exist: #{filename}"
|
||||
return nil
|
||||
end
|
||||
|
||||
upload = create_upload(imported_user.id, filepath, filename)
|
||||
end
|
||||
|
||||
return if !upload.persisted?
|
||||
|
||||
imported_user.user_profile.update(profile_background: upload.url)
|
||||
ensure
|
||||
string_io.close rescue nil
|
||||
file.close rescue nil
|
||||
file.unlink rescue nil
|
||||
end
|
||||
|
||||
def add_users_to_groups
|
||||
puts "", "adding users to groups..."
|
||||
|
||||
groups = @client.groups
|
||||
total_count = groups.count
|
||||
progress_count = 0
|
||||
start_time = Time.now
|
||||
|
||||
@client.groups.each do |group|
|
||||
dgroup = find_group_by_import_id(group["name"])
|
||||
|
||||
# do thing if we migrated this group already
|
||||
next if dgroup.custom_fields['import_users_added']
|
||||
|
||||
group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) }
|
||||
group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) }
|
||||
|
||||
# add members
|
||||
dgroup.bulk_add(group_member_ids)
|
||||
|
||||
# reload group
|
||||
dgroup.reload
|
||||
|
||||
# add owners
|
||||
owners = User.find(group_owner_ids)
|
||||
owners.each { |owner| dgroup.add_owner(owner) }
|
||||
|
||||
dgroup.custom_fields['import_users_added'] = true
|
||||
dgroup.save
|
||||
|
||||
progress_count += 1
|
||||
print_status(progress_count, total_count, start_time)
|
||||
end
|
||||
end
|
||||
|
||||
def import_topics
|
||||
puts "", "importing topics..."
|
||||
|
||||
topic_count = @client.topic_count
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
topics = @client.topics(offset, BATCH_SIZE)
|
||||
|
||||
break if topics.size < 1
|
||||
|
||||
create_posts(topics, total: topic_count, offset: offset) do |topic|
|
||||
# skip if is deleted
|
||||
if topic["deleted"] == "1"
|
||||
puts "Topic with id #{topic["tid"]} was deleted, skipping"
|
||||
next
|
||||
end
|
||||
|
||||
topic_id = "t#{topic["tid"]}"
|
||||
raw = topic["mainpost"]["content"]
|
||||
|
||||
data = {
|
||||
id: topic_id,
|
||||
user_id: user_id_from_imported_user_id(topic["uid"]) || Discourse::SYSTEM_USER_ID,
|
||||
title: topic["title"],
|
||||
category: category_id_from_imported_category_id(topic["cid"]),
|
||||
raw: raw,
|
||||
created_at: topic["timestamp"],
|
||||
views: topic["viewcount"],
|
||||
closed: topic["locked"] == "1",
|
||||
post_create_action: proc do |p|
|
||||
# keep track of this to use in import_posts
|
||||
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
|
||||
p.save
|
||||
@merged_posts_map[topic["mainPid"]] = p.id
|
||||
end
|
||||
}
|
||||
|
||||
data[:pinned_at] = data[:created_at] if topic["pinned"] == "1"
|
||||
|
||||
data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts "", "importing posts..."
|
||||
|
||||
post_count = @client.post_count
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
posts = @client.posts(offset, BATCH_SIZE)
|
||||
|
||||
break if posts.size < 1
|
||||
|
||||
create_posts(posts, total: post_count, offset: offset) do |post|
|
||||
# skip if it's merged_post
|
||||
next if @merged_posts_map[post["pid"]]
|
||||
|
||||
# skip if it's deleted
|
||||
next if post["deleted"] == "1"
|
||||
|
||||
raw = post["content"]
|
||||
post_id = "p#{post["pid"]}"
|
||||
|
||||
next if raw.blank?
|
||||
topic = topic_lookup_from_imported_post_id("t#{post["tid"]}")
|
||||
|
||||
unless topic
|
||||
puts "Topic with id #{post["tid"]} not found, skipping"
|
||||
next
|
||||
end
|
||||
|
||||
data = {
|
||||
id: post_id,
|
||||
user_id: user_id_from_imported_user_id(post["uid"]) || Discourse::SYSTEM_USER_ID,
|
||||
topic_id: topic[:topic_id],
|
||||
raw: raw,
|
||||
created_at: post["timestamp"],
|
||||
post_create_action: proc do |p|
|
||||
post["upvoted_by"].each do |upvoter_id|
|
||||
user = User.new
|
||||
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
|
||||
|
||||
begin
|
||||
PostAction.act(user, p, PostActionType.types[:like])
|
||||
rescue PostAction::AlreadyActed
|
||||
end
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
if post['toPid']
|
||||
# Look reply to topic
|
||||
parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number)
|
||||
|
||||
# Look reply post if topic is missing
|
||||
parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number)
|
||||
|
||||
if parent_id
|
||||
data[:reply_to_post_number] = parent_id
|
||||
else
|
||||
puts "Post with id #{post["toPid"]} not found for reply"
|
||||
end
|
||||
end
|
||||
|
||||
data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def post_process_posts
|
||||
puts "", "Postprocessing posts..."
|
||||
|
||||
current = 0
|
||||
max = Post.count
|
||||
start_time = Time.now
|
||||
|
||||
Post.find_each do |post|
|
||||
begin
|
||||
next if post.custom_fields['import_post_processing']
|
||||
|
||||
new_raw = postprocess_post(post)
|
||||
if new_raw != post.raw
|
||||
post.raw = new_raw
|
||||
post.custom_fields['import_post_processing'] = true
|
||||
post.save
|
||||
end
|
||||
ensure
|
||||
print_status(current += 1, max, start_time)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_attachments
|
||||
puts '', 'importing attachments...'
|
||||
|
||||
current = 0
|
||||
max = Post.count
|
||||
start_time = Time.now
|
||||
|
||||
Post.find_each do |post|
|
||||
current += 1
|
||||
print_status(current, max, start_time)
|
||||
|
||||
new_raw = post.raw.dup
|
||||
new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do
|
||||
image_md = Regexp.last_match[0]
|
||||
text, filepath = $1, $2
|
||||
filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR)
|
||||
|
||||
# if file exists
|
||||
# upload attachment and return html for it
|
||||
if File.exists?(filepath)
|
||||
filename = File.basename(filepath)
|
||||
upload = create_upload(post.user_id, filepath, filename)
|
||||
|
||||
html_for_upload(upload, filename)
|
||||
else
|
||||
puts "File with path #{filepath} not found for post #{post.id}, upload will be broken"
|
||||
image_md
|
||||
end
|
||||
end
|
||||
|
||||
if new_raw != post.raw
|
||||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def postprocess_post(post)
|
||||
raw = post.raw
|
||||
|
||||
# [link to post](/post/:id)
|
||||
raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do
|
||||
text, post_id = $1, $2
|
||||
|
||||
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
|
||||
url = topic_lookup[:url]
|
||||
"[#{text}](#{url})"
|
||||
else
|
||||
"/404"
|
||||
end
|
||||
end
|
||||
|
||||
# [link to topic](/topic/:id)
|
||||
raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do
|
||||
text, topic_id = $1, $2
|
||||
|
||||
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
|
||||
url = topic_lookup[:url]
|
||||
"[#{text}](#{url})"
|
||||
else
|
||||
"/404"
|
||||
end
|
||||
end
|
||||
|
||||
# @username with dash to underscore
|
||||
raw = raw.gsub(/@([a-zA-Z0-9-]+)/) do
|
||||
username = $1
|
||||
|
||||
username.gsub('-', '_')
|
||||
end
|
||||
|
||||
raw
|
||||
end
|
||||
|
||||
def fake_email
|
||||
SecureRandom.hex << "@domain.com"
|
||||
end
|
||||
end
|
||||
|
||||
ImportScripts::NodeBB.new.perform
|
109
script/import_scripts/nodebb/redis.rb
Normal file
109
script/import_scripts/nodebb/redis.rb
Normal file
|
@ -0,0 +1,109 @@
|
|||
require 'redis'
|
||||
|
||||
module NodeBB
|
||||
class Redis
|
||||
attr_reader :redis
|
||||
|
||||
def initialize(params)
|
||||
@redis = ::Redis.new(params)
|
||||
end
|
||||
|
||||
def groups
|
||||
group_keys = redis.zrange('groups:visible:createtime', 0, -1)
|
||||
|
||||
group_keys.map { |group_key| group(group_key) }
|
||||
end
|
||||
|
||||
def group(id)
|
||||
group = redis.hgetall("group:#{id}")
|
||||
group["createtime"] = timestamp_to_date(group["createtime"])
|
||||
group["member_ids"] = redis.zrange("group:#{id}:members", 0, -1)
|
||||
group["owner_ids"] = redis.smembers("group:#{id}:owners")
|
||||
|
||||
group
|
||||
end
|
||||
|
||||
def users
|
||||
user_keys = redis.zrange('users:joindate', 0, -1)
|
||||
|
||||
user_keys.map { |user_key| user(user_key) }
|
||||
end
|
||||
|
||||
def user(id)
|
||||
user = redis.hgetall("user:#{id}")
|
||||
|
||||
user["joindate"] = timestamp_to_date(user["joindate"])
|
||||
user["lastonline"] = timestamp_to_date(user["lastonline"])
|
||||
|
||||
user
|
||||
end
|
||||
|
||||
def categories
|
||||
category_keys = redis.zrange('categories:cid', 0, -1)
|
||||
|
||||
{}.tap do |categories|
|
||||
category_keys.each do |category_key|
|
||||
category = redis.hgetall("category:#{category_key}")
|
||||
|
||||
categories[category['cid']] = category
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def topics(offset = 0, page_size = 2000)
|
||||
# redis get keys inclusive
|
||||
# so we move the offset a bit to continue in the next item
|
||||
offset = offset + 1 unless offset == 0
|
||||
from = offset
|
||||
to = page_size + offset
|
||||
|
||||
topic_keys = redis.zrange('topics:tid', from, to)
|
||||
|
||||
topic_keys.map { |topic_key| topic(topic_key) }
|
||||
end
|
||||
|
||||
def topic(id)
|
||||
topic = redis.hgetall("topic:#{id}")
|
||||
|
||||
topic["lastposttime"] = timestamp_to_date(topic["lastposttime"])
|
||||
topic["timestamp"] = timestamp_to_date(topic["timestamp"])
|
||||
topic["mainpost"] = post(topic["mainPid"])
|
||||
|
||||
topic
|
||||
end
|
||||
|
||||
def topic_count
|
||||
redis.zcard('topics:tid')
|
||||
end
|
||||
|
||||
def posts(offset = 0, page_size = 2000)
|
||||
# redis get keys inclusive
|
||||
# so we move the offset a bit to continue in the next item
|
||||
offset = offset + 1 unless offset == 0
|
||||
from = offset
|
||||
to = page_size + offset
|
||||
|
||||
post_keys = redis.zrange('posts:pid', from, to)
|
||||
|
||||
post_keys.map { |post_key| post(post_key) }
|
||||
end
|
||||
|
||||
def post(id)
|
||||
post = redis.hgetall("post:#{id}")
|
||||
post["timestamp"] = timestamp_to_date(post["timestamp"])
|
||||
post["upvoted_by"] = redis.smembers("pid:#{id}:upvote")
|
||||
|
||||
post
|
||||
end
|
||||
|
||||
def post_count
|
||||
redis.zcard('posts:pid')
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def timestamp_to_date(createtime)
|
||||
Time.at(createtime[0..-4].to_i).utc if createtime
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue
Block a user