discourse/script/import_scripts/nodebb/nodebb.rb
Sam Saffron 30990006a9 DEV: enable frozen string literal on all files
This reduces chances of errors where consumers of strings mutate inputs
and reduces memory usage of the app.

Test suite passes now, but there may be some stuff left, so we will run
a few sites on a branch prior to merging
2019-05-13 09:31:32 +08:00

530 lines
14 KiB
Ruby

# frozen_string_literal: true
require_relative '../base.rb'
require_relative './redis'
class ImportScripts::NodeBB < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
# ATTACHMENT_DIR needs to be absolute, not relative path
ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads'
BATCH_SIZE = 2000
def initialize
super
adapter = NodeBB::Redis
@client = adapter.new(
host: "localhost",
port: "6379",
db: 14
)
load_merged_posts
end
def load_merged_posts
puts 'loading merged posts with topics...'
# we keep here the posts that were merged
# as topics
#
# { post_id: discourse_post_id }
@merged_posts_map = {}
PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id|
post = Post.find(post_id)
topic_id = post.topic_id
nodebb_post_id = post.custom_fields['import_merged_post_id']
@merged_posts_map[nodebb_post_id] = topic_id
end
end
def execute
import_groups
import_categories
import_users
add_users_to_groups
import_topics
import_posts
import_attachments
post_process_posts
end
def import_groups
puts '', 'importing groups'
groups = @client.groups
total_count = groups.count
progress_count = 0
start_time = Time.now
create_groups(groups) do |group|
{
id: group["name"],
name: group["slug"]
}
end
end
def import_categories
puts "", "importing top level categories..."
category_map = @client.categories
category_ids = category_map.keys
categories = category_map.values
top_level_categories = categories.select { |c| c["parentCid"] == "0" && c["disabled"] != "1" }
create_categories(top_level_categories) do |category|
{
id: category["cid"],
name: category["name"],
position: category["order"],
description: category["description"],
}
end
puts "", "importing child categories..."
children_categories = categories.select { |c| c["parentCid"] != "0" && c["disabled"] != "1" }
top_level_category_ids = Set.new(top_level_categories.map { |c| c["cid"] })
# cut down the tree to only 2 levels of categories
children_categories.each do |cc|
while !top_level_category_ids.include?(cc["parentCid"])
cc["parentCid"] = categories.detect { |c| c["cid"] == cc["parentCid"] }["parentCid"]
end
end
create_categories(children_categories) do |category|
{
id: category["cid"],
name: category["name"],
position: category["order"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parentCid"])
}
end
categories.each do |source_category|
cid = category_id_from_imported_category_id(source_category['cid'])
Permalink.create(url: "/category/#{source_category['slug']}", category_id: cid) rescue nil
end
end
def import_users
puts "", "importing users"
users = @client.users
user_count = users.count
# we use this group to grant admin to users
admin_group = @client.group("administrators")
create_users(users, total: user_count) do |user|
username = user["username"]
email = user["email"]
# skip users without username
next unless username
# fake email for users without email
email = fake_email if email.blank?
# use user.suspended to handle banned users
if user["banned"] == "1"
suspended_at = Time.now
suspended_till = Time.now + 100.years
end
{
id: user["uid"],
name: user["fullname"],
username: username,
email: email,
admin: admin_group["member_ids"].include?(user["uid"]),
website: user["website"],
location: user["location"],
suspended_at: suspended_at,
suspended_till: suspended_till,
primary_group_id: group_id_from_imported_group_id(user["groupTitle"]),
created_at: user["joindate"],
bio_raw: user["aboutme"],
active: true,
custom_fields: {
import_pass: user["password"]
},
post_create_action: proc do |u|
import_profile_picture(user, u)
import_profile_background(user, u)
end
}
end
end
def import_profile_picture(old_user, imported_user)
picture = old_user["picture"]
return if picture.blank?
# URI.scheme returns nil for internal URLs
uri = URI.parse(picture)
is_external = uri.scheme
if is_external
# download external image
begin
string_io = open(picture, read_timeout: 5)
rescue Net::ReadTimeout
puts "timeout downloading avatar for user #{imported_user.id}"
return nil
end
# continue if download failed
return unless string_io
# try to get filename from headers
if string_io.meta["content-disposition"]
filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2]
end
# try to get it from path
filename = File.basename(picture) unless filename
# can't determine filename, skip upload
if !filename
puts "Can't determine filename, skipping avatar upload for user #{imported_user.id}"
return
end
# write tmp file
file = Tempfile.new(filename, encoding: 'ascii-8bit')
file.write string_io.read
file.rewind
upload = UploadCreator.new(file, filename).create_for(imported_user.id)
else
# remove "/assets/uploads/" and "/uploads" from attachment
picture = picture.gsub("/assets/uploads", "")
picture = picture.gsub("/uploads", "")
filepath = File.join(ATTACHMENT_DIR, picture)
filename = File.basename(picture)
unless File.exists?(filepath)
puts "Avatar file doesn't exist: #{filepath}"
return nil
end
upload = create_upload(imported_user.id, filepath, filename)
end
return if !upload.persisted?
imported_user.create_user_avatar
imported_user.user_avatar.update(custom_upload_id: upload.id)
imported_user.update(uploaded_avatar_id: upload.id)
ensure
string_io.close rescue nil
file.close rescue nil
file.unlind rescue nil
end
def import_profile_background(old_user, imported_user)
picture = old_user["cover:url"]
return if picture.blank?
# URI returns nil for invalid URLs
uri = URI.parse(picture)
is_external = uri.scheme
if is_external
begin
string_io = open(picture, read_timeout: 5)
rescue Net::ReadTimeout
return nil
end
if string_io.meta["content-disposition"]
filename = string_io.meta["content-disposition"].match(/filename=(\"?)(.+)\1/)[2]
end
filename = File.basename(picture) unless filename
# can't determine filename, skip upload
if !filename
puts "Can't determine filename, skipping background upload for user #{imported_user.id}"
return
end
# write tmp file
file = Tempfile.new(filename, encoding: 'ascii-8bit')
file.write string_io.read
file.rewind
upload = UploadCreator.new(file, filename).create_for(imported_user.id)
else
# remove "/assets/uploads/" and "/uploads" from attachment
picture = picture.gsub("/assets/uploads", "")
picture = picture.gsub("/uploads", "")
filepath = File.join(ATTACHMENT_DIR, picture)
filename = File.basename(picture)
unless File.exists?(filepath)
puts "Background file doesn't exist: #{filepath}"
return nil
end
upload = create_upload(imported_user.id, filepath, filename)
end
return if !upload.persisted?
imported_user.user_profile.upload_profile_background(upload)
ensure
string_io.close rescue nil
file.close rescue nil
file.unlink rescue nil
end
def add_users_to_groups
puts "", "adding users to groups..."
groups = @client.groups
total_count = groups.count
progress_count = 0
start_time = Time.now
@client.groups.each do |group|
dgroup = find_group_by_import_id(group["name"])
# do thing if we migrated this group already
next if dgroup.custom_fields['import_users_added']
group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) }
group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) }
# add members
dgroup.bulk_add(group_member_ids)
# reload group
dgroup.reload
# add owners
owners = User.find(group_owner_ids)
owners.each { |owner| dgroup.add_owner(owner) }
dgroup.custom_fields['import_users_added'] = true
dgroup.save
progress_count += 1
print_status(progress_count, total_count, start_time)
end
end
def import_topics
puts "", "importing topics..."
topic_count = @client.topic_count
batches(BATCH_SIZE) do |offset|
topics = @client.topics(offset, BATCH_SIZE)
break if topics.size < 1
create_posts(topics, total: topic_count, offset: offset) do |topic|
# skip if is deleted
if topic["deleted"] == "1"
puts "Topic with id #{topic["tid"]} was deleted, skipping"
next
end
topic_id = "t#{topic["tid"]}"
raw = topic["mainpost"]["content"]
data = {
id: topic_id,
user_id: user_id_from_imported_user_id(topic["uid"]) || Discourse::SYSTEM_USER_ID,
title: topic["title"],
category: category_id_from_imported_category_id(topic["cid"]),
raw: raw,
created_at: topic["timestamp"],
views: topic["viewcount"],
closed: topic["locked"] == "1",
post_create_action: proc do |p|
# keep track of this to use in import_posts
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
p.save
@merged_posts_map[topic["mainPid"]] = p.id
end
}
data[:pinned_at] = data[:created_at] if topic["pinned"] == "1"
data
end
topics.each do |import_topic|
topic = topic_lookup_from_imported_post_id("t#{import_topic["tid"]}")
Permalink.create(url: "/topic/#{import_topic['slug']}", topic_id: topic[:topic_id]) rescue nil
end
end
end
def import_posts
puts "", "importing posts..."
post_count = @client.post_count
batches(BATCH_SIZE) do |offset|
posts = @client.posts(offset, BATCH_SIZE)
break if posts.size < 1
create_posts(posts, total: post_count, offset: offset) do |post|
# skip if it's merged_post
next if @merged_posts_map[post["pid"]]
# skip if it's deleted
next if post["deleted"] == "1"
raw = post["content"]
post_id = "p#{post["pid"]}"
next if raw.blank?
topic = topic_lookup_from_imported_post_id("t#{post["tid"]}")
unless topic
puts "Topic with id #{post["tid"]} not found, skipping"
next
end
data = {
id: post_id,
user_id: user_id_from_imported_user_id(post["uid"]) || Discourse::SYSTEM_USER_ID,
topic_id: topic[:topic_id],
raw: raw,
created_at: post["timestamp"],
post_create_action: proc do |p|
post["upvoted_by"].each do |upvoter_id|
user = User.new
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
PostActionCreator.like(user, p)
end
end
}
if post['toPid']
# Look reply to topic
parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number)
# Look reply post if topic is missing
parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number)
if parent_id
data[:reply_to_post_number] = parent_id
else
puts "Post with id #{post["toPid"]} not found for reply"
end
end
data
end
end
end
def post_process_posts
puts "", "Postprocessing posts..."
current = 0
max = Post.count
start_time = Time.now
Post.find_each do |post|
begin
next if post.custom_fields['import_post_processing']
new_raw = postprocess_post(post)
if new_raw != post.raw
post.raw = new_raw
post.custom_fields['import_post_processing'] = true
post.save
end
ensure
print_status(current += 1, max, start_time)
end
end
end
def import_attachments
puts '', 'importing attachments...'
current = 0
max = Post.count
start_time = Time.now
Post.find_each do |post|
current += 1
print_status(current, max, start_time)
new_raw = post.raw.dup
new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do
image_md = Regexp.last_match[0]
text, filepath = $1, $2
filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR)
# if file exists
# upload attachment and return html for it
if File.exists?(filepath)
filename = File.basename(filepath)
upload = create_upload(post.user_id, filepath, filename)
html_for_upload(upload, filename)
else
puts "File with path #{filepath} not found for post #{post.id}, upload will be broken"
image_md
end
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB')
end
end
end
def postprocess_post(post)
raw = post.raw
# [link to post](/post/:id)
raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do
text, post_id = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
end
end
# [link to topic](/topic/:id)
raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do
text, topic_id = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
end
end
raw
end
end
ImportScripts::NodeBB.new.perform