mirror of
https://github.com/discourse/discourse.git
synced 2024-12-05 07:53:42 +08:00
504 lines
15 KiB
Ruby
504 lines
15 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# AnswerHub Importer
|
|
#
|
|
# Based on having access to a mysql dump.
|
|
# Pass in the ENV variables listed below before running the script.
|
|
|
|
require_relative "base"
|
|
require "mysql2"
|
|
require "open-uri"
|
|
|
|
class ImportScripts::AnswerHub < ImportScripts::Base
|
|
DB_NAME ||= ENV["DB_NAME"] || "answerhub"
|
|
DB_PASS ||= ENV["DB_PASS"] || "answerhub"
|
|
DB_USER ||= ENV["DB_USER"] || "answerhub"
|
|
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "network1"
|
|
BATCH_SIZE ||= ENV["BATCH_SIZE"].to_i || 1000
|
|
ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || ""
|
|
PROCESS_UPLOADS = ENV["PROCESS_UPLOADS"].to_i || 0
|
|
ANSWERHUB_DOMAIN = ENV["ANSWERHUB_DOMAIN"]
|
|
AVATAR_DIR = ENV["AVATAR_DIR"] || ""
|
|
SITE_ID = ENV["SITE_ID"].to_i || 0
|
|
CATEGORY_MAP_FROM = ENV["CATEGORY_MAP_FROM"].to_i || 0
|
|
CATEGORY_MAP_TO = ENV["CATEGORY_MAP_TO"].to_i || 0
|
|
SCRAPE_AVATARS = ENV["SCRAPE_AVATARS"].to_i || 0
|
|
|
|
def initialize
|
|
super
|
|
@client =
|
|
Mysql2::Client.new(host: "localhost", username: DB_USER, password: DB_PASS, database: DB_NAME)
|
|
@skip_updates = true
|
|
SiteSetting.tagging_enabled = true
|
|
SiteSetting.max_tags_per_topic = 10
|
|
end
|
|
|
|
def execute
|
|
puts "Now starting the AnswerHub Import"
|
|
puts "DB Name: #{DB_NAME}"
|
|
puts "Table Prefix: #{TABLE_PREFIX}"
|
|
puts
|
|
import_users
|
|
import_categories
|
|
import_topics
|
|
import_posts
|
|
import_groups
|
|
add_users_to_groups
|
|
add_moderators
|
|
add_admins
|
|
import_avatars
|
|
create_permalinks
|
|
end
|
|
|
|
def import_users
|
|
puts "", "creating users"
|
|
|
|
query =
|
|
"SELECT count(*) count
|
|
FROM #{TABLE_PREFIX}_authoritables
|
|
WHERE c_type = 'user'
|
|
AND c_active = 1
|
|
AND c_system <> 1;"
|
|
total_count = @client.query(query).first["count"]
|
|
puts "Total count: #{total_count}"
|
|
@last_user_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
query =
|
|
"SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description
|
|
FROM #{TABLE_PREFIX}_authoritables
|
|
WHERE c_type = 'user'
|
|
AND c_active = 1
|
|
AND c_system <> 1
|
|
AND c_id > #{@last_user_id}
|
|
LIMIT #{BATCH_SIZE};"
|
|
|
|
results = @client.query(query)
|
|
break if results.size < 1
|
|
@last_user_id = results.to_a.last["c_id"]
|
|
|
|
create_users(results, total: total_count, offset: offset) do |user|
|
|
# puts user['c_id'].to_s + ' ' + user['c_name']
|
|
next if @lookup.user_id_from_imported_user_id(user["c_id"])
|
|
{
|
|
id: user["c_id"],
|
|
email: "#{SecureRandom.hex}@invalid.invalid",
|
|
username: user["c_name"],
|
|
created_at: user["c_creation_date"],
|
|
bio_raw: user["c_description"],
|
|
last_seen_at: user["c_last_seen"],
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "importing categories..."
|
|
|
|
# Import parent categories first
|
|
query =
|
|
"SELECT c_id, c_name, c_plug, c_parent
|
|
FROM containers
|
|
WHERE c_type = 'space'
|
|
AND c_active = 1
|
|
AND c_parent = 7 OR c_parent IS NULL"
|
|
results = @client.query(query)
|
|
|
|
create_categories(results) do |c|
|
|
{ id: c["c_id"], name: c["c_name"], parent_category_id: check_parent_id(c["c_parent"]) }
|
|
end
|
|
|
|
# Import sub-categories
|
|
query =
|
|
"SELECT c_id, c_name, c_plug, c_parent
|
|
FROM containers
|
|
WHERE c_type = 'space'
|
|
AND c_active = 1
|
|
AND c_parent != 7 AND c_parent IS NOT NULL"
|
|
results = @client.query(query)
|
|
|
|
create_categories(results) do |c|
|
|
# puts c.inspect
|
|
{
|
|
id: c["c_id"],
|
|
name: c["c_name"],
|
|
parent_category_id: category_id_from_imported_category_id(check_parent_id(c["c_parent"])),
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topics
|
|
puts "", "importing topics..."
|
|
|
|
count_query =
|
|
"SELECT count(*) count
|
|
FROM #{TABLE_PREFIX}_nodes
|
|
WHERE c_visibility <> 'deleted'
|
|
AND (c_type = 'question'
|
|
OR c_type = 'kbentry');"
|
|
total_count = @client.query(count_query).first["count"]
|
|
|
|
@last_topic_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
# Let's start with just question types
|
|
query =
|
|
"SELECT *
|
|
FROM #{TABLE_PREFIX}_nodes
|
|
WHERE c_id > #{@last_topic_id}
|
|
AND c_visibility <> 'deleted'
|
|
AND (c_type = 'question'
|
|
OR c_type = 'kbentry')
|
|
ORDER BY c_id ASC
|
|
LIMIT #{BATCH_SIZE};"
|
|
topics = @client.query(query)
|
|
|
|
break if topics.size < 1
|
|
@last_topic_id = topics.to_a.last["c_id"]
|
|
|
|
create_posts(topics, total: total_count, offset: offset) do |t|
|
|
user_id = user_id_from_imported_user_id(t["c_author"]) || Discourse::SYSTEM_USER_ID
|
|
body = process_mentions(t["c_body"])
|
|
body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1
|
|
markdown_body = HtmlToMarkdown.new(body).to_markdown
|
|
{
|
|
id: t["c_id"],
|
|
user_id: user_id,
|
|
title: t["c_title"],
|
|
category: category_id_from_imported_category_id(t["c_primaryContainer"]),
|
|
raw: markdown_body,
|
|
created_at: t["c_creation_date"],
|
|
post_create_action:
|
|
proc do |post|
|
|
tag_names = t["c_topic_names"].split(",")
|
|
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
|
|
end,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "importing posts..."
|
|
|
|
count_query =
|
|
"SELECT count(*) count
|
|
FROM #{TABLE_PREFIX}_nodes
|
|
WHERE c_visibility <> 'deleted'
|
|
AND (c_type = 'answer'
|
|
OR c_type = 'comment'
|
|
OR c_type = 'kbentry');"
|
|
total_count = @client.query(count_query).first["count"]
|
|
|
|
@last_post_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
query =
|
|
"SELECT *
|
|
FROM #{TABLE_PREFIX}_nodes
|
|
WHERE c_id > #{@last_post_id}
|
|
AND c_visibility <> 'deleted'
|
|
AND (c_type = 'answer'
|
|
OR c_type = 'comment'
|
|
OR c_type = 'kbentry')
|
|
ORDER BY c_id ASC
|
|
LIMIT #{BATCH_SIZE};"
|
|
posts = @client.query(query)
|
|
next if all_records_exist? :posts, posts.map { |p| p["c_id"] }
|
|
|
|
break if posts.size < 1
|
|
@last_post_id = posts.to_a.last["c_id"]
|
|
|
|
create_posts(posts, total: total_count, offset: offset) do |p|
|
|
t = topic_lookup_from_imported_post_id(p["c_originalParent"])
|
|
next unless t
|
|
|
|
reply_to_post_id = post_id_from_imported_post_id(p["c_parent"])
|
|
reply_to_post = reply_to_post_id.present? ? Post.find(reply_to_post_id) : nil
|
|
reply_to_post_number = reply_to_post.present? ? reply_to_post.post_number : nil
|
|
|
|
user_id = user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID
|
|
|
|
body = process_mentions(p["c_body"])
|
|
body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1
|
|
|
|
markdown_body = HtmlToMarkdown.new(body).to_markdown
|
|
{
|
|
id: p["c_id"],
|
|
user_id: user_id,
|
|
topic_id: t[:topic_id],
|
|
reply_to_post_number: reply_to_post_number,
|
|
raw: markdown_body,
|
|
created_at: p["c_creation_date"],
|
|
post_create_action:
|
|
proc do |post_info|
|
|
begin
|
|
if p["c_type"] == "answer" && p["c_marked"] == 1
|
|
post = Post.find(post_info[:id])
|
|
if post
|
|
user_id =
|
|
user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID
|
|
current_user = User.find(user_id)
|
|
solved = DiscourseSolved.accept_answer!(post, current_user)
|
|
# puts "SOLVED: #{solved}"
|
|
end
|
|
end
|
|
rescue ActiveRecord::RecordInvalid
|
|
puts "SOLVED: Skipped post_id: #{post.id} because invalid"
|
|
end
|
|
end,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_groups
|
|
puts "", "importing groups..."
|
|
|
|
query =
|
|
"SELECT c_id, c_name
|
|
FROM network6_authoritables
|
|
WHERE c_type='group'
|
|
AND c_id > 6;" # Ignore Anonymous, Users, Moderators, etc.
|
|
groups = @client.query(query)
|
|
|
|
create_groups(groups) do |group|
|
|
{ id: group["c_id"], name: group["c_name"], visibility_level: 1 }
|
|
end
|
|
end
|
|
|
|
def add_users_to_groups
|
|
puts "", "adding users to groups..."
|
|
|
|
query =
|
|
"SELECT c_id, c_name
|
|
FROM network6_authoritables
|
|
WHERE c_type='group'
|
|
AND c_id > 6;" # Ignore Anonymous, Users, Moderators, etc.
|
|
groups = @client.query(query)
|
|
|
|
members_query =
|
|
"SELECT *
|
|
FROM network6_authoritable_groups;"
|
|
group_members = @client.query(members_query)
|
|
|
|
total_count = groups.count
|
|
progress_count = 0
|
|
start_time = Time.now
|
|
|
|
group_members.map
|
|
groups.each do |group|
|
|
dgroup = find_group_by_import_id(group["c_id"])
|
|
|
|
next if dgroup.custom_fields["import_users_added"]
|
|
|
|
group_member_ids =
|
|
group_members
|
|
.map do |m|
|
|
user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group["c_id"]
|
|
end
|
|
.compact
|
|
|
|
# add members
|
|
dgroup.bulk_add(group_member_ids)
|
|
|
|
# reload group
|
|
dgroup.reload
|
|
|
|
dgroup.custom_fields["import_users_added"] = true
|
|
dgroup.save
|
|
|
|
progress_count += 1
|
|
print_status(progress_count, total_count, start_time)
|
|
end
|
|
end
|
|
|
|
def add_moderators
|
|
puts "", "adding moderators..."
|
|
|
|
query =
|
|
"SELECT *
|
|
FROM network6_authoritable_groups
|
|
WHERE c_groups = 4;"
|
|
moderators = @client.query(query)
|
|
|
|
moderator_ids = moderators.map { |m| user_id_from_imported_user_id(m["c_members"]) }.compact
|
|
|
|
moderator_ids.each do |id|
|
|
user = User.find(id)
|
|
user.grant_moderation!
|
|
end
|
|
end
|
|
|
|
def add_admins
|
|
puts "", "adding admins..."
|
|
|
|
query =
|
|
"SELECT *
|
|
FROM network6_authoritable_groups
|
|
WHERE c_groups = 5 OR c_groups = 6;" # Super Users, Network Administrators
|
|
admins = @client.query(query)
|
|
|
|
admin_ids = admins.map { |a| user_id_from_imported_user_id(a["c_members"]) }.compact
|
|
|
|
admin_ids.each do |id|
|
|
user = User.find(id)
|
|
user.grant_admin!
|
|
end
|
|
end
|
|
|
|
def import_avatars
|
|
puts "", "importing user avatars"
|
|
query =
|
|
"SELECT *
|
|
FROM network6_user_preferences
|
|
WHERE c_key = 'avatarImage'"
|
|
avatars = @client.query(query)
|
|
|
|
avatars.each do |a|
|
|
begin
|
|
user_id = user_id_from_imported_user_id(a["c_user"])
|
|
user = User.find(user_id)
|
|
if user
|
|
filename = "avatar-#{user_id}.png"
|
|
path = File.join(AVATAR_DIR, filename)
|
|
next if !File.exist?(path)
|
|
|
|
# Scrape Avatars - Avatars are saved in the db, but it might be easier to just scrape them
|
|
if SCRAPE_AVATARS == 1
|
|
File.open(path, "wb") do |f|
|
|
f << open(
|
|
"https://#{ANSWERHUB_DOMAIN}/forums/users/#{a["c_user"]}/photo/view.html?s=240",
|
|
).read
|
|
end
|
|
end
|
|
|
|
upload = @uploader.create_upload(user.id, path, filename)
|
|
|
|
if upload.persisted?
|
|
user.import_mode = false
|
|
user.create_user_avatar
|
|
user.import_mode = true
|
|
user.user_avatar.update(custom_upload_id: upload.id)
|
|
user.update(uploaded_avatar_id: upload.id)
|
|
else
|
|
Rails.logger.error("Could not persist avatar for user #{user.username}")
|
|
end
|
|
end
|
|
rescue ActiveRecord::RecordNotFound
|
|
puts "Could not find User for user_id: #{a["c_user"]}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def process_uploads(body, user_id)
|
|
if body.match(/<img src="\/forums\/storage\/attachments\/[\w-]*.[a-z]{3,4}">/)
|
|
# There could be multiple images in a post
|
|
images = body.scan(/<img src="\/forums\/storage\/attachments\/[\w-]*.[a-z]{3,4}">/)
|
|
|
|
images.each do |image|
|
|
filepath = File.basename(image).split('"')[0]
|
|
filepath = File.join(ATTACHMENT_DIR, filepath)
|
|
|
|
if File.exist?(filepath)
|
|
filename = File.basename(filepath)
|
|
upload = create_upload(user_id, filepath, filename)
|
|
image_html = html_for_upload(upload, filename)
|
|
original_image_html = '<img src="/forums/storage/attachments/' + filename + '">'
|
|
body.sub!(original_image_html, image_html)
|
|
end
|
|
end
|
|
end
|
|
# Non-images
|
|
if body.match(/<a href="\/forums\/storage\/attachments\/[\w-]*.[a-z]{3,4}">/)
|
|
# There could be multiple files in a post
|
|
files = body.scan(/<a href="\/forums\/storage\/attachments\/[\w-]*.[a-z]{3,4}">/)
|
|
|
|
files.each do |file|
|
|
filepath = File.basename(file).split('"')[0]
|
|
filepath = File.join(ATTACHMENT_DIR, filepath)
|
|
|
|
if File.exist?(filepath)
|
|
filename = File.basename(filepath)
|
|
upload = create_upload(user_id, filepath, filename)
|
|
file_html = html_for_upload(upload, filename)
|
|
original_file_html = '<a href="/forums/storage/attachments/' + filename + '">'
|
|
body.sub!(original_file_html, file_html)
|
|
end
|
|
end
|
|
end
|
|
|
|
body
|
|
end
|
|
|
|
def process_mentions(body)
|
|
raw = body.dup
|
|
|
|
# https://example.forum.com/forums/users/1469/XYZ_Rob.html
|
|
raw.gsub!(%r{(https://example.forum.com/forums/users/\d+/[\w_%-.]*.html)}) do
|
|
legacy_url = $1
|
|
import_user_id =
|
|
legacy_url.match(%r{https://example.forum.com/forums/users/(\d+)/[\w_%-.]*.html}).captures
|
|
|
|
user = @lookup.find_user_by_import_id(import_user_id[0])
|
|
if user.present?
|
|
# puts "/users/#{user.username}"
|
|
"/users/#{user.username}"
|
|
else
|
|
# puts legacy_url
|
|
legacy_url
|
|
end
|
|
end
|
|
|
|
# /forums/users/395/petrocket.html
|
|
raw.gsub!(%r{(/forums/users/\d+/[\w_%-.]*.html)}) do
|
|
legacy_url = $1
|
|
import_user_id = legacy_url.match(%r{/forums/users/(\d+)/[\w_%-.]*.html}).captures
|
|
|
|
# puts raw
|
|
user = @lookup.find_user_by_import_id(import_user_id[0])
|
|
if user.present?
|
|
# puts "/users/#{user.username}"
|
|
"/users/#{user.username}"
|
|
else
|
|
# puts legacy_url
|
|
legacy_url
|
|
end
|
|
end
|
|
|
|
raw
|
|
end
|
|
|
|
def create_permalinks
|
|
puts "", "Creating redirects...", ""
|
|
|
|
# https://example.forum.com/forums/questions/2005/missing-file.html
|
|
Topic.find_each do |topic|
|
|
pcf = topic.first_post.custom_fields
|
|
if pcf && pcf["import_id"]
|
|
id = pcf["import_id"]
|
|
slug = Slug.for(topic.title)
|
|
begin
|
|
Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id)
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
print "."
|
|
end
|
|
end
|
|
end
|
|
|
|
def staff_guardian
|
|
@_staff_guardian ||= Guardian.new(Discourse.system_user)
|
|
end
|
|
|
|
# Some category parent id's need to be adjusted
|
|
def check_parent_id(id)
|
|
return nil if SITE_ID > 0 && id == SITE_ID
|
|
return CATEGORY_MAP_TO if CATEGORY_MAP_FROM > 0 && id == CATEGORY_MAP_FROM
|
|
id
|
|
end
|
|
end
|
|
|
|
ImportScripts::AnswerHub.new.perform
|