mirror of
https://github.com/discourse/discourse.git
synced 2025-01-06 15:44:10 +08:00
532 lines
14 KiB
Ruby
532 lines
14 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require "htmlentities"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::Drupal < ImportScripts::Base
|
|
DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal"
|
|
VID = ENV["DRUPAL_VID"] || 1
|
|
BATCH_SIZE = 1000
|
|
ATTACHMENT_DIR = "/root/files/upload"
|
|
|
|
def initialize
|
|
super
|
|
|
|
@htmlentities = HTMLEntities.new
|
|
|
|
@client =
|
|
Mysql2::Client.new(
|
|
host: "localhost",
|
|
username: "root",
|
|
#password: "password",
|
|
database: DRUPAL_DB,
|
|
)
|
|
end
|
|
|
|
def execute
|
|
import_users
|
|
import_categories
|
|
|
|
# "Nodes" in Drupal are divided into types. Here we import two types,
|
|
# and will later import all the comments/replies for each node.
|
|
# You will need to figure out what the type names are on your install and edit the queries to match.
|
|
import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]
|
|
|
|
import_forum_topics
|
|
|
|
import_replies
|
|
import_likes
|
|
mark_topics_as_solved
|
|
import_sso_records
|
|
import_attachments
|
|
postprocess_posts
|
|
create_permalinks
|
|
import_gravatars
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users"
|
|
|
|
user_count = mysql_query("SELECT count(uid) count FROM users").first["count"]
|
|
|
|
last_user_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
users = mysql_query(<<-SQL).to_a
|
|
SELECT uid,
|
|
name username,
|
|
mail email,
|
|
created
|
|
FROM users
|
|
WHERE uid > #{last_user_id}
|
|
ORDER BY uid
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if users.empty?
|
|
|
|
last_user_id = users[-1]["uid"]
|
|
|
|
users.reject! { |u| @lookup.user_already_imported?(u["uid"]) }
|
|
|
|
create_users(users, total: user_count, offset: offset) do |user|
|
|
email = user["email"].presence || fake_email
|
|
email = fake_email if !EmailAddressValidator.valid_value?(email)
|
|
|
|
username = @htmlentities.decode(user["username"]).strip
|
|
|
|
{ id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) }
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
# You'll need to edit the following query for your Drupal install:
|
|
#
|
|
# * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
|
|
# * Table name may be term_data.
|
|
# * May need to select a vid other than 1
|
|
|
|
puts "", "importing categories"
|
|
|
|
categories = mysql_query(<<-SQL).to_a
|
|
SELECT tid,
|
|
name,
|
|
description
|
|
FROM taxonomy_term_data
|
|
WHERE vid = #{VID}
|
|
SQL
|
|
|
|
create_categories(categories) do |category|
|
|
{
|
|
id: category["tid"],
|
|
name: @htmlentities.decode(category["name"]).strip,
|
|
description: @htmlentities.decode(category["description"]).strip,
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_blog_topics
|
|
puts "", "importing blog topics"
|
|
|
|
unless Category.find_by_name("Blog")
|
|
create_category({ name: "Blog", description: "Articles from the blog" }, nil)
|
|
end
|
|
|
|
blogs = mysql_query(<<-SQL).to_a
|
|
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
|
|
f.body_value body
|
|
FROM node n,
|
|
field_data_body f
|
|
WHERE n.type = 'article'
|
|
AND n.nid = f.entity_id
|
|
AND n.status = 1
|
|
SQL
|
|
|
|
category_id = Category.find_by_name("Blog").id
|
|
|
|
create_posts(blogs) do |topic|
|
|
{
|
|
id: "nid:#{topic["nid"]}",
|
|
user_id: user_id_from_imported_user_id(topic["uid"]) || -1,
|
|
category: category_id,
|
|
raw: topic["body"],
|
|
created_at: Time.zone.at(topic["created"]),
|
|
pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil,
|
|
title: topic["title"].try(:strip),
|
|
custom_fields: {
|
|
import_id: "nid:#{topic["nid"]}",
|
|
},
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_forum_topics
|
|
puts "", "importing forum topics"
|
|
|
|
total_count = mysql_query(<<-SQL).first["count"]
|
|
SELECT COUNT(*) count
|
|
FROM forum_index fi, node n
|
|
WHERE n.type = 'forum'
|
|
AND fi.nid = n.nid
|
|
AND n.status = 1
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(<<-SQL).to_a
|
|
SELECT fi.nid nid,
|
|
fi.title title,
|
|
fi.tid tid,
|
|
n.uid uid,
|
|
fi.created created,
|
|
fi.sticky sticky,
|
|
f.body_value body,
|
|
nc.totalcount views,
|
|
fl.timestamp solved
|
|
FROM forum_index fi
|
|
LEFT JOIN node n ON fi.nid = n.nid
|
|
LEFT JOIN field_data_body f ON f.entity_id = n.nid
|
|
LEFT JOIN flagging fl ON fl.entity_id = n.nid
|
|
AND fl.fid = 7
|
|
LEFT JOIN node_counter nc ON nc.nid = n.nid
|
|
WHERE n.type = 'forum'
|
|
AND n.status = 1
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
SQL
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |row|
|
|
raw = preprocess_raw(row["body"])
|
|
topic = {
|
|
id: "nid:#{row["nid"]}",
|
|
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
|
|
category: category_id_from_imported_category_id(row["tid"]),
|
|
raw: raw,
|
|
created_at: Time.zone.at(row["created"]),
|
|
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
|
|
title: row["title"].try(:strip),
|
|
views: row["views"],
|
|
}
|
|
topic[:custom_fields] = { import_solved: true } if row["solved"].present?
|
|
topic
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_replies
|
|
puts "", "creating replies in topics"
|
|
|
|
total_count = mysql_query(<<-SQL).first["count"]
|
|
SELECT COUNT(*) count
|
|
FROM comment c,
|
|
node n
|
|
WHERE n.nid = c.nid
|
|
AND c.status = 1
|
|
AND n.type IN ('article', 'forum')
|
|
AND n.status = 1
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(<<-SQL).to_a
|
|
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
|
|
f.comment_body_value body
|
|
FROM comment c,
|
|
field_data_comment_body f,
|
|
node n
|
|
WHERE c.cid = f.entity_id
|
|
AND n.nid = c.nid
|
|
AND c.status = 1
|
|
AND n.type IN ('blog', 'forum')
|
|
AND n.status = 1
|
|
ORDER BY c.cid ASC
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |row|
|
|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
|
|
if topic_mapping && topic_id = topic_mapping[:topic_id]
|
|
raw = preprocess_raw(row["body"])
|
|
h = {
|
|
id: "cid:#{row["cid"]}",
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
|
|
raw: raw,
|
|
created_at: Time.zone.at(row["created"]),
|
|
}
|
|
if row["pid"]
|
|
parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
|
|
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
|
|
end
|
|
h
|
|
else
|
|
puts "No topic found for comment #{row["cid"]}"
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_likes
|
|
puts "", "importing post likes"
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
likes = mysql_query(<<-SQL).to_a
|
|
SELECT flagging_id,
|
|
fid,
|
|
entity_id,
|
|
uid
|
|
FROM flagging
|
|
WHERE fid = 5
|
|
OR fid = 6
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if likes.empty?
|
|
|
|
likes.each do |l|
|
|
identifier = l["fid"] == 5 ? "nid" : "cid"
|
|
next unless user_id = user_id_from_imported_user_id(l["uid"])
|
|
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}")
|
|
next unless user = User.find_by(id: user_id)
|
|
next unless post = Post.find_by(id: post_id)
|
|
begin
|
|
PostActionCreator.like(user, post)
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def mark_topics_as_solved
|
|
puts "", "marking topics as solved"
|
|
|
|
solved_topics =
|
|
TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
|
|
|
|
solved_topics.each do |topic_id|
|
|
next unless topic = Topic.find(topic_id)
|
|
next unless post = topic.posts.last
|
|
post_id = post.id
|
|
|
|
PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true)
|
|
TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id)
|
|
end
|
|
end
|
|
|
|
def import_sso_records
|
|
puts "", "importing sso records"
|
|
|
|
start_time = Time.now
|
|
current_count = 0
|
|
|
|
users = UserCustomField.where(name: "import_id")
|
|
|
|
total_count = users.count
|
|
|
|
return if users.empty?
|
|
|
|
users.each do |ids|
|
|
user_id = ids.user_id
|
|
external_id = ids.value
|
|
next unless user = User.find(user_id)
|
|
|
|
begin
|
|
current_count += 1
|
|
print_status(current_count, total_count, start_time)
|
|
SingleSignOnRecord.create!(
|
|
user_id: user.id,
|
|
external_id: external_id,
|
|
external_email: user.email,
|
|
last_payload: "",
|
|
)
|
|
rescue StandardError
|
|
next
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments
|
|
puts "", "importing attachments"
|
|
|
|
current_count = 0
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
total_count = mysql_query(<<-SQL).first["count"]
|
|
SELECT count(field_post_attachment_fid) count
|
|
FROM field_data_field_post_attachment
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
attachments = mysql_query(<<-SQL).to_a
|
|
SELECT *
|
|
FROM field_data_field_post_attachment fp
|
|
LEFT JOIN file_managed fm
|
|
ON fp.field_post_attachment_fid = fm.fid
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if attachments.size < 1
|
|
|
|
attachments.each do |attachment|
|
|
current_count += 1
|
|
print_status current_count, total_count
|
|
|
|
identifier = attachment["entity_type"] == "comment" ? "cid" : "nid"
|
|
next unless user_id = user_id_from_imported_user_id(attachment["uid"])
|
|
unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}")
|
|
next
|
|
end
|
|
next unless user = User.find(user_id)
|
|
next unless post = Post.find(post_id)
|
|
|
|
begin
|
|
new_raw = post.raw.dup
|
|
upload, filename = find_upload(post, attachment)
|
|
|
|
unless upload
|
|
fail_count += 1
|
|
next
|
|
end
|
|
|
|
upload_html = html_for_upload(upload, filename)
|
|
new_raw = "#{new_raw}\n\n#{upload_html}" if new_raw.exclude?(upload_html)
|
|
|
|
if new_raw != post.raw
|
|
PostRevisor.new(post).revise!(
|
|
post.user,
|
|
{ raw: new_raw },
|
|
bypass_bump: true,
|
|
edit_reason: "Import attachment from Drupal",
|
|
)
|
|
else
|
|
puts "", "Skipped upload: already imported"
|
|
end
|
|
|
|
success_count += 1
|
|
rescue => e
|
|
puts e
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def create_permalinks
|
|
puts "", "creating permalinks..."
|
|
|
|
Topic.listable_topics.find_each do |topic|
|
|
begin
|
|
tcf = topic.custom_fields
|
|
if tcf && tcf["import_id"]
|
|
node_id = tcf["import_id"][/nid:(\d+)/, 1]
|
|
slug = "/node/#{node_id}"
|
|
Permalink.create(url: slug, topic_id: topic.id)
|
|
end
|
|
rescue => e
|
|
puts e.message
|
|
puts "Permalink creation failed for id #{topic.id}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def find_upload(post, attachment)
|
|
uri = attachment["uri"][%r{public://upload/(.+)}, 1]
|
|
real_filename = CGI.unescapeHTML(uri)
|
|
file = File.join(ATTACHMENT_DIR, real_filename)
|
|
|
|
unless File.exist?(file)
|
|
puts "Attachment file #{attachment["filename"]} doesn't exist"
|
|
|
|
tmpfile = "attachments_failed.txt"
|
|
filename = File.join("/tmp/", tmpfile)
|
|
File.open(filename, "a") { |f| f.puts attachment["filename"] }
|
|
end
|
|
|
|
upload = create_upload(post.user.id || -1, file, real_filename)
|
|
|
|
if upload.nil? || upload.errors.any?
|
|
puts "Upload not valid"
|
|
puts upload.errors.inspect if upload
|
|
return
|
|
end
|
|
|
|
[upload, real_filename]
|
|
end
|
|
|
|
def preprocess_raw(raw)
|
|
return if raw.blank?
|
|
# quotes on new lines
|
|
raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
|
|
quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
|
|
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
|
|
end
|
|
|
|
# [QUOTE=<username>]...[/QUOTE]
|
|
raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
|
|
username, quote = $1, $2
|
|
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
|
|
end
|
|
|
|
raw.strip!
|
|
raw
|
|
end
|
|
|
|
def postprocess_posts
|
|
puts "", "postprocessing posts"
|
|
|
|
current = 0
|
|
max = Post.count
|
|
|
|
Post.find_each do |post|
|
|
begin
|
|
raw = post.raw
|
|
new_raw = raw.dup
|
|
|
|
# replace old topic to new topic links
|
|
new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do
|
|
post_id = post_id_from_imported_post_id("nid:#{$1}")
|
|
next unless post_id
|
|
topic = Post.find(post_id).topic
|
|
"https://community.site.com/t/-/#{topic.id}"
|
|
end
|
|
|
|
# replace old comment to reply links
|
|
new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do
|
|
post_id = post_id_from_imported_post_id("cid:#{$1}")
|
|
next unless post_id
|
|
post_ref = Post.find(post_id)
|
|
"https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}"
|
|
end
|
|
|
|
if raw != new_raw
|
|
post.raw = new_raw
|
|
post.save
|
|
end
|
|
rescue StandardError
|
|
puts "", "Failed rewrite on post: #{post.id}"
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_gravatars
|
|
puts "", "importing gravatars"
|
|
current = 0
|
|
max = User.count
|
|
User.find_each do |user|
|
|
begin
|
|
user.create_user_avatar(user_id: user.id) unless user.user_avatar
|
|
user.user_avatar.update_gravatar!
|
|
rescue StandardError
|
|
puts "", 'Failed avatar update on user #{user.id}'
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def parse_datetime(time)
|
|
DateTime.strptime(time, "%s")
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql, cache_rows: true)
|
|
end
|
|
end
|
|
|
|
ImportScripts::Drupal.new.perform if __FILE__ == $0
|