mirror of
https://github.com/discourse/discourse.git
synced 2024-12-14 12:43:41 +08:00
be28fc73a0
Refactors script to follow conventions of other importers and adds some features including like import, processing of post raw text, and, if needed, SSO import.
538 lines
14 KiB
Ruby
538 lines
14 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require "htmlentities"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::Drupal < ImportScripts::Base
|
|
|
|
DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal"
|
|
VID = ENV['DRUPAL_VID'] || 1
|
|
BATCH_SIZE = 1000
|
|
ATTACHMENT_DIR = "/root/files/upload"
|
|
|
|
def initialize
|
|
super
|
|
|
|
@htmlentities = HTMLEntities.new
|
|
|
|
@client = Mysql2::Client.new(
|
|
host: "localhost",
|
|
username: "root",
|
|
#password: "password",
|
|
database: DRUPAL_DB
|
|
)
|
|
end
|
|
|
|
def execute
|
|
|
|
import_users
|
|
import_categories
|
|
|
|
# "Nodes" in Drupal are divided into types. Here we import two types,
|
|
# and will later import all the comments/replies for each node.
|
|
# You will need to figure out what the type names are on your install and edit the queries to match.
|
|
if ENV['DRUPAL_IMPORT_BLOG']
|
|
import_blog_topics
|
|
end
|
|
|
|
import_forum_topics
|
|
|
|
import_replies
|
|
import_likes
|
|
mark_topics_as_solved
|
|
import_sso_records
|
|
import_attachments
|
|
postprocess_posts
|
|
create_permalinks
|
|
import_gravatars
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users"
|
|
|
|
user_count = mysql_query("SELECT count(uid) count FROM users").first["count"]
|
|
|
|
last_user_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
users = mysql_query(<<-SQL
|
|
SELECT uid,
|
|
name username,
|
|
mail email,
|
|
created
|
|
FROM users
|
|
WHERE uid > #{last_user_id}
|
|
ORDER BY uid
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
).to_a
|
|
|
|
break if users.empty?
|
|
|
|
last_user_id = users[-1]["uid"]
|
|
|
|
users.reject! { |u| @lookup.user_already_imported?(u["uid"]) }
|
|
|
|
create_users(users, total: user_count, offset: offset) do |user|
|
|
email = user["email"].presence || fake_email
|
|
email = fake_email unless email[EmailValidator.email_regex]
|
|
|
|
username = @htmlentities.decode(user["username"]).strip
|
|
|
|
{
|
|
id: user["uid"],
|
|
name: username,
|
|
email: email,
|
|
created_at: Time.zone.at(user["created"])
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
# You'll need to edit the following query for your Drupal install:
|
|
#
|
|
# * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
|
|
# * Table name may be term_data.
|
|
# * May need to select a vid other than 1
|
|
|
|
puts "", "importing categories"
|
|
|
|
categories = mysql_query(<<-SQL
|
|
SELECT tid,
|
|
name,
|
|
description
|
|
FROM taxonomy_term_data
|
|
WHERE vid = #{VID}
|
|
SQL
|
|
).to_a
|
|
|
|
create_categories(categories) do |category|
|
|
{
|
|
id: category['tid'],
|
|
name: @htmlentities.decode(category['name']).strip,
|
|
description: @htmlentities.decode(category['description']).strip
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_blog_topics
|
|
puts '', "importing blog topics"
|
|
|
|
create_category(
|
|
{
|
|
name: 'Blog',
|
|
description: "Articles from the blog"
|
|
},
|
|
nil) unless Category.find_by_name('Blog')
|
|
|
|
blogs = mysql_query(<<-SQL
|
|
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
|
|
f.body_value body
|
|
FROM node n,
|
|
field_data_body f
|
|
WHERE n.type = 'article'
|
|
AND n.nid = f.entity_id
|
|
AND n.status = 1
|
|
SQL
|
|
).to_a
|
|
|
|
category_id = Category.find_by_name('Blog').id
|
|
|
|
create_posts(blogs) do |topic|
|
|
{
|
|
id: "nid:#{topic['nid']}",
|
|
user_id: user_id_from_imported_user_id(topic['uid']) || -1,
|
|
category: category_id,
|
|
raw: topic['body'],
|
|
created_at: Time.zone.at(topic['created']),
|
|
pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil,
|
|
title: topic['title'].try(:strip),
|
|
custom_fields: { import_id: "nid:#{topic['nid']}" }
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_forum_topics
|
|
puts '', "importing forum topics"
|
|
|
|
total_count = mysql_query(<<-SQL
|
|
SELECT COUNT(*) count
|
|
FROM forum_index fi, node n
|
|
WHERE n.type = 'forum'
|
|
AND fi.nid = n.nid
|
|
AND n.status = 1
|
|
SQL
|
|
).first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(<<-SQL
|
|
SELECT fi.nid nid,
|
|
fi.title title,
|
|
fi.tid tid,
|
|
n.uid uid,
|
|
fi.created created,
|
|
fi.sticky sticky,
|
|
f.body_value body,
|
|
nc.totalcount views,
|
|
fl.timestamp solved
|
|
FROM forum_index fi
|
|
LEFT JOIN node n ON fi.nid = n.nid
|
|
LEFT JOIN field_data_body f ON f.entity_id = n.nid
|
|
LEFT JOIN flagging fl ON fl.entity_id = n.nid
|
|
AND fl.fid = 7
|
|
LEFT JOIN node_counter nc ON nc.nid = n.nid
|
|
WHERE n.type = 'forum'
|
|
AND n.status = 1
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
SQL
|
|
).to_a
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |row|
|
|
raw = preprocess_raw(row['body'])
|
|
topic = {
|
|
id: "nid:#{row['nid']}",
|
|
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
|
category: category_id_from_imported_category_id(row['tid']),
|
|
raw: raw,
|
|
created_at: Time.zone.at(row['created']),
|
|
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
|
|
title: row['title'].try(:strip),
|
|
views: row['views']
|
|
}
|
|
topic[:custom_fields] = { import_solved: true } if row['solved'].present?
|
|
topic
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_replies
|
|
puts '', "creating replies in topics"
|
|
|
|
total_count = mysql_query(<<-SQL
|
|
SELECT COUNT(*) count
|
|
FROM comment c,
|
|
node n
|
|
WHERE n.nid = c.nid
|
|
AND c.status = 1
|
|
AND n.type IN ('article', 'forum')
|
|
AND n.status = 1
|
|
SQL
|
|
).first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(<<-SQL
|
|
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
|
|
f.comment_body_value body
|
|
FROM comment c,
|
|
field_data_comment_body f,
|
|
node n
|
|
WHERE c.cid = f.entity_id
|
|
AND n.nid = c.nid
|
|
AND c.status = 1
|
|
AND n.type IN ('blog', 'forum')
|
|
AND n.status = 1
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
).to_a
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |row|
|
|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
|
|
if topic_mapping && topic_id = topic_mapping[:topic_id]
|
|
raw = preprocess_raw(row['body'])
|
|
h = {
|
|
id: "cid:#{row['cid']}",
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
|
raw: raw,
|
|
created_at: Time.zone.at(row['created']),
|
|
}
|
|
if row['pid']
|
|
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
|
|
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
|
|
end
|
|
h
|
|
else
|
|
puts "No topic found for comment #{row['cid']}"
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_likes
|
|
puts "", "importing post likes"
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
likes = mysql_query(<<-SQL
|
|
SELECT flagging_id,
|
|
fid,
|
|
entity_id,
|
|
uid
|
|
FROM flagging
|
|
WHERE fid = 5
|
|
OR fid = 6
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
).to_a
|
|
|
|
break if likes.empty?
|
|
|
|
likes.each do |l|
|
|
identifier = l['fid'] == 5 ? 'nid' : 'cid'
|
|
next unless user_id = user_id_from_imported_user_id(l['uid'])
|
|
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}")
|
|
next unless user = User.find_by(id: user_id)
|
|
next unless post = Post.find_by(id: post_id)
|
|
PostActionCreator.like(user, post) rescue nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def mark_topics_as_solved
|
|
puts "", "marking topics as solved"
|
|
|
|
solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
|
|
|
|
solved_topics.each do |topic_id|
|
|
next unless topic = Topic.find(topic_id)
|
|
next unless post = topic.posts.last
|
|
post_id = post.id
|
|
|
|
PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true)
|
|
TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id)
|
|
end
|
|
end
|
|
|
|
def import_sso_records
|
|
puts "", "importing sso records"
|
|
|
|
start_time = Time.now
|
|
current_count = 0
|
|
|
|
users = UserCustomField.where(name: "import_id")
|
|
|
|
total_count = users.count
|
|
|
|
return if users.empty?
|
|
|
|
users.each do |ids|
|
|
user_id = ids.user_id
|
|
external_id = ids.value
|
|
next unless user = User.find(user_id)
|
|
|
|
begin
|
|
current_count += 1
|
|
print_status(current_count, total_count, start_time)
|
|
SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '')
|
|
rescue
|
|
next
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments
|
|
puts "", "importing attachments"
|
|
|
|
current_count = 0
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
total_count = mysql_query(<<-SQL
|
|
SELECT count(field_post_attachment_fid) count
|
|
FROM field_data_field_post_attachment
|
|
SQL
|
|
).first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
attachments = mysql_query(<<-SQL
|
|
SELECT *
|
|
FROM field_data_field_post_attachment fp
|
|
LEFT JOIN file_managed fm
|
|
ON fp.field_post_attachment_fid = fm.fid
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
).to_a
|
|
|
|
break if attachments.size < 1
|
|
|
|
attachments.each do |attachment|
|
|
current_count += 1
|
|
print_status current_count, total_count
|
|
|
|
identifier = attachment['entity_type'] == "comment" ? "cid" : "nid"
|
|
next unless user_id = user_id_from_imported_user_id(attachment['uid'])
|
|
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}")
|
|
next unless user = User.find(user_id)
|
|
next unless post = Post.find(post_id)
|
|
|
|
begin
|
|
new_raw = post.raw.dup
|
|
upload, filename = find_upload(post, attachment)
|
|
|
|
unless upload
|
|
fail_count += 1
|
|
next
|
|
end
|
|
|
|
upload_html = html_for_upload(upload, filename)
|
|
new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html)
|
|
|
|
if new_raw != post.raw
|
|
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal")
|
|
else
|
|
puts '', 'Skipped upload: already imported'
|
|
end
|
|
|
|
success_count += 1
|
|
rescue => e
|
|
puts e
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def create_permalinks
|
|
puts '', 'creating permalinks...'
|
|
|
|
Topic.listable_topics.find_each do |topic|
|
|
begin
|
|
tcf = topic.custom_fields
|
|
if tcf && tcf['import_id']
|
|
node_id = tcf['import_id'][/nid:(\d+)/, 1]
|
|
slug = "/topic/#{node_id}"
|
|
Permalink.create(url: slug, topic_id: topic.id)
|
|
end
|
|
rescue => e
|
|
puts e.message
|
|
puts "Permalink creation failed for id #{topic.id}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def find_upload(post, attachment)
|
|
uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1]
|
|
real_filename = CGI.unescapeHTML(uri)
|
|
file = File.join(ATTACHMENT_DIR, real_filename)
|
|
|
|
unless File.exists?(file)
|
|
puts "Attachment file #{attachment['filename']} doesn't exist"
|
|
|
|
tmpfile = "attachments_failed.txt"
|
|
filename = File.join('/tmp/', tmpfile)
|
|
File.open(filename, 'a') { |f|
|
|
f.puts attachment['filename']
|
|
}
|
|
end
|
|
|
|
upload = create_upload(post.user.id || -1, file, real_filename)
|
|
|
|
if upload.nil? || upload.errors.any?
|
|
puts "Upload not valid"
|
|
puts upload.errors.inspect if upload
|
|
return
|
|
end
|
|
|
|
[upload, real_filename]
|
|
end
|
|
|
|
def preprocess_raw(raw)
|
|
return if raw.blank?
|
|
# quotes on new lines
|
|
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
|
|
quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" }
|
|
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
|
|
}
|
|
|
|
# [QUOTE=<username>]...[/QUOTE]
|
|
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
|
|
username, quote = $1, $2
|
|
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
|
|
end
|
|
|
|
raw.strip!
|
|
raw
|
|
end
|
|
|
|
def postprocess_posts
|
|
puts '', 'postprocessing posts'
|
|
|
|
current = 0
|
|
max = Post.count
|
|
|
|
Post.find_each do |post|
|
|
begin
|
|
raw = post.raw
|
|
new_raw = raw.dup
|
|
|
|
# replace old topic to new topic links
|
|
new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do
|
|
post_id = post_id_from_imported_post_id("nid:#{$1}")
|
|
next unless post_id
|
|
topic = Post.find(post_id).topic
|
|
"https://community.site.com/t/-/#{topic.id}"
|
|
end
|
|
|
|
# replace old comment to reply links
|
|
new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do
|
|
post_id = post_id_from_imported_post_id("cid:#{$1}")
|
|
next unless post_id
|
|
post_ref = Post.find(post_id)
|
|
"https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}"
|
|
end
|
|
|
|
if raw != new_raw
|
|
post.raw = new_raw
|
|
post.save
|
|
end
|
|
rescue
|
|
puts '', "Failed rewrite on post: #{post.id}"
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_gravatars
|
|
puts '', 'importing gravatars'
|
|
current = 0
|
|
max = User.count
|
|
User.find_each do |user|
|
|
begin
|
|
user.create_user_avatar(user_id: user.id) unless user.user_avatar
|
|
user.user_avatar.update_gravatar!
|
|
rescue
|
|
puts '', 'Failed avatar update on user #{user.id}'
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def parse_datetime(time)
|
|
DateTime.strptime(time, '%s')
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql, cache_rows: true)
|
|
end
|
|
|
|
end
|
|
|
|
if __FILE__ == $0
|
|
ImportScripts::Drupal.new.perform
|
|
end
|