mirror of
https://github.com/discourse/discourse.git
synced 2025-01-07 15:46:37 +08:00
3bf3b9a4a5
We validate the *format* of email addresses in many places with a match against a regex, often with very slightly different syntax. Adding a separate EmailAddressValidator simplifies the code in a few spots and feels cleaner. Deprecated the old location in case someone is using it in a plugin. No functionality change is in this commit. Note: the regex used at the moment does not support using address literals, e.g.: * localpart@[192.168.0.1] * localpart@[2001:db8::1]
538 lines
14 KiB
Ruby
538 lines
14 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require "htmlentities"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::Drupal < ImportScripts::Base
|
|
|
|
DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal"
|
|
VID = ENV['DRUPAL_VID'] || 1
|
|
BATCH_SIZE = 1000
|
|
ATTACHMENT_DIR = "/root/files/upload"
|
|
|
|
def initialize
|
|
super
|
|
|
|
@htmlentities = HTMLEntities.new
|
|
|
|
@client = Mysql2::Client.new(
|
|
host: "localhost",
|
|
username: "root",
|
|
#password: "password",
|
|
database: DRUPAL_DB
|
|
)
|
|
end
|
|
|
|
def execute
|
|
|
|
import_users
|
|
import_categories
|
|
|
|
# "Nodes" in Drupal are divided into types. Here we import two types,
|
|
# and will later import all the comments/replies for each node.
|
|
# You will need to figure out what the type names are on your install and edit the queries to match.
|
|
if ENV['DRUPAL_IMPORT_BLOG']
|
|
import_blog_topics
|
|
end
|
|
|
|
import_forum_topics
|
|
|
|
import_replies
|
|
import_likes
|
|
mark_topics_as_solved
|
|
import_sso_records
|
|
import_attachments
|
|
postprocess_posts
|
|
create_permalinks
|
|
import_gravatars
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users"
|
|
|
|
user_count = mysql_query("SELECT count(uid) count FROM users").first["count"]
|
|
|
|
last_user_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
users = mysql_query(<<-SQL
|
|
SELECT uid,
|
|
name username,
|
|
mail email,
|
|
created
|
|
FROM users
|
|
WHERE uid > #{last_user_id}
|
|
ORDER BY uid
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
).to_a
|
|
|
|
break if users.empty?
|
|
|
|
last_user_id = users[-1]["uid"]
|
|
|
|
users.reject! { |u| @lookup.user_already_imported?(u["uid"]) }
|
|
|
|
create_users(users, total: user_count, offset: offset) do |user|
|
|
email = user["email"].presence || fake_email
|
|
email = fake_email if !EmailAddressValidator.valid_value?(email)
|
|
|
|
username = @htmlentities.decode(user["username"]).strip
|
|
|
|
{
|
|
id: user["uid"],
|
|
name: username,
|
|
email: email,
|
|
created_at: Time.zone.at(user["created"])
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
# You'll need to edit the following query for your Drupal install:
|
|
#
|
|
# * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
|
|
# * Table name may be term_data.
|
|
# * May need to select a vid other than 1
|
|
|
|
puts "", "importing categories"
|
|
|
|
categories = mysql_query(<<-SQL
|
|
SELECT tid,
|
|
name,
|
|
description
|
|
FROM taxonomy_term_data
|
|
WHERE vid = #{VID}
|
|
SQL
|
|
).to_a
|
|
|
|
create_categories(categories) do |category|
|
|
{
|
|
id: category['tid'],
|
|
name: @htmlentities.decode(category['name']).strip,
|
|
description: @htmlentities.decode(category['description']).strip
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_blog_topics
|
|
puts '', "importing blog topics"
|
|
|
|
create_category(
|
|
{
|
|
name: 'Blog',
|
|
description: "Articles from the blog"
|
|
},
|
|
nil) unless Category.find_by_name('Blog')
|
|
|
|
blogs = mysql_query(<<-SQL
|
|
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
|
|
f.body_value body
|
|
FROM node n,
|
|
field_data_body f
|
|
WHERE n.type = 'article'
|
|
AND n.nid = f.entity_id
|
|
AND n.status = 1
|
|
SQL
|
|
).to_a
|
|
|
|
category_id = Category.find_by_name('Blog').id
|
|
|
|
create_posts(blogs) do |topic|
|
|
{
|
|
id: "nid:#{topic['nid']}",
|
|
user_id: user_id_from_imported_user_id(topic['uid']) || -1,
|
|
category: category_id,
|
|
raw: topic['body'],
|
|
created_at: Time.zone.at(topic['created']),
|
|
pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil,
|
|
title: topic['title'].try(:strip),
|
|
custom_fields: { import_id: "nid:#{topic['nid']}" }
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_forum_topics
|
|
puts '', "importing forum topics"
|
|
|
|
total_count = mysql_query(<<-SQL
|
|
SELECT COUNT(*) count
|
|
FROM forum_index fi, node n
|
|
WHERE n.type = 'forum'
|
|
AND fi.nid = n.nid
|
|
AND n.status = 1
|
|
SQL
|
|
).first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(<<-SQL
|
|
SELECT fi.nid nid,
|
|
fi.title title,
|
|
fi.tid tid,
|
|
n.uid uid,
|
|
fi.created created,
|
|
fi.sticky sticky,
|
|
f.body_value body,
|
|
nc.totalcount views,
|
|
fl.timestamp solved
|
|
FROM forum_index fi
|
|
LEFT JOIN node n ON fi.nid = n.nid
|
|
LEFT JOIN field_data_body f ON f.entity_id = n.nid
|
|
LEFT JOIN flagging fl ON fl.entity_id = n.nid
|
|
AND fl.fid = 7
|
|
LEFT JOIN node_counter nc ON nc.nid = n.nid
|
|
WHERE n.type = 'forum'
|
|
AND n.status = 1
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
SQL
|
|
).to_a
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |row|
|
|
raw = preprocess_raw(row['body'])
|
|
topic = {
|
|
id: "nid:#{row['nid']}",
|
|
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
|
category: category_id_from_imported_category_id(row['tid']),
|
|
raw: raw,
|
|
created_at: Time.zone.at(row['created']),
|
|
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
|
|
title: row['title'].try(:strip),
|
|
views: row['views']
|
|
}
|
|
topic[:custom_fields] = { import_solved: true } if row['solved'].present?
|
|
topic
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_replies
|
|
puts '', "creating replies in topics"
|
|
|
|
total_count = mysql_query(<<-SQL
|
|
SELECT COUNT(*) count
|
|
FROM comment c,
|
|
node n
|
|
WHERE n.nid = c.nid
|
|
AND c.status = 1
|
|
AND n.type IN ('article', 'forum')
|
|
AND n.status = 1
|
|
SQL
|
|
).first['count']
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query(<<-SQL
|
|
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
|
|
f.comment_body_value body
|
|
FROM comment c,
|
|
field_data_comment_body f,
|
|
node n
|
|
WHERE c.cid = f.entity_id
|
|
AND n.nid = c.nid
|
|
AND c.status = 1
|
|
AND n.type IN ('blog', 'forum')
|
|
AND n.status = 1
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
).to_a
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |row|
|
|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
|
|
if topic_mapping && topic_id = topic_mapping[:topic_id]
|
|
raw = preprocess_raw(row['body'])
|
|
h = {
|
|
id: "cid:#{row['cid']}",
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
|
raw: raw,
|
|
created_at: Time.zone.at(row['created']),
|
|
}
|
|
if row['pid']
|
|
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
|
|
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
|
|
end
|
|
h
|
|
else
|
|
puts "No topic found for comment #{row['cid']}"
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_likes
|
|
puts "", "importing post likes"
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
likes = mysql_query(<<-SQL
|
|
SELECT flagging_id,
|
|
fid,
|
|
entity_id,
|
|
uid
|
|
FROM flagging
|
|
WHERE fid = 5
|
|
OR fid = 6
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
).to_a
|
|
|
|
break if likes.empty?
|
|
|
|
likes.each do |l|
|
|
identifier = l['fid'] == 5 ? 'nid' : 'cid'
|
|
next unless user_id = user_id_from_imported_user_id(l['uid'])
|
|
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}")
|
|
next unless user = User.find_by(id: user_id)
|
|
next unless post = Post.find_by(id: post_id)
|
|
PostActionCreator.like(user, post) rescue nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def mark_topics_as_solved
|
|
puts "", "marking topics as solved"
|
|
|
|
solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
|
|
|
|
solved_topics.each do |topic_id|
|
|
next unless topic = Topic.find(topic_id)
|
|
next unless post = topic.posts.last
|
|
post_id = post.id
|
|
|
|
PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true)
|
|
TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id)
|
|
end
|
|
end
|
|
|
|
def import_sso_records
|
|
puts "", "importing sso records"
|
|
|
|
start_time = Time.now
|
|
current_count = 0
|
|
|
|
users = UserCustomField.where(name: "import_id")
|
|
|
|
total_count = users.count
|
|
|
|
return if users.empty?
|
|
|
|
users.each do |ids|
|
|
user_id = ids.user_id
|
|
external_id = ids.value
|
|
next unless user = User.find(user_id)
|
|
|
|
begin
|
|
current_count += 1
|
|
print_status(current_count, total_count, start_time)
|
|
SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '')
|
|
rescue
|
|
next
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments
|
|
puts "", "importing attachments"
|
|
|
|
current_count = 0
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
total_count = mysql_query(<<-SQL
|
|
SELECT count(field_post_attachment_fid) count
|
|
FROM field_data_field_post_attachment
|
|
SQL
|
|
).first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
attachments = mysql_query(<<-SQL
|
|
SELECT *
|
|
FROM field_data_field_post_attachment fp
|
|
LEFT JOIN file_managed fm
|
|
ON fp.field_post_attachment_fid = fm.fid
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
).to_a
|
|
|
|
break if attachments.size < 1
|
|
|
|
attachments.each do |attachment|
|
|
current_count += 1
|
|
print_status current_count, total_count
|
|
|
|
identifier = attachment['entity_type'] == "comment" ? "cid" : "nid"
|
|
next unless user_id = user_id_from_imported_user_id(attachment['uid'])
|
|
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}")
|
|
next unless user = User.find(user_id)
|
|
next unless post = Post.find(post_id)
|
|
|
|
begin
|
|
new_raw = post.raw.dup
|
|
upload, filename = find_upload(post, attachment)
|
|
|
|
unless upload
|
|
fail_count += 1
|
|
next
|
|
end
|
|
|
|
upload_html = html_for_upload(upload, filename)
|
|
new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html)
|
|
|
|
if new_raw != post.raw
|
|
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal")
|
|
else
|
|
puts '', 'Skipped upload: already imported'
|
|
end
|
|
|
|
success_count += 1
|
|
rescue => e
|
|
puts e
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def create_permalinks
|
|
puts '', 'creating permalinks...'
|
|
|
|
Topic.listable_topics.find_each do |topic|
|
|
begin
|
|
tcf = topic.custom_fields
|
|
if tcf && tcf['import_id']
|
|
node_id = tcf['import_id'][/nid:(\d+)/, 1]
|
|
slug = "/topic/#{node_id}"
|
|
Permalink.create(url: slug, topic_id: topic.id)
|
|
end
|
|
rescue => e
|
|
puts e.message
|
|
puts "Permalink creation failed for id #{topic.id}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def find_upload(post, attachment)
|
|
uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1]
|
|
real_filename = CGI.unescapeHTML(uri)
|
|
file = File.join(ATTACHMENT_DIR, real_filename)
|
|
|
|
unless File.exist?(file)
|
|
puts "Attachment file #{attachment['filename']} doesn't exist"
|
|
|
|
tmpfile = "attachments_failed.txt"
|
|
filename = File.join('/tmp/', tmpfile)
|
|
File.open(filename, 'a') { |f|
|
|
f.puts attachment['filename']
|
|
}
|
|
end
|
|
|
|
upload = create_upload(post.user.id || -1, file, real_filename)
|
|
|
|
if upload.nil? || upload.errors.any?
|
|
puts "Upload not valid"
|
|
puts upload.errors.inspect if upload
|
|
return
|
|
end
|
|
|
|
[upload, real_filename]
|
|
end
|
|
|
|
def preprocess_raw(raw)
|
|
return if raw.blank?
|
|
# quotes on new lines
|
|
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
|
|
quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" }
|
|
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
|
|
}
|
|
|
|
# [QUOTE=<username>]...[/QUOTE]
|
|
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
|
|
username, quote = $1, $2
|
|
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
|
|
end
|
|
|
|
raw.strip!
|
|
raw
|
|
end
|
|
|
|
def postprocess_posts
|
|
puts '', 'postprocessing posts'
|
|
|
|
current = 0
|
|
max = Post.count
|
|
|
|
Post.find_each do |post|
|
|
begin
|
|
raw = post.raw
|
|
new_raw = raw.dup
|
|
|
|
# replace old topic to new topic links
|
|
new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do
|
|
post_id = post_id_from_imported_post_id("nid:#{$1}")
|
|
next unless post_id
|
|
topic = Post.find(post_id).topic
|
|
"https://community.site.com/t/-/#{topic.id}"
|
|
end
|
|
|
|
# replace old comment to reply links
|
|
new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do
|
|
post_id = post_id_from_imported_post_id("cid:#{$1}")
|
|
next unless post_id
|
|
post_ref = Post.find(post_id)
|
|
"https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}"
|
|
end
|
|
|
|
if raw != new_raw
|
|
post.raw = new_raw
|
|
post.save
|
|
end
|
|
rescue
|
|
puts '', "Failed rewrite on post: #{post.id}"
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_gravatars
|
|
puts '', 'importing gravatars'
|
|
current = 0
|
|
max = User.count
|
|
User.find_each do |user|
|
|
begin
|
|
user.create_user_avatar(user_id: user.id) unless user.user_avatar
|
|
user.user_avatar.update_gravatar!
|
|
rescue
|
|
puts '', 'Failed avatar update on user #{user.id}'
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def parse_datetime(time)
|
|
DateTime.strptime(time, '%s')
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql, cache_rows: true)
|
|
end
|
|
|
|
end
|
|
|
|
if __FILE__ == $0
|
|
ImportScripts::Drupal.new.perform
|
|
end
|