discourse/script/import_scripts/drupal_qa.rb
Sam Saffron 30990006a9 DEV: enable frozen string literal on all files
This reduces chances of errors where consumers of strings mutate inputs
and reduces memory usage of the app.

Test suite passes now, but there may be some stuff left, so we will run
a few sites on a branch prior to merging
2019-05-13 09:31:32 +08:00

243 lines
7.2 KiB
Ruby

# frozen_string_literal: true
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require File.expand_path(File.dirname(__FILE__) + "/drupal.rb")
class ImportScripts::DrupalQA < ImportScripts::Drupal
def categories_query
result = @client.query("SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids
FROM node AS n
INNER JOIN taxonomy_index AS ti ON ti.nid = n.nid
WHERE n.type = 'question'
AND n.status = 1
GROUP BY n.nid")
categories = {}
result.each do |r|
tids = r['tids']
if tids.present?
tids = tids.split(',')
categories[tids[0].to_i] = true
end
end
@client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(',')})")
end
def create_forum_topics
puts '', "creating forum topics"
total_count = @client.query("
SELECT COUNT(*) count
FROM node n
WHERE n.type = 'question'
AND n.status = 1;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
SELECT n.nid,
n.title,
GROUP_CONCAT(t.tid) AS tid,
n.uid,
n.created,
f.body_value AS body
FROM node AS n
LEFT OUTER JOIN taxonomy_index AS t on t.nid = n.nid
INNER JOIN field_data_body AS f ON f.entity_id = n.nid
WHERE n.type = 'question'
AND n.status = 1
GROUP BY n.nid, n.title, n.uid, n.created, f.body_value
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
create_posts(results, total: total_count, offset: offset) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id((row['tid'] || '').split(',')[0]),
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: nil,
title: row['title'].try(:strip)
}
end
end
end
def create_direct_replies
puts '', "creating replies in topics"
total_count = @client.query("
SELECT COUNT(*) count
FROM node n
WHERE n.type = 'answer'
AND n.status = 1;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
SELECT n.nid AS cid,
q.field_answer_question_nid AS nid,
n.uid,
n.created,
f.body_value AS body
FROM node AS n
INNER JOIN field_data_field_answer_question AS q ON q.entity_id = n.nid
INNER JOIN field_data_body AS f ON f.entity_id = n.nid
WHERE n.status = 1
AND n.type = 'answer'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
}
h
else
puts "No topic found for answer #{row['cid']}"
nil
end
end
end
end
def create_nested_replies
puts '', "creating nested replies to posts in topics"
total_count = @client.query("
SELECT COUNT(c.cid) count
FROM node n
INNER JOIN comment AS c ON n.nid = c.nid
WHERE n.type = 'question'
AND n.status = 1;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
# WARNING: If there are more than 1000000 this might have to be revisited
results = @client.query("
SELECT (c.cid + 1000000) as cid,
c.nid,
c.uid,
c.created,
cb.comment_body_value AS body
FROM node AS n
INNER JOIN comment AS c ON c.nid = n.nid
INNER JOIN field_data_comment_body AS cb ON cb.entity_id = c.cid
WHERE n.status = 1
AND n.type = 'question'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
}
h
else
puts "No topic found for comment #{row['cid']}"
nil
end
end
end
puts '', "creating nested replies to answers in topics"
total_count = @client.query("
SELECT COUNT(c.cid) count
FROM node n
INNER JOIN comment AS c ON n.nid = c.nid
WHERE n.type = 'answer'
AND n.status = 1;").first['count']
batch_size = 1000
batches(batch_size) do |offset|
# WARNING: If there are more than 1000000 this might have to be revisited
results = @client.query("
SELECT (c.cid + 1000000) as cid,
q.field_answer_question_nid AS nid,
c.uid,
c.created,
cb.comment_body_value AS body
FROM node AS n
INNER JOIN field_data_field_answer_question AS q ON q.entity_id = n.nid
INNER JOIN comment AS c ON c.nid = n.nid
INNER JOIN field_data_comment_body AS cb ON cb.entity_id = c.cid
WHERE n.status = 1
AND n.type = 'answer'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
}
h
else
puts "No topic found for comment #{row['cid']}"
nil
end
end
end
end
def create_replies
create_direct_replies
create_nested_replies
end
end
if __FILE__ == $0
ImportScripts::DrupalQA.new.perform
end