Merge pull request #4240 from gschlager/phpbb3-importer

FIX import of private messages from phpBB3
This commit is contained in:
Arpit Jalan 2016-05-28 21:49:40 +05:30
commit 665c5282c7
5 changed files with 105 additions and 195 deletions

View File

@ -161,82 +161,39 @@ module ImportScripts::PhpBB3
SQL
end
def count_messages(use_fixed_messages)
if use_fixed_messages
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_import_privmsgs
SQL
else
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_privmsgs
SQL
end
def count_messages
count(<<-SQL)
SELECT COUNT(*) AS count
FROM #{@table_prefix}_privmsgs m
WHERE NOT EXISTS ( -- ignore duplicate messages
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
SQL
end
def fetch_messages(use_fixed_messages, last_msg_id)
if use_fixed_messages
query(<<-SQL, :msg_id)
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text,
IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
WHERE m.msg_id > #{last_msg_id}
ORDER BY i.root_msg_id, m.msg_id
LIMIT #{@batch_size}
SQL
else
query(<<-SQL, :msg_id)
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
FROM #{@table_prefix}_privmsgs m
LEFT OUTER JOIN (
SELECT post_msg_id, COUNT(*) AS attachment_count
FROM #{@table_prefix}_attachments
WHERE topic_id = 0
GROUP BY post_msg_id
) a ON (m.msg_id = a.post_msg_id)
WHERE m.msg_id > #{last_msg_id}
ORDER BY m.root_level, m.msg_id
LIMIT #{@batch_size}
SQL
end
end
def fetch_message_participants(msg_id, use_fixed_messages)
if use_fixed_messages
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
SQL
else
query(<<-SQL)
SELECT m.to_address
FROM #{@table_prefix}_privmsgs m
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
SQL
end
end
def calculate_fixed_messages
drop_temp_import_message_table
create_temp_import_message_table
fill_temp_import_message_table
drop_import_message_table
create_import_message_table
fill_import_message_table
drop_temp_import_message_table
def fetch_messages(last_msg_id)
query(<<-SQL, :msg_id)
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
m.message_text, m.to_address, r.author_id AS root_author_id, r.to_address AS root_to_address, (
SELECT COUNT(*)
FROM #{@table_prefix}_attachments a
WHERE a.topic_id = 0 AND m.msg_id = a.post_msg_id
) AS attachment_count
FROM #{@table_prefix}_privmsgs m
LEFT OUTER JOIN #{@table_prefix}_privmsgs r ON (m.root_level = r.msg_id)
WHERE m.msg_id > #{last_msg_id}
AND NOT EXISTS ( -- ignore duplicate messages
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
ORDER BY m.msg_id
LIMIT #{@batch_size}
SQL
end
def count_bookmarks
@ -268,83 +225,5 @@ module ImportScripts::PhpBB3
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
SQL
end
protected
def drop_temp_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
end
def create_temp_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
recipient_id MEDIUMINT(8),
normalized_subject VARCHAR(255) NOT NULL,
PRIMARY KEY (msg_id)
)
SQL
end
# this removes duplicate messages, converts the to_address to a number
# and stores the message_subject in lowercase and without the prefix "Re: "
def fill_temp_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
SELECT m.msg_id, m.root_level,
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
ELSE NULL END AS recipient_id,
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
SUBSTRING(m.message_subject, 5)
ELSE m.message_subject END) AS normalized_subject
FROM #{@table_prefix}_privmsgs m
WHERE NOT EXISTS (
SELECT 1
FROM #{@table_prefix}_privmsgs x
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
AND x.to_address = m.to_address AND x.message_time = m.message_time
)
SQL
end
def drop_import_message_table
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
end
def create_import_message_table
query(<<-SQL)
CREATE TABLE #{@table_prefix}_import_privmsgs (
msg_id MEDIUMINT(8) NOT NULL,
root_msg_id MEDIUMINT(8) NOT NULL,
PRIMARY KEY (msg_id),
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
)
SQL
end
# this tries to calculate the actual root_level (= msg_id of the first message in a
# private conversation) based on subject, time, author and recipient
def fill_import_message_table
query(<<-SQL)
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
COALESCE((
SELECT a.msg_id
FROM #{@table_prefix}_privmsgs a
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
AND b.normalized_subject = i.normalized_subject
AND a.msg_id <> m.msg_id
AND a.message_time < m.message_time
ORDER BY a.message_time
LIMIT 1
), 0) ELSE i.root_msg_id END AS root_msg_id
FROM #{@table_prefix}_privmsgs m
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
SQL
end
end
end

View File

@ -118,18 +118,13 @@ module ImportScripts::PhpBB3
end
def import_private_messages
if @settings.fix_private_messages
puts '', 'fixing private messages'
@database.calculate_fixed_messages
end
puts '', 'creating private messages'
total_count = @database.count_messages(@settings.fix_private_messages)
total_count = @database.count_messages
importer = @importers.message_importer
last_msg_id = 0
batches do |offset|
rows, last_msg_id = @database.fetch_messages(@settings.fix_private_messages, last_msg_id)
rows, last_msg_id = @database.fetch_messages(last_msg_id)
break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))

View File

@ -14,7 +14,7 @@ module ImportScripts::PhpBB3
end
def map_to_import_ids(rows)
rows.map { |row| get_import_id(row) }
rows.map { |row| get_import_id(row[:msg_id]) }
end
@ -23,31 +23,38 @@ module ImportScripts::PhpBB3
attachments = import_attachments(row, user_id)
mapped = {
id: get_import_id(row),
id: get_import_id(row[:msg_id]),
user_id: user_id,
created_at: Time.zone.at(row[:message_time]),
raw: @text_processor.process_private_msg(row[:message_text], attachments)
}
if row[:root_msg_id] == 0
map_first_message(row, mapped)
root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
current_user_ids = sorted_user_ids(row[:author_id], row[:to_address])
topic_id = get_topic_id(row, root_user_ids, current_user_ids)
if topic_id.blank?
map_first_message(row, current_user_ids, mapped)
else
map_other_message(row, mapped)
map_other_message(row, topic_id, mapped)
end
end
protected
RE_PREFIX = 're: '
def import_attachments(row, user_id)
if @settings.import_attachments && row[:attachment_count] > 0
@attachment_importer.import_attachments(user_id, row[:msg_id])
end
end
def map_first_message(row, mapped)
mapped[:title] = CGI.unescapeHTML(row[:message_subject])
def map_first_message(row, current_user_ids, mapped)
mapped[:title] = get_topic_title(row)
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id])
mapped[:target_usernames] = get_recipient_usernames(row)
mapped[:custom_fields] = {import_user_ids: current_user_ids.join(',')}
if mapped[:target_usernames].empty? # pm with yourself?
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
@ -57,36 +64,73 @@ module ImportScripts::PhpBB3
mapped
end
def map_other_message(row, mapped)
parent_msg_id = "pm:#{row[:root_msg_id]}"
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
if parent.blank?
puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
return nil
end
mapped[:topic_id] = parent[:topic_id]
def map_other_message(row, topic_id, mapped)
mapped[:topic_id] = topic_id
mapped
end
def get_usernames(msg_id, author_id)
# Find the users who are part of this private message.
# Found from the to_address of phpbb_privmsgs, by looking at
# all the rows with the same root_msg_id.
def get_recipient_user_ids(to_address)
return [] if to_address.blank?
# to_address looks like this: "u_91:u_1234:u_200"
# The "u_" prefix is discarded and the rest is a user_id.
import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages)
.map { |r| r[:to_address].split(':') }
.flatten!.uniq.map! { |u| u[2..-1] }
user_ids = to_address.split(':')
user_ids.uniq!
user_ids.map! { |u| u[2..-1].to_i }
end
def get_recipient_usernames(row)
author_id = row[:author_id].to_s
import_user_ids = get_recipient_user_ids(row[:to_address])
import_user_ids.map! do |import_user_id|
import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
import_user_id.to_s == author_id ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
end.compact
end
def get_import_id(row)
"pm:#{row[:msg_id]}"
def get_topic_title(row)
CGI.unescapeHTML(row[:message_subject])
end
def get_import_id(msg_id)
"pm:#{msg_id}"
end
# Creates a sorted array consisting of the message's author and recipients.
def sorted_user_ids(author_id, to_address)
user_ids = get_recipient_user_ids(to_address)
user_ids << author_id unless author_id.nil?
user_ids.uniq!
user_ids.sort!
end
def get_topic_id(row, root_user_ids, current_user_ids)
if row[:root_msg_id] == 0 || root_user_ids != current_user_ids
# Let's try to find an existing Discourse topic_id if this looks like a root message or
# the user IDs of the root message are different from the current message.
find_topic_id(row, current_user_ids)
else
# This appears to be a reply. Let's try to find the Discourse topic_id for this message.
parent_msg_id = get_import_id(row[:root_msg_id])
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
parent[:topic_id] unless parent.blank?
end
end
# Tries to find a Discourse topic (private message) that has the same title as the current message.
# The users involved in these messages must match too.
def find_topic_id(row, current_user_ids)
topic_title = get_topic_title(row).downcase
topic_titles = [topic_title]
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
Post.select(:topic_id)
.joins(:topic)
.joins(:_custom_fields)
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{titles: topic_titles, user_ids: current_user_ids.join(',')}])
.order('topics.created_at DESC')
.first.try(:topic_id)
end
end
end

View File

@ -52,12 +52,6 @@ import:
private_messages: true
polls: true
# This tries to fix Private Messages that were imported from phpBB2 to phpBB3.
# You should enable this option if you see duplicate messages or lots of related
# messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer'
# should be one topic named 'Importer' and consist of 3 posts).
fix_private_messages: false
# When true: each imported user will have the original username from phpBB as its name
# When false: the name of each user will be blank
username_as_name: false

View File

@ -18,7 +18,6 @@ module ImportScripts::PhpBB3
attr_reader :import_remote_avatars
attr_reader :import_gallery_avatars
attr_reader :fix_private_messages
attr_reader :use_bbcode_to_md
attr_reader :original_site_prefix
@ -45,7 +44,6 @@ module ImportScripts::PhpBB3
@import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery']
@fix_private_messages = import_settings['fix_private_messages']
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
@original_site_prefix = import_settings['site_prefix']['original']