mirror of
https://github.com/discourse/discourse.git
synced 2025-01-19 07:12:45 +08:00
Merge pull request #4240 from gschlager/phpbb3-importer
FIX import of private messages from phpBB3
This commit is contained in:
commit
665c5282c7
|
@ -161,82 +161,39 @@ module ImportScripts::PhpBB3
|
|||
SQL
|
||||
end
|
||||
|
||||
def count_messages(use_fixed_messages)
|
||||
if use_fixed_messages
|
||||
count(<<-SQL)
|
||||
SELECT COUNT(*) AS count
|
||||
FROM #{@table_prefix}_import_privmsgs
|
||||
SQL
|
||||
else
|
||||
count(<<-SQL)
|
||||
SELECT COUNT(*) AS count
|
||||
FROM #{@table_prefix}_privmsgs
|
||||
SQL
|
||||
end
|
||||
def count_messages
|
||||
count(<<-SQL)
|
||||
SELECT COUNT(*) AS count
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
WHERE NOT EXISTS ( -- ignore duplicate messages
|
||||
SELECT 1
|
||||
FROM #{@table_prefix}_privmsgs x
|
||||
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
||||
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
def fetch_messages(use_fixed_messages, last_msg_id)
|
||||
if use_fixed_messages
|
||||
query(<<-SQL, :msg_id)
|
||||
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text,
|
||||
IFNULL(a.attachment_count, 0) AS attachment_count
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
|
||||
LEFT OUTER JOIN (
|
||||
SELECT post_msg_id, COUNT(*) AS attachment_count
|
||||
FROM #{@table_prefix}_attachments
|
||||
WHERE topic_id = 0
|
||||
GROUP BY post_msg_id
|
||||
) a ON (m.msg_id = a.post_msg_id)
|
||||
WHERE m.msg_id > #{last_msg_id}
|
||||
ORDER BY i.root_msg_id, m.msg_id
|
||||
LIMIT #{@batch_size}
|
||||
SQL
|
||||
else
|
||||
query(<<-SQL, :msg_id)
|
||||
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
|
||||
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
LEFT OUTER JOIN (
|
||||
SELECT post_msg_id, COUNT(*) AS attachment_count
|
||||
FROM #{@table_prefix}_attachments
|
||||
WHERE topic_id = 0
|
||||
GROUP BY post_msg_id
|
||||
) a ON (m.msg_id = a.post_msg_id)
|
||||
WHERE m.msg_id > #{last_msg_id}
|
||||
ORDER BY m.root_level, m.msg_id
|
||||
LIMIT #{@batch_size}
|
||||
SQL
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_message_participants(msg_id, use_fixed_messages)
|
||||
if use_fixed_messages
|
||||
query(<<-SQL)
|
||||
SELECT m.to_address
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
|
||||
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
|
||||
SQL
|
||||
else
|
||||
query(<<-SQL)
|
||||
SELECT m.to_address
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
|
||||
SQL
|
||||
end
|
||||
end
|
||||
|
||||
def calculate_fixed_messages
|
||||
drop_temp_import_message_table
|
||||
create_temp_import_message_table
|
||||
fill_temp_import_message_table
|
||||
|
||||
drop_import_message_table
|
||||
create_import_message_table
|
||||
fill_import_message_table
|
||||
|
||||
drop_temp_import_message_table
|
||||
def fetch_messages(last_msg_id)
|
||||
query(<<-SQL, :msg_id)
|
||||
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
|
||||
m.message_text, m.to_address, r.author_id AS root_author_id, r.to_address AS root_to_address, (
|
||||
SELECT COUNT(*)
|
||||
FROM #{@table_prefix}_attachments a
|
||||
WHERE a.topic_id = 0 AND m.msg_id = a.post_msg_id
|
||||
) AS attachment_count
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
LEFT OUTER JOIN #{@table_prefix}_privmsgs r ON (m.root_level = r.msg_id)
|
||||
WHERE m.msg_id > #{last_msg_id}
|
||||
AND NOT EXISTS ( -- ignore duplicate messages
|
||||
SELECT 1
|
||||
FROM #{@table_prefix}_privmsgs x
|
||||
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
||||
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
||||
)
|
||||
ORDER BY m.msg_id
|
||||
LIMIT #{@batch_size}
|
||||
SQL
|
||||
end
|
||||
|
||||
def count_bookmarks
|
||||
|
@ -268,83 +225,5 @@ module ImportScripts::PhpBB3
|
|||
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
|
||||
SQL
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def drop_temp_import_message_table
|
||||
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
|
||||
end
|
||||
|
||||
def create_temp_import_message_table
|
||||
query(<<-SQL)
|
||||
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
|
||||
msg_id MEDIUMINT(8) NOT NULL,
|
||||
root_msg_id MEDIUMINT(8) NOT NULL,
|
||||
recipient_id MEDIUMINT(8),
|
||||
normalized_subject VARCHAR(255) NOT NULL,
|
||||
PRIMARY KEY (msg_id)
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
# this removes duplicate messages, converts the to_address to a number
|
||||
# and stores the message_subject in lowercase and without the prefix "Re: "
|
||||
def fill_temp_import_message_table
|
||||
query(<<-SQL)
|
||||
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
|
||||
SELECT m.msg_id, m.root_level,
|
||||
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
|
||||
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
|
||||
ELSE NULL END AS recipient_id,
|
||||
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
|
||||
SUBSTRING(m.message_subject, 5)
|
||||
ELSE m.message_subject END) AS normalized_subject
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM #{@table_prefix}_privmsgs x
|
||||
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
||||
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
def drop_import_message_table
|
||||
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
|
||||
end
|
||||
|
||||
def create_import_message_table
|
||||
query(<<-SQL)
|
||||
CREATE TABLE #{@table_prefix}_import_privmsgs (
|
||||
msg_id MEDIUMINT(8) NOT NULL,
|
||||
root_msg_id MEDIUMINT(8) NOT NULL,
|
||||
PRIMARY KEY (msg_id),
|
||||
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
|
||||
)
|
||||
SQL
|
||||
end
|
||||
|
||||
# this tries to calculate the actual root_level (= msg_id of the first message in a
|
||||
# private conversation) based on subject, time, author and recipient
|
||||
def fill_import_message_table
|
||||
query(<<-SQL)
|
||||
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
|
||||
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
|
||||
COALESCE((
|
||||
SELECT a.msg_id
|
||||
FROM #{@table_prefix}_privmsgs a
|
||||
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
|
||||
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
|
||||
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
|
||||
AND b.normalized_subject = i.normalized_subject
|
||||
AND a.msg_id <> m.msg_id
|
||||
AND a.message_time < m.message_time
|
||||
ORDER BY a.message_time
|
||||
LIMIT 1
|
||||
), 0) ELSE i.root_msg_id END AS root_msg_id
|
||||
FROM #{@table_prefix}_privmsgs m
|
||||
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
|
||||
SQL
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -118,18 +118,13 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def import_private_messages
|
||||
if @settings.fix_private_messages
|
||||
puts '', 'fixing private messages'
|
||||
@database.calculate_fixed_messages
|
||||
end
|
||||
|
||||
puts '', 'creating private messages'
|
||||
total_count = @database.count_messages(@settings.fix_private_messages)
|
||||
total_count = @database.count_messages
|
||||
importer = @importers.message_importer
|
||||
last_msg_id = 0
|
||||
|
||||
batches do |offset|
|
||||
rows, last_msg_id = @database.fetch_messages(@settings.fix_private_messages, last_msg_id)
|
||||
rows, last_msg_id = @database.fetch_messages(last_msg_id)
|
||||
break if rows.size < 1
|
||||
|
||||
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
|
||||
|
|
|
@ -14,7 +14,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def map_to_import_ids(rows)
|
||||
rows.map { |row| get_import_id(row) }
|
||||
rows.map { |row| get_import_id(row[:msg_id]) }
|
||||
end
|
||||
|
||||
|
||||
|
@ -23,31 +23,38 @@ module ImportScripts::PhpBB3
|
|||
attachments = import_attachments(row, user_id)
|
||||
|
||||
mapped = {
|
||||
id: get_import_id(row),
|
||||
id: get_import_id(row[:msg_id]),
|
||||
user_id: user_id,
|
||||
created_at: Time.zone.at(row[:message_time]),
|
||||
raw: @text_processor.process_private_msg(row[:message_text], attachments)
|
||||
}
|
||||
|
||||
if row[:root_msg_id] == 0
|
||||
map_first_message(row, mapped)
|
||||
root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
|
||||
current_user_ids = sorted_user_ids(row[:author_id], row[:to_address])
|
||||
topic_id = get_topic_id(row, root_user_ids, current_user_ids)
|
||||
|
||||
if topic_id.blank?
|
||||
map_first_message(row, current_user_ids, mapped)
|
||||
else
|
||||
map_other_message(row, mapped)
|
||||
map_other_message(row, topic_id, mapped)
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
RE_PREFIX = 're: '
|
||||
|
||||
def import_attachments(row, user_id)
|
||||
if @settings.import_attachments && row[:attachment_count] > 0
|
||||
@attachment_importer.import_attachments(user_id, row[:msg_id])
|
||||
end
|
||||
end
|
||||
|
||||
def map_first_message(row, mapped)
|
||||
mapped[:title] = CGI.unescapeHTML(row[:message_subject])
|
||||
def map_first_message(row, current_user_ids, mapped)
|
||||
mapped[:title] = get_topic_title(row)
|
||||
mapped[:archetype] = Archetype.private_message
|
||||
mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id])
|
||||
mapped[:target_usernames] = get_recipient_usernames(row)
|
||||
mapped[:custom_fields] = {import_user_ids: current_user_ids.join(',')}
|
||||
|
||||
if mapped[:target_usernames].empty? # pm with yourself?
|
||||
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
|
||||
|
@ -57,36 +64,73 @@ module ImportScripts::PhpBB3
|
|||
mapped
|
||||
end
|
||||
|
||||
def map_other_message(row, mapped)
|
||||
parent_msg_id = "pm:#{row[:root_msg_id]}"
|
||||
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
|
||||
|
||||
if parent.blank?
|
||||
puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
|
||||
return nil
|
||||
end
|
||||
|
||||
mapped[:topic_id] = parent[:topic_id]
|
||||
def map_other_message(row, topic_id, mapped)
|
||||
mapped[:topic_id] = topic_id
|
||||
mapped
|
||||
end
|
||||
|
||||
def get_usernames(msg_id, author_id)
|
||||
# Find the users who are part of this private message.
|
||||
# Found from the to_address of phpbb_privmsgs, by looking at
|
||||
# all the rows with the same root_msg_id.
|
||||
def get_recipient_user_ids(to_address)
|
||||
return [] if to_address.blank?
|
||||
|
||||
# to_address looks like this: "u_91:u_1234:u_200"
|
||||
# The "u_" prefix is discarded and the rest is a user_id.
|
||||
import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages)
|
||||
.map { |r| r[:to_address].split(':') }
|
||||
.flatten!.uniq.map! { |u| u[2..-1] }
|
||||
user_ids = to_address.split(':')
|
||||
user_ids.uniq!
|
||||
user_ids.map! { |u| u[2..-1].to_i }
|
||||
end
|
||||
|
||||
def get_recipient_usernames(row)
|
||||
author_id = row[:author_id].to_s
|
||||
import_user_ids = get_recipient_user_ids(row[:to_address])
|
||||
|
||||
import_user_ids.map! do |import_user_id|
|
||||
import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
|
||||
import_user_id.to_s == author_id ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
|
||||
end.compact
|
||||
end
|
||||
|
||||
def get_import_id(row)
|
||||
"pm:#{row[:msg_id]}"
|
||||
def get_topic_title(row)
|
||||
CGI.unescapeHTML(row[:message_subject])
|
||||
end
|
||||
|
||||
def get_import_id(msg_id)
|
||||
"pm:#{msg_id}"
|
||||
end
|
||||
|
||||
# Creates a sorted array consisting of the message's author and recipients.
|
||||
def sorted_user_ids(author_id, to_address)
|
||||
user_ids = get_recipient_user_ids(to_address)
|
||||
user_ids << author_id unless author_id.nil?
|
||||
user_ids.uniq!
|
||||
user_ids.sort!
|
||||
end
|
||||
|
||||
def get_topic_id(row, root_user_ids, current_user_ids)
|
||||
if row[:root_msg_id] == 0 || root_user_ids != current_user_ids
|
||||
# Let's try to find an existing Discourse topic_id if this looks like a root message or
|
||||
# the user IDs of the root message are different from the current message.
|
||||
find_topic_id(row, current_user_ids)
|
||||
else
|
||||
# This appears to be a reply. Let's try to find the Discourse topic_id for this message.
|
||||
parent_msg_id = get_import_id(row[:root_msg_id])
|
||||
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
|
||||
parent[:topic_id] unless parent.blank?
|
||||
end
|
||||
end
|
||||
|
||||
# Tries to find a Discourse topic (private message) that has the same title as the current message.
|
||||
# The users involved in these messages must match too.
|
||||
def find_topic_id(row, current_user_ids)
|
||||
topic_title = get_topic_title(row).downcase
|
||||
topic_titles = [topic_title]
|
||||
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
|
||||
|
||||
Post.select(:topic_id)
|
||||
.joins(:topic)
|
||||
.joins(:_custom_fields)
|
||||
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
|
||||
{titles: topic_titles, user_ids: current_user_ids.join(',')}])
|
||||
.order('topics.created_at DESC')
|
||||
.first.try(:topic_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -52,12 +52,6 @@ import:
|
|||
private_messages: true
|
||||
polls: true
|
||||
|
||||
# This tries to fix Private Messages that were imported from phpBB2 to phpBB3.
|
||||
# You should enable this option if you see duplicate messages or lots of related
|
||||
# messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer'
|
||||
# should be one topic named 'Importer' and consist of 3 posts).
|
||||
fix_private_messages: false
|
||||
|
||||
# When true: each imported user will have the original username from phpBB as its name
|
||||
# When false: the name of each user will be blank
|
||||
username_as_name: false
|
||||
|
|
|
@ -18,7 +18,6 @@ module ImportScripts::PhpBB3
|
|||
attr_reader :import_remote_avatars
|
||||
attr_reader :import_gallery_avatars
|
||||
|
||||
attr_reader :fix_private_messages
|
||||
attr_reader :use_bbcode_to_md
|
||||
|
||||
attr_reader :original_site_prefix
|
||||
|
@ -45,7 +44,6 @@ module ImportScripts::PhpBB3
|
|||
@import_remote_avatars = avatar_settings['remote']
|
||||
@import_gallery_avatars = avatar_settings['gallery']
|
||||
|
||||
@fix_private_messages = import_settings['fix_private_messages']
|
||||
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
|
||||
|
||||
@original_site_prefix = import_settings['site_prefix']['original']
|
||||
|
|
Loading…
Reference in New Issue
Block a user