mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 19:03:45 +08:00
Improvements to phpBB3 import script (#10999)
* FEATURE: Import attachments * FEATURE: Add support for importing multiple forums in one * FEATURE: Add support for category and tag mapping * FEATURE: Import groups * FIX: Add spaces around images * FEATURE: Custom mapping of user rank to trust levels * FIX: Do not fail import if it cannot import polls * FIX: Optimize existing records lookup Co-authored-by: Gerhard Schlager <mail@gerhard-schlager.at> Co-authored-by: Jarek Radosz <jradosz@gmail.com>
This commit is contained in:
parent
82af278ae5
commit
a71b219c9a
|
@ -606,10 +606,15 @@ class ImportScripts::Base
|
|||
skipped += 1
|
||||
puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}"
|
||||
else
|
||||
result = BookmarkManager.new(user).create(post_id: post.id)
|
||||
begin
|
||||
manager = BookmarkManager.new(user)
|
||||
bookmark = manager.create(post_id: post.id)
|
||||
|
||||
created += 1 if result.errors.none?
|
||||
skipped += 1 if result.errors.any?
|
||||
created += 1 if manager.errors.none?
|
||||
skipped += 1 if manager.errors.any?
|
||||
rescue
|
||||
skipped += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -57,6 +57,11 @@ module ImportScripts
|
|||
UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
|
||||
end
|
||||
|
||||
def find_username_by_import_id(import_id)
|
||||
user_id = user_id_from_imported_user_id(import_id)
|
||||
User.where(id: user_id).pluck(:username).first if user_id.present?
|
||||
end
|
||||
|
||||
# Get the Discourse Category id based on the id of the source category
|
||||
def category_id_from_imported_category_id(import_id)
|
||||
@categories[import_id] || @categories[import_id.to_s]
|
||||
|
|
|
@ -22,13 +22,13 @@ module ImportScripts::PhpBB3
|
|||
if version.start_with?('3.0')
|
||||
require_relative 'database_3_0'
|
||||
Database_3_0.new(@database_client, @database_settings)
|
||||
elsif version.start_with?('3.1')
|
||||
elsif version.start_with?('3.1') || version.start_with?('3.2')
|
||||
require_relative 'database_3_1'
|
||||
Database_3_1.new(@database_client, @database_settings)
|
||||
else
|
||||
raise UnsupportedVersionError, <<~MSG
|
||||
Unsupported version (#{version}) of phpBB detected.
|
||||
Currently only 3.0.x and 3.1.x are supported by this importer.
|
||||
Currently only version 3.0, 3.1 and 3.2 are supported by this importer.
|
||||
MSG
|
||||
end
|
||||
end
|
||||
|
|
|
@ -53,6 +53,20 @@ module ImportScripts::PhpBB3
|
|||
SQL
|
||||
end
|
||||
|
||||
def fetch_groups
|
||||
query(<<-SQL)
|
||||
SELECT g.group_id, g.group_type, g.group_name, g.group_desc
|
||||
FROM #{@table_prefix}groups g
|
||||
SQL
|
||||
end
|
||||
|
||||
def fetch_group_users
|
||||
query(<<-SQL)
|
||||
SELECT ug.group_id, ug.user_id, ug.group_leader
|
||||
FROM #{@table_prefix}user_group ug
|
||||
SQL
|
||||
end
|
||||
|
||||
def fetch_categories
|
||||
query(<<-SQL)
|
||||
SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_desc, x.first_post_time
|
||||
|
@ -213,12 +227,20 @@ module ImportScripts::PhpBB3
|
|||
SELECT b.user_id, t.topic_first_post_id
|
||||
FROM #{@table_prefix}bookmarks b
|
||||
JOIN #{@table_prefix}topics t ON (b.topic_id = t.topic_id)
|
||||
WHERE b.user_id > #{last_user_id} AND b.topic_id > #{last_topic_id}
|
||||
WHERE b.user_id > #{last_user_id}
|
||||
ORDER BY b.user_id, b.topic_id
|
||||
LIMIT #{@batch_size}
|
||||
SQL
|
||||
end
|
||||
|
||||
def get_smiley(smiley_code)
|
||||
query(<<-SQL).first
|
||||
SELECT emotion, smiley_url
|
||||
FROM #{@table_prefix}smilies
|
||||
WHERE code = '#{smiley_code}'
|
||||
SQL
|
||||
end
|
||||
|
||||
def get_config_values
|
||||
query(<<-SQL).first
|
||||
SELECT
|
||||
|
|
|
@ -27,8 +27,13 @@ module ImportScripts::PhpBB3
|
|||
def execute
|
||||
puts '', "importing from phpBB #{@php_config[:phpbb_version]}"
|
||||
|
||||
SiteSetting.tagging_enabled = true if @settings.tag_mappings.present?
|
||||
|
||||
import_users
|
||||
import_anonymous_users if @settings.import_anonymous_users
|
||||
import_groups
|
||||
import_user_groups
|
||||
import_new_categories
|
||||
import_categories
|
||||
import_posts
|
||||
import_private_messages if @settings.import_private_messages
|
||||
|
@ -67,12 +72,12 @@ module ImportScripts::PhpBB3
|
|||
|
||||
batches do |offset|
|
||||
rows, last_user_id = @database.fetch_users(last_user_id)
|
||||
rows = rows.to_a.uniq { |row| row[:user_id] }
|
||||
break if rows.size < 1
|
||||
|
||||
next if all_records_exist?(:users, importer.map_users_to_import_ids(rows))
|
||||
|
||||
create_users(rows, total: total_count, offset: offset) do |row|
|
||||
begin
|
||||
next if user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
|
||||
importer.map_user(row)
|
||||
rescue => e
|
||||
log_error("Failed to map user with ID #{row[:user_id]}", e)
|
||||
|
@ -91,10 +96,9 @@ module ImportScripts::PhpBB3
|
|||
rows, last_username = @database.fetch_anonymous_users(last_username)
|
||||
break if rows.size < 1
|
||||
|
||||
next if all_records_exist?(:users, importer.map_anonymous_users_to_import_ids(rows))
|
||||
|
||||
create_users(rows, total: total_count, offset: offset) do |row|
|
||||
begin
|
||||
next if user_id_from_imported_user_id(@settings.prefix(row[:post_username]))
|
||||
importer.map_anonymous_user(row)
|
||||
rescue => e
|
||||
log_error("Failed to map anonymous user with ID #{row[:user_id]}", e)
|
||||
|
@ -103,12 +107,74 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
end
|
||||
|
||||
def import_groups
|
||||
puts '', 'creating groups'
|
||||
rows = @database.fetch_groups
|
||||
|
||||
create_groups(rows) do |row|
|
||||
begin
|
||||
next if row[:group_type] == 3
|
||||
|
||||
group_name = if @settings.site_name.present?
|
||||
"#{@settings.site_name}_#{row[:group_name]}"
|
||||
else
|
||||
row[:group_name]
|
||||
end[0..19].gsub(/[^a-zA-Z0-9\-_. ]/, '_')
|
||||
|
||||
bio_raw = @importers.text_processor.process_raw_text(row[:group_desc]) rescue row[:group_desc]
|
||||
|
||||
{
|
||||
id: @settings.prefix(row[:group_id]),
|
||||
name: group_name,
|
||||
full_name: row[:group_name],
|
||||
bio_raw: bio_raw
|
||||
}
|
||||
rescue => e
|
||||
log_error("Failed to map group with ID #{row[:group_id]}", e)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_user_groups
|
||||
puts '', 'creating user groups'
|
||||
rows = @database.fetch_group_users
|
||||
|
||||
rows.each do |row|
|
||||
group_id = @lookup.group_id_from_imported_group_id(@settings.prefix(row[:group_id]))
|
||||
next if !group_id
|
||||
|
||||
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
|
||||
|
||||
begin
|
||||
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id, owner: row[:group_leader])
|
||||
rescue => e
|
||||
log_error("Failed to add user #{row[:user_id]} to group #{row[:group_id]}", e)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_new_categories
|
||||
puts '', 'creating new categories'
|
||||
|
||||
create_categories(@settings.new_categories) do |row|
|
||||
next if row == "SKIP"
|
||||
|
||||
{
|
||||
id: @settings.prefix(row[:forum_id]),
|
||||
name: row[:name],
|
||||
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id]))
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_categories
|
||||
puts '', 'creating categories'
|
||||
rows = @database.fetch_categories
|
||||
importer = @importers.category_importer
|
||||
|
||||
create_categories(rows) do |row|
|
||||
next if @settings.category_mappings[row[:forum_id].to_s] == 'SKIP'
|
||||
|
||||
importer.map_category(row)
|
||||
end
|
||||
end
|
||||
|
@ -123,10 +189,9 @@ module ImportScripts::PhpBB3
|
|||
rows, last_post_id = @database.fetch_posts(last_post_id)
|
||||
break if rows.size < 1
|
||||
|
||||
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
|
||||
|
||||
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||
begin
|
||||
next if post_id_from_imported_post_id(@settings.prefix(row[:post_id]))
|
||||
importer.map_post(row)
|
||||
rescue => e
|
||||
log_error("Failed to map post with ID #{row[:post_id]}", e)
|
||||
|
@ -145,10 +210,9 @@ module ImportScripts::PhpBB3
|
|||
rows, last_msg_id = @database.fetch_messages(last_msg_id)
|
||||
break if rows.size < 1
|
||||
|
||||
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
|
||||
|
||||
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||
begin
|
||||
next if post_id_from_imported_post_id(@settings.prefix("pm:#{row[:msg_id]}"))
|
||||
importer.map_message(row)
|
||||
rescue => e
|
||||
log_error("Failed to map message with ID #{row[:msg_id]}", e)
|
||||
|
@ -168,7 +232,11 @@ module ImportScripts::PhpBB3
|
|||
break if rows.size < 1
|
||||
|
||||
create_bookmarks(rows, total: total_count, offset: offset) do |row|
|
||||
importer.map_bookmark(row)
|
||||
begin
|
||||
importer.map_bookmark(row)
|
||||
rescue => e
|
||||
log_error("Failed to map bookmark (#{row[:user_id]}, #{row[:topic_first_post_id]})", e)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,10 +2,14 @@
|
|||
|
||||
module ImportScripts::PhpBB3
|
||||
class BookmarkImporter
|
||||
def initialize(settings)
|
||||
@settings = settings
|
||||
end
|
||||
|
||||
def map_bookmark(row)
|
||||
{
|
||||
user_id: row[:user_id],
|
||||
post_id: row[:topic_first_post_id]
|
||||
user_id: @settings.prefix(row[:user_id]),
|
||||
post_id: @settings.prefix(row[:topic_first_post_id])
|
||||
}
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,20 +5,28 @@ module ImportScripts::PhpBB3
|
|||
# @param lookup [ImportScripts::LookupContainer]
|
||||
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
|
||||
# @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter]
|
||||
def initialize(lookup, text_processor, permalink_importer)
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
def initialize(lookup, text_processor, permalink_importer, settings)
|
||||
@lookup = lookup
|
||||
@text_processor = text_processor
|
||||
@permalink_importer = permalink_importer
|
||||
@settings = settings
|
||||
end
|
||||
|
||||
def map_category(row)
|
||||
return if @settings.category_mappings[row[:forum_id].to_s]
|
||||
|
||||
if row[:parent_id] && @settings.category_mappings[row[:parent_id].to_s]
|
||||
puts "parent category (#{row[:parent_id]}) was mapped, but children was not (#{row[:forum_id]})"
|
||||
end
|
||||
|
||||
{
|
||||
id: row[:forum_id],
|
||||
id: @settings.prefix(row[:forum_id]),
|
||||
name: CGI.unescapeHTML(row[:forum_name]),
|
||||
parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]),
|
||||
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
|
||||
post_create_action: proc do |category|
|
||||
update_category_description(category, row)
|
||||
@permalink_importer.create_for_category(category, row[:forum_id])
|
||||
@permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
|
||||
end
|
||||
}
|
||||
end
|
||||
|
@ -43,7 +51,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
if row[:forum_desc].present?
|
||||
changes = { raw: @text_processor.process_raw_text(row[:forum_desc]) }
|
||||
changes = { raw: (@text_processor.process_raw_text(row[:forum_desc]) rescue row[:forum_desc]) }
|
||||
opts = { revised_at: post.created_at, bypass_bump: true }
|
||||
post.revise(Discourse.system_user, changes, opts)
|
||||
end
|
||||
|
|
|
@ -32,7 +32,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def category_importer
|
||||
CategoryImporter.new(@lookup, text_processor, permalink_importer)
|
||||
CategoryImporter.new(@lookup, text_processor, permalink_importer, @settings)
|
||||
end
|
||||
|
||||
def post_importer
|
||||
|
@ -44,15 +44,13 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def bookmark_importer
|
||||
BookmarkImporter.new
|
||||
BookmarkImporter.new(@settings)
|
||||
end
|
||||
|
||||
def permalink_importer
|
||||
@permalink_importer ||= PermalinkImporter.new(@settings.permalinks)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def attachment_importer
|
||||
AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config)
|
||||
end
|
||||
|
@ -62,15 +60,15 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def poll_importer
|
||||
PollImporter.new(@lookup, @database, text_processor)
|
||||
PollImporter.new(@lookup, @database, text_processor, @settings)
|
||||
end
|
||||
|
||||
def text_processor
|
||||
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings)
|
||||
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
|
||||
end
|
||||
|
||||
def smiley_processor
|
||||
SmileyProcessor.new(@uploader, @settings, @phpbb_config)
|
||||
SmileyProcessor.new(@uploader, @database, @settings, @phpbb_config)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -20,7 +20,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def map_message(row)
|
||||
user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id
|
||||
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || Discourse.system_user.id
|
||||
attachments = import_attachments(row, user_id)
|
||||
|
||||
mapped = {
|
||||
|
@ -84,7 +84,7 @@ module ImportScripts::PhpBB3
|
|||
import_user_ids = get_recipient_user_ids(row[:to_address])
|
||||
|
||||
import_user_ids.map! do |import_user_id|
|
||||
@lookup.find_user_by_import_id(import_user_id).try(:username)
|
||||
@lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
|
||||
end.compact
|
||||
end
|
||||
|
||||
|
@ -93,7 +93,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def get_import_id(msg_id)
|
||||
"pm:#{msg_id}"
|
||||
@settings.prefix("pm:#{msg_id}")
|
||||
end
|
||||
|
||||
# Creates a sorted array consisting of the message's author and recipients.
|
||||
|
|
|
@ -39,7 +39,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def create_for_post(post, import_id)
|
||||
return unless @settings.create_topic_links && post
|
||||
return unless @settings.create_post_links && post
|
||||
|
||||
url = "viewtopic.php?p=#{import_id}"
|
||||
|
||||
|
|
|
@ -5,10 +5,12 @@ module ImportScripts::PhpBB3
|
|||
# @param lookup [ImportScripts::LookupContainer]
|
||||
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
|
||||
# @param text_processor [ImportScripts::PhpBB3::TextProcessor]
|
||||
def initialize(lookup, database, text_processor)
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
def initialize(lookup, database, text_processor, settings)
|
||||
@lookup = lookup
|
||||
@database = database
|
||||
@text_processor = text_processor
|
||||
@settings = settings
|
||||
end
|
||||
|
||||
# @param poll_data [ImportScripts::PhpBB3::PollData]
|
||||
|
@ -47,7 +49,7 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def get_option_text(row)
|
||||
text = @text_processor.process_raw_text(row[:poll_option_text])
|
||||
text = @text_processor.process_raw_text(row[:poll_option_text]) rescue row[:poll_option_text]
|
||||
text.squish!
|
||||
text.gsub!(/^(\d+)\./, '\1\.')
|
||||
text
|
||||
|
@ -55,7 +57,7 @@ module ImportScripts::PhpBB3
|
|||
|
||||
# @param poll_data [ImportScripts::PhpBB3::PollData]
|
||||
def get_poll_text(poll_data)
|
||||
title = @text_processor.process_raw_text(poll_data.title)
|
||||
title = @text_processor.process_raw_text(poll_data.title) rescue poll_data.title
|
||||
text = +"#{title}\n\n"
|
||||
|
||||
arguments = ["results=always"]
|
||||
|
@ -118,7 +120,7 @@ module ImportScripts::PhpBB3
|
|||
|
||||
rows.each do |row|
|
||||
option_id = mapped_option_ids[row[:poll_option_id]]
|
||||
user_id = @lookup.user_id_from_imported_user_id(row[:user_id])
|
||||
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
|
||||
|
||||
if option_id.present? && user_id.present?
|
||||
PollVote.create!(poll: poll, poll_option_id: option_id, user_id: user_id)
|
||||
|
|
|
@ -18,22 +18,24 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def map_to_import_ids(rows)
|
||||
rows.map { |row| row[:post_id] }
|
||||
rows.map { |row| @settings.prefix(row[:post_id]) }
|
||||
end
|
||||
|
||||
def map_post(row)
|
||||
imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username]
|
||||
return if @settings.category_mappings[row[:forum_id].to_s] == 'SKIP'
|
||||
|
||||
imported_user_id = @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
|
||||
user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1
|
||||
is_first_post = row[:post_id] == row[:topic_first_post_id]
|
||||
|
||||
attachments = import_attachments(row, user_id)
|
||||
|
||||
mapped = {
|
||||
id: row[:post_id],
|
||||
id: @settings.prefix(row[:post_id]),
|
||||
user_id: user_id,
|
||||
created_at: Time.zone.at(row[:post_time]),
|
||||
raw: @text_processor.process_post(row[:post_text], attachments),
|
||||
import_topic_id: row[:topic_id]
|
||||
import_topic_id: @settings.prefix(row[:topic_id])
|
||||
}
|
||||
|
||||
if is_first_post
|
||||
|
@ -54,14 +56,18 @@ module ImportScripts::PhpBB3
|
|||
def map_first_post(row, mapped)
|
||||
poll_data = add_poll(row, mapped) if @settings.import_polls
|
||||
|
||||
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
|
||||
mapped[:category] = @lookup.category_id_from_imported_category_id(@settings.prefix(@settings.category_mappings[row[:forum_id].to_s])) ||
|
||||
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:forum_id]))
|
||||
mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255]
|
||||
mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL
|
||||
mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL
|
||||
mapped[:views] = row[:topic_views]
|
||||
mapped[:post_create_action] = proc do |post|
|
||||
@permalink_importer.create_for_topic(post.topic, row[:topic_id])
|
||||
@permalink_importer.create_for_post(post, row[:post_id])
|
||||
if tags = @settings.tag_mappings[row[:forum_id].to_s].presence
|
||||
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tags)
|
||||
end
|
||||
@permalink_importer.create_for_topic(post.topic, row[:topic_id]) # skip @settings.prefix because ID is used in permalink generation
|
||||
@permalink_importer.create_for_post(post, row[:post_id]) # skip @settings.prefix because ID is used in permalink generation
|
||||
@poll_importer.update_poll(row[:topic_id], post, poll_data) if poll_data
|
||||
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
|
||||
end
|
||||
|
@ -70,16 +76,16 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def map_other_post(row, mapped)
|
||||
parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id])
|
||||
parent = @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
|
||||
|
||||
if parent.blank?
|
||||
puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}"
|
||||
puts "Parent post #{@settings.prefix(row[:topic_first_post_id])} doesn't exist. Skipping #{@settings.prefix(row[:post_id])}: #{row[:topic_title][0..40]}"
|
||||
return nil
|
||||
end
|
||||
|
||||
mapped[:topic_id] = parent[:topic_id]
|
||||
mapped[:post_create_action] = proc do |post|
|
||||
@permalink_importer.create_for_post(post, row[:post_id])
|
||||
@permalink_importer.create_for_post(post, row[:post_id]) # skip @settings.prefix because ID is used in permalink generation
|
||||
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
|
||||
end
|
||||
|
||||
|
@ -91,9 +97,14 @@ module ImportScripts::PhpBB3
|
|||
|
||||
poll_data = PollData.new(row[:poll_title], row[:poll_max_options], row[:poll_end])
|
||||
poll_raw = @poll_importer.create_raw(row[:topic_id], poll_data)
|
||||
return if poll_data.options.size < 2
|
||||
|
||||
mapped_post[:raw] = poll_raw << "\n\n" << mapped_post[:raw]
|
||||
poll_data
|
||||
end
|
||||
|
||||
def staff_guardian
|
||||
@_staff_guardian ||= Guardian.new(Discourse.system_user)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -12,14 +12,18 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def map_users_to_import_ids(rows)
|
||||
rows.map { |row| row[:user_id] }
|
||||
rows.map { |row| @settings.prefix(row[:user_id]) }
|
||||
end
|
||||
|
||||
def map_user(row)
|
||||
is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER
|
||||
|
||||
trust_level = row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1]
|
||||
trust_level = @settings.trust_level_for_posts(row[:user_posts], trust_level: trust_level)
|
||||
manual_locked_trust_level = trust_level > TrustLevel[1] ? trust_level : nil
|
||||
|
||||
{
|
||||
id: row[:user_id],
|
||||
id: @settings.prefix(row[:user_id]),
|
||||
email: row[:user_email],
|
||||
username: row[:username],
|
||||
password: @settings.import_passwords ? row[:user_password] : nil,
|
||||
|
@ -28,7 +32,8 @@ module ImportScripts::PhpBB3
|
|||
last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]),
|
||||
registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil),
|
||||
active: is_active_user,
|
||||
trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1],
|
||||
trust_level: trust_level,
|
||||
manual_locked_trust_level: manual_locked_trust_level,
|
||||
approved: is_active_user,
|
||||
approved_by_id: is_active_user ? Discourse.system_user.id : nil,
|
||||
approved_at: is_active_user ? Time.now : nil,
|
||||
|
@ -45,14 +50,14 @@ module ImportScripts::PhpBB3
|
|||
end
|
||||
|
||||
def map_anonymous_users_to_import_ids(rows)
|
||||
rows.map { |row| row[:post_username] }
|
||||
rows.map { |row| @settings.prefix(row[:post_username]) }
|
||||
end
|
||||
|
||||
def map_anonymous_user(row)
|
||||
username = row[:post_username]
|
||||
|
||||
{
|
||||
id: username,
|
||||
id: @settings.prefix(username),
|
||||
email: "anonymous_#{SecureRandom.hex}@no-email.invalid",
|
||||
username: username,
|
||||
name: @settings.username_as_name ? username : '',
|
||||
|
|
|
@ -11,6 +11,73 @@ database:
|
|||
batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine.
|
||||
|
||||
import:
|
||||
# Set this if you import multiple phpBB forums into a single Discourse forum.
|
||||
#
|
||||
# For example, when importing multiple sites, prefix all imported IDs
|
||||
# with 'first' to avoid conflicts. Subsequent import runs must have a
|
||||
# different 'site_name'.
|
||||
#
|
||||
# site_name: first
|
||||
#
|
||||
site_name:
|
||||
|
||||
# Create new categories
|
||||
#
|
||||
# For example, to create a parent category and a subcategory.
|
||||
#
|
||||
# new_categories:
|
||||
# - forum_id: foo
|
||||
# name: Foo Category
|
||||
# - forum_id: bar
|
||||
# name: Bar Category
|
||||
# parent_id: foo
|
||||
#
|
||||
new_categories:
|
||||
|
||||
# Category mappings
|
||||
#
|
||||
# For example, topics from phpBB category 1 and 2 will be imported
|
||||
# in the new "Foo Category" category, topics from phpBB category 3
|
||||
# will be imported in subcategory "Bar category", topics from phpBB
|
||||
# category 4 will be merged into category 5 and category 6 will be
|
||||
# skipped.
|
||||
#
|
||||
# category_mappings:
|
||||
# 1: foo
|
||||
# 2: foo
|
||||
# 3: bar
|
||||
# 4: 5
|
||||
# 6: SKIP
|
||||
#
|
||||
category_mappings:
|
||||
|
||||
# Tag mappings
|
||||
#
|
||||
# For example, imported topics from phpBB category 1 will be tagged
|
||||
# with 'first-category', etc.
|
||||
#
|
||||
# tag_mappings:
|
||||
# 1:
|
||||
# - first-category
|
||||
# 2:
|
||||
# - second-category
|
||||
# 3:
|
||||
# - third-category
|
||||
#
|
||||
tag_mappings:
|
||||
|
||||
# Rank to trust level mapping
|
||||
#
|
||||
# Map phpBB 3.x rank levels to trust level
|
||||
# Users with rank at least 3000 will have TL3, etc.
|
||||
#
|
||||
# rank_mapping:
|
||||
# trust_level_1: 200
|
||||
# trust_level_2: 1000
|
||||
# trust_level_3: 3000
|
||||
#
|
||||
rank_mapping:
|
||||
|
||||
# WARNING: Do not activate this option unless you know what you are doing.
|
||||
# It will probably break the BBCode to Markdown conversion and slows down your import.
|
||||
use_bbcode_to_md: false
|
||||
|
|
88
script/import_scripts/phpbb3/support/bbcode/markdown_node.rb
Normal file
88
script/import_scripts/phpbb3/support/bbcode/markdown_node.rb
Normal file
|
@ -0,0 +1,88 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ImportScripts; end
|
||||
module ImportScripts::PhpBB3; end
|
||||
|
||||
module ImportScripts::PhpBB3::BBCode
|
||||
LINEBREAK_AUTO = :auto
|
||||
LINEBREAK_HARD = :hard
|
||||
LINEBREAK_HTML = :html
|
||||
|
||||
class MarkdownNode
|
||||
# @return [String]
|
||||
attr_reader :xml_node_name
|
||||
|
||||
# @return [MarkdownNode]
|
||||
attr_reader :parent
|
||||
|
||||
# @return [Array<MarkdownNode>]
|
||||
attr_reader :children
|
||||
|
||||
# @return [Array<MarkdownNode>]
|
||||
attr_accessor :previous_sibling
|
||||
|
||||
# @return [Array<MarkdownNode>]
|
||||
attr_accessor :next_sibling
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :text
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :prefix
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :postfix
|
||||
|
||||
# @return [Integer]
|
||||
attr_accessor :prefix_linebreaks
|
||||
|
||||
# @return [Integer]
|
||||
attr_accessor :postfix_linebreaks
|
||||
|
||||
# @return [Symbol]
|
||||
attr_accessor :prefix_linebreak_type
|
||||
|
||||
# @return [Symbol]
|
||||
attr_accessor :postfix_linebreak_type
|
||||
|
||||
# @return [String]
|
||||
attr_accessor :prefix_children
|
||||
|
||||
# @param xml_node_name [String]
|
||||
# @param parent [MarkdownNode]
|
||||
def initialize(xml_node_name:, parent:)
|
||||
@xml_node_name = xml_node_name
|
||||
|
||||
@text = +""
|
||||
@prefix = +""
|
||||
@postfix = +""
|
||||
|
||||
@prefix_linebreaks = 0
|
||||
@postfix_linebreaks = 0
|
||||
|
||||
@prefix_linebreak_type = LINEBREAK_AUTO
|
||||
@postfix_linebreak_type = LINEBREAK_AUTO
|
||||
|
||||
@parent = parent
|
||||
@children = []
|
||||
|
||||
if @parent
|
||||
@previous_sibling = @parent.children.last
|
||||
@previous_sibling.next_sibling = self if @previous_sibling
|
||||
@parent.children << self
|
||||
end
|
||||
end
|
||||
|
||||
def enclosed_with=(text)
|
||||
@prefix = @postfix = text
|
||||
end
|
||||
|
||||
def skip_children
|
||||
@children = nil
|
||||
end
|
||||
|
||||
def to_s
|
||||
"name: #{xml_node_name}, prefix: #{prefix}, text: #{text}, children: #{children.size}, postfix: #{postfix}"
|
||||
end
|
||||
end
|
||||
end
|
356
script/import_scripts/phpbb3/support/bbcode/xml_to_markdown.rb
Normal file
356
script/import_scripts/phpbb3/support/bbcode/xml_to_markdown.rb
Normal file
|
@ -0,0 +1,356 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'nokogiri'
|
||||
require_relative 'markdown_node'
|
||||
|
||||
module ImportScripts::PhpBB3::BBCode
|
||||
class XmlToMarkdown
|
||||
def initialize(xml, opts = {})
|
||||
@username_from_user_id = opts[:username_from_user_id]
|
||||
@smilie_to_emoji = opts[:smilie_to_emoji]
|
||||
@quoted_post_from_post_id = opts[:quoted_post_from_post_id]
|
||||
@upload_md_from_file = opts[:upload_md_from_file]
|
||||
@url_replacement = opts[:url_replacement]
|
||||
@allow_inline_code = opts.fetch(:allow_inline_code, false)
|
||||
@traditional_linebreaks = opts.fetch(:traditional_linebreaks, false)
|
||||
|
||||
@doc = Nokogiri::XML(xml)
|
||||
@list_stack = []
|
||||
end
|
||||
|
||||
def convert
|
||||
preprocess_xml
|
||||
|
||||
md_root = MarkdownNode.new(xml_node_name: "ROOT", parent: nil)
|
||||
visit(@doc.root, md_root)
|
||||
to_markdown(md_root).rstrip
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
IGNORED_ELEMENTS = ["s", "e", "i"]
|
||||
ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"]
|
||||
ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"]
|
||||
EXPLICIT_LINEBREAK_THRESHOLD = 2
|
||||
|
||||
def preprocess_xml
|
||||
@doc.traverse do |node|
|
||||
if node.is_a? Nokogiri::XML::Text
|
||||
node.content = node.content.gsub(/\A\n+\s*/, "")
|
||||
node.content = node.content.lstrip if remove_leading_whitespaces?(node)
|
||||
node.remove if node.content.empty?
|
||||
elsif IGNORED_ELEMENTS.include?(node.name)
|
||||
node.remove
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def remove_leading_whitespaces?(xml_node)
|
||||
parent = xml_node.parent
|
||||
return false unless parent
|
||||
|
||||
ELEMENTS_WITHOUT_LEADING_WHITESPACES.include?(parent.name) &&
|
||||
parent.children.first == xml_node
|
||||
end
|
||||
|
||||
def visit(xml_node, md_parent)
|
||||
visitor = "visit_#{xml_node.name}"
|
||||
visitor_exists = respond_to?(visitor, include_all: true)
|
||||
|
||||
if visitor_exists && md_parent.children
|
||||
md_node = create_node(xml_node, md_parent)
|
||||
send(visitor, xml_node, md_node)
|
||||
end
|
||||
|
||||
xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) }
|
||||
|
||||
after_hook = "after_#{xml_node.name}"
|
||||
if respond_to?(after_hook, include_all: true)
|
||||
send(after_hook, xml_node, md_node)
|
||||
end
|
||||
end
|
||||
|
||||
def create_node(xml_node, md_parent)
|
||||
if xml_node.name == "br"
|
||||
last_child = md_parent.children.last
|
||||
return last_child if last_child&.xml_node_name == "br"
|
||||
end
|
||||
|
||||
MarkdownNode.new(xml_node_name: xml_node.name, parent: md_parent)
|
||||
end
|
||||
|
||||
def visit_text(xml_node, md_node)
|
||||
md_node.text << text(xml_node)
|
||||
end
|
||||
|
||||
def visit_B(xml_node, md_node)
|
||||
if xml_node.parent&.name != 'B'
|
||||
md_node.enclosed_with = "**"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_I(xml_node, md_node)
|
||||
if xml_node.parent&.name != 'I'
|
||||
md_node.enclosed_with = "_"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_U(xml_node, md_node)
|
||||
if xml_node.parent&.name != 'U'
|
||||
md_node.prefix = "[u]"
|
||||
md_node.postfix = "[/u]"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_CODE(xml_node, md_node)
|
||||
content = xml_node.content
|
||||
|
||||
if !@allow_inline_code || content.include?("\n")
|
||||
md_node.prefix = "```text\n"
|
||||
md_node.postfix = "\n```"
|
||||
else
|
||||
md_node.enclosed_with = "`"
|
||||
end
|
||||
|
||||
md_node.text = content.rstrip
|
||||
md_node.skip_children
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
|
||||
md_node.prefix_linebreak_type = LINEBREAK_HTML
|
||||
end
|
||||
|
||||
def visit_LIST(xml_node, md_node)
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1
|
||||
md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0
|
||||
|
||||
@list_stack << {
|
||||
unordered: xml_node.attribute('type').nil?,
|
||||
item_count: 0
|
||||
}
|
||||
end
|
||||
|
||||
def after_LIST(xml_node, md_node)
|
||||
@list_stack.pop
|
||||
end
|
||||
|
||||
def visit_LI(xml_node, md_node)
|
||||
list = @list_stack.last
|
||||
depth = @list_stack.size - 1
|
||||
|
||||
list[:item_count] += 1
|
||||
|
||||
indentation = ' ' * 2 * depth
|
||||
symbol = list[:unordered] ? '*' : "#{list[:item_count]}."
|
||||
|
||||
md_node.prefix = "#{indentation}#{symbol} "
|
||||
md_node.postfix_linebreaks = 1
|
||||
end
|
||||
|
||||
def visit_IMG(xml_node, md_node)
|
||||
md_node.text = +"![](#{xml_node.attribute('src')})"
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
|
||||
md_node.skip_children
|
||||
end
|
||||
|
||||
def visit_URL(xml_node, md_node)
|
||||
original_url = xml_node.attribute('url').to_s
|
||||
url = CGI.unescapeHTML(original_url)
|
||||
url = @url_replacement.call(url) if @url_replacement
|
||||
|
||||
if xml_node.content.strip == original_url
|
||||
md_node.text = url
|
||||
md_node.skip_children
|
||||
else
|
||||
md_node.prefix = "["
|
||||
md_node.postfix = "](#{url})"
|
||||
end
|
||||
end
|
||||
|
||||
def visit_EMAIL(xml_node, md_node)
|
||||
md_node.prefix = "<"
|
||||
md_node.postfix = ">"
|
||||
end
|
||||
|
||||
def visit_br(xml_node, md_node)
|
||||
md_node.postfix_linebreaks += 1
|
||||
|
||||
if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
|
||||
md_node.postfix_linebreak_type = LINEBREAK_HARD
|
||||
end
|
||||
end
|
||||
|
||||
def visit_E(xml_node, md_node)
|
||||
if @smilie_to_emoji
|
||||
md_node.text = @smilie_to_emoji.call(xml_node.content)
|
||||
md_node.skip_children
|
||||
end
|
||||
end
|
||||
|
||||
def visit_QUOTE(xml_node, md_node)
|
||||
if post = quoted_post(xml_node)
|
||||
md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
|
||||
md_node.postfix = "\n[/quote]"
|
||||
elsif username = quoted_username(xml_node)
|
||||
md_node.prefix = %Q{[quote="#{username}"]\n}
|
||||
md_node.postfix = "\n[/quote]"
|
||||
else
|
||||
md_node.prefix_children = "> "
|
||||
end
|
||||
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
|
||||
md_node.prefix_linebreak_type = LINEBREAK_HTML
|
||||
end
|
||||
|
||||
def quoted_post(xml_node)
|
||||
if @quoted_post_from_post_id
|
||||
post_id = to_i(xml_node.attr("post_id"))
|
||||
@quoted_post_from_post_id.call(post_id) if post_id
|
||||
end
|
||||
end
|
||||
|
||||
def quoted_username(xml_node)
|
||||
if @username_from_user_id
|
||||
user_id = to_i(xml_node.attr("user_id"))
|
||||
username = @username_from_user_id.call(user_id) if user_id
|
||||
end
|
||||
|
||||
username = xml_node.attr("author") unless username
|
||||
username
|
||||
end
|
||||
|
||||
def to_i(string)
|
||||
string.to_i if string&.match(/\A\d+\z/)
|
||||
end
|
||||
|
||||
def visit_ATTACHMENT(xml_node, md_node)
|
||||
filename = xml_node.attr("filename")
|
||||
index = to_i(xml_node.attr("index"))
|
||||
|
||||
md_node.text = @upload_md_from_file.call(filename, index) if @upload_md_from_file
|
||||
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 1
|
||||
md_node.skip_children
|
||||
end
|
||||
|
||||
def visit_SIZE(xml_node, md_node)
|
||||
size = to_i(xml_node.attr("size"))
|
||||
return if size.nil?
|
||||
|
||||
if size.between?(1, 99)
|
||||
md_node.prefix = '<small>'
|
||||
md_node.postfix = '</small>'
|
||||
elsif size.between?(101, 200)
|
||||
md_node.prefix = '<big>'
|
||||
md_node.postfix = '</big>'
|
||||
end
|
||||
end
|
||||
|
||||
def text(xml_node, escape_markdown: true)
|
||||
text = CGI.unescapeHTML(xml_node.text)
|
||||
# text.gsub!(/[\\`*_{}\[\]()#+\-.!~]/) { |c| "\\#{c}" } if escape_markdown
|
||||
text
|
||||
end
|
||||
|
||||
# @param md_parent [MarkdownNode]
|
||||
def to_markdown(md_parent)
|
||||
markdown = +""
|
||||
|
||||
md_parent.children.each do |md_node|
|
||||
prefix = md_node.prefix
|
||||
text = md_node.children&.any? ? to_markdown(md_node) : md_node.text
|
||||
postfix = md_node.postfix
|
||||
|
||||
parent_prefix = prefix_from_parent(md_parent)
|
||||
|
||||
if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?)
|
||||
prefix = "#{parent_prefix}#{prefix}"
|
||||
end
|
||||
|
||||
if md_node.xml_node_name != "CODE"
|
||||
text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix)
|
||||
end
|
||||
|
||||
add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix)
|
||||
markdown << prefix
|
||||
markdown << text
|
||||
markdown << postfix
|
||||
add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix)
|
||||
end
|
||||
|
||||
markdown
|
||||
end
|
||||
|
||||
def hoist_whitespaces!(markdown, text, prefix, postfix)
|
||||
text = text.lstrip if markdown.end_with?("\n")
|
||||
|
||||
unless prefix.empty?
|
||||
if starts_with_whitespace?(text) && !ends_with_whitespace?(markdown)
|
||||
prefix = "#{text[0]}#{prefix}"
|
||||
end
|
||||
text = text.lstrip
|
||||
end
|
||||
|
||||
unless postfix.empty?
|
||||
if ends_with_whitespace?(text)
|
||||
postfix = "#{postfix}#{text[-1]}"
|
||||
end
|
||||
text = text.rstrip
|
||||
end
|
||||
|
||||
[text, prefix, postfix]
|
||||
end
|
||||
|
||||
def prefix_from_parent(md_parent)
|
||||
while md_parent
|
||||
return md_parent.prefix_children if md_parent.prefix_children
|
||||
md_parent = md_parent.parent
|
||||
end
|
||||
end
|
||||
|
||||
def add_linebreaks!(markdown, required_linebreak_count, linebreak_type, prefix = nil)
|
||||
return if required_linebreak_count == 0 || markdown.empty?
|
||||
|
||||
existing_linebreak_count = markdown[/(?:\\?\n|<br>\n)*\z/].count("\n")
|
||||
|
||||
if linebreak_type == LINEBREAK_HTML
|
||||
max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1
|
||||
required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD
|
||||
end
|
||||
|
||||
return if existing_linebreak_count >= required_linebreak_count
|
||||
|
||||
rstrip!(markdown)
|
||||
alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
|
||||
|
||||
required_linebreak_count.times do |index|
|
||||
linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count)
|
||||
|
||||
markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0
|
||||
markdown << linebreak
|
||||
end
|
||||
end
|
||||
|
||||
def rstrip!(markdown)
|
||||
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, '')
|
||||
end
|
||||
|
||||
def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count)
|
||||
use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index
|
||||
is_last_linebreak = linebreak_index + 1 == required_linebreak_count
|
||||
|
||||
return "<br>\n" if linebreak_type == LINEBREAK_HTML &&
|
||||
use_alternative_linebreak && is_last_linebreak
|
||||
|
||||
return "\\\n" if linebreak_type == LINEBREAK_HARD ||
|
||||
@traditional_linebreaks || use_alternative_linebreak
|
||||
|
||||
"\n"
|
||||
end
|
||||
|
||||
def starts_with_whitespace?(text)
|
||||
text.match?(/\A\s/)
|
||||
end
|
||||
|
||||
def ends_with_whitespace?(text)
|
||||
text.match?(/\s\z/)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,14 +1,23 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'csv'
|
||||
require 'yaml'
|
||||
require_relative '../../base'
|
||||
|
||||
module ImportScripts::PhpBB3
|
||||
class Settings
|
||||
def self.load(filename)
|
||||
yaml = YAML::load_file(filename)
|
||||
Settings.new(yaml)
|
||||
Settings.new(yaml.deep_stringify_keys.with_indifferent_access)
|
||||
end
|
||||
|
||||
attr_reader :site_name
|
||||
|
||||
attr_reader :new_categories
|
||||
attr_reader :category_mappings
|
||||
attr_reader :tag_mappings
|
||||
attr_reader :rank_mapping
|
||||
|
||||
attr_reader :import_anonymous_users
|
||||
attr_reader :import_attachments
|
||||
attr_reader :import_private_messages
|
||||
|
@ -34,6 +43,14 @@ module ImportScripts::PhpBB3
|
|||
|
||||
def initialize(yaml)
|
||||
import_settings = yaml['import']
|
||||
|
||||
@site_name = import_settings['site_name']
|
||||
|
||||
@new_categories = import_settings['new_categories']
|
||||
@category_mappings = import_settings['category_mappings']
|
||||
@tag_mappings = import_settings['tag_mappings']
|
||||
@rank_mapping = import_settings['rank_mapping']
|
||||
|
||||
@import_anonymous_users = import_settings['anonymous_users']
|
||||
@import_attachments = import_settings['attachments']
|
||||
@import_private_messages = import_settings['private_messages']
|
||||
|
@ -58,6 +75,20 @@ module ImportScripts::PhpBB3
|
|||
|
||||
@database = DatabaseSettings.new(yaml['database'])
|
||||
end
|
||||
|
||||
def prefix(val)
|
||||
@site_name.present? && val.present? ? "#{@site_name}:#{val}" : val
|
||||
end
|
||||
|
||||
def trust_level_for_posts(rank, trust_level: 0)
|
||||
if @rank_mapping.present?
|
||||
@rank_mapping.each do |key, value|
|
||||
trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value
|
||||
end
|
||||
end
|
||||
|
||||
trust_level
|
||||
end
|
||||
end
|
||||
|
||||
class DatabaseSettings
|
||||
|
|
|
@ -3,10 +3,12 @@
|
|||
module ImportScripts::PhpBB3
|
||||
class SmileyProcessor
|
||||
# @param uploader [ImportScripts::Uploader]
|
||||
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
# @param phpbb_config [Hash]
|
||||
def initialize(uploader, settings, phpbb_config)
|
||||
def initialize(uploader, database, settings, phpbb_config)
|
||||
@uploader = uploader
|
||||
@database = database
|
||||
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
|
||||
|
||||
@smiley_map = {}
|
||||
|
@ -16,12 +18,16 @@ module ImportScripts::PhpBB3
|
|||
|
||||
def replace_smilies(text)
|
||||
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
|
||||
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
|
||||
smiley = $1
|
||||
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?:\S+ -->/) do
|
||||
emoji($1)
|
||||
end
|
||||
end
|
||||
|
||||
@smiley_map.fetch(smiley) do
|
||||
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
|
||||
end
|
||||
def emoji(smiley_code)
|
||||
@smiley_map.fetch(smiley_code) do
|
||||
smiley = @database.get_smiley(smiley_code)
|
||||
emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
|
||||
emoji || smiley_as_text(smiley_code)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -36,7 +42,7 @@ module ImportScripts::PhpBB3
|
|||
[':o', ':-o', ':eek:'] => ':astonished:',
|
||||
[':shock:'] => ':open_mouth:',
|
||||
[':?', ':-?', ':???:'] => ':confused:',
|
||||
['8-)', ':cool:'] => ':sunglasses:',
|
||||
['8)', '8-)', ':cool:'] => ':sunglasses:',
|
||||
[':lol:'] => ':laughing:',
|
||||
[':x', ':-x', ':mad:'] => ':angry:',
|
||||
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
|
||||
|
|
|
@ -1,48 +1,75 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'bbcode/xml_to_markdown'
|
||||
|
||||
module ImportScripts::PhpBB3
|
||||
class TextProcessor
|
||||
# @param lookup [ImportScripts::LookupContainer]
|
||||
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
|
||||
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
|
||||
# @param settings [ImportScripts::PhpBB3::Settings]
|
||||
def initialize(lookup, database, smiley_processor, settings)
|
||||
# @param phpbb_config [Hash]
|
||||
def initialize(lookup, database, smiley_processor, settings, phpbb_config)
|
||||
@lookup = lookup
|
||||
@database = database
|
||||
@smiley_processor = smiley_processor
|
||||
@he = HTMLEntities.new
|
||||
@use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2')
|
||||
|
||||
@settings = settings
|
||||
@new_site_prefix = settings.new_site_prefix
|
||||
create_internal_link_regexps(settings.original_site_prefix)
|
||||
end
|
||||
|
||||
def process_raw_text(raw)
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
def process_raw_text(raw, attachments = nil)
|
||||
if @use_xml_to_markdown
|
||||
unreferenced_attachments = attachments&.dup
|
||||
|
||||
clean_bbcodes(text)
|
||||
if @settings.use_bbcode_to_md
|
||||
text = bbcode_to_md(text)
|
||||
converter = BBCode::XmlToMarkdown.new(
|
||||
raw,
|
||||
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
|
||||
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
|
||||
quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
|
||||
upload_md_from_file: (lambda do |filename, index|
|
||||
unreferenced_attachments[index] = nil
|
||||
attachments.fetch(index, filename).dup
|
||||
end if attachments),
|
||||
url_replacement: nil,
|
||||
allow_inline_code: false
|
||||
)
|
||||
|
||||
text = converter.convert
|
||||
|
||||
text.gsub!(@short_internal_link_regexp) do |link|
|
||||
replace_internal_link(link, $1, $2)
|
||||
end
|
||||
|
||||
add_unreferenced_attachments(text, unreferenced_attachments)
|
||||
else
|
||||
text = raw.dup
|
||||
text = CGI.unescapeHTML(text)
|
||||
|
||||
clean_bbcodes(text)
|
||||
if @settings.use_bbcode_to_md
|
||||
text = bbcode_to_md(text)
|
||||
end
|
||||
process_smilies(text)
|
||||
process_links(text)
|
||||
process_lists(text)
|
||||
process_code(text)
|
||||
fix_markdown(text)
|
||||
process_attachments(text, attachments) if attachments.present?
|
||||
|
||||
text
|
||||
end
|
||||
process_smilies(text)
|
||||
process_links(text)
|
||||
process_lists(text)
|
||||
process_code(text)
|
||||
fix_markdown(text)
|
||||
text
|
||||
end
|
||||
|
||||
def process_post(raw, attachments)
|
||||
text = process_raw_text(raw)
|
||||
text = process_attachments(text, attachments) if attachments.present?
|
||||
text
|
||||
process_raw_text(raw, attachments) rescue raw
|
||||
end
|
||||
|
||||
def process_private_msg(raw, attachments)
|
||||
text = process_raw_text(raw)
|
||||
text = process_attachments(text, attachments) if attachments.present?
|
||||
text
|
||||
process_raw_text(raw, attachments) rescue raw
|
||||
end
|
||||
|
||||
protected
|
||||
|
@ -139,6 +166,12 @@ module ImportScripts::PhpBB3
|
|||
attachments.fetch(index, real_filename)
|
||||
end
|
||||
|
||||
add_unreferenced_attachments(text, unreferenced_attachments)
|
||||
end
|
||||
|
||||
def add_unreferenced_attachments(text, unreferenced_attachments)
|
||||
return text unless unreferenced_attachments
|
||||
|
||||
unreferenced_attachments = unreferenced_attachments.compact
|
||||
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
|
||||
text
|
||||
|
@ -161,6 +194,7 @@ module ImportScripts::PhpBB3
|
|||
|
||||
def fix_markdown(text)
|
||||
text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" }
|
||||
text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line
|
||||
text
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,817 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'rails_helper'
|
||||
require Rails.root.join('script/import_scripts/phpbb3/support/bbcode/xml_to_markdown')
|
||||
|
||||
RSpec.describe ImportScripts::PhpBB3::BBCode::XmlToMarkdown do
|
||||
def convert(xml, opts = {})
|
||||
described_class.new(xml, opts).convert
|
||||
end
|
||||
|
||||
it "converts unformatted text" do
|
||||
xml = '<t>unformatted text</t>'
|
||||
expect(convert(xml)).to eq('unformatted text')
|
||||
end
|
||||
|
||||
it "converts nested formatting" do
|
||||
xml = '<r><I><s>[i]</s>this is italic<B><s>[b]</s> and bold<e>[/b]</e></B> text<e>[/i]</e></I></r>'
|
||||
expect(convert(xml)).to eq('_this is italic **and bold** text_')
|
||||
end
|
||||
|
||||
context "bold text" do
|
||||
it "converts bold text" do
|
||||
xml = '<r><B><s>[b]</s>this is bold text<e>[/b]</e></B></r>'
|
||||
expect(convert(xml)).to eq('**this is bold text**')
|
||||
end
|
||||
|
||||
it "converts multi-line bold text" do
|
||||
xml = <<~XML
|
||||
<r><B><s>[b]</s>this is bold text<br/>
|
||||
on two lines<e>[/b]</e></B><br/>
|
||||
<br/>
|
||||
<B><s>[b]</s>this is bold text<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
with two empty lines<e>[/b]</e></B></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
**this is bold text
|
||||
on two lines**
|
||||
|
||||
**this is bold text\\
|
||||
\\
|
||||
\\
|
||||
with two empty lines**
|
||||
MD
|
||||
end
|
||||
|
||||
it "ignores duplicate bold text" do
|
||||
xml = '<r><B><s>[b]</s><B><s>[b]</s>this is bold text<e>[/b]</e></B><e>[/b]</e></B></r>'
|
||||
expect(convert(xml)).to eq('**this is bold text**')
|
||||
end
|
||||
end
|
||||
|
||||
context "italic text" do
|
||||
it "converts italic text" do
|
||||
xml = '<r><I><s>[i]</s>this is italic text<e>[/i]</e></I></r>'
|
||||
expect(convert(xml)).to eq('_this is italic text_')
|
||||
end
|
||||
|
||||
it "converts multi-line italic text" do
|
||||
xml = <<~XML
|
||||
<r><I><s>[i]</s>this is italic text<br/>
|
||||
on two lines<e>[/i]</e></I><br/>
|
||||
<br/>
|
||||
<I><s>[i]</s>this is italic text<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
with two empty lines<e>[/i]</e></I></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
_this is italic text
|
||||
on two lines_
|
||||
|
||||
_this is italic text\\
|
||||
\\
|
||||
\\
|
||||
with two empty lines_
|
||||
MD
|
||||
end
|
||||
|
||||
it "ignores duplicate italic text" do
|
||||
xml = '<r><I><s>[i]</s><I><s>[i]</s>this is italic text<e>[/i]</e></I><e>[/i]</e></I></r>'
|
||||
expect(convert(xml)).to eq('_this is italic text_')
|
||||
end
|
||||
end
|
||||
|
||||
context "underlined text" do
|
||||
it "converts underlined text" do
|
||||
xml = '<r><U><s>[u]</s>this is underlined text<e>[/u]</e></U></r>'
|
||||
expect(convert(xml)).to eq('[u]this is underlined text[/u]')
|
||||
end
|
||||
|
||||
it "converts multi-line underlined text" do
|
||||
xml = <<~XML
|
||||
<r><U><s>[u]</s>this is underlined text<br/>
|
||||
on two lines<e>[/u]</e></U><br/>
|
||||
<br/>
|
||||
<U><s>[u]</s>this is underlined text<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
with two empty lines<e>[/u]</e></U></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
[u]this is underlined text
|
||||
on two lines[/u]
|
||||
|
||||
[u]this is underlined text\\
|
||||
\\
|
||||
\\
|
||||
with two empty lines[/u]
|
||||
MD
|
||||
end
|
||||
|
||||
it "ignores duplicate underlined text" do
|
||||
xml = '<r><U><s>[u]</s><U><s>[u]</s>this is underlined text<e>[/u]</e></U><e>[/u]</e></U></r>'
|
||||
expect(convert(xml)).to eq('[u]this is underlined text[/u]')
|
||||
end
|
||||
end
|
||||
|
||||
context "code blocks" do
|
||||
context "inline code blocks enabled" do
|
||||
let(:opts) { { allow_inline_code: true } }
|
||||
|
||||
it "converts single line code blocks" do
|
||||
xml = '<r><CODE><s>[code]</s>one line of code<e>[/code]</e></CODE></r>'
|
||||
expect(convert(xml, opts)).to eq('`one line of code`')
|
||||
end
|
||||
end
|
||||
|
||||
context "inline code blocks disabled" do
|
||||
it "converts single line code blocks" do
|
||||
xml = '<r>foo <CODE><s>[code]</s>some code<e>[/code]</e></CODE> bar</r>'
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
foo
|
||||
|
||||
```text
|
||||
some code
|
||||
```
|
||||
|
||||
bar
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
it "converts multi-line code blocks" do
|
||||
xml = <<~XML
|
||||
<r><CODE><s>[code]</s><i>
|
||||
</i> /\_/\
|
||||
( o.o )
|
||||
> ^ <
|
||||
<e>[/code]</e></CODE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
```text
|
||||
/\_/\
|
||||
( o.o )
|
||||
> ^ <
|
||||
```
|
||||
MD
|
||||
end
|
||||
|
||||
it "adds leading and trailing linebreaks to code blocks" do
|
||||
xml = <<~XML
|
||||
<r>text before code block<br/>
|
||||
|
||||
<CODE><s>[code]</s><i>
|
||||
</i>foo
|
||||
|
||||
bar
|
||||
<e>[/code]</e></CODE>
|
||||
|
||||
text after code block</r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
text before code block
|
||||
|
||||
```text
|
||||
foo
|
||||
|
||||
bar
|
||||
```
|
||||
|
||||
text after code block
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
context "lists" do
|
||||
it "converts unordered lists" do
|
||||
xml = <<~XML
|
||||
<r><LIST><s>[list]</s>
|
||||
<LI><s>[*]</s>Red</LI>
|
||||
<LI><s>[*]</s>Blue</LI>
|
||||
<LI><s>[*]</s>Yellow</LI>
|
||||
<e>[/list]</e></LIST></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
* Red
|
||||
* Blue
|
||||
* Yellow
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts ordered lists" do
|
||||
xml = <<~XML
|
||||
<r><LIST type="decimal"><s>[list=1]</s>
|
||||
<LI><s>[*]</s>Go to the shops</LI>
|
||||
<LI><s>[*]</s>Buy a new computer</LI>
|
||||
<LI><s>[*]</s>Swear at computer when it crashes</LI>
|
||||
<e>[/list]</e></LIST></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
1. Go to the shops
|
||||
2. Buy a new computer
|
||||
3. Swear at computer when it crashes
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts all types of ordered lists into regular ordered lists" do
|
||||
xml = <<~XML
|
||||
<r><LIST type="upper-alpha"><s>[list=A]</s>
|
||||
<LI><s>[*]</s>The first possible answer</LI>
|
||||
<LI><s>[*]</s>The second possible answer</LI>
|
||||
<LI><s>[*]</s>The third possible answer</LI>
|
||||
<e>[/list]</e></LIST></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
1. The first possible answer
|
||||
2. The second possible answer
|
||||
3. The third possible answer
|
||||
MD
|
||||
end
|
||||
|
||||
it "adds leading and trailing linebreaks to lists if needed" do
|
||||
xml = <<~XML
|
||||
<r>foo
|
||||
<LIST><s>[list]</s>
|
||||
<LI><s>[*]</s>Red</LI>
|
||||
<LI><s>[*]</s>Blue</LI>
|
||||
<LI><s>[*]</s>Yellow</LI>
|
||||
<e>[/list]</e></LIST>
|
||||
bar</r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
foo
|
||||
|
||||
* Red
|
||||
* Blue
|
||||
* Yellow
|
||||
|
||||
bar
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts nested lists" do
|
||||
xml = <<~XML
|
||||
<r><LIST><s>[list]</s>
|
||||
<LI><s>[*]</s>Option 1
|
||||
<LIST><s>[list]</s>
|
||||
<LI><s>[*]</s>Option 1.1</LI>
|
||||
<LI><s>[*]</s>Option 1.2</LI>
|
||||
<e>[/list]</e></LIST></LI>
|
||||
<LI><s>[*]</s>Option 2
|
||||
<LIST><s>[list]</s>
|
||||
<LI><s>[*]</s>Option 2.1
|
||||
<LIST type="decimal"><s>[list=1]</s>
|
||||
<LI><s>[*]</s> Red</LI>
|
||||
<LI><s>[*]</s> Blue</LI>
|
||||
<e>[/list]</e></LIST></LI>
|
||||
<LI><s>[*]</s>Option 2.2</LI>
|
||||
<e>[/list]</e></LIST></LI>
|
||||
<e>[/list]</e></LIST></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
* Option 1
|
||||
* Option 1.1
|
||||
* Option 1.2
|
||||
* Option 2
|
||||
* Option 2.1
|
||||
1. Red
|
||||
2. Blue
|
||||
* Option 2.2
|
||||
MD
|
||||
end
|
||||
|
||||
it "handles nested elements and linebreaks in list items" do
|
||||
xml = <<~XML
|
||||
<r><LIST><s>[list]</s><LI><s>[*]</s>some text <B><s>[b]</s><I><s>[i]</s>foo<e>[/i]</e></I><e>[/b]</e></B><br/>
|
||||
or <B><s>[b]</s><I><s>[i]</s>bar<e>[/i]</e></I><e>[/b]</e></B> more text</LI><e>[/list]</e></LIST></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
* some text **_foo_**
|
||||
or **_bar_** more text
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
context "images" do
|
||||
it "converts image" do
|
||||
xml = <<~XML
|
||||
<r><IMG src="https://example.com/foo.png"><s>[img]</s>
|
||||
<URL url="https://example.com/foo.png">
|
||||
<LINK_TEXT text="https://example.com/foo.png">https://example.com/foo.png</LINK_TEXT>
|
||||
</URL><e>[/img]</e></IMG></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq('![](https://example.com/foo.png)')
|
||||
end
|
||||
|
||||
it "converts image with link" do
|
||||
xml = <<~XML
|
||||
<r><URL url="https://example.com/"><s>[url=https://example.com/]</s>
|
||||
<IMG src="https://example.com/foo.png"><s>[img]</s>
|
||||
<LINK_TEXT text="https://example.com/foo.png">https://example.com/foo.png</LINK_TEXT>
|
||||
<e>[/img]</e></IMG><e>[/url]</e></URL></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq('[![](https://example.com/foo.png)](https://example.com/)')
|
||||
end
|
||||
end
|
||||
|
||||
context "links" do
|
||||
it "converts links created without BBCode" do
|
||||
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara">https://en.wikipedia.org/wiki/Capybara</URL></r>'
|
||||
expect(convert(xml)).to eq('https://en.wikipedia.org/wiki/Capybara')
|
||||
end
|
||||
|
||||
it "converts links created with BBCode" do
|
||||
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara"><s>[url]</s>https://en.wikipedia.org/wiki/Capybara<e>[/url]</e></URL></r>'
|
||||
expect(convert(xml)).to eq('https://en.wikipedia.org/wiki/Capybara')
|
||||
end
|
||||
|
||||
it "converts links with link text" do
|
||||
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara"><s>[url=https://en.wikipedia.org/wiki/Capybara]</s>Capybara<e>[/url]</e></URL></r>'
|
||||
expect(convert(xml)).to eq('[Capybara](https://en.wikipedia.org/wiki/Capybara)')
|
||||
end
|
||||
|
||||
it "converts internal links" do
|
||||
opts = {
|
||||
url_replacement: lambda do |url|
|
||||
if url == 'http://forum.example.com/viewtopic.php?f=2&t=2'
|
||||
'https://discuss.example.com/t/welcome-topic/18'
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
xml = '<r><URL url="http://forum.example.com/viewtopic.php?f=2&t=2"><LINK_TEXT text="viewtopic.php?f=2&t=2">http://forum.example.com/viewtopic.php?f=2&t=2</LINK_TEXT></URL></r>'
|
||||
expect(convert(xml, opts)).to eq('https://discuss.example.com/t/welcome-topic/18')
|
||||
end
|
||||
|
||||
it "converts email links created without BBCode" do
|
||||
xml = '<r><EMAIL email="foo.bar@example.com">foo.bar@example.com</EMAIL></r>'
|
||||
expect(convert(xml)).to eq('<foo.bar@example.com>')
|
||||
end
|
||||
|
||||
it "converts email links created with BBCode" do
|
||||
xml = '<r><EMAIL email="foo.bar@example.com"><s>[email]</s>foo.bar@example.com<e>[/email]</e></EMAIL></r>'
|
||||
expect(convert(xml)).to eq('<foo.bar@example.com>')
|
||||
end
|
||||
|
||||
it "converts truncated, long links" do
|
||||
xml = <<~XML
|
||||
<r><URL url="http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli">
|
||||
<s>[url]</s><LINK_TEXT text="http://answers.yahoo.com/question/index ... 223AAkkPli">
|
||||
http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli</LINK_TEXT>
|
||||
<e>[/url]</e></URL></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq('http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli')
|
||||
end
|
||||
|
||||
it "converts BBCodes inside link text" do
|
||||
xml = <<~XML
|
||||
<r><URL url="http://example.com"><s>[url=http://example.com]</s>
|
||||
<B><s>[b]</s>Hello <I><s>[i]</s>world<e>[/i]</e></I>!<e>[/b]</e></B>
|
||||
<e>[/url]</e></URL></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq('[**Hello _world_!**](http://example.com)')
|
||||
end
|
||||
end
|
||||
|
||||
context "quotes" do
|
||||
it "converts simple quote" do
|
||||
xml = <<~XML
|
||||
<r><QUOTE><s>[quote]</s>Lorem<br/>
|
||||
ipsum<e>[/quote]</e></QUOTE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
> Lorem
|
||||
> ipsum
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts quote with line breaks" do
|
||||
xml = <<~XML
|
||||
<r><QUOTE><s>[quote]</s>First paragraph<br/>
|
||||
<br/>
|
||||
Second paragraph<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
Third paragraph<e>[/quote]</e></QUOTE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
> First paragraph
|
||||
>
|
||||
> Second paragraph
|
||||
> \\
|
||||
> \\
|
||||
> Third paragraph
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts quote with line breaks and nested formatting" do
|
||||
xml = <<~XML
|
||||
<r><QUOTE><s>[quote]</s>
|
||||
<I><s>[i]</s>this is italic<br/>
|
||||
<B><s>[b]</s>and bold<br/>
|
||||
text<br/>
|
||||
<e>[/b]</e></B> on multiple<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
lines<e>[/i]</e></I>
|
||||
<e>[/quote]</e></QUOTE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
> _this is italic
|
||||
> **and bold
|
||||
> text**
|
||||
> on multiple\\
|
||||
> \\
|
||||
> \\
|
||||
> lines_
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts quote with author attribute" do
|
||||
xml = '<r><QUOTE author="Mr. Blobby"><s>[quote="Mr. Blobby"]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
[quote="Mr. Blobby"]
|
||||
Lorem ipsum
|
||||
[/quote]
|
||||
MD
|
||||
end
|
||||
|
||||
it "converts quote with author attribute and line breaks" do
|
||||
xml = <<~XML
|
||||
<r><QUOTE author="Mr. Blobby"><s>[quote="Mr. Blobby"]</s>First paragraph<br/>
|
||||
<br/>
|
||||
Second paragraph<br/>
|
||||
<br/>
|
||||
Third paragraph<e>[/quote]</e></QUOTE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
[quote="Mr. Blobby"]
|
||||
First paragraph
|
||||
|
||||
Second paragraph
|
||||
|
||||
Third paragraph
|
||||
[/quote]
|
||||
MD
|
||||
end
|
||||
|
||||
context "with user_id attribute" do
|
||||
let(:opts) { { username_from_user_id: lambda { |user_id| user_id == 48 ? "mr_blobby" : nil } } }
|
||||
|
||||
it "uses the correct username when the user exists" do
|
||||
xml = '<r><QUOTE author="Mr. Blobby" user_id="48"><s>[quote="Mr. Blobby" user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
|
||||
|
||||
expect(convert(xml, opts)).to eq(<<~MD.chomp)
|
||||
[quote="mr_blobby"]
|
||||
Lorem ipsum
|
||||
[/quote]
|
||||
MD
|
||||
end
|
||||
|
||||
it "uses the author name when the user does not exist" do
|
||||
xml = '<r><QUOTE author="Mr. Blobby" user_id="49"><s>[quote="Mr. Blobby" user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
|
||||
|
||||
expect(convert(xml, opts)).to eq(<<~MD.chomp)
|
||||
[quote="Mr. Blobby"]
|
||||
Lorem ipsum
|
||||
[/quote]
|
||||
MD
|
||||
end
|
||||
|
||||
it "creates a blockquote when the user does not exist and the author is missing" do
|
||||
xml = '<r><QUOTE user_id="49"><s>[quote=user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
|
||||
expect(convert(xml, opts)).to eq("> Lorem ipsum")
|
||||
end
|
||||
end
|
||||
|
||||
context "with post_id attribute" do
|
||||
let(:opts) do
|
||||
{ quoted_post_from_post_id: lambda { |post_id| { username: 'mr_blobby', post_number: 3, topic_id: 951 } if post_id == 43 } }
|
||||
end
|
||||
|
||||
it "uses information from the quoted post if the post exists" do
|
||||
xml = <<~XML
|
||||
<r><QUOTE author="Mr. Blobby" post_id="43" time="1534626128" user_id="48">
|
||||
<s>[quote="Mr. Blobby" post_id=43 time=1534626128 user_id=48]</s>Lorem ipsum<e>[/quote]</e>
|
||||
</QUOTE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml, opts)).to eq(<<~MD.chomp)
|
||||
[quote="mr_blobby, post:3, topic:951"]
|
||||
Lorem ipsum
|
||||
[/quote]
|
||||
MD
|
||||
end
|
||||
|
||||
it "uses other attributes when post doesn't exist" do
|
||||
xml = <<~XML
|
||||
<r><QUOTE author="Mr. Blobby" post_id="44" time="1534626128" user_id="48">
|
||||
<s>[quote="Mr. Blobby" post_id=44 time=1534626128 user_id=48]</s>Lorem ipsum<e>[/quote]</e>
|
||||
</QUOTE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml, opts)).to eq(<<~MD.chomp)
|
||||
[quote="Mr. Blobby"]
|
||||
Lorem ipsum
|
||||
[/quote]
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
it "converts nested quotes" do
|
||||
xml = <<~XML
|
||||
<r>Multiple nested quotes:<br/>
|
||||
|
||||
<QUOTE author="user3">
|
||||
<s>[quote=user3]</s>
|
||||
<QUOTE author="user2">
|
||||
<s>[quote=user2]</s>
|
||||
<QUOTE author="user1">
|
||||
<s>[quote=user1]</s>
|
||||
<B><s>[b]</s>foo <I><s>[i]</s>and<e>[/i]</e></I> bar<e>[/b]</e></B>
|
||||
<e>[/quote]</e>
|
||||
</QUOTE>
|
||||
|
||||
Lorem ipsum
|
||||
<e>[/quote]</e>
|
||||
</QUOTE>
|
||||
|
||||
nested quotes
|
||||
<e>[/quote]</e>
|
||||
</QUOTE>
|
||||
|
||||
Text after quotes.
|
||||
</r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
Multiple nested quotes:
|
||||
|
||||
[quote="user3"]
|
||||
[quote="user2"]
|
||||
[quote="user1"]
|
||||
**foo _and_ bar**
|
||||
[/quote]
|
||||
|
||||
Lorem ipsum
|
||||
[/quote]
|
||||
|
||||
nested quotes
|
||||
[/quote]
|
||||
|
||||
Text after quotes.
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
it "converts smilies" do
|
||||
opts = {
|
||||
smilie_to_emoji: lambda do |smilie|
|
||||
case smilie
|
||||
when ':D'
|
||||
':smiley:'
|
||||
when ':eek:'
|
||||
':astonished:'
|
||||
end
|
||||
end
|
||||
}
|
||||
|
||||
xml = '<r><E>:D</E> <E>:eek:</E></r>'
|
||||
expect(convert(xml, opts)).to eq(":smiley: :astonished:")
|
||||
end
|
||||
|
||||
context "attachments" do
|
||||
it "converts attachments" do
|
||||
opts = {
|
||||
upload_md_from_file: lambda do |filename, index|
|
||||
url = \
|
||||
case index
|
||||
when 0 then
|
||||
"upload://hash2.png"
|
||||
when 1 then
|
||||
"upload://hash1.png"
|
||||
end
|
||||
|
||||
"![#{filename}|231x231](#{url})"
|
||||
end
|
||||
}
|
||||
|
||||
xml = <<~XML
|
||||
<r>Multiple attachments:
|
||||
<ATTACHMENT filename="image1.png" index="1"><s>[attachment=1]</s>image1.png<e>[/attachment]</e></ATTACHMENT>
|
||||
This is an inline image.<br/>
|
||||
<br/>
|
||||
And another one:
|
||||
<ATTACHMENT filename="image2.png" index="0"><s>[attachment=0]</s>image2.png<e>[/attachment]</e></ATTACHMENT></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml, opts)).to eq(<<~MD.chomp)
|
||||
Multiple attachments:
|
||||
![image1.png|231x231](upload://hash1.png)
|
||||
This is an inline image.
|
||||
|
||||
And another one:
|
||||
![image2.png|231x231](upload://hash2.png)
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
context "line breaks" do
|
||||
it "converts line breaks" do
|
||||
xml = <<~XML
|
||||
<t>Lorem ipsum dolor sit amet.<br/>
|
||||
<br/>
|
||||
Consetetur sadipscing elitr.<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
Sed diam nonumy eirmod tempor.<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
Invidunt ut labore et dolore.</t>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
Lorem ipsum dolor sit amet.
|
||||
|
||||
Consetetur sadipscing elitr.
|
||||
\\
|
||||
\\
|
||||
Sed diam nonumy eirmod tempor.
|
||||
\\
|
||||
\\
|
||||
\\
|
||||
\\
|
||||
Invidunt ut labore et dolore.
|
||||
MD
|
||||
end
|
||||
|
||||
it "uses hard linebreaks when tradition line breaks are enabled" do
|
||||
xml = <<~XML
|
||||
<t>Lorem ipsum dolor sit amet.<br/>
|
||||
Consetetur sadipscing elitr.<br/>
|
||||
<br/>
|
||||
Sed diam nonumy eirmod tempor.<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
Invidunt ut labore et dolore.</t>
|
||||
XML
|
||||
|
||||
expect(convert(xml, traditional_linebreaks: true)).to eq(<<~MD.chomp)
|
||||
Lorem ipsum dolor sit amet.\\
|
||||
Consetetur sadipscing elitr.\\
|
||||
\\
|
||||
Sed diam nonumy eirmod tempor.\\
|
||||
\\
|
||||
\\
|
||||
\\
|
||||
Invidunt ut labore et dolore.
|
||||
MD
|
||||
end
|
||||
|
||||
it "uses <br> in front of block elements" do
|
||||
xml = <<~XML
|
||||
<r>text before 4 empty lines<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
|
||||
<CODE><s>[code]</s>some code<e>[/code]</e></CODE>
|
||||
text before 3 empty lines<br/>
|
||||
<br/>
|
||||
<br/>
|
||||
|
||||
<LIST><s>[list]</s>
|
||||
<LI><s>[*]</s> item 1</LI>
|
||||
<LI><s>[*]</s> item 2</LI>
|
||||
<e>[/list]</e></LIST>
|
||||
text before 2 empty lines<br/>
|
||||
<br/>
|
||||
|
||||
<LIST><s>[list]</s>
|
||||
<LI><s>[*]</s> item 1</LI>
|
||||
<LI><s>[*]</s> item 2</LI>
|
||||
<e>[/list]</e></LIST></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.chomp)
|
||||
text before 4 empty lines
|
||||
\\
|
||||
\\
|
||||
\\
|
||||
<br>
|
||||
```text
|
||||
some code
|
||||
```
|
||||
|
||||
text before 3 empty lines
|
||||
\\
|
||||
\\
|
||||
<br>
|
||||
* item 1
|
||||
* item 2
|
||||
|
||||
text before 2 empty lines
|
||||
\\
|
||||
<br>
|
||||
* item 1
|
||||
* item 2
|
||||
MD
|
||||
end
|
||||
end
|
||||
|
||||
context "whitespace" do
|
||||
it "doesn't strip whitespaces from inline tags" do
|
||||
xml = <<~XML
|
||||
<r>Lorem<B><s>[b]</s> ipsum <e>[/b]</e></B>dolor<br/>
|
||||
<I><s>[i]</s> sit <e>[/i]</e></I>amet,<br/>
|
||||
consetetur<B><s>[b]</s> sadipscing <e>[/b]</e></B></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.rstrip)
|
||||
Lorem **ipsum** dolor
|
||||
_sit_ amet,
|
||||
consetetur **sadipscing**
|
||||
MD
|
||||
end
|
||||
|
||||
it "preserves whitespace between tags" do
|
||||
xml = "<r>foo <B><s>[b]</s>bold<e>[/b]</e></B> <I><s>[i]</s>italic<e>[/i]</e></I> <U><s>[u]</s>underlined<e>[/u]</e></U> bar</r>"
|
||||
expect(convert(xml)).to eq("foo **bold** _italic_ [u]underlined[/u] bar")
|
||||
end
|
||||
end
|
||||
|
||||
context "unknown element" do
|
||||
it "converts an unknown element right below the root element" do
|
||||
xml = '<r><UNKNOWN><s>[unknown]</s>foo<e>[/unknown]</e></UNKNOWN></r>'
|
||||
expect(convert(xml)).to eq('foo')
|
||||
end
|
||||
|
||||
it "converts an unknown element inside a known element" do
|
||||
xml = '<r><B><s>[b]</s><UNKNOWN><s>[unknown]</s>bar<e>[/unknown]</e></UNKNOWN><e>[/b]</e></B></r>'
|
||||
expect(convert(xml)).to eq('**bar**')
|
||||
end
|
||||
end
|
||||
|
||||
context "font size" do
|
||||
it "converts sizes to either <small> or <big>" do
|
||||
xml = <<~XML
|
||||
<r><SIZE size="50"><s>[size=50]</s>very small<e>[/size]</e></SIZE><br/>
|
||||
<SIZE size="85"><s>[size=85]</s>small<e>[/size]</e></SIZE><br/>
|
||||
<SIZE size="150"><s>[size=150]</s>large<e>[/size]</e></SIZE><br/>
|
||||
<SIZE size="200"><s>[size=200]</s>very large<e>[/size]</e></SIZE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.rstrip)
|
||||
<small>very small</small>
|
||||
<small>small</small>
|
||||
<big>large</big>
|
||||
<big>very large</big>
|
||||
MD
|
||||
end
|
||||
|
||||
it "ignores invalid sizes" do
|
||||
xml = <<~XML
|
||||
<r><SIZE size="-50"><s>[size=-50]</s>negative number<e>[/size]</e></SIZE><br/>
|
||||
<SIZE size="0"><s>[size=0]</s>zero<e>[/size]</e></SIZE><br/>
|
||||
<SIZE size="300"><s>[size=300]</s>too large<e>[/size]</e></SIZE><br/>
|
||||
<SIZE size="abc"><s>[size=abc]</s>not a number<e>[/size]</e></SIZE><br/>
|
||||
<SIZE><s>[size]</s>no size<e>[/size]</e></SIZE></r>
|
||||
XML
|
||||
|
||||
expect(convert(xml)).to eq(<<~MD.rstrip)
|
||||
negative number
|
||||
zero
|
||||
too large
|
||||
not a number
|
||||
no size
|
||||
MD
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue
Block a user