From f490ed3bbc93b47e17592e433ed8c273b677a7cd Mon Sep 17 00:00:00 2001 From: Edmond Lepedus Date: Fri, 17 May 2019 07:18:28 +0100 Subject: [PATCH] FEATURE: Add attachment support to xenforo importer (#7548) * FEATURE: Add attachment support to XenForo importer If `ATTACHMENT_DIR` is provided, importer will scan each imported post for `[GALLERY]` and `[ATTACH]` tags, attempt to import the referenced files as Discourse uploads and replace the tags with Discourse markup. References to files which cannot be imported are stripped. NOTE: This only imports attachments which are referenced in imported posts. Any XenForo media or files which are not referenced in any post using `[ATTACH]` or `[GALLERY]` tags will not be imported. The goal is to ensure that we don't have posts with missing images and unsightly markup, NOT to ensure that all attachments are migrated. * FEATURE: Add attachment support to XenForo importer If `ATTACHMENT_DIR` is provided, importer will scan each imported post for `[GALLERY]` and `[ATTACH]` tags, attempt to import the referenced files as Discourse uploads and replace the tags with Discourse markup. References to files which cannot be imported are stripped. NOTE: This only imports attachments which are referenced in imported posts. Any XenForo media or files which are not referenced in any post using `[ATTACH]` or `[GALLERY]` tags will not be imported. The goal is to ensure that we don't have posts with missing images and unsightly markup, NOT to ensure that all attachments are migrated. * FEATURE: Add attachment support to XenForo importer If `ATTACHMENT_DIR` is provided, importer will scan each imported post for `[GALLERY]` and `[ATTACH]` tags, attempt to import the referenced files as Discourse uploads and replace the tags with Discourse markup. References to files which cannot be imported are stripped. NOTE: This only imports attachments which are referenced in imported posts. Any XenForo media or files which are not referenced in any post using `[ATTACH]` or `[GALLERY]` tags will not be imported. The goal is to ensure that we don't have posts with missing images and unsightly markup, NOT to ensure that all attachments are migrated. --- script/import_scripts/xenforo.rb | 78 +++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) mode change 100644 => 100755 script/import_scripts/xenforo.rb diff --git a/script/import_scripts/xenforo.rb b/script/import_scripts/xenforo.rb old mode 100644 new mode 100755 index 78114d73209..ff07c26c391 --- a/script/import_scripts/xenforo.rb +++ b/script/import_scripts/xenforo.rb @@ -11,10 +11,10 @@ class ImportScripts::XenForo < ImportScripts::Base XENFORO_DB = "xenforo_db" TABLE_PREFIX = "xf_" BATCH_SIZE = 1000 + ATTACHMENT_DIR = '/tmp/attachments' def initialize super - @client = Mysql2::Client.new( host: "localhost", username: "root", @@ -307,9 +307,85 @@ class ImportScripts::XenForo < ImportScripts::Base s.gsub!(/\[color=[#a-z0-9]+\]/i, "") s.gsub!(/\[\/color\]/i, "") + if Dir.exist? ATTACHMENT_DIR + s = process_xf_attachments(:gallery, s) + s = process_xf_attachments(:attachment, s) + end + s end + + def process_xf_attachments(xf_type, s) + ids = Set.new + ids.merge(s.scan(get_xf_regexp(xf_type)).map {|x| x[0].to_i}) + ids.each do |id| + next unless id + sql = get_xf_sql(xf_type, id).squish! + results = mysql_query(sql) + if results.size < 1 + # Strip attachment + s.gsub!(get_xf_regexp(xf_type, id), '') + STDERR.puts "#{xf_type.capitalize} id #{id} not found in source database. Stripping." + next + end + original_filename = results.first['filename'] + result = results.first + upload = import_xf_attachment(result['data_id'], result['file_hash'], result['user_id'], original_filename) + next unless upload + if upload.present? && upload.persisted? + s.gsub!(get_xf_regexp(xf_type, id), @uploader.html_for_upload(upload, original_filename)) + else + STDERR.puts "Could not find upload: #{upload.id}. Skipping attachment id #{id}" + end + end + s + end + + def import_xf_attachment(data_id, file_hash, owner_id, original_filename) + current_filename = "#{data_id}-#{file_hash}.data" + path = Pathname.new(ATTACHMENT_DIR + "/#{data_id / 1000}/#{current_filename}") + new_path = path.dirname + original_filename + upload = nil + if File.exist? path + FileUtils.cp path, new_path + upload = @uploader.create_upload owner_id, new_path, original_filename + FileUtils.rm new_path + else + STDERR.puts "Could not find file #{path}. Skipping attachment id #{data_id}" + end + upload + end + + def get_xf_regexp(type, id = nil) + case type + when :gallery + Regexp.new /\[GALLERY=media,\s#{id ? id : '(\d+)'}\].+?\]/i + when :attachment + Regexp.new /\[ATTACH(?>=\w+)?\]#{id ? id : '(\d+)'}\[\/ATTACH\]/i + end + end + + def get_xf_sql(type, id) + case type + when :gallery + <<-SQL + SELECT m.media_id, m.media_title, a.attachment_id, a.data_id, d.filename, d.file_hash,d.user_id + FROM xengallery_media as m + INNER JOIN #{TABLE_PREFIX}attachment a on m.attachment_id = a.attachment_id + INNER JOIN #{TABLE_PREFIX}attachment_data d on a.data_id = d.data_id + WHERE media_id = #{id} + SQL + when :attachment + <<-SQL + SELECT a.attachment_id, a.data_id, d.filename, d.file_hash, d.user_id + FROM #{TABLE_PREFIX}attachment AS a + INNER JOIN #{TABLE_PREFIX}attachment_data d ON a.data_id = d.data_id + WHERE attachment_id = #{id} + SQL + end + end + def mysql_query(sql) @client.query(sql, cache_rows: false) end