diff --git a/script/import_scripts/smf2.rb b/script/import_scripts/smf2.rb index 37994719c59..8eec5f210fb 100644 --- a/script/import_scripts/smf2.rb +++ b/script/import_scripts/smf2.rb @@ -54,16 +54,19 @@ class ImportScripts::Smf2 < ImportScripts::Base options.password = HighLine.new.ask('') {|q| q.echo = false } end - @db = Mysql2::Client.new(host: options.host, username: options.username, - password: options.password, database: options.database) + @default_db_connection = create_db_connection end def execute + authorized_extensions = SiteSetting.authorized_extensions + SiteSetting.authorized_extensions = "*" import_groups import_users import_categories import_posts postprocess_posts + ensure + SiteSetting.authorized_extensions = authorized_extensions end def import_groups @@ -227,12 +230,16 @@ class ImportScripts::Smf2 < ImportScripts::Base print "\r#{spinner.next}" end + db2 = create_db_connection + create_posts(query(<<-SQL), total: total) do |message| SELECT m.id_msg, m.id_topic, m.id_member, m.poster_time, m.body, o.ignore_quotes, - m.subject, t.id_board, t.id_first_msg + m.subject, t.id_board, t.id_first_msg, COUNT(a.id_attach) AS attachment_count FROM {prefix}messages AS m LEFT JOIN {prefix}import_message_order AS o ON o.message_id = m.id_msg LEFT JOIN {prefix}topics AS t ON t.id_topic = m.id_topic + LEFT JOIN {prefix}attachments AS a ON a.id_msg = m.id_msg AND a.attachment_type = 0 + GROUP BY m.id_msg ORDER BY o.message_order ASC SQL skip = false @@ -240,7 +247,6 @@ class ImportScripts::Smf2 < ImportScripts::Base post = { id: message[:id_msg], user_id: user_id_from_imported_user_id(message[:id_member]) || -1, - raw: convert_message_body(message[:body], ignore_quotes: ignore_quotes), created_at: Time.zone.at(message[:poster_time]), post_create_action: ignore_quotes && proc do |post| post.custom_fields['import_rebake'] = 't' @@ -259,10 +265,29 @@ class ImportScripts::Smf2 < ImportScripts::Base skip = true end end - skip ? nil : post + next nil if skip + + attachments = message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array) + SELECT id_attach, file_hash, filename FROM {prefix}attachments + WHERE attachment_type = 0 AND id_msg = #{message[:id_msg]} + ORDER BY id_attach ASC + SQL + attachments.map! {|a| import_attachment(post, a) rescue (puts $! ; nil) } + post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes) + next post end end + def import_attachment(post, attachment) + path = find_smf_attachment_path(attachment[:id_attach], attachment[:file_hash], attachment[:filename]) + raise "Attachment for post #{post[:id]} failed: #{attachment[:filename]}" unless path.present? + upload = create_upload(post[:user_id], path, attachment[:filename]) + raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(', ')}" unless upload.persisted? + return upload + rescue SystemCallError => err + raise "Attachment for post #{post[:id]} failed: #{err.message}" + end + def postprocess_posts puts '', 'rebaking posts' @@ -284,20 +309,38 @@ class ImportScripts::Smf2 < ImportScripts::Base private - def query(sql, **opts, &block) - return __query(sql).to_a if opts[:as] == :array - return __query(sql, as: :array).first[0] if opts[:as] == :single - return __query(sql, stream: true).each(&block) if block_given? - return __query(sql, stream: true) + def create_db_connection + Mysql2::Client.new(host: options.host, username: options.username, + password: options.password, database: options.database) end - def __query(sql, **opts) - @db.query(sql.gsub('{prefix}', options.prefix), + def query(sql, **opts, &block) + db = opts[:connection] || @default_db_connection + return __query(db, sql).to_a if opts[:as] == :array + return __query(db, sql, as: :array).first[0] if opts[:as] == :single + return __query(db, sql, stream: true).each(&block) if block_given? + return __query(db, sql, stream: true) + end + + def __query(db, sql, **opts) + db.query(sql.gsub('{prefix}', options.prefix), {symbolize_keys: true, cache_rows: false}.merge(opts)) end + TRTR_TABLE = begin + from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ" + to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy" + from.chars.zip(to.chars) + end + def find_smf_attachment_path(attachment_id, file_hash, filename) - [ filename, "#{attachment_id}_#{file_hash}" ] + cleaned_name = filename.dup + TRTR_TABLE.each {|from,to| cleaned_name.gsub!(from, to) } + cleaned_name.gsub!(/\s/, '_') + cleaned_name.gsub!(/[^\w_\.\-]/, '') + legacy_name = "#{attachment_id}_#{cleaned_name.gsub('.', '_')}#{Digest::MD5.hexdigest(cleaned_name)}" + + [ filename, "#{attachment_id}_#{file_hash}", legacy_name ] .map {|name| File.join(options.smfroot, 'attachments', name) } .detect {|file| File.exists?(file) } end @@ -319,10 +362,57 @@ class ImportScripts::Smf2 < ImportScripts::Base s.lines.each {|l| r << '[li]' << l.strip.sub(/^\[x\]\s*/, '') << '[/li]' } r << "[/ul]\n" end - # TODO: attachments + + if attachments.present? + use_count = Hash.new(0) + AttachmentPatterns.each do |p| + pattern, emitter = *p + body.gsub!(pattern) do |s| + next s unless (num = $~[:num].to_i - 1) >= 0 + next s unless (upload = attachments[num]).present? + use_count[num] += 1 + instance_exec(upload, &emitter) + end + end + if use_count.keys.length < attachments.select(&:present?).length + body << "\n\n---" + attachments.each_with_index do |upload, num| + if upload.present? and use_count[num] == 0 + body << ( "\n\n" + get_upload_markdown(upload) ) + end + end + end + end + return opts[:ignore_quotes] ? body : convert_quotes(body) end + def v8 + @ctx ||= begin + ctx = PrettyText.create_new_context + PrettyText.decorate_context(ctx) + # provides toHumanSize but restores I18n.t which we need to fix again + ctx.load(Rails.root + "app/assets/javascripts/locales/i18n.js") + helper = PrettyText::Helpers.new + ctx['I18n']['t'] = proc {|_,key,opts| helper.t(key, opts) } + # from i18n_helpers.js -- can't load it directly because Ember is missing + ctx.eval(<<-'end') + var oldI18ntoHumanSize = I18n.toHumanSize; + I18n.toHumanSize = function(number, options) { + options = options || {}; + options.format = I18n.t("number.human.storage_units.format"); + return oldI18ntoHumanSize.apply(this, [number, options]); + }; + end + ctx + end + end + + def get_upload_markdown(upload) + @func ||= v8.eval("Discourse.Utilities.getUploadMarkdown") + return @func.call(upload).to_s + end + def convert_quotes(body) body.to_s.gsub(QuotePattern) do |s| inner = $~[:inner].strip @@ -398,6 +488,11 @@ class ImportScripts::Smf2 < ImportScripts::Base QuotePattern = build_nested_tag_regex('quote') ColorPattern = build_nested_tag_regex('color') ListPattern = build_nested_tag_regex('list') + AttachmentPatterns = [ + [/^\[attach(?:|img|url|mini)=(?\d+)\]$/, ->(u) { "\n"+get_upload_markdown(u)+"\n" }], + [/\[attach(?:|img|url|mini)=(?\d+)\]/, ->(u) { get_upload_markdown(u) }] + ] + # Provides command line options and parses the SMF settings file. class Options @@ -405,12 +500,6 @@ class ImportScripts::Smf2 < ImportScripts::Base class Error < StandardError ; end class SettingsError < Error ; end - def initialize - self.host = 'localhost' - self.username = Etc.getlogin - self.prefix = 'smf_' - end - def parse!(args = ARGV) raise Error, 'not enough arguments' if ARGV.empty? begin @@ -421,6 +510,10 @@ class ImportScripts::Smf2 < ImportScripts::Base raise Error, 'too many arguments' if args.length > 1 self.smfroot = args.first read_smf_settings if self.smfroot + + self.host ||= 'localhost' + self.username ||= Etc.getlogin + self.prefix ||= 'smf_' end def usage