# frozen_string_literal: true # getsatisfaction importer # # pre-req: You will either get an Excel or a bunch of CSV files. Be sure to rename them all so that # # - users.csv is the users table export # - replies.csv is the reply table export # - topics.csv is the topics table export # - categories.csv is the categories table export # - topics_categories.csv is the mapping between the topics and categories table # # Make sure that the CSV files use UTF-8 encoding, have consistent line endings and use comma as column separator. # That's usually the case when you export Excel sheets as CSV. # When you get MalformedCSVError during the import, try converting the line endings of the CSV into the Unix format. # Mixed line endings in CSV files can create weird errors! # # You need to call fix_quotes_in_csv() for CSV files that use \" to escape quotes within quoted fields. # The import script expects quotes to be escaped with "". # # It's likely that some posts in replies.csv aren't in the correct order. Currently the import script doesn't handle # that correctly and will import the replies in the wrong order. # You should run `rake posts:reorder_posts` after the import. require "csv" require File.expand_path(File.dirname(__FILE__) + "/base.rb") require "reverse_markdown" # gem 'reverse_markdown' # Call it like this: # RAILS_ENV=production bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME class ImportScripts::GetSatisfaction < ImportScripts::Base IMPORT_ARCHIVED_TOPICS = false # The script classifies each topic as private when at least one associated category # in "topics_categories.csv" is unknown (not included i "categories.csv"). IMPORT_PRIVATE_TOPICS = false # Should the creation of permalinks be skipped? Make sure you configure OLD_DOMAIN if you CREATE_PERMALINKS = true # Replace "http://community.example.com/" with the URL of your community for permalinks OLD_DOMAIN = "http://community.example.com/" BATCH_SIZE = 1000 def initialize(path) @path = path super() @bbcode_to_md = true @topic_slug = {} @topic_categories = {} @skipped_topics = Set.new end def execute # TODO Remove the call to fix_quotes_in_csv() if your replies.csv uses the double quotes (""). # That's usually the case when you exported the file from Excel. fix_quotes_in_csv("replies") import_users import_categories import_topics import_posts create_permalinks if CREATE_PERMALINKS end def csv_filename(table_name, use_fixed: true) if use_fixed filename = File.join(@path, "#{table_name}_fixed.csv") return filename if File.exist?(filename) end File.join(@path, "#{table_name}.csv") end def fix_quotes_in_csv(*table_names) puts "", "fixing CSV files" table_names.each do |table_name| source_filename = csv_filename(table_name, use_fixed: false) target_filename = csv_filename("#{table_name}_fixed", use_fixed: false) previous_line = nil File.open(target_filename, "w") do |file| File .open(source_filename) .each_line do |line| line.gsub!(/(?<![^\\]\\)\\"/, '""') line.gsub!(/\\\\/, '\\') if previous_line previous_line << "\n" unless line.starts_with?(",") line = "#{previous_line}#{line}" previous_line = nil end if line.gsub!(/,\+1\\\R$/m, ',"+1"').present? previous_line = line else file.puts(line) end end file.puts(previous_line) if previous_line end end end def csv_parse(table_name) CSV.foreach( csv_filename(table_name), headers: true, header_converters: :symbol, skip_blanks: true, encoding: "bom|utf-8", ) { |row| yield row } end def total_rows(table_name) CSV .foreach(csv_filename(table_name), headers: true, skip_blanks: true, encoding: "bom|utf-8") .inject(0) { |c, _| c + 1 } end def import_users puts "", "creating users" count = 0 users = [] total = total_rows("users") csv_parse("users") do |row| users << { id: row[:user_id], email: row[:email], name: row[:realname], username: row[:nickname], created_at: DateTime.parse(row[:joined_date]), active: true, } count += 1 import_users_batch!(users, count - users.length, total) if count % BATCH_SIZE == 0 end import_users_batch!(users, count - users.length, total) end def import_users_batch!(users, offset, total) return if users.empty? create_users(users, offset: offset, total: total) { |user| user } users.clear end def import_categories puts "", "creating categories" rows = [] csv_parse("categories") do |row| rows << { id: row[:category_id], name: row[:name], description: row[:description].present? ? normalize_raw!(row[:description]) : nil, } end create_categories(rows) { |row| row } end def import_topic_id(topic_id) "T#{topic_id}" end def import_topics read_topic_categories puts "", "creating topics" count = 0 topics = [] total = total_rows("topics") csv_parse("topics") do |row| topic = nil topic_id = import_topic_id(row[:topic_id]) if skip_topic?(row) @skipped_topics.add(topic_id) else topic = map_post(row) topic[:id] = topic_id topic[:title] = ( if row[:subject].present? row[:subject].strip[0...255] else "Topic title missing" end ) topic[:category] = category_id(row) topic[:archived] = row[:archived_at].present? @topic_slug[topic[:id]] = row[:url] if CREATE_PERMALINKS end topics << topic count += 1 import_topics_batch!(topics, count - topics.length, total) if count % BATCH_SIZE == 0 end import_topics_batch!(topics, count - topics.length, total) end def skip_topic?(row) return true if row[:removed] == "1" return true unless IMPORT_ARCHIVED_TOPICS || row[:archived_at].blank? unless IMPORT_PRIVATE_TOPICS categories = @topic_categories[row[:topic_id]] return true if categories && categories[:has_unknown_category] end false end def category_id(row) categories = @topic_categories[row[:topic_id]] return categories[:category_ids].last if categories SiteSetting.uncategorized_category_id end def read_topic_categories puts "", "reading topic_categories" count = 0 total = total_rows("topics_categories") csv_parse("topics_categories") do |row| topic_id = row[:topic_id] category_id = category_id_from_imported_category_id(row[:category_id]) @topic_categories[topic_id] ||= { category_ids: [], has_unknown_category: false } if category_id.nil? @topic_categories[topic_id][:has_unknown_category] = true else @topic_categories[topic_id][:category_ids] << category_id end count += 1 print_status(count, total) end end def import_topics_batch!(topics, offset, total) return if topics.empty? create_posts(topics, total: total, offset: offset) { |topic| topic } topics.clear end def import_posts puts "", "creating posts" count = 0 posts = [] total = total_rows("replies") csv_parse("replies") do |row| post = nil if row[:removed] != "1" parent = topic_lookup_from_imported_post_id(row[:parent_id]) if row[:parent_id] != "NULL" post = map_post(row) post[:id] = row[:reply_id] post[:topic_id] = import_topic_id(row[:topic_id]) post[:reply_to_post_number] = parent[:post_number] if parent end posts << post count += 1 import_posts_batch!(posts, count - posts.length, total) if count % BATCH_SIZE == 0 end import_posts_batch!(posts, count - posts.length, total) end def import_posts_batch!(posts, offset, total) return if posts.empty? create_posts(posts, total: total, offset: offset) do |post| next if post.nil? || @skipped_topics.include?(post[:topic_id]) topic = topic_lookup_from_imported_post_id(post[:topic_id]) if topic post[:topic_id] = topic[:topic_id] else p "MISSING TOPIC #{post[:topic_id]}" p post next end post end posts.clear end def map_post(row) { user_id: user_id_from_imported_user_id(row[:user_id]) || Discourse.system_user.id, created_at: DateTime.parse(row[:created_at]), raw: normalize_raw!(row[:formatted_content]), } end def normalize_raw!(raw) return "<missing>" if raw.blank? raw = raw.dup # hoist code hoisted = {} raw.gsub!(%r{(<pre>\s*)?<code>(.*?)</code>(\s*</pre>)?}mi) do code = $2 hoist = SecureRandom.hex # tidy code, wow, this is impressively crazy code.gsub!(/ (\s*)/, "\n\\1") code.gsub!(/^\s*\n$/, "\n") code.gsub!(/\n+/m, "\n") code.strip! hoisted[hoist] = code hoist end # impressive seems to be using tripple space as a <p> unless hoisted # in this case double space works best ... so odd raw.gsub!(" ", "\n\n") hoisted.each { |hoist, code| raw.gsub!(hoist, "\n```\n#{code}\n```\n") } raw = CGI.unescapeHTML(raw) raw = ReverseMarkdown.convert(raw) raw end def create_permalinks puts "", "Creating Permalinks...", "" Topic.listable_topics.find_each do |topic| tcf = topic.first_post.custom_fields if tcf && tcf["import_id"] if slug = @topic_slug[tcf["import_id"]] slug.gsub!(OLD_DOMAIN, "") Permalink.find_or_create_by(url: slug, topic_id: topic.id) if slug.present? end end end end end unless ARGV[0] && Dir.exist?(ARGV[0]) puts "", "Usage:", "", "bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME", "" exit 1 end ImportScripts::GetSatisfaction.new(ARGV[0]).perform