From 08e10c2a9d41947e60886e974b00021a33843c93 Mon Sep 17 00:00:00 2001 From: Neil Lalonde Date: Thu, 3 Dec 2015 10:12:06 -0500 Subject: [PATCH] FEATURE: Zoho importer --- script/import_scripts/base.rb | 20 ++- script/import_scripts/base/csv_helper.rb | 75 ++++++++++ script/import_scripts/zoho.rb | 175 +++++++++++++++++++++++ 3 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 script/import_scripts/base/csv_helper.rb create mode 100644 script/import_scripts/zoho.rb diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index 1632757cac4..b91296edafa 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -366,7 +366,6 @@ class ImportScripts::Base end new_category = create_category(params, params[:id]) - @lookup.add_category(params[:id], new_category) created += 1 end @@ -396,6 +395,8 @@ class ImportScripts::Base new_category.custom_fields["import_id"] = import_id if import_id new_category.save! + @lookup.add_category(import_id, new_category) + post_create_action.try(:call, new_category) new_category @@ -645,6 +646,23 @@ class ImportScripts::Base end end + def update_user_signup_date_based_on_first_post + puts "", "setting users' signup date based on the date of their first post" + + total_count = User.count + progress_count = 0 + + User.find_each do |user| + first = user.posts.order('created_at ASC').first + if first + user.created_at = first.created_at + user.save! + end + progress_count += 1 + print_status(progress_count, total_count) + end + end + def html_for_upload(upload, display_filename) @uploader.html_for_upload(upload, display_filename) end diff --git a/script/import_scripts/base/csv_helper.rb b/script/import_scripts/base/csv_helper.rb new file mode 100644 index 00000000000..c505ce71015 --- /dev/null +++ b/script/import_scripts/base/csv_helper.rb @@ -0,0 +1,75 @@ +module ImportScripts + module CsvHelper + class RowResolver + def load(row) + @row = row + end + + def self.create(cols) + Class.new(RowResolver).new(cols) + end + + def initialize(cols) + cols.each_with_index do |col,idx| + self.class.send(:define_method, col.downcase.gsub(/[\W]/, '_').squeeze('_')) do + @row[idx] + end + end + end + end + + def csv_parse(filename, col_sep = ',') + first = true + row = nil + + current_row = "" + double_quote_count = 0 + + File.open(filename).each_line do |line| + + line.strip! + + current_row << "\n" unless current_row.empty? + current_row << line + + double_quote_count += line.scan('"').count + + next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row. + + raw = begin + CSV.parse(current_row, col_sep: col_sep) + rescue CSV::MalformedCSVError => e + puts e.message + puts "*" * 100 + puts "Bad row skipped, line is: #{line}" + puts + puts current_row + puts + puts "double quote count is : #{double_quote_count}" + puts "*" * 100 + + current_row = "" + double_quote_count = 0 + + next + end[0] + + if first + row = RowResolver.create(raw) + + current_row = "" + double_quote_count = 0 + first = false + next + end + + row.load(raw) + + yield row + + current_row = "" + double_quote_count = 0 + end + end + end +end \ No newline at end of file diff --git a/script/import_scripts/zoho.rb b/script/import_scripts/zoho.rb new file mode 100644 index 00000000000..9c92637747f --- /dev/null +++ b/script/import_scripts/zoho.rb @@ -0,0 +1,175 @@ +# Import from Zoho. +# Be sure to get the posts CSV file, AND the user list csv file with people's email addresses. +# You may need to contact Zoho support for the user list. +# +# * Zoho data doesn't indicate which users are admins or moderators, so you'll need to grant +# those privileges manually after the import finishes. +# * The posts and users csv files don't seem to have consistent usernames, and sometimes use +# full names instead of usernames. This may cause duplicate users with slightly different +# usernames to be created. + +require 'csv' +require File.expand_path(File.dirname(__FILE__) + "/base.rb") +require File.expand_path(File.dirname(__FILE__) + "/base/csv_helper.rb") + +# Call it like this: +# bundle exec ruby script/import_scripts/zoho.rb +class ImportScripts::Zoho < ImportScripts::Base + + include ImportScripts::CsvHelper + + BATCH_SIZE = 1000 + + def initialize(path) + @path = path + @all_posts = [] + @categories = {} # key is the parent category, value is an array of sub-categories + @topic_mapping = {} + @current_row = nil + super() + end + + def execute + import_users + import_posts + update_tl0 + update_user_signup_date_based_on_first_post + end + + def cleanup_zoho_username(s) + s.strip.gsub(/[^A-Za-z0-9_\.\-]/, '') + end + + def import_users + puts "", "Importing users" + create_users( CSV.parse(File.read(File.join(@path, 'users.csv'))) ) do |u| + username = cleanup_zoho_username(u[0]) + { + id: username, + username: username, + email: u[1], + created_at: Time.zone.now # TODO: updated created_at based on the oldest post by each user + } + end + end + + def import_posts + # 0 Forum Name + # 1 Category Name + # 2 Topic Title + # 3 Permalink + # 4 Posted Time + # 5 Content + # 6 Author + # 7 Attachments + # 8 Votes + + count = 0 + + puts "", "Parsing posts CSV" + + csv_parse(File.join(@path, "posts.csv")) do |row| + @all_posts << row.dup + if @categories[row.forum_name].nil? + @categories[row.forum_name] = [] + end + + unless @categories[row.forum_name].include?(row.category_name) + @categories[row.forum_name] << row.category_name + end + end + + puts "", "Creating categories" + + # Create categories + @categories.each do |parent, subcats| + c = create_category({name: parent}, parent) + subcats.each do |subcat| + next if subcat == "Uncategorized" || subcat == "Uncategorised" + create_category({name: subcat, parent_category_id: c.id}, "#{parent}:#{subcat}") + end + end + + puts "", "Creating topics and posts" + + created, skipped = create_posts(@all_posts, total: @all_posts.size) do |row| + @current_row = row + + # fetch user + username = cleanup_zoho_username(row.author) + + next if username.blank? # no author for this post, so skip + + user_id = user_id_from_imported_user_id(username) + + if user_id.nil? + # user CSV file didn't have a user with this username. create it now with an invalid email address. + u = create_user( + { id: username, + username: username, + email: "#{username}@example.com", + created_at: Time.zone.parse(row.posted_time) }, + username + ) + user_id = u.id + end + + if @topic_mapping[row.permalink].nil? + category_id = nil + if row.category_name != "Uncategorized" && row.category_name != "Uncategorised" + category_id = category_id_from_imported_category_id("#{row.forum_name}:#{row.category_name}") + else + category_id = category_id_from_imported_category_id(row.forum_name) + end + + # create topic + { + id: import_post_id(row), + user_id: user_id, + category: category_id, + title: row.topic_title, + raw: row.content, + created_at: Time.zone.parse(row.posted_time) + } + # created_post callback will be called + else + { + id: import_post_id(row), + user_id: user_id, + raw: row.content, + created_at: Time.zone.parse(row.posted_time), + topic_id: @topic_mapping[row.permalink] + } + end + end + + puts "" + puts "Created: #{created}" + puts "Skipped: #{skipped}" + puts "" + end + + def created_post(post) + unless @topic_mapping[@current_row.permalink] + @topic_mapping[@current_row.permalink] = post.topic_id + end + end + + def import_post_id(row) + # Try to make up a unique id based on the data Zoho gives us. + # The posted_time seems to be the same for all posts in a topic, so we can't use that. + Digest::SHA1.hexdigest "#{row.permalink}:#{row.content}" + end + +end + +unless ARGV[0] && Dir.exist?(ARGV[0]) + if ARGV[0] && !Dir.exist?(ARGV[0]) + puts "", "ERROR! Dir #{ARGV[0]} not found.", "" + end + + puts "", "Usage:", "", " bundle exec ruby script/import_scripts/zoho.rb DIRNAME", "" + exit 1 +end + +ImportScripts::Zoho.new(ARGV[0]).perform