# coding: utf-8 # frozen_string_literal: true require "mysql2" require File.expand_path(File.dirname(__FILE__) + "/base.rb") require "htmlentities" begin require "reverse_markdown" # https://github.com/jqr/php-serialize rescue LoadError puts puts "reverse_markdown not found." puts "Add to Gemfile, like this: " puts puts "echo gem \\'reverse_markdown\\' >> Gemfile" puts "bundle install" exit end # Before running this script, paste these lines into your shell, # then use arrow keys to edit the values =begin export DB_HOST="localhost" export DB_NAME="ipboard" export DB_PW="ipboard" export DB_USER="ipboard" export TABLE_PREFIX="ipb_" export IMPORT_AFTER="1970-01-01" export UPLOADS="http://example.com/uploads" export URL="http://example.com/" export AVATARS_DIR="/imports/avatars/" export USERDIR="user" =end class ImportScripts::IpboardSQL < ImportScripts::Base DB_HOST = ENV["DB_HOST"] || "localhost" DB_NAME = ENV["DB_NAME"] || "ipboard" DB_PW = ENV["DB_PW"] || "ipboard" DB_USER = ENV["DB_USER"] || "ipboard" TABLE_PREFIX = ENV["TABLE_PREFIX"] || "ipb_" IMPORT_AFTER = ENV["IMPORT_AFTER"] || "1970-01-01" UPLOADS = ENV["UPLOADS"] || "http://UPLOADS+LOCATION+IS+NOT+SET/uploads" USERDIR = ENV["USERDIR"] || "user" URL = ENV["URL"] || "https://forum.example.com" AVATARS_DIR = ENV["AVATARS_DIR"] || "/home/pfaffman/data/example.com/avatars/" BATCH_SIZE = 1000 ID_FIRST = true QUIET = true DEBUG = false GALLERY_CAT_ID = 1_234_567 GALLERY_CAT_NAME = "galeria" EMO_DIR = ENV["EMO_DIR"] || "default" OLD_FORMAT = false if OLD_FORMAT MEMBERS_TABLE = "#{TABLE_PREFIX}core_members" FORUMS_TABLE = "#{TABLE_PREFIX}forums_forums" POSTS_TABLE = "#{TABLE_PREFIX}forums_posts" TOPICS_TABLE = "#{TABLE_PREFIX}forums_topics" else MEMBERS_TABLE = "#{TABLE_PREFIX}members" FORUMS_TABLE = "#{TABLE_PREFIX}forums" POSTS_TABLE = "#{TABLE_PREFIX}posts" TOPICS_TABLE = "#{TABLE_PREFIX}topics" GROUPS_TABLE = "#{TABLE_PREFIX}groups" PROFILE_TABLE = "#{TABLE_PREFIX}profile_portal" ATTACHMENT_TABLE = "#{TABLE_PREFIX}attachments" end # TODO: replace ipb_ with TABLE_PREFIX ################# # Site settings # ################# # don't send any emails SiteSetting.disable_emails = "non-staff" # don't send digests (so you can enable email without users noticing) SiteSetting.disable_digest_emails = true # keep site and users private SiteSetting.login_required = true SiteSetting.hide_user_profiles_from_public = true # if site is made available, don't let it get indexed SiteSetting.allow_index_in_robots_txt = false # don't notify users when images in their posts get downloaded SiteSetting.disable_system_edit_notifications = true # SiteSetting.force_hostname='forum.dev1dev.com' SiteSetting.title = "IPB Import" if ID_FIRST # TODO figure this out puts "WARNING: permalink_normalizations not set!!!" sleep 1 #raw = "[ORIGINAL POST](#{URL}/topic/#{id}-#{slug})\n\n" + raw #SiteSetting.permalink_normalizations='/topic/(.*t)\?.*/\1' else # remove stuff after a "?" and work for urls that end in .html SiteSetting.permalink_normalizations = '/(.*t)[?.].*/\1' #raw = "[ORIGINAL POST](#{URL}/#{slug}-#{id}t)\n\n" + raw end def initialize print_warning("Importing data after #{IMPORT_AFTER}") if IMPORT_AFTER > "1970-01-01" super @htmlentities = HTMLEntities.new begin @client = Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME) rescue Exception => e puts "=" * 50 puts e.message puts <<~TEXT Cannot log in to database. Hostname: #{DB_HOST} Username: #{DB_USER} Password: #{DB_PW} database: #{DB_NAME} You should set these variables: export DB_HOST="localhost" export DB_NAME="ipboard" export DB_PW="ipboard" export DB_USER="ipboard" export TABLE_PREFIX="ipb_" export IMPORT_AFTER="1970-01-01" export URL="http://example.com" export UPLOADS= export USERDIR="user" Exiting. TEXT exit end end def execute import_users import_categories import_topics import_posts import_private_messages # not supported import_image_categories # NOT SUPPORTED import_gallery_topics update_tl0 create_permalinks end def import_users puts "", "creating users" total_count = mysql_query( "SELECT count(*) count FROM #{MEMBERS_TABLE} WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'));", ).first[ "count" ] batches(BATCH_SIZE) do |offset| #notes: no location, url, results = mysql_query( " SELECT member_id id, name username, member_group_id usergroup, email, pp_thumb_photo avatar_url, # pp_main_photo avatar_url, # avatar_location avatar_url, # TODO consider joining ibf_profile_portal.avatar_location and avatar_type FROM_UNIXTIME(joined) created_at, FROM_UNIXTIME(last_activity) last_seen_at, ip_address registration_ip_address, member_banned banned, bday_year, bday_month, bday_day, g_title member_type, last_visit last_seen_at FROM #{MEMBERS_TABLE}, #{PROFILE_TABLE}, #{GROUPS_TABLE} WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) AND member_id=pp_member_id AND member_group_id = g_id order by member_id ASC LIMIT #{BATCH_SIZE} OFFSET #{offset};", ) break if results.size < 1 next if all_records_exist? :users, results.map { |u| u["id"].to_i } create_users(results, total: total_count, offset: offset) do |user| next if user["email"].blank? next if user["username"].blank? next if @lookup.user_id_from_imported_user_id(user["id"]) birthday = begin Date.parse("#{user["bday_year"]}-#{user["bday_month"]}-#{user["bday_day"]}") rescue StandardError nil end # TODO: what about timezones? next if user["id"] == 0 { id: user["id"], email: user["email"], username: user["username"], avatar_url: user["avatar_url"], title: user["member_type"], created_at: user["created_at"] == nil ? 0 : Time.zone.at(user["created_at"]), # bio_raw: user['bio_raw'], registration_ip_address: user["registration_ip_address"], # birthday: birthday, last_seen_at: user["last_seen_at"] == nil ? 0 : Time.zone.at(user["last_seen_at"]), admin: /^Admin/.match(user["member_type"]) ? true : false, moderator: /^MOD/.match(user["member_type"]) ? true : false, post_create_action: proc do |newuser| if user["avatar_url"] && user["avatar_url"].length > 0 photo_path = AVATARS_DIR + user["avatar_url"] if File.exist?(photo_path) begin upload = create_upload(newuser.id, photo_path, File.basename(photo_path)) if upload && upload.persisted? newuser.import_mode = false newuser.create_user_avatar newuser.import_mode = true newuser.user_avatar.update(custom_upload_id: upload.id) newuser.update(uploaded_avatar_id: upload.id) else puts "Error: Upload did not persist for #{photo_path}!" end rescue SystemCallError => err puts "Could not import avatar #{photo_path}: #{err.message}" end else puts "avatar file not found at #{photo_path}" end end suspend_user(newuser) if user["banned"] != 0 end, } end end end def suspend_user(user) user.suspended_at = Time.now user.suspended_till = 200.years.from_now ban_reason = "Account deactivated by administrator" user_option = user.user_option user_option.email_digests = false user_option.email_level = UserOption.email_level_types[:never] user_option.email_messages_level = UserOption.email_level_types[:never] user_option.save! if user.save StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason) else puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" end end def file_full_path(relpath) File.join JSON_FILES_DIR, relpath.split("?").first end def import_image_categories puts "", "importing image categories..." categories = mysql_query( " SELECT category_id id, category_name_seo name, category_parent_id as parent_id FROM #{TABLE_PREFIX}gallery_categories ORDER BY id ASC ", ).to_a category_names = mysql_query( " SELECT DISTINCT word_key, word_default title FROM #{TABLE_PREFIX}core_sys_lang_words where word_app='gallery' AND word_key REGEXP 'gallery_category_[0-9]+$' ORDER BY word_key ASC ", ).to_a cat_map = {} puts "Creating gallery_cat_map" category_names.each do |name| title = name["title"] word_key = name["word_key"] puts "Processing #{word_key}: #{title}" id = word_key.gsub("gallery_category_", "") next if cat_map[id] cat_map[id] = cat_map.has_value?(title) ? title + " " + id : title puts "#{id} => #{cat_map[id]}" end params = { id: GALLERY_CAT_ID, name: GALLERY_CAT_NAME } create_category(params, params[:id]) create_categories(categories) do |category| id = (category["id"]).to_s name = CGI.unescapeHTML(cat_map[id]) { id: id + "gal", name: name, parent_category_id: @lookup.category_id_from_imported_category_id(GALLERY_CAT_ID), color: random_category_color, } end end def import_categories puts "", "importing categories..." categories = mysql_query( " SELECT id, name name, parent_id as parent_id FROM #{FORUMS_TABLE} ORDER BY parent_id ASC ", ).to_a top_level_categories = categories.select { |c| c["parent.id"] == -1 } create_categories(top_level_categories) do |category| id = category["id"].to_s name = category["name"] { id: id, name: name } end children_categories = categories.select { |c| c["parent.id"] != -1 } create_categories(children_categories) do |category| id = category["id"].to_s name = category["name"] { id: id, name: name, parent_category_id: @lookup.category_id_from_imported_category_id(category["parent_id"]), color: random_category_color, } end end def import_topics puts "", "importing topics..." total_count = mysql_query( "SELECT count(*) count FROM #{POSTS_TABLE} WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) AND new_topic=1;", ).first[ "count" ] batches(BATCH_SIZE) do |offset| discussions = mysql_query(<<-SQL) SELECT #{TOPICS_TABLE}.tid tid, #{TOPICS_TABLE}.forum_id category, #{POSTS_TABLE}.pid pid, #{TOPICS_TABLE}.title title, #{TOPICS_TABLE}.pinned pinned, #{POSTS_TABLE}.post raw, #{TOPICS_TABLE}.title_seo as slug, FROM_UNIXTIME(#{POSTS_TABLE}.post_date) created_at, #{POSTS_TABLE}.author_id user_id FROM #{POSTS_TABLE}, #{TOPICS_TABLE} WHERE #{POSTS_TABLE}.topic_id = #{TOPICS_TABLE}.tid AND #{POSTS_TABLE}.new_topic = 1 AND #{POSTS_TABLE}.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) ORDER BY #{POSTS_TABLE}.post_date ASC LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if discussions.size < 1 next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t["tid"].to_s } create_posts(discussions, total: total_count, offset: offset) do |discussion| slug = discussion["slug"] id = discussion["tid"] raw = clean_up(discussion["raw"]) { id: "discussion#" + discussion["tid"].to_s, user_id: user_id_from_imported_user_id(discussion["user_id"]) || Discourse::SYSTEM_USER_ID, title: CGI.unescapeHTML(discussion["title"]), category: category_id_from_imported_category_id(discussion["category"].to_s), raw: raw, pinned_at: discussion["pinned"].to_i == 1 ? Time.zone.at(discussion["created_at"]) : nil, created_at: Time.zone.at(discussion["created_at"]), } end end end def array_from_members_string(invited_members = "a:3:{i:0;i:22629;i:1;i:21837;i:2;i:22234;}") out = [] count_regex = /a:(\d)+:/ count = count_regex.match(invited_members)[1] rest = invited_members.sub(count_regex, "") i_regex = /i:\d+;i:(\d+);/ while m = i_regex.match(rest) i = m[1] rest.sub!(i_regex, "") puts "i: #{i}, #{rest}" out += [i.to_i] end out end def import_private_messages puts "", "importing private messages..." topic_count = mysql_query("SELECT COUNT(msg_id) count FROM #{TABLE_PREFIX}message_posts").first["count"] last_private_message_topic_id = -1 batches(BATCH_SIZE) do |offset| private_messages = mysql_query(<<-SQL) SELECT msg_id pmtextid, msg_topic_id topic_id, msg_author_id fromuserid, mt_title title, msg_post message, mt_invited_members touserarray, mt_to_member_id to_user_id, msg_is_first_post first_post, msg_date dateline FROM #{TABLE_PREFIX}message_topics, #{TABLE_PREFIX}message_posts WHERE msg_topic_id = mt_id AND msg_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) ORDER BY msg_topic_id, msg_id LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL puts "Processing #{private_messages.count} messages" break if private_messages.count < 1 puts "Processing . . . " private_messages = private_messages.reject { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") } title_username_of_pm_first_post = {} create_posts(private_messages, total: topic_count, offset: offset) do |m| skip = false mapped = {} mapped[:id] = "pm-#{m["pmtextid"]}" mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) || Discourse::SYSTEM_USER_ID mapped[:raw] = begin clean_up(m["message"]) rescue StandardError nil end mapped[:created_at] = Time.zone.at(m["dateline"]) title = @htmlentities.decode(m["title"]).strip[0...255] topic_id = nil next if mapped[:raw].blank? # users who are part of this private message. target_usernames = [] target_userids = [] begin to_user_array = [m["to_user_id"]] + array_from_members_string(m["touserarray"]) rescue StandardError puts "#{m["pmtextid"]} -- #{m["touserarray"]}" skip = true end begin to_user_array.each do |to_user| user_id = user_id_from_imported_user_id(to_user) username = User.find_by(id: user_id).try(:username) target_userids << user_id || Discourse::SYSTEM_USER_ID target_usernames << username if username if user_id puts "Found user: #{to_user} -- #{user_id} -- #{username}" else puts "Can't find user: #{to_user}" end end rescue StandardError puts "skipping pm-#{m["pmtextid"]} `to_user_array` is broken -- #{to_user_array.inspect}" skip = true end participants = target_userids participants << mapped[:user_id] begin participants.sort! rescue StandardError puts "one of the participant's id is nil -- #{participants.inspect}" end if last_private_message_topic_id != m["topic_id"] last_private_message_topic_id = m["topic_id"] unless QUIET puts "New message: #{m["topic_id"]}: #{title} from #{m["fromuserid"]} (#{mapped[:user_id]})" end # topic post message topic_id = m["topic_id"] mapped[:title] = title mapped[:archetype] = Archetype.private_message mapped[:target_usernames] = target_usernames.join(",") if mapped[:target_usernames].size < 1 # pm with yourself? # skip = true mapped[:target_usernames] = "system" puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})" end else # reply topic_id = topic_lookup_from_imported_post_id("pm-#{topic_id}") skip = true if !topic_id mapped[:topic_id] = topic_id unless QUIET puts "Reply message #{topic_id}: #{m["topic_id"]}: from #{m["fromuserid"]} (#{mapped[:user_id]})" end end # puts "#{target_usernames} -- #{mapped[:target_usernames]}" # puts "Adding #{mapped}" skip ? nil : mapped # puts "#{'-'*50}> added" end end end def import_gallery_topics # pfaffman: I'm not clear whether this is an IPBoard thing or from some other system puts "", "importing gallery albums..." gallery_count = 0 total_count = mysql_query( "SELECT count(*) count FROM #{TABLE_PREFIX}gallery_images ;", ).first[ "count" ] # NOTE: for imports with huge numbers of galleries, this needs to use limits batches(BATCH_SIZE) do |offset| # galleries = mysql_query(<<-SQL # SELECT #{TABLE_PREFIX}gallery_albums.album_id tid, # #{TABLE_PREFIX}gallery_albums.album_category_id category, # #{TABLE_PREFIX}gallery_albums.album_owner_id user_id, # #{TABLE_PREFIX}gallery_albums.album_name title, # #{TABLE_PREFIX}gallery_albums.album_description raw, # #{TABLE_PREFIX}gallery_albums.album_type, # FROM_UNIXTIME(#{TABLE_PREFIX}gallery_albums.album_last_img_date) created_at # FROM #{TABLE_PREFIX}gallery_albums # ORDER BY #{TABLE_PREFIX}gallery_albums.album_id ASC # SQL # ) images = mysql_query(<<-SQL) SELECT #{TABLE_PREFIX}gallery_albums.album_id tid, #{TABLE_PREFIX}gallery_albums.album_category_id category, #{TABLE_PREFIX}gallery_albums.album_owner_id user_id, #{TABLE_PREFIX}gallery_albums.album_name title, #{TABLE_PREFIX}gallery_albums.album_description raw, #{TABLE_PREFIX}gallery_albums.album_type, #{TABLE_PREFIX}gallery_images.image_caption caption, #{TABLE_PREFIX}gallery_images.image_description description, #{TABLE_PREFIX}gallery_images.image_masked_file_name masked, #{TABLE_PREFIX}gallery_images.image_id image_id, #{TABLE_PREFIX}gallery_images.image_medium_file_name medium, #{TABLE_PREFIX}gallery_images.image_original_file_name orig, FROM_UNIXTIME(#{TABLE_PREFIX}gallery_albums.album_last_img_date) created_at, #{TABLE_PREFIX}gallery_images.image_file_name filename FROM #{TABLE_PREFIX}gallery_albums, #{TABLE_PREFIX}gallery_images WHERE #{TABLE_PREFIX}gallery_images.image_album_id=#{TABLE_PREFIX}gallery_albums.album_id ORDER BY #{TABLE_PREFIX}gallery_albums.album_id, image_date DESC LIMIT #{BATCH_SIZE} OFFSET #{offset}; SQL break if images.size < 1 if all_records_exist? :posts, images.map { |t| "gallery#" + t["tid"].to_s + t["image_id"].to_s } next end last_id = images.first["tid"] raw = "Gallery ID: #{last_id}\n" + clean_up(images.first["raw"]) raw += "#{clean_up(images.first["description"])}\n" last_gallery = images.first.dup create_posts(images, total: total_count, offset: offset) do |gallery| id = gallery["tid"].to_i #puts "ID: #{id}, last_id: #{last_id}, image: #{gallery['image_id']}" if id == last_id raw += "### #{gallery["caption"]}\n" raw += "#{UPLOADS}/#{gallery["orig"]}\n" last_gallery = gallery.dup next else insert_raw = raw.dup last_id = gallery["tid"] if DEBUG raw = "Gallery ID: #{last_id}\n" + clean_up(gallery["raw"]) raw += "Cat: #{last_gallery["category"]} - #{category_id_from_imported_category_id(last_gallery["category"].to_s + "gal")}" end raw += "#{clean_up(images.first["description"])}\n" raw += "### #{gallery["caption"]}\n" raw += "User #{gallery["user_id"]}, image_id: #{gallery["image_id"]}\n" if DEBUG raw += "#{UPLOADS}/#{gallery["orig"]}\n" gallery_count += 1 unless QUIET puts "#{gallery_count}--Cat: #{last_gallery["category"]} ==> #{category_id_from_imported_category_id(last_gallery["category"].to_s + "gal")}" end { id: "gallery#" + last_gallery["tid"].to_s + last_gallery["image_id"].to_s, user_id: user_id_from_imported_user_id(last_gallery["user_id"]) || Discourse::SYSTEM_USER_ID, title: CGI.unescapeHTML(last_gallery["title"]), category: category_id_from_imported_category_id(last_gallery["category"].to_s + "gal"), raw: insert_raw, } end end end end # TODO: use this to figure out to pin posts def map_first_post(row, mapped) mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id]) mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255] mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL mapped[:post_create_action] = proc do |post| @permalink_importer.create_for_topic(post.topic, row[:topic_id]) end mapped end def import_comments puts "", "importing gallery comments..." total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_comments;").first["count"] batches(BATCH_SIZE) do |offset| comments = mysql_query(<<-SQL) SELECT #{TABLE_PREFIX}gallery_comments.tid tid, #{TABLE_PREFIX}gallery_topics.forum_id category, #{TABLE_PREFIX}gallery_posts.pid pid, #{TABLE_PREFIX}gallery_topics.title title, #{TABLE_PREFIX}gallery_posts.post raw, FROM_UNIXTIME(#{TABLE_PREFIX}gallery_posts.post_date) created_at, #{TABLE_PREFIX}gallery_posts.author_id user_id FROM #{TABLE_PREFIX}gallery_posts, #{TABLE_PREFIX}gallery_topics WHERE #{TABLE_PREFIX}gallery_posts.topic_id = #{TABLE_PREFIX}gallery_topics.tid AND #{TABLE_PREFIX}gallery_posts.new_topic = 0 AND #{TABLE_PREFIX}gallery_posts.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) ORDER BY #{TABLE_PREFIX}gallery_posts.post_date ASC LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if comments.size < 1 next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment["pid"].to_s } create_posts(comments, total: total_count, offset: offset) do |comment| next unless t = topic_lookup_from_imported_post_id("discussion#" + comment["tid"].to_s) next if comment["raw"].blank? { id: "comment#" + comment["pid"].to_s, user_id: user_id_from_imported_user_id(comment["user_id"]) || Discourse::SYSTEM_USER_ID, topic_id: t[:topic_id], raw: clean_up(comment["raw"]), created_at: Time.zone.at(comment["created_at"]), } end end end def import_posts puts "", "importing posts..." total_count = mysql_query( "SELECT count(*) count FROM #{POSTS_TABLE} WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) AND new_topic=0;", ).first[ "count" ] batches(BATCH_SIZE) do |offset| comments = mysql_query(<<-SQL) SELECT #{TOPICS_TABLE}.tid tid, #{TOPICS_TABLE}.forum_id category, #{POSTS_TABLE}.pid pid, #{TOPICS_TABLE}.title title, #{POSTS_TABLE}.post raw, FROM_UNIXTIME(#{POSTS_TABLE}.post_date) created_at, #{POSTS_TABLE}.author_id user_id FROM #{POSTS_TABLE}, #{TOPICS_TABLE} WHERE #{POSTS_TABLE}.topic_id = #{TOPICS_TABLE}.tid AND #{POSTS_TABLE}.new_topic = 0 AND #{POSTS_TABLE}.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d')) ORDER BY #{POSTS_TABLE}.post_date ASC LIMIT #{BATCH_SIZE} OFFSET #{offset} SQL break if comments.size < 1 next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment["pid"].to_s } create_posts(comments, total: total_count, offset: offset) do |comment| next unless t = topic_lookup_from_imported_post_id("discussion#" + comment["tid"].to_s) next if comment["raw"].blank? { id: "comment#" + comment["pid"].to_s, user_id: user_id_from_imported_user_id(comment["user_id"]) || Discourse::SYSTEM_USER_ID, topic_id: t[:topic_id], raw: clean_up(comment["raw"]), created_at: Time.zone.at(comment["created_at"]), } end end end def nokogiri_fix_blockquotes(raw) # this makes proper quotes with user/topic/post references. # I'm not clear if it is for just some bizarre imported data, or it might ever be useful # It should be integrated into the Nokogiri section of clean_up, though. @doc = Nokogiri.XML("" + raw + "") # handle
s with links to original post @doc .css("blockquote[class=ipsQuote]") .each do |b| # puts "\n#{'#'*50}\n#{b}\n\nCONTENT: #{b['data-ipsquote-contentid']}" # b.options = Nokogiri::XML::ParseOptions::STRICT imported_post_id = b["data-ipsquote-contentcommentid"].to_s content_type = b["data-ipsquote-contenttype"].to_s content_class = b["data-ipsquote-contentclass"].to_s content_id = b["data-ipsquote-contentid"].to_s || b["data-cid"].to_s topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id) post = topic_lookup ? topic_lookup[:post_number] : nil topic = topic_lookup ? topic_lookup[:topic_id] : nil post ||= post_lookup ? post_lookup[:post_number] : nil topic ||= post_lookup ? post_lookup[:topic_id] : nil # TODO: consider:# consider:# TODO make sure it's the imported username # TODO: do _s still get \-escaped? ips_username = b["data-ipsquote-username"] || b["data-author"] username = ips_username new_text = "" if DEBUG # new_text += "post: #{imported_post_id} --> #{post_lookup} --> |#{post}|
\n" # new_text += "topic: #{content_id} --> #{topic_lookup} --> |#{topic}|
\n" # new_text += "user: #{ips_username} --> |#{username}|
\n" # new_text += "class: #{content_class}
\n" # new_text += "type: #{content_type}
\n" if content_class.length > 0 && content_class != "forums_Topic" new_text += "UNEXPECTED CONTENT CLASS! #{content_class}
\n" end if content_type.length > 0 && content_type != "forums" new_text += "UNEXPECTED CONTENT TYPE! #{content_type}
\n" end # puts "#{'-'*20} and NOWWWWW!!!! \n #{new_text}" end if post && topic && username quote = "\n[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n" else if username && username.length > 1 quote = "\n[quote=\"#{username}\"]\n\n" else quote = "\n[quote]\n" end # new_doc = Nokogiri::XML("#{new_text}") end puts "QUOTE: #{quote}" sleep 1 b.content = quote + b.content + "\n[/quote]\n" b.name = "div" end raw = @doc.to_html end def clean_up(raw) return "" if raw.blank? raw.gsub!(/<#EMO_DIR#>/, EMO_DIR) # TODO what about uploads? # raw.gsub!(//,UPLOADS) raw.gsub!(/
/, "\n\n") raw.gsub!(%r{
}, "\n\n") raw.gsub!(%r{}, "\n\n") raw.gsub!(/\[hr\]/, "\n***\n") raw.gsub!(/'/, "'") raw.gsub!(%r{\[url="(.+?)"\]http.+?\[/url\]}, "\\1\n") raw.gsub!(%r{\[media\](.+?)\[/media\]}, "\n\\1\n\n") raw.gsub!(%r{\[php\](.+?)\[/php\]}m) do |m| "\n\n```php\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" end raw.gsub!(%r{\[code\](.+?)\[/code\]}m) do |m| "\n\n```\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n" end raw.gsub!(%r{\[list\](.+?)\[/list\]}m) { |m| "\n" + $1.gsub(/\[\*\]/, "\n- ") + "\n\n" } raw.gsub!(/\[quote\]/, "\n[quote]\n") raw.gsub!(%r{\[/quote\]}, "\n[/quote]\n") raw.gsub!(/date=\'(.+?)\'/, "") raw.gsub!(/timestamp=\'(.+?)\' /, "") quote_regex = /\[quote name=\'(.+?)\'\s+post=\'(\d+?)\'\s*\]/ while quote = quote_regex.match(raw) # get IPB post number and find Discourse post and topic number unless QUIET puts "----------------------------------------\nName: #{quote[1]}, post: #{quote[2]}" end imported_post_id = quote[2].to_s topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) post_lookup = topic_lookup_from_imported_post_id("discussion#" + imported_post_id) puts "topic_lookup: #{topic_lookup}, post: #{post_lookup}" unless QUIET post_num = topic_lookup ? topic_lookup[:post_number] : nil topic_num = topic_lookup ? topic_lookup[:topic_id] : nil post_num ||= post_lookup ? post_lookup[:post_number] : nil topic_num ||= post_lookup ? post_lookup[:topic_id] : nil # Fix or leave bogus username? username = find_user_by_import_id(quote[1]) || quote[1] puts "username: #{username}, post_id: #{post_num}, topic_id: #{topic_num}" unless QUIET puts "Before fixing a quote: #{raw}\n**************************************** " unless QUIET post_string = post_num ? ", post:#{post_num}" : "" topic_string = topic_num ? ", topic:#{topic_num}" : "" raw.gsub!(quote_regex, "\n[quote=\"#{username}#{post_string}#{topic_string}\"]\n\n") puts "AFTER!!!!!!!!!!!!1: #{raw}" unless QUIET sleep 1 raw end attach_regex = /\[attachment=(\d+?):.+\]/ while attach = attach_regex.match(raw) attach_id = attach[1] attachments = mysql_query( "SELECT attach_location as loc, attach_file as filename FROM #{ATTACHMENT_TABLE} WHERE attach_id=#{attach_id}", ) if attachments.count < 1 puts "Attachment #{attach_id} not found." attach_string = "Attachment #{attach_id} not found." else attach_url = "#{UPLOADS}/#{attachments.first["loc"].gsub(" ", "%20")}" if attachments.first["filename"].match(/(png|jpg|jpeg|gif)$/) # images are rendered as a link that contains the image attach_string = "#{attach_id}\n\n[![#{attachments.first["filename"]}](#{attach_url})](#{attach_url})\n" else # other attachments are simple download links attach_string = "#{attach_id}\n\n[#{attachments.first["filename"]}](#{attach_url})\n" end end raw.sub!(attach_regex, attach_string) end raw end def random_category_color colors = SiteSetting.category_colors.split("|") colors[rand(colors.count)] end def old_clean_up(raw) # This was for a forum that appeared to have lots of customization's. # IT did a good job of handling quotes and whatnot, but I don't know # what version if IPBoard it was for. return "" if raw.blank? raw.gsub!(/<___base_url___>/, URL) raw.gsub!(/
/, UPLOADS) raw.gsub!(/ /, UPLOADS) raw.gsub!(/
/, "\n") @doc = Nokogiri.XML("" + raw + "") # handles with links to original post @doc .css("blockquote[class=ipsQuote]") .each do |b| imported_post_id = b["data-ipsquote-contentcommentid"].to_s content_type = b["data-ipsquote-contenttype"].to_s content_class = b["data-ipsquote-contentclass"].to_s content_id = b["data-ipsquote-contentid"].to_s || b["data-cid"].to_s topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id) post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id) post = topic_lookup ? topic_lookup[:post_number] : nil topic = topic_lookup ? topic_lookup[:topic_id] : nil post ||= post_lookup ? post_lookup[:post_number] : nil topic ||= post_lookup ? post_lookup[:topic_id] : nil # TODO: consider:# consider:ips_username = b["data-ipsquote-username"] || b["data-author"] username = ips_username new_text = "" if DEBUG if content_class.length > 0 && content_class != "forums_Topic" new_text += "UNEXPECTED CONTENT CLASS! #{content_class}
\n" end if content_type.length > 0 && content_type != "forums" new_text += "UNEXPECTED CONTENT TYPE! #{content_type}
\n" end end if post && topic && username quote = "[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n" else if username && username.length > 1 quote = "[quote=\"#{username}\"]\n\n" else quote = "[quote]\n" end end b.content = quote + b.content + "\n[/quote]\n" b.name = "div" end @doc.css("object param embed").each { |embed| embed.replace("\n#{embed["src"]}\n") } # handle