2014-07-03 04:50:44 +08:00
if ARGV . include? ( 'bbcode-to-md' )
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
# In a temp dir:
#
2014-07-12 01:36:05 +08:00
# git clone https://github.com/nlalonde/ruby-bbcode-to-md.git
2014-07-03 04:50:44 +08:00
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-0.0.13.gem
require 'ruby-bbcode-to-md'
end
2014-05-31 03:09:58 +08:00
module ImportScripts ; end
class ImportScripts :: Base
def initialize
require File . expand_path ( File . dirname ( __FILE__ ) + " /../../config/environment " )
2014-08-14 04:17:16 +08:00
preload_i18n
2014-05-31 03:09:58 +08:00
2014-07-03 04:50:44 +08:00
@bbcode_to_md = true if ARGV . include? ( 'bbcode-to-md' )
2014-07-17 01:59:30 +08:00
@existing_groups = { }
@failed_groups = [ ]
2014-05-31 03:09:58 +08:00
@existing_users = { }
@failed_users = [ ]
2014-08-12 00:44:17 +08:00
@categories_lookup = { }
@existing_posts = { }
2014-05-31 03:09:58 +08:00
@topic_lookup = { }
2014-07-17 01:59:30 +08:00
GroupCustomField . where ( name : 'import_id' ) . pluck ( :group_id , :value ) . each do | group_id , import_id |
@existing_groups [ import_id ] = group_id
end
2014-05-31 03:09:58 +08:00
UserCustomField . where ( name : 'import_id' ) . pluck ( :user_id , :value ) . each do | user_id , import_id |
@existing_users [ import_id ] = user_id
end
CategoryCustomField . where ( name : 'import_id' ) . pluck ( :category_id , :value ) . each do | category_id , import_id |
2014-08-12 00:44:17 +08:00
@categories_lookup [ import_id ] = Category . find ( category_id . to_i )
2014-05-31 03:09:58 +08:00
end
PostCustomField . where ( name : 'import_id' ) . pluck ( :post_id , :value ) . each do | post_id , import_id |
2014-08-12 00:44:17 +08:00
@existing_posts [ import_id ] = post_id
2014-05-31 03:09:58 +08:00
end
2014-08-18 19:04:08 +08:00
Post . joins ( :topic ) . select ( " posts.id, posts.topic_id, posts.post_number, topics.slug " ) . each do | post |
@topic_lookup [ post . id ] = {
topic_id : post . topic_id ,
post_number : post . post_number ,
url : post . url ,
}
2014-05-31 03:09:58 +08:00
end
end
2014-08-14 04:17:16 +08:00
def preload_i18n
I18n . t ( " test " )
ActiveSupport :: Inflector . transliterate ( " test " )
end
2014-05-31 03:09:58 +08:00
def perform
Rails . logger . level = 3 # :error, so that we don't create log files that are many GB
2014-06-06 02:40:11 +08:00
2014-05-31 03:09:58 +08:00
SiteSetting . email_domains_blacklist = ''
2014-06-06 02:40:11 +08:00
SiteSetting . min_topic_title_length = 1
SiteSetting . min_post_length = 1
SiteSetting . min_private_message_post_length = 1
SiteSetting . min_private_message_title_length = 1
SiteSetting . allow_duplicate_topic_titles = true
2014-08-21 01:29:17 +08:00
SiteSetting . default_digest_email_frequency = ''
2014-06-06 02:40:11 +08:00
2014-05-31 03:09:58 +08:00
RateLimiter . disable
execute
2014-08-14 04:17:16 +08:00
puts " "
2014-05-31 03:09:58 +08:00
update_bumped_at
2014-06-06 03:30:29 +08:00
update_feature_topic_users
2014-07-04 02:43:24 +08:00
update_category_featured_topics
update_topic_count_replies
2014-06-06 03:30:29 +08:00
2014-08-14 04:17:16 +08:00
puts " " , " Done "
2014-05-31 03:09:58 +08:00
ensure
RateLimiter . enable
end
# Implementation will do most of its work in its execute method.
# It will need to call create_users, create_categories, and create_posts.
def execute
raise NotImplementedError
end
# Get the Discourse Post id based on the id of the source record
def post_id_from_imported_post_id ( import_id )
2014-08-12 00:44:17 +08:00
@existing_posts [ import_id ] || @existing_posts [ import_id . to_s ]
2014-05-31 03:09:58 +08:00
end
# Get the Discourse topic info (a hash) based on the id of the source record
def topic_lookup_from_imported_post_id ( import_id )
post_id = post_id_from_imported_post_id ( import_id )
post_id ? @topic_lookup [ post_id ] : nil
end
2014-07-17 01:59:30 +08:00
# Get the Discourse Group id based on the id of the source group
def group_id_from_imported_group_id ( import_id )
@existing_groups [ import_id ] || @existing_groups [ import_id . to_s ] || find_group_by_import_id ( import_id ) . try ( :id )
end
def find_group_by_import_id ( import_id )
GroupCustomField . where ( name : 'import_id' , value : import_id . to_s ) . first . try ( :group )
end
2014-05-31 03:09:58 +08:00
# Get the Discourse User id based on the id of the source user
def user_id_from_imported_user_id ( import_id )
2014-07-05 04:05:15 +08:00
@existing_users [ import_id ] || @existing_users [ import_id . to_s ] || find_user_by_import_id ( import_id ) . try ( :id )
2014-06-12 01:47:45 +08:00
end
def find_user_by_import_id ( import_id )
UserCustomField . where ( name : 'import_id' , value : import_id . to_s ) . first . try ( :user )
2014-05-31 03:09:58 +08:00
end
# Get the Discourse Category id based on the id of the source category
def category_from_imported_category_id ( import_id )
2014-08-12 00:44:17 +08:00
@categories_lookup [ import_id ] || @categories_lookup [ import_id . to_s ]
2014-05-31 03:09:58 +08:00
end
def create_admin ( opts = { } )
admin = User . new
admin . email = opts [ :email ] || " sam.saffron@gmail.com "
admin . username = opts [ :username ] || " sam "
admin . password = SecureRandom . uuid
admin . save!
admin . grant_admin!
admin . change_trust_level! ( :regular )
admin . email_tokens . update_all ( confirmed : true )
admin
end
2014-07-17 01:59:30 +08:00
# Iterate through a list of groups to be imported.
# Takes a collection and yields to the block for each element.
# Block should return a hash with the attributes for each element.
# Required fields are :id and :name, where :id is the id of the
# group in the original datasource. The given id will not be used
# to create the Discourse group record.
def create_groups ( results , opts = { } )
groups_created = 0
groups_skipped = 0
total = opts [ :total ] || results . size
results . each do | result |
g = yield ( result )
if group_id_from_imported_group_id ( g [ :id ] )
groups_skipped += 1
else
new_group = create_group ( g , g [ :id ] )
if new_group . valid?
@existing_groups [ g [ :id ] . to_s ] = new_group . id
groups_created += 1
else
@failed_groups << g
puts " Failed to create group id #{ g [ :id ] } #{ new_group . name } : #{ new_group . errors . full_messages } "
end
end
print_status groups_created + groups_skipped + @failed_groups . length + ( opts [ :offset ] || 0 ) , total
end
return [ groups_created , groups_skipped ]
end
def create_group ( opts , import_id )
opts = opts . dup . tap { | o | o . delete ( :id ) }
import_name = opts [ :name ]
opts [ :name ] = UserNameSuggester . suggest ( import_name )
existing = Group . where ( name : opts [ :name ] ) . first
return existing if existing and existing . custom_fields [ " import_id " ] . to_i == import_id . to_i
g = existing || Group . new ( opts )
g . custom_fields [ " import_id " ] = import_id
g . custom_fields [ " import_name " ] = import_name
g . tap ( & :save )
end
2014-05-31 03:09:58 +08:00
# Iterate through a list of user records to be imported.
# Takes a collection, and yields to the block for each element.
# Block should return a hash with the attributes for the User model.
# Required fields are :id and :email, where :id is the id of the
# user in the original datasource. The given id will not be used to
# create the Discourse user record.
2014-07-05 04:05:15 +08:00
def create_users ( results , opts = { } )
2014-06-12 01:47:45 +08:00
num_users_before = User . count
2014-05-31 03:09:58 +08:00
users_created = 0
users_skipped = 0
progress = 0
2014-07-05 04:05:15 +08:00
total = opts [ :total ] || results . size
2014-05-31 03:09:58 +08:00
results . each do | result |
u = yield ( result )
2014-08-12 00:44:17 +08:00
if u . nil?
users_skipped += 1
next # block returns nil to skip a post
end
2014-08-18 19:04:08 +08:00
import_id = u [ :id ]
if user_id_from_imported_user_id ( import_id )
2014-05-31 03:09:58 +08:00
users_skipped += 1
elsif u [ :email ] . present?
2014-08-18 19:04:08 +08:00
new_user = create_user ( u , import_id )
2014-05-31 03:09:58 +08:00
if new_user . valid?
2014-08-18 19:04:08 +08:00
@existing_users [ import_id . to_s ] = new_user . id
2014-05-31 03:09:58 +08:00
users_created += 1
else
@failed_users << u
2014-08-18 19:04:08 +08:00
puts " Failed to create user id: #{ import_id } , username: #{ new_user . username } , email: #{ new_user . email } : #{ new_user . errors . full_messages } "
2014-05-31 03:09:58 +08:00
end
else
@failed_users << u
2014-08-18 19:04:08 +08:00
puts " Skipping user id #{ import_id } because email is blank "
2014-05-31 03:09:58 +08:00
end
2014-07-05 04:05:15 +08:00
print_status users_created + users_skipped + @failed_users . length + ( opts [ :offset ] || 0 ) , total
2014-05-31 03:09:58 +08:00
end
2014-07-05 04:05:15 +08:00
return [ users_created , users_skipped ]
2014-05-31 03:09:58 +08:00
end
def create_user ( opts , import_id )
opts . delete ( :id )
2014-07-17 01:59:30 +08:00
post_create_action = opts . delete ( :post_create_action )
2014-05-31 03:09:58 +08:00
existing = User . where ( email : opts [ :email ] . downcase , username : opts [ :username ] ) . first
2014-08-14 04:17:16 +08:00
return existing if existing && existing . custom_fields [ " import_id " ] . to_i == import_id . to_i
2014-05-31 03:09:58 +08:00
2014-06-10 14:07:16 +08:00
bio_raw = opts . delete ( :bio_raw )
2014-08-18 19:04:08 +08:00
website = opts . delete ( :website )
2014-08-14 15:43:32 +08:00
avatar_url = opts . delete ( :avatar_url )
2014-08-14 04:17:16 +08:00
opts [ :name ] = User . suggest_name ( opts [ :email ] ) unless opts [ :name ]
2014-08-15 00:11:28 +08:00
if opts [ :username ] . blank? ||
opts [ :username ] . length < User . username_length . begin ||
opts [ :username ] . length > User . username_length . end ||
opts [ :username ] =~ / [^A-Za-z0-9_] / ||
opts [ :username ] [ 0 ] =~ / [^A-Za-z0-9] / ||
! User . username_available? ( opts [ :username ] )
2014-08-14 04:17:16 +08:00
opts [ :username ] = UserNameSuggester . suggest ( opts [ :username ] || opts [ :name ] || opts [ :email ] )
end
2014-05-31 03:09:58 +08:00
opts [ :email ] = opts [ :email ] . downcase
opts [ :trust_level ] = TrustLevel . levels [ :basic ] unless opts [ :trust_level ]
2014-08-14 05:57:07 +08:00
opts [ :active ] = true
2014-08-14 04:17:16 +08:00
opts [ :import_mode ] = true
2014-05-31 03:09:58 +08:00
u = User . new ( opts )
u . custom_fields [ " import_id " ] = import_id
u . custom_fields [ " import_username " ] = opts [ :username ] if opts [ :username ] . present?
2014-08-14 15:43:32 +08:00
u . custom_fields [ " import_avatar_url " ] = avatar_url if avatar_url . present?
2014-05-31 03:09:58 +08:00
begin
2014-06-10 14:07:16 +08:00
User . transaction do
u . save!
2014-08-18 19:04:08 +08:00
if bio_raw . present? || website . present?
u . user_profile . bio_raw = bio_raw if bio_raw . present?
u . user_profile . website = website if website . present?
2014-06-10 14:07:16 +08:00
u . user_profile . save!
end
end
2014-05-31 03:09:58 +08:00
rescue
# try based on email
existing = User . find_by ( email : opts [ :email ] . downcase )
if existing
existing . custom_fields [ " import_id " ] = import_id
existing . save!
u = existing
end
end
2014-07-17 01:59:30 +08:00
post_create_action . try ( :call , u ) if u . persisted?
2014-05-31 03:09:58 +08:00
u # If there was an error creating the user, u.errors has the messages
end
# Iterates through a collection to create categories.
# The block should return a hash with attributes for the new category.
# Required fields are :id and :name, where :id is the id of the
# category in the original datasource. The given id will not be used to
# create the Discourse category record.
# Optional attributes are position, description, and parent_category_id.
def create_categories ( results )
results . each do | c |
params = yield ( c )
2014-08-18 19:04:08 +08:00
puts " \t #{ params [ :name ] } "
2014-07-05 04:05:15 +08:00
# make sure categories don't go more than 2 levels deep
if params [ :parent_category_id ]
top = Category . find_by_id ( params [ :parent_category_id ] )
top = top . parent_category while top && ! top . parent_category . nil?
params [ :parent_category_id ] = top . id if top
end
2014-05-31 03:09:58 +08:00
new_category = create_category ( params , params [ :id ] )
2014-08-12 00:44:17 +08:00
@categories_lookup [ params [ :id ] ] = new_category
2014-05-31 03:09:58 +08:00
end
end
def create_category ( opts , import_id )
2014-08-18 19:04:08 +08:00
existing = category_from_imported_category_id ( import_id ) || Category . where ( " LOWER(name) = ? " , opts [ :name ] . downcase ) . first
2014-05-31 03:09:58 +08:00
return existing if existing
2014-07-17 01:59:30 +08:00
post_create_action = opts . delete ( :post_create_action )
2014-08-18 19:04:08 +08:00
2014-05-31 03:09:58 +08:00
new_category = Category . new (
name : opts [ :name ] ,
2014-08-12 00:44:17 +08:00
user_id : opts [ :user_id ] || opts [ :user ] . try ( :id ) || - 1 ,
2014-05-31 03:09:58 +08:00
position : opts [ :position ] ,
description : opts [ :description ] ,
parent_category_id : opts [ :parent_category_id ]
)
2014-08-18 19:04:08 +08:00
2014-05-31 03:09:58 +08:00
new_category . custom_fields [ " import_id " ] = import_id if import_id
new_category . save!
2014-08-18 19:04:08 +08:00
2014-07-17 01:59:30 +08:00
post_create_action . try ( :call , new_category )
2014-08-18 19:04:08 +08:00
2014-05-31 03:09:58 +08:00
new_category
end
# Iterates through a collection of posts to be imported.
# It can create topics and replies.
# Attributes will be passed to the PostCreator.
# Topics should give attributes title and category.
# Replies should provide topic_id. Use topic_lookup_from_imported_post_id to find the topic.
def create_posts ( results , opts = { } )
skipped = 0
created = 0
total = opts [ :total ] || results . size
results . each do | r |
params = yield ( r )
if params . nil?
skipped += 1
next # block returns nil to skip a post
end
import_id = params . delete ( :id ) . to_s
if post_id_from_imported_post_id ( import_id )
skipped += 1 # already imported this post
else
begin
2014-06-26 07:11:52 +08:00
new_post = create_post ( params , import_id )
2014-07-05 04:05:15 +08:00
if new_post . is_a? ( Post )
2014-08-12 00:44:17 +08:00
@existing_posts [ import_id ] = new_post . id
2014-08-18 19:04:08 +08:00
@topic_lookup [ new_post . id ] = {
post_number : new_post . post_number ,
topic_id : new_post . topic_id ,
url : new_post . url ,
}
2014-07-05 04:05:15 +08:00
created += 1
else
skipped += 1
puts " Error creating post #{ import_id } . Skipping. "
puts new_post . inspect
end
2014-08-18 19:04:08 +08:00
rescue Discourse :: InvalidAccess = > e
skipped += 1
puts " InvalidAccess creating post #{ import_id } . Topic is closed? #{ e . message } "
2014-05-31 03:09:58 +08:00
rescue = > e
skipped += 1
2014-08-12 00:44:17 +08:00
puts " Exception while creating post #{ import_id } . Skipping. "
2014-05-31 03:09:58 +08:00
puts e . message
end
end
print_status skipped + created + ( opts [ :offset ] || 0 ) , total
end
return [ created , skipped ]
end
2014-06-26 07:11:52 +08:00
def create_post ( opts , import_id )
2014-05-31 03:09:58 +08:00
user = User . find ( opts [ :user_id ] )
2014-07-17 01:59:30 +08:00
post_create_action = opts . delete ( :post_create_action )
2014-05-31 03:09:58 +08:00
opts = opts . merge ( skip_validations : true )
2014-07-04 02:43:24 +08:00
opts [ :import_mode ] = true
2014-06-26 07:11:52 +08:00
opts [ :custom_fields ] || = { }
opts [ :custom_fields ] [ 'import_id' ] = import_id
2014-05-31 03:09:58 +08:00
2014-07-03 04:50:44 +08:00
if @bbcode_to_md
2014-07-24 03:15:51 +08:00
opts [ :raw ] = opts [ :raw ] . bbcode_to_md ( false ) rescue opts [ :raw ]
2014-07-03 04:50:44 +08:00
end
2014-07-05 04:05:15 +08:00
post_creator = PostCreator . new ( user , opts )
post = post_creator . create
2014-07-17 01:59:30 +08:00
post_create_action . try ( :call , post ) if post
2014-07-05 04:05:15 +08:00
post ? post : post_creator . errors . full_messages
2014-05-31 03:09:58 +08:00
end
2014-07-17 01:59:30 +08:00
# Creates an upload.
# Expects path to be the full path and filename of the source file.
def create_upload ( user_id , path , source_filename )
tmp = Tempfile . new ( 'discourse-upload' )
src = File . open ( path )
FileUtils . copy_stream ( src , tmp )
src . close
tmp . rewind
Upload . create_for ( user_id , tmp , source_filename , File . size ( tmp ) )
ensure
tmp . close rescue nil
tmp . unlink rescue nil
end
2014-06-04 22:37:43 +08:00
def close_inactive_topics ( opts = { } )
2014-08-14 04:17:16 +08:00
puts " " , " Closing topics that have been inactive for more than #{ num_days } days. "
2014-06-04 22:37:43 +08:00
num_days = opts [ :days ] || 30
query = Topic . where ( 'last_posted_at < ?' , num_days . days . ago ) . where ( closed : false )
total_count = query . count
closed_count = 0
query . find_each do | topic |
topic . update_status ( 'closed' , true , Discourse . system_user )
closed_count += 1
print_status ( closed_count , total_count )
end
end
2014-06-05 06:21:45 +08:00
def update_bumped_at
2014-08-14 04:17:16 +08:00
puts " updating bumped_at on topics "
2014-06-05 06:21:45 +08:00
Post . exec_sql ( " update topics t set bumped_at = (select max(created_at) from posts where topic_id = t.id and post_type != #{ Post . types [ :moderator_action ] } ) " )
end
2014-06-06 03:30:29 +08:00
def update_feature_topic_users
2014-07-04 02:43:24 +08:00
puts " updating featured topic users "
2014-06-06 03:30:29 +08:00
total_count = Topic . count
progress_count = 0
Topic . find_each do | topic |
topic . feature_topic_users
progress_count += 1
print_status ( progress_count , total_count )
end
end
2014-07-04 02:43:24 +08:00
def update_category_featured_topics
2014-08-14 04:17:16 +08:00
puts " updating featured topics in categories "
2014-07-04 02:43:24 +08:00
Category . find_each do | category |
CategoryFeaturedTopic . feature_topics_for ( category )
end
end
def update_topic_count_replies
puts " updating user topic reply counts "
total_count = User . real . count
progress_count = 0
User . real . find_each do | u |
u . user_stat . update_topic_reply_count
u . user_stat . save!
progress_count += 1
print_status ( progress_count , total_count )
end
end
2014-05-31 03:09:58 +08:00
def print_status ( current , max )
2014-08-18 19:04:08 +08:00
print " \r %9d / %d (%5.1f%%) " % [ current , max , ( ( current . to_f / max . to_f ) * 100 ) . round ( 1 ) ]
2014-05-31 03:09:58 +08:00
end
def batches ( batch_size )
offset = 0
loop do
yield offset
offset += batch_size
end
end
end