discourse/script/import_scripts/smf2.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

916 lines
26 KiB
Ruby
Raw Normal View History

# coding: utf-8
# frozen_string_literal: true
2015-03-15 08:57:46 +08:00
require "mysql2"
2014-07-17 01:59:30 +08:00
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require "htmlentities"
require "tsort"
require "optparse"
2014-07-17 05:18:52 +08:00
require "etc"
2015-01-13 23:43:09 +08:00
require "open3"
2014-07-17 01:59:30 +08:00
class ImportScripts::Smf2 < ImportScripts::Base
BATCH_SIZE = 5000
2014-07-17 01:59:30 +08:00
def self.run
options = Options.new
begin
options.parse!
2014-07-17 05:18:52 +08:00
rescue Options::SettingsError => err
$stderr.puts "Cannot load SMF settings: #{err.message}"
exit 1
2014-07-17 01:59:30 +08:00
rescue Options::Error => err
$stderr.puts err.to_s.capitalize
2014-07-17 05:18:52 +08:00
$stderr.puts options.usage
2014-07-17 01:59:30 +08:00
exit 1
end
new(options).perform
end
attr_reader :options
def initialize(options)
2015-01-13 23:43:09 +08:00
if options.timezone.nil?
$stderr.puts "No source timezone given and autodetection from PHP failed."
$stderr.puts "Use -t option to specify correct source timezone:"
$stderr.puts options.usage
exit 1
end
2014-07-17 01:59:30 +08:00
super()
@options = options
begin
2015-01-13 23:43:09 +08:00
Time.zone = options.timezone
2014-07-17 01:59:30 +08:00
rescue ArgumentError
2015-01-13 23:43:09 +08:00
$stderr.puts "Timezone name '#{options.timezone}' is invalid."
exit 1
2014-07-17 01:59:30 +08:00
end
if options.database.blank?
$stderr.puts "No database name given."
$stderr.puts options.usage
exit 1
end
if options.password == :ask
require "highline"
$stderr.print "Enter password for MySQL database `#{options.database}`: "
options.password = HighLine.new.ask("") { |q| q.echo = false }
end
2014-07-30 04:55:38 +08:00
@default_db_connection = create_db_connection
2014-07-17 01:59:30 +08:00
end
def execute
import_groups
import_users
import_categories
import_posts
import_personal_posts
postprocess_posts
make_prettyurl_permalinks("/forum")
2014-07-17 01:59:30 +08:00
end
def import_groups
puts "", "creating groups"
total = query(<<-SQL, as: :single)
SELECT COUNT(*) FROM {prefix}membergroups
WHERE min_posts = -1 AND group_type IN (1, 2)
SQL
create_groups(query(<<-SQL), total: total) { |group| group }
SELECT id_group AS id, group_name AS name
FROM {prefix}membergroups
WHERE min_posts = -1 AND group_type IN (1, 2)
SQL
end
GUEST_GROUP = -1
MEMBER_GROUP = 0
ADMIN_GROUP = 1
MODERATORS_GROUP = 2
def import_users
puts "", "creating users"
total = query("SELECT COUNT(*) FROM {prefix}members", as: :single)
create_users(query(<<-SQL), total: total) do |member|
SELECT a.id_member, a.member_name, a.date_registered, a.real_name, a.email_address,
2017-04-03 04:27:57 +08:00
CONCAT(LCASE(a.member_name),':', a.passwd) AS password,
2014-07-17 01:59:30 +08:00
a.is_activated, a.last_login, a.birthdate, a.member_ip, a.id_group, a.additional_groups,
b.id_attach, b.file_hash, b.filename
FROM {prefix}members AS a
LEFT JOIN {prefix}attachments AS b ON a.id_member = b.id_member
SQL
group_ids = [member[:id_group], *member[:additional_groups].split(",").map(&:to_i)]
2015-03-27 21:37:59 +08:00
create_time =
begin
2015-03-27 21:37:59 +08:00
Time.zone.at(member[:date_registered])
rescue StandardError
Time.now
end
2015-03-27 21:37:59 +08:00
last_seen_time =
begin
2015-03-27 21:37:59 +08:00
Time.zone.at(member[:last_login])
rescue StandardError
nil
end
2015-03-27 21:37:59 +08:00
ip_addr =
begin
2015-03-27 21:37:59 +08:00
IPAddr.new(member[:member_ip])
rescue StandardError
nil
end
2014-07-17 01:59:30 +08:00
{
id: member[:id_member],
username: member[:member_name],
2017-04-03 04:27:57 +08:00
password: member[:password],
2015-03-27 21:37:59 +08:00
created_at: create_time,
2014-07-17 01:59:30 +08:00
name: member[:real_name],
email: member[:email_address],
active: member[:is_activated] == 1,
approved: member[:is_activated] == 1,
2015-03-27 21:37:59 +08:00
last_seen_at: last_seen_time,
2014-07-17 01:59:30 +08:00
date_of_birth: member[:birthdate],
2015-03-27 21:37:59 +08:00
ip_address: ip_addr,
2014-07-17 01:59:30 +08:00
admin: group_ids.include?(ADMIN_GROUP),
moderator: group_ids.include?(MODERATORS_GROUP),
post_create_action:
proc do |user|
2015-03-27 21:37:59 +08:00
user.update(created_at: create_time) if create_time < user.created_at
2017-04-03 04:28:16 +08:00
user.save
2014-07-17 01:59:30 +08:00
GroupUser.transaction do
group_ids.each do |gid|
(group_id = group_id_from_imported_group_id(gid)) &&
GroupUser.find_or_create_by(user: user, group_id: group_id)
end
end
if options.smfroot && member[:id_attach].present? && user.uploaded_avatar_id.blank?
(
path =
2014-07-17 01:59:30 +08:00
find_smf_attachment_path(
member[:id_attach],
member[:file_hash],
member[:filename],
)
) &&
begin
2014-07-17 01:59:30 +08:00
upload = create_upload(user.id, path, member[:filename])
user.update(uploaded_avatar_id: upload.id) if upload.persisted?
rescue SystemCallError => err
puts "Could not import avatar: #{err.message}"
end
end
end,
2014-07-17 01:59:30 +08:00
}
end
end
def import_categories
create_categories(query(<<-SQL)) do |board|
SELECT id_board, id_parent, name, description, member_groups
FROM {prefix}boards
ORDER BY id_parent ASC, id_board ASC
SQL
parent_id = category_id_from_imported_category_id(board[:id_parent]) if board[:id_parent] > 0
2014-07-17 01:59:30 +08:00
groups = (board[:member_groups] || "").split(/,/).map(&:to_i)
restricted = !groups.include?(GUEST_GROUP) && !groups.include?(MEMBER_GROUP)
board[:name] += board[:id_board].to_s if Category.find_by_name(board[:name])
2014-07-17 01:59:30 +08:00
{
id: board[:id_board],
name: board[:name],
description: board[:description],
parent_category_id: parent_id,
post_create_action:
restricted &&
proc do |category|
category.update(read_restricted: true)
groups.each do |imported_group_id|
(group_id = group_id_from_imported_group_id(imported_group_id)) &&
CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg|
cg.permission_type = CategoryGroup.permission_types[:full]
end
end
end,
}
end
end
def import_posts
puts "", "creating posts"
spinner = %w[/ - \\ |].cycle
total = query("SELECT COUNT(*) FROM {prefix}messages", as: :single)
PostCreator.class_eval do
def guardian
@guardian ||=
if opts[:import_mode]
@@system_guardian ||= Guardian.new(Discourse.system_user)
else
Guardian.new(@user)
end
end
end
db2 = create_db_connection
2014-07-17 01:59:30 +08:00
create_posts(query(<<-SQL), total: total) do |message|
SELECT m.id_msg, m.id_topic, m.id_member, m.poster_time, m.body,
m.subject, t.id_board, t.id_first_msg, COUNT(a.id_attach) AS attachment_count
2014-07-17 01:59:30 +08:00
FROM {prefix}messages AS m
LEFT JOIN {prefix}topics AS t ON t.id_topic = m.id_topic
LEFT JOIN {prefix}attachments AS a ON a.id_msg = m.id_msg AND a.attachment_type = 0
GROUP BY m.id_msg
ORDER BY m.id_topic ASC, m.id_msg ASC
2014-07-17 01:59:30 +08:00
SQL
skip = false
ignore_quotes = false
2014-07-17 01:59:30 +08:00
post = {
id: message[:id_msg],
user_id: user_id_from_imported_user_id(message[:id_member]) || -1,
created_at: Time.zone.at(message[:poster_time]),
post_create_action:
ignore_quotes &&
proc do |p|
p.custom_fields["import_rebake"] = "t"
p.save
end,
2014-07-17 01:59:30 +08:00
}
2014-07-17 01:59:30 +08:00
if message[:id_msg] == message[:id_first_msg]
post[:category] = category_id_from_imported_category_id(message[:id_board])
2014-07-17 01:59:30 +08:00
post[:title] = decode_entities(message[:subject])
else
parent = topic_lookup_from_imported_post_id(message[:id_first_msg])
if parent
post[:topic_id] = parent[:topic_id]
else
puts "Parent post #{message[:id_first_msg]} doesn't exist. Skipping #{message[:id_msg]}: #{message[:subject][0..40]}"
skip = true
end
end
next nil if skip
attachments =
message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array)
SELECT id_attach, file_hash, filename FROM {prefix}attachments
WHERE attachment_type = 0 AND id_msg = #{message[:id_msg]}
ORDER BY id_attach ASC
SQL
attachments.map! do |a|
begin
import_attachment(post, a)
rescue StandardError
(
puts $!
nil
)
end
end
begin
post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes)
rescue => e
puts "Failed to import message with ID #{post[:id]}"
puts e.message
puts e.backtrace.join("\n")
post[:raw] = "-- MESSAGE SKIPPED --"
end
next post
2014-07-17 01:59:30 +08:00
end
end
def import_personal_posts
puts "Loading pm mapping..."
@pm_mapping = {}
Topic
.joins(:topic_allowed_users)
.where(archetype: Archetype.private_message)
.where("title NOT ILIKE 'Re:%'")
.group(:id)
.order(:id)
.pluck(
"string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id",
)
.each do |users, title, topic_id|
@pm_mapping[users] ||= {}
@pm_mapping[users][title] ||= []
@pm_mapping[users][title] << topic_id
end
puts "", "Importing personal posts..."
last_post_id = -1
total =
query(
"SELECT COUNT(*) count FROM smf_personal_messages WHERE deleted_by_sender = 0",
as: :single,
)
batches(BATCH_SIZE) do |offset|
posts = query(<<~SQL, as: :array)
SELECT id_pm
, id_member_from
, msgtime
, subject
, body
, (SELECT GROUP_CONCAT(id_member) FROM smf_pm_recipients r WHERE r.id_pm = pm.id_pm) recipients
FROM smf_personal_messages pm
WHERE deleted_by_sender = 0
AND id_pm > #{last_post_id}
ORDER BY id_pm
LIMIT #{BATCH_SIZE}
SQL
break if posts.empty?
last_post_id = posts[-1][:id_pm]
post_ids = posts.map { |p| "pm-#{p[:id_pm]}" }
next if all_records_exist?(:post, post_ids)
create_posts(posts, total: total, offset: offset) do |p|
next unless user_id = user_id_from_imported_user_id(p[:id_member_from])
next if p[:recipients].blank?
recipients =
p[:recipients].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq
next if recipients.empty?
id = "pm-#{p[:id_pm]}"
next if post_id_from_imported_post_id(id)
post = { id: id, created_at: Time.at(p[:msgtime]), user_id: user_id }
begin
post[:raw] = convert_message_body(p[:body])
rescue => e
puts "Failed to import personal message with ID #{post[:id]}"
puts e.message
puts e.backtrace.join("\n")
post[:raw] = "-- MESSAGE SKIPPED --"
end
users = (recipients + [user_id]).sort.uniq.join(",")
title = decode_entities(p[:subject])
if topic_id = find_pm_topic_id(users, title)
post[:topic_id] = topic_id
else
post[:archetype] = Archetype.private_message
post[:title] = title
post[:target_usernames] = User.where(id: recipients).pluck(:username)
post[:post_create_action] = proc do |action_post|
@pm_mapping[users] ||= {}
@pm_mapping[users][title] ||= []
@pm_mapping[users][title] << action_post.topic_id
end
end
post
end
end
end
def find_pm_topic_id(users, title)
# Please note that this approach to topic matching is lifted straight from smf1.rb.
# With SMFv2 we could update this to use id_pm_head, which contains
# the id of the message this is a reply to, or the message's own id_pm
# if it's the first in the messages thread.
#
return unless title.start_with?("Re:")
return unless @pm_mapping[users]
title = title.gsub(/^(Re:)+/i, "")
return unless @pm_mapping[users][title]
@pm_mapping[users][title][-1]
end
def import_attachment(post, attachment)
path =
find_smf_attachment_path(
attachment[:id_attach],
attachment[:file_hash],
attachment[:filename],
)
raise "Attachment for post #{post[:id]} failed: #{attachment[:filename]}" if path.blank?
upload = create_upload(post[:user_id], path, attachment[:filename])
unless upload.persisted?
raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(", ")}"
end
upload
rescue SystemCallError => err
raise "Attachment for post #{post[:id]} failed: #{err.message}"
end
def postprocess_posts
puts "", "rebaking posts"
2014-07-17 01:59:30 +08:00
tags = PostCustomField.where(name: "import_rebake", value: "t")
tags_total = tags.count
tags_done = 0
tags.each do |tag|
post = tag.post
Post.transaction do
post.raw = convert_bbcode(post.raw)
post.rebake!
post.save
tag.destroy!
end
print_status(tags_done += 1, tags_total)
2014-07-17 01:59:30 +08:00
end
end
private
2014-07-30 04:55:38 +08:00
def create_db_connection
Mysql2::Client.new(
host: options.host,
username: options.username,
password: options.password,
database: options.database,
)
end
2014-07-17 01:59:30 +08:00
def query(sql, **opts, &block)
2014-07-30 04:55:38 +08:00
db = opts[:connection] || @default_db_connection
return __query(db, sql).to_a if opts[:as] == :array
return __query(db, sql, as: :array).first[0] if opts[:as] == :single
return __query(db, sql, stream: true).each(&block) if block_given?
__query(db, sql, stream: true)
2014-07-17 01:59:30 +08:00
end
2014-07-30 04:55:38 +08:00
def __query(db, sql, **opts)
db.query(
sql.gsub("{prefix}", options.prefix),
2014-07-17 01:59:30 +08:00
{ symbolize_keys: true, cache_rows: false }.merge(opts),
)
end
TRTR_TABLE =
begin
from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ"
to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"
from.chars.zip(to.chars)
end
2014-07-17 01:59:30 +08:00
def find_smf_attachment_path(attachment_id, file_hash, filename)
cleaned_name = filename.dup
TRTR_TABLE.each { |from, to| cleaned_name.gsub!(from, to) }
cleaned_name.gsub!(/\s/, "_")
cleaned_name.gsub!(/[^\w_\.\-]/, "")
legacy_name =
"#{attachment_id}_#{cleaned_name.gsub(".", "_")}#{Digest::MD5.hexdigest(cleaned_name)}"
[filename, "#{attachment_id}_#{file_hash}", legacy_name].map do |name|
2014-07-17 01:59:30 +08:00
File.join(options.smfroot, "attachments", name)
end
.detect { |file| File.exist?(file) }
2014-07-17 01:59:30 +08:00
end
def decode_entities(*args)
(@html_entities ||= HTMLEntities.new).decode(*args)
end
def convert_message_body(body, attachments = [], **opts)
body = decode_entities(body.gsub(%r{<br\s*/>}, "\n"))
body.gsub!(ColorPattern, '\k<inner>')
body.gsub!(ListPattern) do |s|
params = parse_tag_params($~[:params])
tag = params["type"] == "decimal" ? "ol" : "ul"
"\n[#{tag}]#{$~[:inner].strip}[/#{tag}]\n"
end
body.gsub!(XListPattern) do |s|
r = +"\n[ul]"
s.lines.each { |l| r += "[li]#{l.strip.sub(/^\[x\]\s*/, "")}[/li]" }
"#{r}[/ul]\n"
2014-07-17 01:59:30 +08:00
end
if attachments.present?
use_count = Hash.new(0)
AttachmentPatterns.each do |p|
pattern, emitter = *p
body.gsub!(pattern) do |s|
next s if (num = $~[:num].to_i - 1) < 0
next s if (upload = attachments[num]).blank?
use_count[num] += 1
instance_exec(upload, &emitter)
end
end
if use_count.keys.length < attachments.select(&:present?).length
body = "#{body}\n\n---"
attachments.each_with_index do |upload, num|
"#{body}\n\n#{get_upload_markdown(upload)}" if upload.present? && use_count[num] == (0)
end
end
end
opts[:ignore_quotes] ? body : convert_bbcode(body)
2014-07-17 01:59:30 +08:00
end
def get_upload_markdown(upload)
html_for_upload(upload, upload.original_filename)
end
2014-07-17 01:59:30 +08:00
def convert_quotes(body)
body
.to_s
.gsub(QuotePattern) do |s|
inner = $~[:inner].strip
params = parse_tag_params($~[:params])
if params["author"].present?
quote = +"\n[quote=\"#{params["author"]}"
2014-07-17 01:59:30 +08:00
if QuoteParamsPattern =~ params["link"]
tl = topic_lookup_from_imported_post_id($~[:msg].to_i)
quote = "#{quote} post:#{tl[:post_number]}, topic:#{tl[:topic_id]}" if tl
end
quote = "#{quote}\"]\n#{convert_quotes(inner)}\n[/quote]"
else
2017-04-03 04:38:23 +08:00
"<blockquote>#{convert_quotes(inner)}</blockquote>"
2014-07-17 01:59:30 +08:00
end
end
end
2024-11-06 06:27:49 +08:00
IGNORED_BBCODE = %w[
black
blue
center
color
email
flash
font
glow
green
iurl
left
list
move
red
right
shadown
size
table
time
white
]
def convert_bbcode(raw)
return "" if raw.blank?
raw = convert_quotes(raw)
# [acronym]
raw.gsub!(%r{\[acronym=([^\]]+)\](.*?)\[/acronym\]}im) { %{<abbr title="#{$1}">#{$2}</abbr>} }
# [br]
raw.gsub!(/\[br\]/i, "\n")
raw.gsub!(%r{<br\s*/?>}i, "\n")
# [hr]
raw.gsub!(/\[hr\]/i, "<hr/>")
# [sub]
raw.gsub!(%r{\[sub\](.*?)\[/sub\]}im) { "<sub>#{$1}</sub>" }
# [sup]
raw.gsub!(%r{\[sup\](.*?)\[/sup\]}im) { "<sup>#{$1}</sup>" }
# [html]
raw.gsub!(/\[html\]/i, "\n```html\n")
raw.gsub!(%r{\[/html\]}i, "\n```\n")
# [php]
raw.gsub!(/\[php\]/i, "\n```php\n")
raw.gsub!(%r{\[/php\]}i, "\n```\n")
# [code]
raw.gsub!(%r{\[/?code\]}i, "\n```\n")
# [pre]
raw.gsub!(%r{\[/?pre\]}i, "\n```\n")
# [tt]
raw.gsub!(%r{\[/?tt\]}i, "`")
# [ftp]
raw.gsub!(/\[ftp/i, "[url")
raw.gsub!(%r{\[/ftp\]}i, "[/url]")
# [me]
raw.gsub!(%r{\[me=([^\]]*)\](.*?)\[/me\]}im) { "_\\* #{$1} #{$2}_" }
# [ul]
raw.gsub!(/\[ul\]/i, "")
raw.gsub!(%r{\[/ul\]}i, "")
# [li]
raw.gsub!(%r{\[li\](.*?)\[/li\]}im) { "- #{$1}" }
# puts [img] on their own line
raw.gsub!(%r{\[img[^\]]*\](.*?)\[/img\]}im) { "\n#{$1}\n" }
# puts [youtube] on their own line
raw.gsub!(%r{\[youtube\](.*?)\[/youtube\]}im) { "\n#{$1}\n" }
IGNORED_BBCODE.each { |code| raw.gsub!(%r{\[#{code}[^\]]*\](.*?)\[/#{code}\]}im, '\1') }
# ensure [/quote] are on their own line
raw.gsub!(%r{\s*\[/quote\]\s*}im, "\n[/quote]\n")
# remove tapatalk mess
raw.gsub!(%r{Sent from .+? using \[url=.*?\].+?\[/url\]}i, "")
raw.gsub!(/Sent from .+? using .+?\z/i, "")
# clean URLs
raw.gsub!(%r{\[url=(.+?)\]\1\[/url\]}i, '\1')
raw
end
2014-07-17 01:59:30 +08:00
def extract_quoted_message_ids(body)
Set.new.tap do |quoted|
body.scan(/\[quote\s+([^\]]+)\s*\]/) do |params|
params = parse_tag_params(params)
if params.has_key?("link")
match = QuoteParamsPattern.match(params["link"])
quoted = "#{quoted}#{match[:msg].to_i}" if match
2014-07-17 01:59:30 +08:00
end
end
end
end
# param1=value1=still1 value1 param2=value2 ...
# => {'param1' => 'value1=still1 value1', 'param2' => 'value2 ...'}
def parse_tag_params(params)
params
.to_s
.strip
.scan(/(?<param>\w+)=(?<value>(?:(?>\S+)|\s+(?!\w+=))*)/)
.inject({}) do |h, e|
h[e[0]] = e[1]
h
end
2014-07-17 01:59:30 +08:00
end
class << self
private
# [tag param=value param2=value2]
# text
# [tag nested=true]text[/tag]
# [/tag]
# => match[:params] == 'param=value param2=value2'
# match[:inner] == "\n text\n [tag nested=true]text[/tag]\n"
def build_nested_tag_regex(ltag, rtag = nil)
rtag ||= "/" + ltag
/
2014-07-17 01:59:30 +08:00
\[#{ltag}(?-x:[ =](?<params>[^\]]*))?\] # consume open tag, followed by...
(?<inner>(?:
(?> [^\[]+ ) # non-tags, or...
|
\[(?! #{ltag}(?-x:[ =][^\]]*)?\] | #{rtag}\]) # different tags, or ...
|
(?<re> # recursively matched tags of the same kind
\[#{ltag}(?-x:[ =][^\]]*)?\]
(?:
(?> [^\[]+ )
|
\[(?! #{ltag}(?-x:[ =][^\]]*)?\] | #{rtag}\])
|
\g<re> # recursion here
)*
\[#{rtag}\]
)
)*)
\[#{rtag}\]
/x
end
end
QuoteParamsPattern = /^topic=(?<topic>\d+).msg(?<msg>\d+)#msg\k<msg>$/
XListPattern = /(?<xblock>(?>^\[x\]\s*(?<line>.*)$\n?)+)/
QuotePattern = build_nested_tag_regex("quote")
ColorPattern = build_nested_tag_regex("color")
ListPattern = build_nested_tag_regex("list")
AttachmentPatterns = [
[/^\[attach(?:|img|url|mini)=(?<num>\d+)\]$/, ->(u) { "\n" + get_upload_markdown(u) + "\n" }],
[/\[attach(?:|img|url|mini)=(?<num>\d+)\]/, ->(u) { get_upload_markdown(u) }],
]
2014-07-17 01:59:30 +08:00
# Provides command line options and parses the SMF settings file.
class Options
class Error < StandardError
end
2014-07-17 05:18:52 +08:00
class SettingsError < Error
end
2014-07-17 01:59:30 +08:00
def parse!(args = ARGV)
2014-07-17 05:18:52 +08:00
raise Error, "not enough arguments" if ARGV.empty?
2014-07-17 01:59:30 +08:00
begin
parser.parse!(args)
rescue OptionParser::ParseError => err
raise Error, err.message
end
raise Error, "too many arguments" if args.length > 1
self.smfroot = args.first
read_smf_settings if self.smfroot
self.host ||= "localhost"
self.username ||= Etc.getlogin
self.prefix ||= "smf_"
2015-01-13 23:43:09 +08:00
self.timezone ||= get_php_timezone
2014-07-17 01:59:30 +08:00
end
def usage
parser.to_s
end
attr_accessor :host
attr_accessor :username
attr_accessor :password
attr_accessor :database
attr_accessor :prefix
attr_accessor :smfroot
2015-01-13 23:43:09 +08:00
attr_accessor :timezone
2014-07-17 01:59:30 +08:00
private
2015-01-13 23:43:09 +08:00
def get_php_timezone
2017-04-03 04:35:43 +08:00
phpinfo, status = Open3.capture2("php", "-i")
2015-01-13 23:43:09 +08:00
phpinfo.lines.each do |line|
key, *vals = line.split(" => ").map(&:strip)
break vals[0] if key == "Default timezone"
end
rescue Errno::ENOENT
$stderr.puts "Error: PHP CLI executable not found"
end
2014-07-17 01:59:30 +08:00
def read_smf_settings
settings = File.join(self.smfroot, "Settings.php")
2021-10-27 16:39:28 +08:00
File
.readlines(settings)
.each do |line|
2014-07-17 01:59:30 +08:00
next unless m = %r{\$([a-z_]+)\s*=\s*['"](.+?)['"]\s*;\s*((#|//).*)?$}.match(line)
case m[1]
when "db_server"
self.host ||= m[2]
when "db_user"
self.username ||= m[2]
when "db_passwd"
self.password ||= m[2]
when "db_name"
self.database ||= m[2]
when "db_prefix"
self.prefix ||= m[2]
end
end
rescue => err
2014-07-17 05:18:52 +08:00
raise SettingsError, err.message unless self.database
2014-07-17 01:59:30 +08:00
end
def parser
2014-07-17 05:18:52 +08:00
@parser ||=
OptionParser.new(nil, 12) do |o|
o.banner = "Usage:\t#{File.basename($0)} <SMFROOT> [options]\n"
o.banner = "${o.banner}\t#{File.basename($0)} -d <DATABASE> [options]"
2014-07-17 05:18:52 +08:00
o.on("-h HOST", :REQUIRED, "MySQL server hostname [\"#{self.host}\"]") do |s|
self.host = s
end
2014-07-17 05:18:52 +08:00
o.on("-u USER", :REQUIRED, "MySQL username [\"#{self.username}\"]") do |s|
self.username = s
end
o.on(
"-p [PASS]",
:OPTIONAL,
"MySQL password. Without argument, reads password from STDIN.",
) { |s| self.password = s || :ask }
o.on("-d DBNAME", :REQUIRED, "Name of SMF database") { |s| self.database = s }
o.on("-f PREFIX", :REQUIRED, "Table names prefix [\"#{self.prefix}\"]") do |s|
self.prefix = s
end
2015-01-13 23:43:09 +08:00
o.on("-t TIMEZONE", :REQUIRED, "Timezone used by SMF2 [auto-detected from PHP]") do |s|
self.timezone = s
2014-07-17 01:59:30 +08:00
end
end
2014-07-17 01:59:30 +08:00
end
end #Options
# Framework around TSort, used to build a dependency graph over messages
# to find and solve cyclic quotations.
class MessageDependencyGraph
include TSort
def initialize
@nodes = {}
end
def [](key)
@nodes[key]
end
def add_message(id, prev = nil, quoted = [])
@nodes[id] = Node.new(self, id, prev, quoted)
end
def tsort_each_node(&block)
@nodes.each_value(&block)
end
def tsort_each_child(node, &block)
node.dependencies.each(&block)
end
def cycles
strongly_connected_components.select { |c| c.length > 1 }.to_a
end
class Node
attr_reader :id
def initialize(graph, id, prev = nil, quoted = [])
@graph = graph
@id = id
@prev = prev
@quoted = quoted
end
def prev
@graph[@prev]
end
def quoted
@quoted.map { |id| @graph[id] }.reject(&:nil?)
2014-07-17 01:59:30 +08:00
end
def ignore_quotes?
!!@ignore_quotes
end
def ignore_quotes=(value)
@ignore_quotes = !!value
@dependencies = nil
end
def dependencies
@dependencies ||=
Set
.new
.tap do |deps|
deps.merge(quoted) unless ignore_quotes?
deps << prev if prev.present?
end
.to_a
end
def hash
@id.hash
end
def eql?(other)
@id.eql?(other)
end
def inspect
"#<#{self.class.name}: id=#{id.inspect}, prev=#{safe_id(@prev)}, quoted=[#{@quoted.map(&method(:safe_id)).join(", ")}]>"
end
private
def safe_id(id)
@graph[id].present? ? @graph[id].id.inspect : "(#{id})"
2014-07-17 01:59:30 +08:00
end
end #Node
end #MessageDependencyGraph
def make_prettyurl_permalinks(prefix)
puts "creating permalinks for prettyurl plugin"
begin
serialized = query(<<-SQL, as: :single)
SELECT value FROM {prefix}settings
WHERE variable='pretty_board_urls';
SQL
board_slugs = Array.new
ser = /\{(.*)\}/.match(serialized)[1]
ser.scan(/i:(\d+);s:\d+:\"(.*?)\";/).each { |nv| board_slugs[nv[0].to_i] = nv[1] }
topic_urls = query(<<-SQL, as: :array)
SELECT t.id_first_msg, t.id_board,u.pretty_url
FROM smf_topics t
LEFT JOIN smf_pretty_topic_urls u ON u.id_topic = t.id_topic ;
SQL
topic_urls.each do |url|
t = topic_lookup_from_imported_post_id(url[:id_first_msg])
Permalink.create(
url: "#{prefix}/#{board_slugs[url[:id_board]]}/#{url[:pretty_url]}",
topic_id: t[:topic_id],
)
end
rescue StandardError
end
end
2014-07-17 01:59:30 +08:00
end
ImportScripts::Smf2.run