discourse/app/jobs/regular/export_user_archive.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

620 lines
17 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
require "csv"
module Jobs
class ExportUserArchive < ::Jobs::Base
sidekiq_options retry: false
attr_accessor :current_user
# note: contents provided entirely by user
attr_accessor :extra
COMPONENTS ||= %w[
user_archive
preferences
auth_tokens
auth_token_logs
badges
bookmarks
category_preferences
flags
likes
post_actions
queued_posts
visits
]
HEADER_ATTRS_FOR ||=
HashWithIndifferentAccess.new(
user_archive: %w[
topic_title
categories
is_pm
post_raw
post_cooked
like_count
reply_count
url
created_at
],
user_archive_profile: %w[location website bio views],
auth_tokens: %w[
id
auth_token_hash
prev_auth_token_hash
auth_token_seen
client_ip
user_agent
seen_at
rotated_at
created_at
updated_at
],
auth_token_logs: %w[
id
action
user_auth_token_id
client_ip
auth_token_hash
created_at
path
user_agent
],
badges: %w[
badge_id
badge_name
granted_at
post_id
seq
granted_manually
notification_id
featured_rank
],
bookmarks: %w[
bookmarkable_id
bookmarkable_type
link
name
created_at
updated_at
reminder_at
reminder_last_sent_at
reminder_set_at
auto_delete_preference
],
category_preferences: %w[
category_id
category_names
notification_level
dismiss_new_timestamp
],
flags: %w[
id
post_id
flag_type
created_at
updated_at
deleted_at
deleted_by
related_post_id
targets_topic
was_take_action
],
likes: %w[id post_id topic_id post_number created_at updated_at deleted_at deleted_by],
post_actions: %w[
id
post_id
post_action_type
created_at
updated_at
deleted_at
deleted_by
related_post_id
],
queued_posts: %w[id verdict category_id topic_id post_raw other_json],
visits: %w[visited_at posts_read mobile time_read],
)
def execute(args)
@current_user = User.find_by(id: args[:user_id])
@extra = HashWithIndifferentAccess.new(args[:args]) if args[:args]
@timestamp ||= Time.now.strftime("%y%m%d-%H%M%S")
components = []
COMPONENTS.each do |name|
export_method = "#{name}_export"
h = { name: name, method: :"#{export_method}" }
h[:filetype] = :csv
filetype_method = :"#{name}_filetype"
h[:filetype] = public_send(filetype_method) if respond_to? filetype_method
condition_method = :"include_#{name}?"
h[:skip] = !public_send(condition_method) if respond_to? condition_method
h[:filename] = name
components.push(h)
end
export_title = "user_archive".titleize
filename = "user_archive-#{@current_user.username}-#{@timestamp}"
user_export = UserExport.create(file_name: filename, user_id: @current_user.id)
filename = "#{filename}-#{user_export.id}"
dirname = "#{UserExport.base_directory}/#{filename}"
# ensure directory exists
FileUtils.mkdir_p(dirname) unless Dir.exist?(dirname)
# Generate a compressed CSV file
zip_filename = nil
begin
components.each do |component|
next if component[:skip]
case component[:filetype]
when :csv
CSV.open("#{dirname}/#{component[:filename]}.csv", "w") do |csv|
csv << get_header(component[:name])
public_send(component[:method]) { |d| csv << d }
end
when :json
File.open("#{dirname}/#{component[:filename]}.json", "w") do |file|
file.write MultiJson.dump(public_send(component[:method]), indent: 4)
end
else
raise "unknown export filetype"
end
end
zip_filename = Compression::Zip.new.compress(UserExport.base_directory, filename)
ensure
FileUtils.rm_rf(dirname)
end
# create upload
upload = nil
if File.exist?(zip_filename)
File.open(zip_filename) do |file|
upload =
UploadCreator.new(
file,
File.basename(zip_filename),
type: "csv_export",
for_export: "true",
).create_for(@current_user.id)
if upload.persisted?
user_export.update_columns(upload_id: upload.id)
else
Rails.logger.warn(
"Failed to upload the file #{zip_filename}: #{upload.errors.full_messages}",
)
end
end
File.delete(zip_filename)
end
ensure
post = notify_user(upload, export_title)
if user_export.present? && post.present?
topic = post.topic
user_export.update_columns(topic_id: topic.id)
topic.update_status("closed", true, Discourse.system_user)
end
end
def user_archive_export
return enum_for(:user_archive_export) unless block_given?
Post
.includes(topic: :category)
.where(user_id: @current_user.id)
.select(:topic_id, :post_number, :raw, :cooked, :like_count, :reply_count, :created_at)
.order(:created_at)
.with_deleted
.each { |user_archive| yield get_user_archive_fields(user_archive) }
end
def user_archive_profile_export
return enum_for(:user_archive_profile_export) unless block_given?
UserProfile
.where(user_id: @current_user.id)
.select(:location, :website, :bio_raw, :views)
.each { |user_profile| yield get_user_archive_profile_fields(user_profile) }
end
def preferences_export
UserSerializer.new(@current_user, scope: guardian)
end
def preferences_filetype
:json
end
def auth_tokens_export
return enum_for(:auth_tokens) unless block_given?
UserAuthToken
.where(user_id: @current_user.id)
.each do |token|
yield(
[
token.id,
token.auth_token.to_s[0..4] + "...", # hashed and truncated
token.prev_auth_token[0..4] + "...",
token.auth_token_seen,
token.client_ip,
token.user_agent,
token.seen_at,
token.rotated_at,
token.created_at,
token.updated_at,
]
)
end
end
def include_auth_token_logs?
# SiteSetting.verbose_auth_token_logging
UserAuthTokenLog.where(user_id: @current_user.id).exists?
end
def auth_token_logs_export
return enum_for(:auth_token_logs) unless block_given?
UserAuthTokenLog
.where(user_id: @current_user.id)
.each do |log|
yield(
[
log.id,
log.action,
log.user_auth_token_id,
log.client_ip,
log.auth_token.to_s[0..4] + "...", # hashed and truncated
log.created_at,
log.path,
log.user_agent,
]
)
end
end
def badges_export
return enum_for(:badges_export) unless block_given?
UserBadge
.where(user_id: @current_user.id)
.joins(:badge)
.select(
:badge_id,
:granted_at,
:post_id,
:seq,
:granted_by_id,
:notification_id,
:featured_rank,
)
.order(:granted_at)
.each do |ub|
yield(
[
ub.badge_id,
ub.badge.display_name,
ub.granted_at,
ub.post_id,
ub.seq,
# Hide the admin's identity, simply indicate human or system
User.human_user_id?(ub.granted_by_id),
ub.notification_id,
ub.featured_rank,
]
)
end
end
def bookmarks_export
return enum_for(:bookmarks_export) unless block_given?
@current_user
.bookmarks
.where.not(bookmarkable_type: nil)
.order(:id)
.each do |bookmark|
link = ""
if guardian.can_see_bookmarkable?(bookmark)
if bookmark.bookmarkable.respond_to?(:full_url)
link = bookmark.bookmarkable.full_url
else
link = bookmark.bookmarkable.url
end
end
yield(
[
bookmark.bookmarkable_id,
bookmark.bookmarkable_type,
link,
bookmark.name,
bookmark.created_at,
bookmark.updated_at,
bookmark.reminder_at,
bookmark.reminder_last_sent_at,
bookmark.reminder_set_at,
Bookmark.auto_delete_preferences[bookmark.auto_delete_preference],
]
)
end
end
def category_preferences_export
return enum_for(:category_preferences_export) unless block_given?
CategoryUser
.where(user_id: @current_user.id)
.includes(:category)
.merge(Category.secured(guardian))
.each do |cu|
yield(
[
cu.category_id,
piped_category_name(cu.category_id, cu.category),
NotificationLevels.all[cu.notification_level],
cu.last_seen_at,
]
)
end
end
def flags_export
return enum_for(:flags_export) unless block_given?
PostAction
.with_deleted
.where(user_id: @current_user.id)
.where(post_action_type_id: PostActionType.flag_types.values)
.order(:created_at)
.each do |pa|
yield(
[
pa.id,
pa.post_id,
PostActionType.flag_types[pa.post_action_type_id],
pa.created_at,
pa.updated_at,
pa.deleted_at,
self_or_other(pa.deleted_by_id),
pa.related_post_id,
pa.targets_topic,
# renamed to 'was_take_action' to avoid possibility of thinking this is a synonym of agreed_at
pa.staff_took_action,
]
)
end
end
def likes_export
return enum_for(:likes_export) unless block_given?
PostAction
.with_deleted
.where(user_id: @current_user.id)
.where(post_action_type_id: PostActionType.types[:like])
.order(:created_at)
.each do |pa|
post = Post.with_deleted.find_by(id: pa.post_id)
yield(
[
pa.id,
pa.post_id,
post&.topic_id,
post&.post_number,
pa.created_at,
pa.updated_at,
pa.deleted_at,
self_or_other(pa.deleted_by_id),
]
)
end
end
def include_post_actions?
# Most forums should not have post_action records other than flags and likes, but they are possible in historical oddities.
PostAction
.where(user_id: @current_user.id)
.where.not(
post_action_type_id: PostActionType.flag_types.values + [PostActionType.types[:like]],
)
.exists?
end
def post_actions_export
return enum_for(:likes_export) unless block_given?
PostAction
.with_deleted
.where(user_id: @current_user.id)
.where.not(
post_action_type_id: PostActionType.flag_types.values + [PostActionType.types[:like]],
)
.order(:created_at)
.each do |pa|
yield(
[
pa.id,
pa.post_id,
PostActionType.types[pa.post_action_type] || pa.post_action_type,
pa.created_at,
pa.updated_at,
pa.deleted_at,
self_or_other(pa.deleted_by_id),
pa.related_post_id,
]
)
end
end
def queued_posts_export
return enum_for(:queued_posts_export) unless block_given?
# Most Reviewable fields staff-private, but post content needs to be exported.
ReviewableQueuedPost
.where(target_created_by_id: @current_user.id)
.order(:created_at)
.each do |rev|
yield(
[
rev.id,
rev.status,
rev.category_id,
rev.topic_id,
rev.payload["raw"],
MultiJson.dump(rev.payload.slice(*queued_posts_payload_permitted_keys)),
]
)
end
end
def visits_export
return enum_for(:visits_export) unless block_given?
UserVisit
.where(user_id: @current_user.id)
.order(visited_at: :asc)
.each { |uv| yield [uv.visited_at, uv.posts_read, uv.mobile, uv.time_read] }
end
def get_header(entity)
if entity == "user_list"
header_array =
HEADER_ATTRS_FOR["user_list"] + HEADER_ATTRS_FOR["user_stats"] +
HEADER_ATTRS_FOR["user_profile"]
FEATURE: Rename 'Discourse SSO' to DiscourseConnect (#11978) The 'Discourse SSO' protocol is being rebranded to DiscourseConnect. This should help to reduce confusion when 'SSO' is used in the generic sense. This commit aims to: - Rename `sso_` site settings. DiscourseConnect specific ones are prefixed `discourse_connect_`. Generic settings are prefixed `auth_` - Add (server-side-only) backwards compatibility for the old setting names, with deprecation notices - Copy `site_settings` database records to the new names - Rename relevant translation keys - Update relevant translations This commit does **not** aim to: - Rename any Ruby classes or methods. This might be done in a future commit - Change any URLs. This would break existing integrations - Make any changes to the protocol. This would break existing integrations - Change any functionality. Further normalization across DiscourseConnect and other auth methods will be done separately The risks are: - There is no backwards compatibility for site settings on the client-side. Accessing auth-related site settings in Javascript is fairly rare, and an error on the client side would not be security-critical. - If a plugin is monkey-patching parts of the auth process, changes to locale keys could cause broken error messages. This should also be unlikely. The old site setting names remain functional, so security-related overrides will remain working. A follow-up commit will be made with a post-deploy migration to delete the old `site_settings` rows.
2021-02-08 18:04:33 +08:00
header_array.concat(HEADER_ATTRS_FOR["user_sso"]) if SiteSetting.enable_discourse_connect
user_custom_fields = UserField.all
if user_custom_fields.present?
user_custom_fields.each do |custom_field|
header_array.push("#{custom_field.name} (custom user field)")
end
end
header_array.push("group_names")
else
header_array = HEADER_ATTRS_FOR[entity]
end
header_array
end
private
def guardian
@guardian ||= Guardian.new(@current_user)
end
def piped_category_name(category_id, category)
return "#{category_id}" if category_id && !category
return "-" if !guardian.can_see_category?(category)
categories = [category.name]
while category.parent_category_id && category = category.parent_category
categories << category.name
end
categories.reverse.join("|")
end
def self_or_other(user_id)
if user_id.nil?
nil
elsif user_id == @current_user.id
"self"
else
"other"
end
end
def get_user_archive_fields(user_archive)
user_archive_array = []
topic_data = user_archive.topic
user_archive = user_archive.as_json
topic_data =
Topic
.with_deleted
.includes(:category)
.find_by(id: user_archive["topic_id"]) if topic_data.nil?
return user_archive_array if topic_data.nil?
categories = piped_category_name(topic_data.category_id, topic_data.category)
is_pm =
(
if topic_data.archetype == "private_message"
I18n.t("csv_export.boolean_yes")
else
I18n.t("csv_export.boolean_no")
end
)
url =
"#{Discourse.base_url}/t/#{topic_data.slug}/#{topic_data.id}/#{user_archive["post_number"]}"
topic_hash = {
"post_raw" => user_archive["raw"],
"post_cooked" => user_archive["cooked"],
"topic_title" => topic_data.title,
"categories" => categories,
"is_pm" => is_pm,
"url" => url,
}
user_archive.merge!(topic_hash)
HEADER_ATTRS_FOR["user_archive"].each { |attr| user_archive_array.push(user_archive[attr]) }
user_archive_array
end
def get_user_archive_profile_fields(user_profile)
user_archive_profile = []
HEADER_ATTRS_FOR["user_archive_profile"].each do |attr|
data =
if attr == "bio"
user_profile.attributes["bio_raw"]
else
user_profile.attributes[attr]
end
user_archive_profile.push(data)
end
user_archive_profile
end
def queued_posts_payload_permitted_keys
# Generated with:
#
# SELECT distinct json_object_keys(payload) from reviewables
# where type = 'ReviewableQueuedPost' and (payload->'old_queued_post_id') IS NULL
#
# except raw, created_topic_id, created_post_id
%w[composer_open_duration_msecs is_poll reply_to_post_number tags title typing_duration_msecs]
end
def notify_user(upload, export_title)
post = nil
if @current_user
post =
if upload.persisted?
SystemMessage.create_from_system_user(
@current_user,
:csv_export_succeeded,
download_link: UploadMarkdown.new(upload).attachment_markdown,
export_title: export_title,
)
else
SystemMessage.create_from_system_user(@current_user, :csv_export_failed)
end
end
post
end
end
end