diff --git a/app/assets/javascripts/discourse/app/instance-initializers/message-bus.js b/app/assets/javascripts/discourse/app/instance-initializers/message-bus.js index be651bd64cc..819db4c3387 100644 --- a/app/assets/javascripts/discourse/app/instance-initializers/message-bus.js +++ b/app/assets/javascripts/discourse/app/instance-initializers/message-bus.js @@ -7,6 +7,12 @@ import getURL from "discourse-common/lib/get-url"; const LONG_POLL_AFTER_UNSEEN_TIME = 1200000; // 20 minutes +let _sendDeferredPageview = false; + +export function sendDeferredPageview() { + _sendDeferredPageview = true; +} + function mbAjax(messageBus, opts) { opts.headers ||= {}; @@ -22,6 +28,11 @@ function mbAjax(messageBus, opts) { opts.headers["Discourse-Present"] = "true"; } + if (_sendDeferredPageview) { + opts.headers["Discourse-Deferred-Track-View"] = "true"; + _sendDeferredPageview = false; + } + const oldComplete = opts.complete; opts.complete = function (xhr, stat) { handleLogoff(xhr); diff --git a/app/assets/javascripts/discourse/app/instance-initializers/page-tracking.js b/app/assets/javascripts/discourse/app/instance-initializers/page-tracking.js index 704493c7388..e3f226cb72e 100644 --- a/app/assets/javascripts/discourse/app/instance-initializers/page-tracking.js +++ b/app/assets/javascripts/discourse/app/instance-initializers/page-tracking.js @@ -4,11 +4,20 @@ import { resetPageTracking, startPageTracking, } from "discourse/lib/page-tracker"; +import { sendDeferredPageview } from "./message-bus"; export default { after: "inject-objects", + before: "message-bus", initialize(owner) { + const isErrorPage = + document.querySelector("meta#discourse-error")?.dataset.discourseError === + "true"; + if (!isErrorPage) { + sendDeferredPageview(); + } + // Tell our AJAX system to track a page transition // eslint-disable-next-line ember/no-private-routing-service const router = owner.lookup("router:main"); diff --git a/app/assets/javascripts/discourse/scripts/pageview.js b/app/assets/javascripts/discourse/scripts/pageview.js new file mode 100644 index 00000000000..6bb83f55443 --- /dev/null +++ b/app/assets/javascripts/discourse/scripts/pageview.js @@ -0,0 +1,17 @@ +document.addEventListener("DOMContentLoaded", function () { + const isErrorPage = + document.querySelector("meta#discourse-error")?.dataset.discourseError === + "true"; + + if (!isErrorPage) { + const root = + document.querySelector("meta[name=discourse-base-uri]")?.content || ""; + + fetch(`${root}/pageview`, { + method: "POST", + headers: { + "Discourse-Deferred-Track-View": "true", + }, + }); + } +}); diff --git a/app/controllers/pageview_controller.rb b/app/controllers/pageview_controller.rb new file mode 100644 index 00000000000..2b669eaf515 --- /dev/null +++ b/app/controllers/pageview_controller.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class PageviewController < ApplicationController + skip_before_action :check_xhr, + :redirect_to_login_if_required, + :preload_json, + :verify_authenticity_token + + def index + # pageview tracking is handled by middleware + render plain: "ok" + end +end diff --git a/app/models/application_request.rb b/app/models/application_request.rb index a91c67571ed..8b7f966a56f 100644 --- a/app/models/application_request.rb +++ b/app/models/application_request.rb @@ -14,6 +14,10 @@ class ApplicationRequest < ActiveRecord::Base page_view_anon_mobile api user_api + page_view_anon_browser + page_view_anon_browser_mobile + page_view_logged_in_browser + page_view_logged_in_browser_mobile ] include CachedCounting diff --git a/app/models/concerns/reports/consolidated_page_views_browser_detection.rb b/app/models/concerns/reports/consolidated_page_views_browser_detection.rb new file mode 100644 index 00000000000..3b252722b1b --- /dev/null +++ b/app/models/concerns/reports/consolidated_page_views_browser_detection.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module Reports::ConsolidatedPageViewsBrowserDetection + extend ActiveSupport::Concern + + class_methods do + def report_consolidated_page_views_browser_detection(report) + report.modes = [:stacked_chart] + + data = + DB.query( + <<~SQL, + SELECT + date, + SUM(CASE WHEN req_type = :page_view_logged_in_browser THEN count ELSE 0 END) AS page_view_logged_in_browser, + SUM(CASE WHEN req_type = :page_view_anon_browser THEN count ELSE 0 END) AS page_view_anon_browser, + SUM(CASE WHEN req_type = :page_view_crawler THEN count ELSE 0 END) AS page_view_crawler, + SUM( + CASE WHEN req_type = :page_view_anon THEN count + WHEN req_type = :page_view_logged_in THEN count + WHEN req_type = :page_view_anon_browser THEN -count + WHEN req_type = :page_view_logged_in_browser THEN -count + ELSE 0 + END + ) AS page_view_other + FROM application_requests + WHERE date >= :start_date AND date <= :end_date + GROUP BY date + ORDER BY date ASC + SQL + start_date: report.start_date, + end_date: report.end_date, + page_view_anon: ApplicationRequest.req_types[:page_view_anon], + page_view_crawler: ApplicationRequest.req_types[:page_view_crawler], + page_view_logged_in: ApplicationRequest.req_types[:page_view_logged_in], + page_view_anon_browser: ApplicationRequest.req_types[:page_view_anon_browser], + page_view_logged_in_browser: ApplicationRequest.req_types[:page_view_logged_in_browser], + ) + + report.data = [ + { + req: "page_view_logged_in_browser", + label: + I18n.t( + "reports.consolidated_page_views_browser_detection.xaxis.page_view_logged_in_browser", + ), + color: report.colors[0], + data: data.map { |row| { x: row.date, y: row.page_view_logged_in_browser } }, + }, + { + req: "page_view_anon_browser", + label: + I18n.t( + "reports.consolidated_page_views_browser_detection.xaxis.page_view_anon_browser", + ), + color: report.colors[1], + data: data.map { |row| { x: row.date, y: row.page_view_anon_browser } }, + }, + { + req: "page_view_crawler", + label: + I18n.t("reports.consolidated_page_views_browser_detection.xaxis.page_view_crawler"), + color: report.colors[2], + data: data.map { |row| { x: row.date, y: row.page_view_crawler } }, + }, + { + req: "page_view_other", + label: I18n.t("reports.consolidated_page_views_browser_detection.xaxis.page_view_other"), + color: report.colors[2], + data: data.map { |row| { x: row.date, y: row.page_view_other } }, + }, + ] + end + end +end diff --git a/app/models/report.rb b/app/models/report.rb index 0daa3e86f87..dbf576a5ce6 100644 --- a/app/models/report.rb +++ b/app/models/report.rb @@ -49,6 +49,7 @@ class Report include Reports::UserFlaggingRatio include Reports::TrustLevelGrowth include Reports::ConsolidatedPageViews + include Reports::ConsolidatedPageViewsBrowserDetection include Reports::ConsolidatedApiRequests include Reports::Visits include Reports::TimeToFirstResponse diff --git a/app/views/exceptions/not_found.html.erb b/app/views/exceptions/not_found.html.erb index 373a6c18059..ff8918d9f85 100644 --- a/app/views/exceptions/not_found.html.erb +++ b/app/views/exceptions/not_found.html.erb @@ -1,5 +1,13 @@ <% content_for :title do %><%= @page_title %> - <%= SiteSetting.title %><% end %> +<% content_for :head do %> + +<% end %> + +<% content_for :no_ember_head do %> + +<% end %> +

<%= @title %>

diff --git a/app/views/layouts/no_ember.html.erb b/app/views/layouts/no_ember.html.erb index 51d1f319e94..6a7c6a3986f 100644 --- a/app/views/layouts/no_ember.html.erb +++ b/app/views/layouts/no_ember.html.erb @@ -17,6 +17,8 @@ <%- if allow_plugins? %> <%= build_plugin_html 'server:before-head-close' %> <%- end -%> + + <%= preload_script "pageview" %> <%- if allow_plugins? %> diff --git a/app/views/layouts/publish.html.erb b/app/views/layouts/publish.html.erb index 04d91c9653f..c934229dc1e 100644 --- a/app/views/layouts/publish.html.erb +++ b/app/views/layouts/publish.html.erb @@ -8,6 +8,7 @@ <%= render_google_tag_manager_head_code %> <%= render_google_universal_analytics_code %> <%= preload_script 'publish' %> + <%= preload_script 'pageview' %> <%= theme_lookup("header") %> diff --git a/config/initializers/004-message_bus.rb b/config/initializers/004-message_bus.rb index 383d8a01134..d9a1959a3be 100644 --- a/config/initializers/004-message_bus.rb +++ b/config/initializers/004-message_bus.rb @@ -31,7 +31,7 @@ def setup_message_bus_env(env) "Access-Control-Allow-Origin" => Discourse.base_url_no_prefix, "Access-Control-Allow-Methods" => "GET, POST", "Access-Control-Allow-Headers" => - "X-SILENCE-LOGGER, X-Shared-Session-Key, Dont-Chunk, Discourse-Present", + "X-SILENCE-LOGGER, X-Shared-Session-Key, Dont-Chunk, Discourse-Present, Discourse-Deferred-Track-View", "Access-Control-Max-Age" => "7200", } diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index e30316ca322..2fa9a55ad68 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -1338,6 +1338,15 @@ en: user_api: "User API" yaxis: "Day" description: "API requests for regular API keys and user API keys." + consolidated_page_views_browser_detection: + title: "Consolidated Pageviews with Browser Detection (Experimental)" + xaxis: + page_view_anon_browser: "Anonymous Browser" + page_view_logged_in_browser: "Logged In Browser" + page_view_crawler: "Known Crawler" + page_view_other: "Other pageviews" + yaxis: "Day" + description: "Pageviews for logged in users, anonymous users, known crawlers and other. This experimental report ensures logged-in/anon requests are coming from real browsers before counting them." dau_by_mau: title: "DAU/MAU" xaxis: "Day" @@ -1497,6 +1506,16 @@ en: xaxis: "Day" yaxis: "Mobile Anon Pageviews" description: "Number of new pageviews from visitors on a mobile device who are not logged in." + page_view_anon_browser_reqs: + title: "Anonymous Browser Pageviews" + xaxis: "Day" + yaxis: "Anonymous Browser Pageviews" + description: "Number of pageviews by anonymous visitors using real browsers." + page_view_logged_in_browser_reqs: + title: "Logged In Browser Pageviews" + xaxis: "Day" + yaxis: "Logged In Browser Pageviews" + description: "Number of pageviews by logged-in visitors using real browsers." http_background_reqs: title: "Background" xaxis: "Day" diff --git a/config/routes.rb b/config/routes.rb index d784afeb440..34dc52d7445 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1629,6 +1629,8 @@ Discourse::Application.routes.draw do resources :sidebar_sections, only: %i[index create update destroy] put "/sidebar_sections/reset/:id" => "sidebar_sections#reset" + post "/pageview" => "pageview#index" + get "*url", to: "permalinks#show", constraints: PermalinkConstraint.new get "/form-templates/:id" => "form_templates#show" diff --git a/lib/middleware/request_tracker.rb b/lib/middleware/request_tracker.rb index 042b946ba22..1147a25a617 100644 --- a/lib/middleware/request_tracker.rb +++ b/lib/middleware/request_tracker.rb @@ -74,9 +74,31 @@ class Middleware::RequestTracker elsif data[:has_auth_cookie] ApplicationRequest.increment!(:page_view_logged_in) ApplicationRequest.increment!(:page_view_logged_in_mobile) if data[:is_mobile] + if data[:explicit_track_view] + # Must be a browser if it had this header from our ajax implementation + ApplicationRequest.increment!(:page_view_logged_in_browser) + ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile] + end elsif !SiteSetting.login_required ApplicationRequest.increment!(:page_view_anon) ApplicationRequest.increment!(:page_view_anon_mobile) if data[:is_mobile] + if data[:explicit_track_view] + # Must be a browser if it had this header from our ajax implementation + ApplicationRequest.increment!(:page_view_anon_browser) + ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile] + end + end + end + + # Message-bus requests may include this 'deferred track' header which we use to detect + # 'real browser' views. + if data[:deferred_track] && !data[:is_crawler] + if data[:has_auth_cookie] + ApplicationRequest.increment!(:page_view_logged_in_browser) + ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile] + elsif !SiteSetting.login_required + ApplicationRequest.increment!(:page_view_anon_browser) + ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile] end end @@ -103,12 +125,20 @@ class Middleware::RequestTracker request ||= Rack::Request.new(env) helper = Middleware::AnonymousCache::Helper.new(env, request) + # Value of the discourse-track-view request header env_track_view = env["HTTP_DISCOURSE_TRACK_VIEW"] - track_view = status == 200 - track_view &&= env_track_view != "0" && env_track_view != "false" - track_view &&= - env_track_view || (request.get? && !request.xhr? && headers["Content-Type"] =~ %r{text/html}) - track_view = !!track_view + + # Was the discourse-track-view request header set to true? Likely + # set by our ajax library to indicate a page view. + explicit_track_view = status == 200 && %w[1 true].include?(env_track_view) + + # An HTML response to a GET request is tracked implicitly + implicit_track_view = + status == 200 && !%w[0 false].include?(env_track_view) && request.get? && !request.xhr? && + headers["Content-Type"] =~ %r{text/html} + + track_view = !!(explicit_track_view || implicit_track_view) + has_auth_cookie = Auth::DefaultCurrentUserProvider.find_v0_auth_cookie(request).present? has_auth_cookie ||= Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env).present? @@ -118,6 +148,10 @@ class Middleware::RequestTracker is_message_bus = request.path.start_with?("#{Discourse.base_path}/message-bus/") is_topic_timings = request.path.start_with?("#{Discourse.base_path}/topics/timings") + # This header is sent on a follow-up request after a real browser loads up a page + # see `scripts/pageview.js` and `instance-initializers/page-tracking.js` + has_deferred_track_header = %w[1 true].include?(env["HTTP_DISCOURSE_DEFERRED_TRACK_VIEW"]) + h = { status: status, is_crawler: helper.is_crawler?, @@ -129,6 +163,8 @@ class Middleware::RequestTracker track_view: track_view, timing: timing, queue_seconds: env["REQUEST_QUEUE_SECONDS"], + explicit_track_view: explicit_track_view, + deferred_track: has_deferred_track_header, } if h[:is_background] @@ -166,7 +202,8 @@ class Middleware::RequestTracker data = begin self.class.get_data(env, result, info, request) - rescue StandardError + rescue StandardError => e + Rails.logger.warn("RequestTracker.get_data failed: #{e}") nil end diff --git a/spec/lib/middleware/request_tracker_spec.rb b/spec/lib/middleware/request_tracker_spec.rb index 7757e093d7f..059f25f63d2 100644 --- a/spec/lib/middleware/request_tracker_spec.rb +++ b/spec/lib/middleware/request_tracker_spec.rb @@ -66,6 +66,7 @@ RSpec.describe Middleware::RequestTracker do CachedCounting.flush expect(ApplicationRequest.page_view_anon.first.count).to eq(2) + expect(ApplicationRequest.page_view_anon_browser.first.count).to eq(2) end it "can log requests correctly" do @@ -119,6 +120,23 @@ RSpec.describe Middleware::RequestTracker do expect(ApplicationRequest.page_view_anon_mobile.first.count).to eq(1) expect(ApplicationRequest.page_view_crawler.first.count).to eq(1) + + expect(ApplicationRequest.page_view_anon_browser.first.count).to eq(1) + end + + it "logs deferred pageviews correctly" do + data = + Middleware::RequestTracker.get_data( + env(:path => "/message-bus/abcde/poll", "HTTP_DISCOURSE_DEFERRED_TRACK_VIEW" => "1"), + ["200", { "Content-Type" => "text/html" }], + 0.1, + ) + Middleware::RequestTracker.log_request(data) + + expect(data[:deferred_track]).to eq(true) + CachedCounting.flush + + expect(ApplicationRequest.page_view_anon_browser.first.count).to eq(1) end it "logs API requests correctly" do diff --git a/spec/system/request_tracker_spec.rb b/spec/system/request_tracker_spec.rb new file mode 100644 index 00000000000..43df17b7296 --- /dev/null +++ b/spec/system/request_tracker_spec.rb @@ -0,0 +1,188 @@ +# frozen_string_literal: true + +describe "request tracking", type: :system do + before do + ApplicationRequest.enable + CachedCounting.reset + CachedCounting.enable + end + + after do + CachedCounting.reset + ApplicationRequest.disable + CachedCounting.disable + end + + it "tracks an anonymous visit correctly" do + visit "/" + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 1, + "page_view_anon_browser_total" => 1, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + + find(".nav-item_categories a").click + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 2, + "page_view_anon_browser_total" => 2, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + end + + it "tracks a crawler visit correctly" do + # Can't change Selenium's user agent... so change site settings to make Discourse detect chrome as a crawler + SiteSetting.crawler_user_agents += "|chrome" + + visit "/" + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 0, + "page_view_anon_browser_total" => 0, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 1, + ) + end + end + + it "tracks a logged-in session correctly" do + sign_in Fabricate(:user) + + visit "/" + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 0, + "page_view_anon_browser_total" => 0, + "page_view_logged_in_total" => 1, + "page_view_crawler_total" => 0, + "page_view_logged_in_browser_total" => 1, + ) + end + + find(".nav-item_categories a").click + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 0, + "page_view_anon_browser_total" => 0, + "page_view_logged_in_total" => 2, + "page_view_crawler_total" => 0, + "page_view_logged_in_browser_total" => 2, + ) + end + end + + it "tracks normal error pages correctly" do + SiteSetting.bootstrap_error_pages = false + + visit "/foobar" + + try_until_success do + CachedCounting.flush + + # Does not count error as a pageview + expect(ApplicationRequest.stats).to include( + "http_4xx_total" => 1, + "page_view_anon_total" => 0, + "page_view_anon_browser_total" => 0, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + + find("#site-logo").click + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "http_4xx_total" => 1, + "page_view_anon_total" => 1, + "page_view_anon_browser_total" => 1, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + end + + it "tracks non-ember pages correctly" do + visit "/safe-mode" + + try_until_success do + CachedCounting.flush + + # Does not count error as a pageview + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 1, + "page_view_anon_browser_total" => 1, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + end + + it "tracks bootstrapped error pages correctly" do + SiteSetting.bootstrap_error_pages = true + + visit "/foobar" + + try_until_success do + CachedCounting.flush + + # Does not count error as a pageview + expect(ApplicationRequest.stats).to include( + "http_4xx_total" => 1, + "page_view_anon_total" => 0, + "page_view_anon_browser_total" => 0, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + + find("#site-logo").click + + try_until_success do + CachedCounting.flush + expect(ApplicationRequest.stats).to include( + "http_4xx_total" => 1, + "page_view_anon_total" => 1, + "page_view_anon_browser_total" => 1, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + end + + it "tracks published pages correctly" do + SiteSetting.enable_page_publishing = true + page = + Fabricate(:published_page, public: true, slug: "some-page", topic: Fabricate(:post).topic) + + visit "/pub/some-page" + + try_until_success do + CachedCounting.flush + + # Does not count error as a pageview + expect(ApplicationRequest.stats).to include( + "page_view_anon_total" => 1, + "page_view_anon_browser_total" => 1, + "page_view_logged_in_total" => 0, + "page_view_crawler_total" => 0, + ) + end + end +end