From c034dca844929e7fd698262a95e78c0744c33e8c Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Thu, 18 Jun 2015 12:29:06 +1000
Subject: [PATCH] improve nginx report to include counts and break down user
 traffic

---
 script/nginx_analyze.rb | 149 +++++++++++++++++++++++++++++++++-------
 1 file changed, 124 insertions(+), 25 deletions(-)

diff --git a/script/nginx_analyze.rb b/script/nginx_analyze.rb
index f83d6425d22..177d45c4806 100644
--- a/script/nginx_analyze.rb
+++ b/script/nginx_analyze.rb
@@ -1,3 +1,5 @@
+require 'date'
+
 class LogAnalyzer
 
   class LineParser
@@ -10,6 +12,8 @@ class LogAnalyzer
 
     PATTERN = /\[(.*)\] (\S+) \"(.*)\" \"(.*)\" \"(.*)\" ([0-9]+) ([0-9]+) \"(.*)\" ([0-9.]+) ([0-9.]+) "(.*)"/
 
+    TIME_FORMAT = "%d/%b/%Y:%H:%M:%S %Z"
+
     def self.parse(line)
       result = new
       _, result.time, result.ip_address, result.url, result.user_agent,
@@ -21,24 +25,61 @@ class LogAnalyzer
 
       result
     end
+
+    def parsed_time
+      DateTime.strptime(time, TIME_FORMAT)
+    end
   end
 
   attr_reader :total_requests, :message_bus_requests, :filename,
               :ip_to_rails_duration, :username_to_rails_duration,
               :route_to_rails_duration, :url_to_rails_duration,
-              :status_404_to_count
+              :status_404_to_count, :from_time, :to_time
 
   def self.analyze(filename)
     new(filename).analyze
   end
 
+  class Aggeregator
+
+    def initialize
+      @data = {}
+    end
+
+    def add(id, duration, aggregate=nil)
+      ary = (@data[id] ||= [0,0])
+      ary[0] += duration
+      ary[1] += 1
+      if aggregate
+        ary[2] ||= Hash.new(0)
+        ary[2][aggregate] += duration
+      end
+    end
+
+    def top(n)
+      @data.sort{|a,b| b[1][0] <=> a[1][0]}.first(n).map do |metric, ary|
+        metric = metric.to_s
+        metric = "[empty]" if metric.length == 0
+        result = [metric, ary[0], ary[1]]
+        # handle aggregate
+        if ary[2]
+          result.push ary[2].sort{|a,b| b[1] <=> a[1]}.first(5).map{|k,v|
+            v = "%.2f" % v if Float === v
+            "#{k}(#{v})"}.join(" ")
+        end
+
+        result
+      end
+    end
+  end
+
   def initialize(filename)
     @filename = filename
-    @ip_to_rails_duration = Hash.new(0)
-    @username_to_rails_duration = Hash.new(0)
-    @route_to_rails_duration = Hash.new(0)
-    @url_to_rails_duration = Hash.new(0)
-    @status_404_to_count = Hash.new(0)
+    @ip_to_rails_duration = Aggeregator.new
+    @username_to_rails_duration = Aggeregator.new
+    @route_to_rails_duration = Aggeregator.new
+    @url_to_rails_duration = Aggeregator.new
+    @status_404_to_count = Aggeregator.new
   end
 
   def analyze
@@ -48,21 +89,24 @@ class LogAnalyzer
       @total_requests += 1
       parsed = LineParser.parse(line)
 
+      @from_time ||= parsed.time
+      @to_time = parsed.time
+
       if parsed.url =~ /(POST|GET) \/message-bus/
         @message_bus_requests += 1
         next
       end
 
-      @ip_to_rails_duration[parsed.ip_address] += parsed.rails_duration
+      @ip_to_rails_duration.add(parsed.ip_address, parsed.rails_duration)
 
       username = parsed.username == "-" ? "[Anonymous]" : parsed.username
-      @username_to_rails_duration[username] += parsed.rails_duration
+      @username_to_rails_duration.add(username, parsed.rails_duration, parsed.route)
 
-      @route_to_rails_duration[parsed.route] += parsed.rails_duration
+      @route_to_rails_duration.add(parsed.route, parsed.rails_duration)
 
-      @url_to_rails_duration[parsed.url] += parsed.rails_duration
+      @url_to_rails_duration.add(parsed.url, parsed.rails_duration)
 
-      @status_404_to_count[parsed.url] += 1 if parsed.status == "404"
+      @status_404_to_count.add(parsed.url,1) if parsed.status == "404"
     end
     self
   end
@@ -72,46 +116,101 @@ end
 filename = ARGV[0] || "/var/log/nginx/access.log"
 analyzer = LogAnalyzer.analyze(filename)
 
-SPACER = "-" * 80
+SPACER = "-" * 100
 
-def top(cols, hash, count)
-  sorted = hash.sort{|a,b| b[1] <=> a[1]}.first(30)
+# don't feel like pulling in active support
+def map_with_index(ary, &block)
+  idx = 0
+  ary.map do |item|
+    v = block.call(item, idx)
+    idx += 1
+    v
+  end
+end
 
-  longest_0 = [cols[0].length, sorted.map{|a,b| a.to_s.length}.max ].max
+def top(cols, aggregator, count)
+  sorted = aggregator.top(30)
 
-  puts "#{cols[0].ljust(longest_0)} #{cols[1]}"
-  puts "#{("-"*(cols[0].length)).ljust(longest_0)} #{"-"*cols[1].length}"
+  col_just = []
+
+  col_widths = map_with_index(cols) do |name,idx|
+    max_width = name.length
+    col_just[idx] = :ljust
+    sorted.each do |row|
+      col_just[idx] = :rjust unless String === row[idx] || row[idx].nil?
+      row[idx] = '%.2f' % row[idx] if Float === row[idx]
+      row[idx] = row[idx].to_s
+      max_width = row[idx].length if row[idx].length > max_width
+    end
+    [max_width,80].min
+  end
+
+  puts(map_with_index(cols) do |name,idx|
+    name.ljust(col_widths[idx])
+  end.join(" "))
+
+  puts(map_with_index(cols) do |name,idx|
+    ("-" * name.length).ljust(col_widths[idx])
+  end.join(" "))
+
+  sorted.each do |raw_row|
+
+    rows = []
+    idx = 0
+    raw_row.each do |col|
+      j = 0
+      col.to_s.scan(/(.{1,80}($|\s)|.{1,80})/).each do |r|
+        rows[j] ||= []
+        rows[j][idx] = r[0]
+        j += 1
+      end
+      idx += 1
+    end
+
+    if rows.length > 1
+      puts
+    end
+
+    rows.each do |row|
+      cols.length.times do |i|
+        print row[i].to_s.send(col_just[i], col_widths[i])
+        print " "
+      end
+      puts
+    end
+
+    if rows.length > 1
+      puts
+    end
 
-  sorted.each do |val, duration|
-    next unless val && val.length > 1
-    n = Fixnum === duration ? duration : '%.2f' % duration
-    puts "#{val.to_s.ljust(longest_0)} #{n.to_s.rjust(cols[1].length)}"
   end
 end
 
 puts
 puts "Analyzed: #{analyzer.filename}"
 puts SPACER
+puts "#{analyzer.from_time} - #{analyzer.to_time}"
+puts SPACER
 puts "Total Requests: #{analyzer.total_requests} ( MessageBus: #{analyzer.message_bus_requests} )"
 puts SPACER
 puts "Top 30 IPs by Server Load"
 puts
-top(["IP Address", "Duration"], analyzer.ip_to_rails_duration, 30)
+top(["IP Address", "Duration", "Reqs"], analyzer.ip_to_rails_duration, 30)
 puts SPACER
 puts
 puts "Top 30 users by Server Load"
 puts
-top(["Username", "Duration"], analyzer.username_to_rails_duration, 30)
+top(["Username", "Duration", "Reqs", "Routes"], analyzer.username_to_rails_duration, 30)
 puts SPACER
 puts
 puts "Top 30 routes by Server Load"
 puts
-top(["Route", "Duration"], analyzer.route_to_rails_duration, 30)
+top(["Route", "Duration", "Reqs"], analyzer.route_to_rails_duration, 30)
 puts SPACER
 puts
 puts "Top 30 urls by Server Load"
 puts
-top(["Url", "Duration"], analyzer.url_to_rails_duration, 30)
+top(["Url", "Duration", "Reqs"], analyzer.url_to_rails_duration, 30)
 
 puts "(all durations in seconds)"
 puts SPACER