From 551f3cbe420dcb72bcb784205451d5b01b811041 Mon Sep 17 00:00:00 2001
From: Duncan Ogilvie <mr.exodia.tpodt@gmail.com>
Date: Wed, 29 Nov 2017 02:50:39 +0100
Subject: [PATCH] Memory usage improvements (#3013)

* govendor update code.gitea.io/git

Signed-off-by: Duncan Ogilvie <mr.exodia.tpodt@gmail.com>

* Greatly improve memory usage

Signed-off-by: Duncan Ogilvie <mr.exodia.tpodt@gmail.com>
---
 modules/context/repo.go            |  3 ++
 routers/repo/download.go           |  3 +-
 routers/repo/editor.go             |  9 ++++--
 routers/repo/issue.go              |  3 ++
 routers/repo/view.go               | 29 +++++++++++------
 vendor/code.gitea.io/git/blob.go   | 50 +++++++++++++++++++++++++++---
 vendor/code.gitea.io/git/commit.go |  3 +-
 vendor/code.gitea.io/git/git.go    |  2 +-
 vendor/vendor.json                 |  6 ++--
 9 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/modules/context/repo.go b/modules/context/repo.go
index 38baa423061..e8d084c342a 100644
--- a/modules/context/repo.go
+++ b/modules/context/repo.go
@@ -143,6 +143,9 @@ func (r *Repository) GetEditorconfig() (*editorconfig.Editorconfig, error) {
 	if err != nil {
 		return nil, err
 	}
+	if treeEntry.Blob().Size() >= setting.UI.MaxDisplayFileSize {
+		return nil, git.ErrNotExist{ID: "", RelPath: ".editorconfig"}
+	}
 	reader, err := treeEntry.Blob().Data()
 	if err != nil {
 		return nil, err
diff --git a/routers/repo/download.go b/routers/repo/download.go
index 78c60886078..78c4b519bec 100644
--- a/routers/repo/download.go
+++ b/routers/repo/download.go
@@ -45,10 +45,11 @@ func ServeData(ctx *context.Context, name string, reader io.Reader) error {
 
 // ServeBlob download a git.Blob
 func ServeBlob(ctx *context.Context, blob *git.Blob) error {
-	dataRc, err := blob.Data()
+	dataRc, err := blob.DataAsync()
 	if err != nil {
 		return err
 	}
+	defer dataRc.Close()
 
 	return ServeData(ctx, ctx.Repo.TreePath, dataRc)
 }
diff --git a/routers/repo/editor.go b/routers/repo/editor.go
index a6cc9223647..82b04a84d20 100644
--- a/routers/repo/editor.go
+++ b/routers/repo/editor.go
@@ -73,11 +73,16 @@ func editFile(ctx *context.Context, isNewFile bool) {
 
 		// No way to edit a directory online.
 		if entry.IsDir() {
-			ctx.Handle(404, "", nil)
+			ctx.Handle(404, "entry.IsDir", nil)
 			return
 		}
 
 		blob := entry.Blob()
+		if blob.Size() >= setting.UI.MaxDisplayFileSize {
+			ctx.Handle(404, "blob.Size", err)
+			return
+		}
+
 		dataRc, err := blob.Data()
 		if err != nil {
 			ctx.Handle(404, "blob.Data", err)
@@ -93,7 +98,7 @@ func editFile(ctx *context.Context, isNewFile bool) {
 
 		// Only text file are editable online.
 		if !base.IsTextFile(buf) {
-			ctx.Handle(404, "", nil)
+			ctx.Handle(404, "base.IsTextFile", nil)
 			return
 		}
 
diff --git a/routers/repo/issue.go b/routers/repo/issue.go
index c24a4e4360d..b45d521e5b1 100644
--- a/routers/repo/issue.go
+++ b/routers/repo/issue.go
@@ -319,6 +319,9 @@ func getFileContentFromDefaultBranch(ctx *context.Context, filename string) (str
 	if err != nil {
 		return "", false
 	}
+	if entry.Blob().Size() >= setting.UI.MaxDisplayFileSize {
+		return "", false
+	}
 	r, err = entry.Blob().Data()
 	if err != nil {
 		return "", false
diff --git a/routers/repo/view.go b/routers/repo/view.go
index d43b4d7f78b..a02acb0d6c1 100644
--- a/routers/repo/view.go
+++ b/routers/repo/view.go
@@ -76,11 +76,12 @@ func renderDirectory(ctx *context.Context, treeLink string) {
 		ctx.Data["ReadmeInList"] = true
 		ctx.Data["ReadmeExist"] = true
 
-		dataRc, err := readmeFile.Data()
+		dataRc, err := readmeFile.DataAsync()
 		if err != nil {
 			ctx.Handle(500, "Data", err)
 			return
 		}
+		defer dataRc.Close()
 
 		buf := make([]byte, 1024)
 		n, _ := dataRc.Read(buf)
@@ -91,14 +92,21 @@ func renderDirectory(ctx *context.Context, treeLink string) {
 		ctx.Data["FileName"] = readmeFile.Name()
 		// FIXME: what happens when README file is an image?
 		if isTextFile {
-			d, _ := ioutil.ReadAll(dataRc)
-			buf = append(buf, d...)
-			if markup.Type(readmeFile.Name()) != "" {
-				ctx.Data["IsMarkup"] = true
-				ctx.Data["FileContent"] = string(markup.Render(readmeFile.Name(), buf, treeLink, ctx.Repo.Repository.ComposeMetas()))
+			if readmeFile.Size() >= setting.UI.MaxDisplayFileSize {
+				// Pretend that this is a normal text file to display 'This file is too large to be shown'
+				ctx.Data["IsFileTooLarge"] = true
+				ctx.Data["IsTextFile"] = true
+				ctx.Data["FileSize"] = readmeFile.Size()
 			} else {
-				ctx.Data["IsRenderedHTML"] = true
-				ctx.Data["FileContent"] = string(bytes.Replace(buf, []byte("\n"), []byte(`<br>`), -1))
+				d, _ := ioutil.ReadAll(dataRc)
+				buf = append(buf, d...)
+				if markup.Type(readmeFile.Name()) != "" {
+					ctx.Data["IsMarkup"] = true
+					ctx.Data["FileContent"] = string(markup.Render(readmeFile.Name(), buf, treeLink, ctx.Repo.Repository.ComposeMetas()))
+				} else {
+					ctx.Data["IsRenderedHTML"] = true
+					ctx.Data["FileContent"] = string(bytes.Replace(buf, []byte("\n"), []byte(`<br>`), -1))
+				}
 			}
 		}
 	}
@@ -135,11 +143,12 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
 	ctx.Data["IsViewFile"] = true
 
 	blob := entry.Blob()
-	dataRc, err := blob.Data()
+	dataRc, err := blob.DataAsync()
 	if err != nil {
-		ctx.Handle(500, "Data", err)
+		ctx.Handle(500, "DataAsync", err)
 		return
 	}
+	defer dataRc.Close()
 
 	ctx.Data["FileSize"] = blob.Size()
 	ctx.Data["FileName"] = blob.Name()
diff --git a/vendor/code.gitea.io/git/blob.go b/vendor/code.gitea.io/git/blob.go
index 10b8ea4c9f1..a6e392eeb50 100644
--- a/vendor/code.gitea.io/git/blob.go
+++ b/vendor/code.gitea.io/git/blob.go
@@ -6,7 +6,11 @@ package git
 
 import (
 	"bytes"
+	"fmt"
 	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
 )
 
 // Blob represents a Git object.
@@ -18,14 +22,52 @@ type Blob struct {
 // Data gets content of blob all at once and wrap it as io.Reader.
 // This can be very slow and memory consuming for huge content.
 func (b *Blob) Data() (io.Reader, error) {
-	stdout, err := NewCommand("show", b.ID.String()).RunInDirBytes(b.repo.Path)
-	if err != nil {
-		return nil, err
+	stdout := new(bytes.Buffer)
+	stderr := new(bytes.Buffer)
+
+	// Preallocate memory to save ~50% memory usage on big files.
+	stdout.Grow(int(b.Size() + 2048))
+
+	if err := b.DataPipeline(stdout, stderr); err != nil {
+		return nil, concatenateError(err, stderr.String())
 	}
-	return bytes.NewBuffer(stdout), nil
+	return stdout, nil
 }
 
 // DataPipeline gets content of blob and write the result or error to stdout or stderr
 func (b *Blob) DataPipeline(stdout, stderr io.Writer) error {
 	return NewCommand("show", b.ID.String()).RunInDirPipeline(b.repo.Path, stdout, stderr)
 }
+
+type cmdReadCloser struct {
+	cmd    *exec.Cmd
+	stdout io.Reader
+}
+
+func (c cmdReadCloser) Read(p []byte) (int, error) {
+	return c.stdout.Read(p)
+}
+
+func (c cmdReadCloser) Close() error {
+	io.Copy(ioutil.Discard, c.stdout)
+	return c.cmd.Wait()
+}
+
+// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
+// Calling the Close function on the result will discard all unread output.
+func (b *Blob) DataAsync() (io.ReadCloser, error) {
+	cmd := exec.Command("git", "show", b.ID.String())
+	cmd.Dir = b.repo.Path
+	cmd.Stderr = os.Stderr
+
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return nil, fmt.Errorf("StdoutPipe: %v", err)
+	}
+
+	if err = cmd.Start(); err != nil {
+		return nil, fmt.Errorf("Start: %v", err)
+	}
+
+	return cmdReadCloser{stdout: stdout, cmd: cmd}, nil
+}
diff --git a/vendor/code.gitea.io/git/commit.go b/vendor/code.gitea.io/git/commit.go
index c2954123816..299a2381b65 100644
--- a/vendor/code.gitea.io/git/commit.go
+++ b/vendor/code.gitea.io/git/commit.go
@@ -98,10 +98,11 @@ func (c *Commit) IsImageFile(name string) bool {
 		return false
 	}
 
-	dataRc, err := blob.Data()
+	dataRc, err := blob.DataAsync()
 	if err != nil {
 		return false
 	}
+	defer dataRc.Close()
 	buf := make([]byte, 1024)
 	n, _ := dataRc.Read(buf)
 	buf = buf[:n]
diff --git a/vendor/code.gitea.io/git/git.go b/vendor/code.gitea.io/git/git.go
index 9ec20c97e13..150b80fb076 100644
--- a/vendor/code.gitea.io/git/git.go
+++ b/vendor/code.gitea.io/git/git.go
@@ -25,7 +25,7 @@ var (
 	// Prefix the log prefix
 	Prefix = "[git-module] "
 	// GitVersionRequired is the minimum Git version required
-	GitVersionRequired = "1.8.1.6"
+	GitVersionRequired = "1.7.2"
 )
 
 func log(format string, args ...interface{}) {
diff --git a/vendor/vendor.json b/vendor/vendor.json
index 54cbbf330c8..e3084fd53de 100644
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@@ -3,10 +3,10 @@
 	"ignore": "test appengine",
 	"package": [
 		{
-			"checksumSHA1": "JN/re4+x/hCzMLGHmieUcykVDAg=",
+			"checksumSHA1": "vAVjAz7Wpjnu7GGba4JLIDTpQEw=",
 			"path": "code.gitea.io/git",
-			"revision": "d47b98c44c9a6472e44ab80efe65235e11c6da2a",
-			"revisionTime": "2017-10-23T00:52:09Z"
+			"revision": "f9dd6826bbb51c92c6964ce18176c304ea286e54",
+			"revisionTime": "2017-11-28T15:25:05Z"
 		},
 		{
 			"checksumSHA1": "QQ7g7B9+EIzGjO14KCGEs9TNEzM=",