2019-12-12 21:18:07 +08:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2022-11-28 02:20:29 +08:00
// SPDX-License-Identifier: MIT
2019-12-12 21:18:07 +08:00
package git
import (
"bytes"
2021-09-10 04:13:36 +08:00
"context"
2019-12-12 21:18:07 +08:00
"fmt"
2021-09-10 04:13:36 +08:00
"io"
"os"
2021-09-21 03:46:51 +08:00
"code.gitea.io/gitea/modules/log"
2019-12-12 21:18:07 +08:00
)
// CheckAttributeOpts represents the possible options to CheckAttribute
type CheckAttributeOpts struct {
CachedOnly bool
AllAttributes bool
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
Attributes [ ] string
2019-12-12 21:18:07 +08:00
Filenames [ ] string
2021-11-18 04:37:00 +08:00
IndexFile string
WorkTree string
2019-12-12 21:18:07 +08:00
}
// CheckAttribute return the Blame object of file
func ( repo * Repository ) CheckAttribute ( opts CheckAttributeOpts ) ( map [ string ] map [ string ] string , error ) {
2021-11-18 04:37:00 +08:00
env := [ ] string { }
2022-06-16 23:47:44 +08:00
if len ( opts . IndexFile ) > 0 {
2021-11-18 04:37:00 +08:00
env = append ( env , "GIT_INDEX_FILE=" + opts . IndexFile )
}
2022-06-16 23:47:44 +08:00
if len ( opts . WorkTree ) > 0 {
2021-11-18 04:37:00 +08:00
env = append ( env , "GIT_WORK_TREE=" + opts . WorkTree )
}
if len ( env ) > 0 {
env = append ( os . Environ ( ) , env ... )
}
2019-12-12 21:18:07 +08:00
stdOut := new ( bytes . Buffer )
stdErr := new ( bytes . Buffer )
2022-10-23 22:44:45 +08:00
cmd := NewCommand ( repo . Ctx , "check-attr" , "-z" )
2019-12-12 21:18:07 +08:00
if opts . AllAttributes {
2022-10-23 22:44:45 +08:00
cmd . AddArguments ( "-a" )
2019-12-12 21:18:07 +08:00
} else {
for _ , attribute := range opts . Attributes {
if attribute != "" {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
cmd . AddDynamicArguments ( attribute )
2019-12-12 21:18:07 +08:00
}
}
}
2022-06-16 23:47:44 +08:00
if opts . CachedOnly {
2022-10-23 22:44:45 +08:00
cmd . AddArguments ( "--cached" )
2019-12-12 21:18:07 +08:00
}
2022-10-23 22:44:45 +08:00
cmd . AddDashesAndList ( opts . Filenames ... )
2019-12-12 21:18:07 +08:00
2022-04-01 10:55:30 +08:00
if err := cmd . Run ( & RunOpts {
Env : env ,
Dir : repo . Path ,
Stdout : stdOut ,
Stderr : stdErr ,
2022-02-11 20:47:22 +08:00
} ) ; err != nil {
2022-10-25 03:29:17 +08:00
return nil , fmt . Errorf ( "failed to run check-attr: %w\n%s\n%s" , err , stdOut . String ( ) , stdErr . String ( ) )
2019-12-12 21:18:07 +08:00
}
2021-09-10 04:13:36 +08:00
// FIXME: This is incorrect on versions < 1.8.5
2019-12-12 21:18:07 +08:00
fields := bytes . Split ( stdOut . Bytes ( ) , [ ] byte { '\000' } )
if len ( fields ) % 3 != 1 {
2021-09-10 04:13:36 +08:00
return nil , fmt . Errorf ( "wrong number of fields in return from check-attr" )
2019-12-12 21:18:07 +08:00
}
2022-01-21 01:46:10 +08:00
name2attribute2info := make ( map [ string ] map [ string ] string )
2019-12-12 21:18:07 +08:00
for i := 0 ; i < ( len ( fields ) / 3 ) ; i ++ {
filename := string ( fields [ 3 * i ] )
attribute := string ( fields [ 3 * i + 1 ] )
info := string ( fields [ 3 * i + 2 ] )
attribute2info := name2attribute2info [ filename ]
if attribute2info == nil {
attribute2info = make ( map [ string ] string )
}
attribute2info [ attribute ] = info
name2attribute2info [ filename ] = attribute2info
}
return name2attribute2info , nil
}
2021-09-10 04:13:36 +08:00
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
Attributes [ ] string
2021-09-10 04:13:36 +08:00
Repo * Repository
IndexFile string
WorkTree string
stdinReader io . ReadCloser
stdinWriter * os . File
stdOut attributeWriter
cmd * Command
env [ ] string
ctx context . Context
cancel context . CancelFunc
}
2022-06-10 09:57:49 +08:00
// Init initializes the CheckAttributeReader
2021-09-10 04:13:36 +08:00
func ( c * CheckAttributeReader ) Init ( ctx context . Context ) error {
2021-09-21 03:46:51 +08:00
if len ( c . Attributes ) == 0 {
2021-09-10 04:13:36 +08:00
lw := new ( nulSeparatedAttributeWriter )
lw . attributes = make ( chan attributeTriple )
2021-09-21 03:46:51 +08:00
lw . closed = make ( chan struct { } )
2021-09-10 04:13:36 +08:00
c . stdOut = lw
c . stdOut . Close ( )
return fmt . Errorf ( "no provided Attributes to check" )
}
c . ctx , c . cancel = context . WithCancel ( ctx )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00
c . cmd = NewCommand ( c . ctx , "check-attr" , "--stdin" , "-z" )
if len ( c . IndexFile ) > 0 {
c . cmd . AddArguments ( "--cached" )
c . env = append ( c . env , "GIT_INDEX_FILE=" + c . IndexFile )
}
if len ( c . WorkTree ) > 0 {
c . env = append ( c . env , "GIT_WORK_TREE=" + c . WorkTree )
}
c . env = append ( c . env , "GIT_FLUSH=1" )
2023-02-06 10:23:17 +08:00
c . cmd . AddDynamicArguments ( c . Attributes ... )
2021-09-21 03:46:51 +08:00
2021-09-10 04:13:36 +08:00
var err error
2021-09-21 03:46:51 +08:00
2021-09-10 04:13:36 +08:00
c . stdinReader , c . stdinWriter , err = os . Pipe ( )
if err != nil {
2021-09-21 03:46:51 +08:00
c . cancel ( )
2021-09-10 04:13:36 +08:00
return err
}
2022-06-16 23:47:44 +08:00
lw := new ( nulSeparatedAttributeWriter )
lw . attributes = make ( chan attributeTriple , 5 )
lw . closed = make ( chan struct { } )
c . stdOut = lw
2021-09-10 04:13:36 +08:00
return nil
}
// Run run cmd
func ( c * CheckAttributeReader ) Run ( ) error {
2021-09-21 03:46:51 +08:00
defer func ( ) {
2022-02-15 01:03:56 +08:00
_ = c . stdinReader . Close ( )
_ = c . stdOut . Close ( )
2021-09-21 03:46:51 +08:00
} ( )
2021-09-10 04:13:36 +08:00
stdErr := new ( bytes . Buffer )
2022-04-01 10:55:30 +08:00
err := c . cmd . Run ( & RunOpts {
Env : c . env ,
Dir : c . Repo . Path ,
Stdin : c . stdinReader ,
Stdout : c . stdOut ,
Stderr : stdErr ,
2021-09-10 04:13:36 +08:00
} )
2022-03-08 16:30:14 +08:00
if err != nil && // If there is an error we need to return but:
c . ctx . Err ( ) != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded)
err . Error ( ) != "signal: killed" { // 2. We should not pass up errors due to the program being killed
2021-09-10 04:13:36 +08:00
return fmt . Errorf ( "failed to run attr-check. Error: %w\nStderr: %s" , err , stdErr . String ( ) )
}
return nil
}
// CheckPath check attr for given path
2021-09-21 03:46:51 +08:00
func ( c * CheckAttributeReader ) CheckPath ( path string ) ( rs map [ string ] string , err error ) {
defer func ( ) {
2022-10-11 04:54:30 +08:00
if err != nil && err != c . ctx . Err ( ) {
log . Error ( "Unexpected error when checking path %s in %s. Error: %v" , path , c . Repo . Path , err )
2021-09-21 03:46:51 +08:00
}
} ( )
2021-09-10 04:13:36 +08:00
select {
case <- c . ctx . Done ( ) :
return nil , c . ctx . Err ( )
2022-04-28 11:19:33 +08:00
default :
2021-09-10 04:13:36 +08:00
}
2021-09-21 03:46:51 +08:00
if _ , err = c . stdinWriter . Write ( [ ] byte ( path + "\x00" ) ) ; err != nil {
defer c . Close ( )
2021-09-10 04:13:36 +08:00
return nil , err
}
2021-09-21 03:46:51 +08:00
rs = make ( map [ string ] string )
2021-09-10 04:13:36 +08:00
for range c . Attributes {
select {
2021-09-21 03:46:51 +08:00
case attr , ok := <- c . stdOut . ReadAttribute ( ) :
if ! ok {
return nil , c . ctx . Err ( )
}
2021-09-10 04:13:36 +08:00
rs [ attr . Attribute ] = attr . Value
case <- c . ctx . Done ( ) :
return nil , c . ctx . Err ( )
}
}
return rs , nil
}
// Close close pip after use
func ( c * CheckAttributeReader ) Close ( ) error {
2021-09-21 03:46:51 +08:00
c . cancel ( )
2022-02-15 01:03:56 +08:00
err := c . stdinWriter . Close ( )
2021-09-21 03:46:51 +08:00
return err
2021-09-10 04:13:36 +08:00
}
type attributeWriter interface {
io . WriteCloser
ReadAttribute ( ) <- chan attributeTriple
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp [ ] byte
attributes chan attributeTriple
2021-09-21 03:46:51 +08:00
closed chan struct { }
2021-09-10 04:13:36 +08:00
working attributeTriple
pos int
}
func ( wr * nulSeparatedAttributeWriter ) Write ( p [ ] byte ) ( n int , err error ) {
l , read := len ( p ) , 0
nulIdx := bytes . IndexByte ( p , '\x00' )
for nulIdx >= 0 {
wr . tmp = append ( wr . tmp , p [ : nulIdx ] ... )
switch wr . pos {
case 0 :
wr . working = attributeTriple {
Filename : string ( wr . tmp ) ,
}
case 1 :
wr . working . Attribute = string ( wr . tmp )
case 2 :
wr . working . Value = string ( wr . tmp )
}
wr . tmp = wr . tmp [ : 0 ]
wr . pos ++
if wr . pos > 2 {
wr . attributes <- wr . working
wr . pos = 0
}
read += nulIdx + 1
if l > read {
p = p [ nulIdx + 1 : ]
nulIdx = bytes . IndexByte ( p , '\x00' )
} else {
return l , nil
}
}
wr . tmp = append ( wr . tmp , p ... )
return len ( p ) , nil
}
func ( wr * nulSeparatedAttributeWriter ) ReadAttribute ( ) <- chan attributeTriple {
return wr . attributes
}
func ( wr * nulSeparatedAttributeWriter ) Close ( ) error {
2021-09-21 03:46:51 +08:00
select {
case <- wr . closed :
return nil
default :
}
2021-09-10 04:13:36 +08:00
close ( wr . attributes )
2021-09-21 03:46:51 +08:00
close ( wr . closed )
2021-09-10 04:13:36 +08:00
return nil
}
2022-06-16 23:47:44 +08:00
// Create a check attribute reader for the current repository and provided commit ID
func ( repo * Repository ) CheckAttributeReader ( commitID string ) ( * CheckAttributeReader , context . CancelFunc ) {
indexFilename , worktree , deleteTemporaryFile , err := repo . ReadTreeToTemporaryIndex ( commitID )
if err != nil {
return nil , func ( ) { }
}
checker := & CheckAttributeReader {
2024-02-25 02:46:49 +08:00
Attributes : [ ] string {
AttributeLinguistVendored ,
AttributeLinguistGenerated ,
AttributeLinguistDocumentation ,
AttributeLinguistDetectable ,
AttributeLinguistLanguage ,
AttributeGitlabLanguage ,
} ,
Repo : repo ,
IndexFile : indexFilename ,
WorkTree : worktree ,
2022-06-16 23:47:44 +08:00
}
ctx , cancel := context . WithCancel ( repo . Ctx )
if err := checker . Init ( ctx ) ; err != nil {
log . Error ( "Unable to open checker for %s. Error: %v" , commitID , err )
} else {
go func ( ) {
err := checker . Run ( )
if err != nil && err != ctx . Err ( ) {
log . Error ( "Unable to open checker for %s. Error: %v" , commitID , err )
}
cancel ( )
} ( )
}
deferable := func ( ) {
_ = checker . Close ( )
cancel ( )
deleteTemporaryFile ( )
}
return checker , deferable
}