2021-06-15 01:20:43 +08:00
// Copyright 2021 The Gitea Authors. All rights reserved.
2022-11-28 02:20:29 +08:00
// SPDX-License-Identifier: MIT
2017-12-04 09:48:03 +08:00
package util
import (
2022-03-31 10:25:40 +08:00
"bytes"
"unicode"
2022-01-21 01:46:10 +08:00
)
2021-06-15 01:20:43 +08:00
type sanitizedError struct {
2022-03-31 10:25:40 +08:00
err error
2017-12-04 09:48:03 +08:00
}
2021-06-15 01:20:43 +08:00
func ( err sanitizedError ) Error ( ) string {
2022-03-31 10:25:40 +08:00
return SanitizeCredentialURLs ( err . err . Error ( ) )
2017-12-04 09:48:03 +08:00
}
2022-03-31 10:25:40 +08:00
func ( err sanitizedError ) Unwrap ( ) error {
return err . err
2017-12-04 09:48:03 +08:00
}
2022-03-31 10:25:40 +08:00
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
func SanitizeErrorCredentialURLs ( err error ) error {
return sanitizedError { err : err }
2017-12-04 09:48:03 +08:00
}
2022-03-31 10:25:40 +08:00
const userPlaceholder = "sanitized-credential"
2021-06-15 01:20:43 +08:00
2022-03-31 10:25:40 +08:00
var schemeSep = [ ] byte ( "://" )
2021-06-15 01:20:43 +08:00
2022-03-31 10:25:40 +08:00
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
func SanitizeCredentialURLs ( s string ) string {
2024-06-14 09:26:33 +08:00
bs := UnsafeStringToBytes ( s )
2022-03-31 10:25:40 +08:00
schemeSepPos := bytes . Index ( bs , schemeSep )
if schemeSepPos == - 1 || bytes . IndexByte ( bs [ schemeSepPos : ] , '@' ) == - 1 {
return s // fast return if there is no URL scheme or no userinfo
2017-12-04 09:48:03 +08:00
}
2022-03-31 10:25:40 +08:00
out := make ( [ ] byte , 0 , len ( bs ) + len ( userPlaceholder ) )
for schemeSepPos != - 1 {
schemeSepPos += 3 // skip the "://"
sepAtPos := - 1 // the possible '@' position: "https://foo@[^here]host"
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
sepLoop :
for ; sepEndPos < len ( bs ) ; sepEndPos ++ {
c := bs [ sepEndPos ]
if ( 'A' <= c && c <= 'Z' ) || ( 'a' <= c && c <= 'z' ) || ( '0' <= c && c <= '9' ) {
continue
}
switch c {
case '@' :
sepAtPos = sepEndPos
case '-' , '.' , '_' , '~' , '!' , '$' , '&' , '\'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' , ':' , '%' :
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
default :
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
}
}
// if there is '@', and the string is like "s://u@h", then hide the "u" part
if sepAtPos != - 1 && ( schemeSepPos >= 4 && unicode . IsLetter ( rune ( bs [ schemeSepPos - 4 ] ) ) ) && sepAtPos - schemeSepPos > 0 && sepEndPos - sepAtPos > 0 {
out = append ( out , bs [ : schemeSepPos ] ... )
out = append ( out , userPlaceholder ... )
out = append ( out , bs [ sepAtPos : sepEndPos ] ... )
} else {
out = append ( out , bs [ : sepEndPos ] ... )
}
bs = bs [ sepEndPos : ]
schemeSepPos = bytes . Index ( bs , schemeSep )
2021-06-15 01:20:43 +08:00
}
2022-03-31 10:25:40 +08:00
out = append ( out , bs ... )
2024-06-14 09:26:33 +08:00
return UnsafeBytesToString ( out )
2017-12-04 09:48:03 +08:00
}