Better mime type probing

This commit is contained in:
Philipp Heckel 2022-01-10 13:38:51 -05:00
parent b5183612be
commit e8cb9e7fde
7 changed files with 53 additions and 39 deletions

View file

@ -11,13 +11,14 @@ import (
// It will always set a Content-Type based on http.DetectContentType, but will never send the "text/html"
// content type.
type ContentTypeWriter struct {
w http.ResponseWriter
sniffed bool
w http.ResponseWriter
filename string
sniffed bool
}
// NewContentTypeWriter creates a new ContentTypeWriter
func NewContentTypeWriter(w http.ResponseWriter) *ContentTypeWriter {
return &ContentTypeWriter{w, false}
func NewContentTypeWriter(w http.ResponseWriter, filename string) *ContentTypeWriter {
return &ContentTypeWriter{w, filename, false}
}
func (w *ContentTypeWriter) Write(p []byte) (n int, err error) {
@ -27,7 +28,7 @@ func (w *ContentTypeWriter) Write(p []byte) (n int, err error) {
// Detect and set Content-Type header
// Fix content types that we don't want to inline-render in the browser. In particular,
// we don't want to render HTML in the browser for security reasons.
contentType := http.DetectContentType(p)
contentType, _ := DetectContentType(p, w.filename)
if strings.HasPrefix(contentType, "text/html") {
contentType = strings.ReplaceAll(contentType, "text/html", "text/plain")
} else if contentType == "application/octet-stream" {

View file

@ -9,14 +9,14 @@ import (
func TestSniffWriter_WriteHTML(t *testing.T) {
rr := httptest.NewRecorder()
sw := NewContentTypeWriter(rr)
sw := NewContentTypeWriter(rr, "")
sw.Write([]byte("<script>alert('hi')</script>"))
require.Equal(t, "text/plain; charset=utf-8", rr.Header().Get("Content-Type"))
}
func TestSniffWriter_WriteTwoWriteCalls(t *testing.T) {
rr := httptest.NewRecorder()
sw := NewContentTypeWriter(rr)
sw := NewContentTypeWriter(rr, "")
sw.Write([]byte{0x25, 0x50, 0x44, 0x46, 0x2d, 0x11, 0x22, 0x33})
sw.Write([]byte("<script>alert('hi')</script>"))
require.Equal(t, "application/pdf", rr.Header().Get("Content-Type"))
@ -34,7 +34,7 @@ func TestSniffWriter_WriteHTMLSplitIntoTwoWrites(t *testing.T) {
// This test shows how splitting the HTML into two Write() calls will still yield text/plain
rr := httptest.NewRecorder()
sw := NewContentTypeWriter(rr)
sw := NewContentTypeWriter(rr, "")
sw.Write([]byte("<scr"))
sw.Write([]byte("ipt>alert('hi')</script>"))
require.Equal(t, "text/plain; charset=utf-8", rr.Header().Get("Content-Type"))
@ -42,9 +42,16 @@ func TestSniffWriter_WriteHTMLSplitIntoTwoWrites(t *testing.T) {
func TestSniffWriter_WriteUnknownMimeType(t *testing.T) {
rr := httptest.NewRecorder()
sw := NewContentTypeWriter(rr)
sw := NewContentTypeWriter(rr, "")
randomBytes := make([]byte, 199)
rand.Read(randomBytes)
sw.Write(randomBytes)
require.Equal(t, "application/octet-stream", rr.Header().Get("Content-Type"))
}
func TestSniffWriter_WriteWithFilenameAPK(t *testing.T) {
rr := httptest.NewRecorder()
sw := NewContentTypeWriter(rr, "https://example.com/ntfy.apk")
sw.Write([]byte{0x50, 0x4B, 0x03, 0x04})
require.Equal(t, "application/vnd.android.package-archive", rr.Header().Get("Content-Type"))
}

View file

@ -3,8 +3,8 @@ package util
import (
"errors"
"fmt"
"github.com/gabriel-vasile/mimetype"
"math/rand"
"mime"
"os"
"regexp"
"strconv"
@ -21,7 +21,6 @@ var (
random = rand.New(rand.NewSource(time.Now().UnixNano()))
randomMutex = sync.Mutex{}
sizeStrRegex = regexp.MustCompile(`(?i)^(\d+)([gmkb])?$`)
extRegex = regexp.MustCompile(`^\.[-_A-Za-z0-9]+$`)
errInvalidPriority = errors.New("invalid priority")
)
@ -168,20 +167,18 @@ func ShortTopicURL(s string) string {
return strings.TrimPrefix(strings.TrimPrefix(s, "https://"), "http://")
}
// ExtensionByType is a wrapper around mime.ExtensionByType with a few sensible corrections
func ExtensionByType(contentType string) string {
switch contentType {
case "image/jpeg":
return ".jpg"
case "video/mp4":
return ".mp4"
default:
exts, err := mime.ExtensionsByType(contentType)
if err == nil && len(exts) > 0 && extRegex.MatchString(exts[0]) {
return exts[0]
}
return ".bin"
// DetectContentType probes the byte array b and returns mime type and file extension.
// The filename is only used to override certain special cases.
func DetectContentType(b []byte, filename string) (mimeType string, ext string) {
if strings.HasSuffix(strings.ToLower(filename), ".apk") {
return "application/vnd.android.package-archive", ".apk"
}
m := mimetype.Detect(b)
mimeType, ext = m.String(), m.Extension()
if ext == "" {
ext = ".bin"
}
return
}
// ParseSize parses a size string like 2K or 2M into bytes. If no unit is found, e.g. 123, bytes is assumed.