diff --git a/extractor.go b/extractor.go index d9fc27c..0088c71 100644 --- a/extractor.go +++ b/extractor.go @@ -13,9 +13,28 @@ import ( type Extractor struct { Path string Progress func(int64) int64 + + // SanitizePathFunc can be provided if you wish to inspect and/or modify the source path + // returning an error from this function will abort extraction + SanitizePathFunc func(path string) (saferPath string, userDefined error) + + // LinkFunc can be provided for user specified handling of filesystem links + // returning an error from this function aborts extraction + LinkFunc func(Link) error +} + +// Link represents a filesystem link where Name is the link's destination path, +// Target is what the link actually points to, +// and Root is the extraction root +type Link struct { + Root, Name, Target string } func (te *Extractor) Extract(reader io.Reader) error { + if isNullDevice(te.Path) { + return nil + } + tarReader := tar.NewReader(reader) // Check if the output path already exists, so we know whether we should @@ -61,21 +80,51 @@ func (te *Extractor) Extract(reader io.Reader) error { return nil } -// outputPath returns the path at whicht o place tarPath -func (te *Extractor) outputPath(tarPath string) string { - elems := strings.Split(tarPath, "/") // break into elems - elems = elems[1:] // remove original root +// Sanitize sets up the extractor to use built in sanitation functions +// (Modify paths to be platform legal, symlinks may not escape extraction root) +// or unsets any previously set sanitation functions on the extractor +// (no special rules are applied when extracting) +func (te *Extractor) Sanitize(toggle bool) { + if toggle { + te.SanitizePathFunc = sanitizePath + te.LinkFunc = func(inLink Link) error { + if err := childrenOnly(inLink); err != nil { + return err + } + if err := platformLink(inLink); err != nil { + return err + } + return os.Symlink(inLink.Target, inLink.Name) + } + } else { + te.SanitizePathFunc = nil + te.LinkFunc = nil + } +} - path := fp.Join(elems...) // join elems - path = fp.Join(te.Path, path) // rebase on extractor root - return path +// outputPath returns the path at which to place tarPath +func (te *Extractor) outputPath(tarPath string) (outPath string, err error) { + elems := strings.Split(tarPath, "/") // break into elems + elems = elems[1:] // remove original root + outPath = strings.Join(elems, "/") // join elems + outPath = gopath.Join(te.Path, outPath) // rebase on to extraction target root + // sanitize path to be platform legal + if te.SanitizePathFunc != nil { + outPath, err = te.SanitizePathFunc(outPath) + } else { + outPath = fp.FromSlash(outPath) + } + return } func (te *Extractor) extractDir(h *tar.Header, depth int) error { - path := te.outputPath(h.Name) + path, err := te.outputPath(h.Name) + if err != nil { + return err + } if depth == 0 { - // if this is the root root directory, use it as the output path for remaining files + // if this is the root directory, use it as the output path for remaining files te.Path = path } @@ -83,13 +132,25 @@ func (te *Extractor) extractDir(h *tar.Header, depth int) error { } func (te *Extractor) extractSymlink(h *tar.Header) error { - return os.Symlink(h.Linkname, te.outputPath(h.Name)) + path, err := te.outputPath(h.Name) + if err != nil { + return err + } + + if te.LinkFunc != nil { + return te.LinkFunc(Link{Root: te.Path, Name: h.Name, Target: h.Linkname}) + } + + return os.Symlink(h.Linkname, path) } func (te *Extractor) extractFile(h *tar.Header, r *tar.Reader, depth int, rootExists bool, rootIsDir bool) error { - path := te.outputPath(h.Name) + path, err := te.outputPath(h.Name) + if err != nil { + return err + } - if depth == 0 { // if depth is 0, this is the only file (we aren't 'ipfs get'ing a directory) + if depth == 0 { // if depth is 0, this is the only file (we aren't extracting a directory) if rootExists && rootIsDir { // putting file inside of a root dir. fnameo := gopath.Base(h.Name) @@ -130,5 +191,27 @@ func copyWithProgress(to io.Writer, from io.Reader, cb func(int64) int64) error return err } } +} + +// childrenOnly will return an error if link targets escape their root +func childrenOnly(inLink Link) error { + if fp.IsAbs(inLink.Target) { + return fmt.Errorf("Link target %q is an absolute path (forbidden)", inLink.Target) + } + resolvedTarget := fp.Join(inLink.Name, inLink.Target) + rel, err := fp.Rel(inLink.Root, resolvedTarget) + if err != nil { + return err + } + //disallow symlinks from climbing out of the target root + if strings.HasPrefix(rel, "..") { + return fmt.Errorf("Symlink target %q escapes root %q", inLink.Target, inLink.Root) + } + //disallow pointing to your own root from above as well + if strings.HasPrefix(resolvedTarget, inLink.Root) { + return fmt.Errorf("Symlink target %q escapes and re-enters its own root %q (forbidden)", inLink.Target, inLink.Root) + } + + return nil } diff --git a/sanitize.go b/sanitize.go new file mode 100644 index 0000000..c54c7f3 --- /dev/null +++ b/sanitize.go @@ -0,0 +1,19 @@ +// +build !windows + +package tar + +import ( + "os" +) + +func isNullDevice(path string) bool { + return path == os.DevNull +} + +func sanitizePath(path string) (string, error) { + return path, nil +} + +func platformLink(inLink Link) error { + return nil +} diff --git a/sanitize_windows.go b/sanitize_windows.go new file mode 100644 index 0000000..36fbe52 --- /dev/null +++ b/sanitize_windows.go @@ -0,0 +1,83 @@ +package tar + +import ( + "fmt" + "net/url" + "os" + "path/filepath" + "regexp" + "strings" +) + +//https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx +var reservedNames = [...]string{"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"} +var reservedCharsRegex *regexp.Regexp + +const reservedCharsStr = `[<>:"\\|?*]` //NOTE: `/` is not included as it is our standard path separator +const reservedNamesRegexFmt = `(?i)^(%s)(?: *%s*)?[^\w ]` // $reservedName, $reservedCharsStr + +func init() { + reservedCharsRegex = regexp.MustCompile(reservedCharsStr) +} + +func isNullDevice(path string) bool { + if len(path) != 3 { + return false + } + if path[0]|0x20 != 'n' { + return false + } + if path[1]|0x20 != 'u' { + return false + } + if path[2]|0x20 != 'l' { + return false + } + return true +} + +func sanitizePath(path string) (string, error) { + pathElements := strings.Split(path, "/") + + //first pass: strip illegal tail & prefix reserved names `CON .` -> `_CON` + for pi := range pathElements { + pathElements[pi] = strings.TrimRight(pathElements[pi], ". ") //MSDN: Do not end a file or directory name with a space or a period + + for _, rn := range reservedNames { + re, _ := regexp.Compile(fmt.Sprintf(reservedNamesRegexFmt, rn, reservedCharsStr)) //no err, regex is a constant with guaranteed constant arguments + if matched := re.MatchString(pathElements[pi]); matched { + pathElements[pi] = "_" + pathElements[pi] + break + } + } + } + + //second pass: scan and encode reserved characters ? -> %3F + res := strings.Join(pathElements, `/`) //intentionally avoiding [file]path.Clean() being called with Join(); we do our own filtering first + illegalIndices := reservedCharsRegex.FindAllStringIndex(res, -1) + + if illegalIndices != nil { + var lastIndex int + var builder strings.Builder + allocAssist := (len(res) - len(illegalIndices)) + (len(illegalIndices) * 3) //3 = encoded length + builder.Grow(allocAssist) + + for _, si := range illegalIndices { + builder.WriteString(res[lastIndex:si[0]]) //append up to problem char + builder.WriteString(url.QueryEscape(res[si[0]:si[1]])) //escape and append problem char + lastIndex = si[1] + } + builder.WriteString(res[lastIndex:]) //append remainder + res = builder.String() + } + + return filepath.FromSlash(res), nil +} + +func platformLink(inLink Link) error { + if strings.HasPrefix(inLink.Target, string(os.PathSeparator)) || strings.HasPrefix(inLink.Target, "/") { + return fmt.Errorf("Link target %q is relative to drive root (forbidden)", inLink.Target) + } + + return nil +}