From 2d07f04d06105d2df8354a541017a8cc0a840a44 Mon Sep 17 00:00:00 2001
From: Eric Chlebek <echlebek@gmail.com>
Date: Fri, 8 Jun 2018 09:08:11 -0700
Subject: [PATCH] Refactor the asset manager install method (#1654)

* Refactor the asset manager, avoid fd leaks.

This commit refactors the install method of the asset manager so
that leaks do not occur if errors occur while assets are being
installed.

* file descriptors no longer leak
* lock files no longer persist past the method's invocation

Signed-off-by: Eric Chlebek <eric@sensu.io>

* Run dep ensure again (???)

Signed-off-by: Eric Chlebek <eric@sensu.io>

* Don't cache the vendor directory.

Signed-off-by: Eric Chlebek <eric@sensu.io>
---
 .travis.yml                                   |   1 -
 CHANGELOG.md                                  |   2 +
 Gopkg.lock                                    |  23 +-
 Gopkg.toml                                    |   2 +-
 agent/assetmanager/asset.go                   | 163 ++--
 vendor/github.com/golang/snappy/.gitignore    |  16 +
 vendor/github.com/golang/snappy/AUTHORS       |  15 +
 vendor/github.com/golang/snappy/CONTRIBUTORS  |  37 +
 vendor/github.com/golang/snappy/LICENSE       |  27 +
 vendor/github.com/golang/snappy/README        | 107 +++
 vendor/github.com/golang/snappy/decode.go     | 237 ++++++
 .../github.com/golang/snappy/decode_amd64.go  |  14 +
 .../github.com/golang/snappy/decode_amd64.s   | 490 ++++++++++++
 .../github.com/golang/snappy/decode_other.go  | 101 +++
 vendor/github.com/golang/snappy/encode.go     | 285 +++++++
 .../github.com/golang/snappy/encode_amd64.go  |  29 +
 .../github.com/golang/snappy/encode_amd64.s   | 730 ++++++++++++++++++
 .../github.com/golang/snappy/encode_other.go  | 238 ++++++
 vendor/github.com/golang/snappy/snappy.go     |  98 +++
 vendor/github.com/mholt/archiver/.travis.yml  |   2 +-
 vendor/github.com/mholt/archiver/README.md    |   2 +
 vendor/github.com/mholt/archiver/archiver.go  |  45 +-
 vendor/github.com/mholt/archiver/rar.go       |  68 +-
 vendor/github.com/mholt/archiver/tar.go       | 102 ++-
 vendor/github.com/mholt/archiver/tarbz2.go    |  72 +-
 vendor/github.com/mholt/archiver/targz.go     |  68 +-
 vendor/github.com/mholt/archiver/tarlz4.go    |  92 +++
 vendor/github.com/mholt/archiver/tarsz.go     |  92 +++
 vendor/github.com/mholt/archiver/tarxz.go     |  69 +-
 vendor/github.com/mholt/archiver/zip.go       |  84 +-
 vendor/github.com/pierrec/lz4/.gitignore      |  31 +
 vendor/github.com/pierrec/lz4/.travis.yml     |   8 +
 vendor/github.com/pierrec/lz4/LICENSE         |  28 +
 vendor/github.com/pierrec/lz4/README.md       |  31 +
 vendor/github.com/pierrec/lz4/block.go        | 454 +++++++++++
 vendor/github.com/pierrec/lz4/lz4.go          | 105 +++
 vendor/github.com/pierrec/lz4/reader.go       | 364 +++++++++
 vendor/github.com/pierrec/lz4/writer.go       | 377 +++++++++
 vendor/github.com/pierrec/xxHash/LICENSE      |  28 +
 .../pierrec/xxHash/xxHash32/xxHash32.go       | 205 +++++
 40 files changed, 4792 insertions(+), 150 deletions(-)
 create mode 100644 vendor/github.com/golang/snappy/.gitignore
 create mode 100644 vendor/github.com/golang/snappy/AUTHORS
 create mode 100644 vendor/github.com/golang/snappy/CONTRIBUTORS
 create mode 100644 vendor/github.com/golang/snappy/LICENSE
 create mode 100644 vendor/github.com/golang/snappy/README
 create mode 100644 vendor/github.com/golang/snappy/decode.go
 create mode 100644 vendor/github.com/golang/snappy/decode_amd64.go
 create mode 100644 vendor/github.com/golang/snappy/decode_amd64.s
 create mode 100644 vendor/github.com/golang/snappy/decode_other.go
 create mode 100644 vendor/github.com/golang/snappy/encode.go
 create mode 100644 vendor/github.com/golang/snappy/encode_amd64.go
 create mode 100644 vendor/github.com/golang/snappy/encode_amd64.s
 create mode 100644 vendor/github.com/golang/snappy/encode_other.go
 create mode 100644 vendor/github.com/golang/snappy/snappy.go
 create mode 100644 vendor/github.com/mholt/archiver/tarlz4.go
 create mode 100644 vendor/github.com/mholt/archiver/tarsz.go
 create mode 100644 vendor/github.com/pierrec/lz4/.gitignore
 create mode 100644 vendor/github.com/pierrec/lz4/.travis.yml
 create mode 100644 vendor/github.com/pierrec/lz4/LICENSE
 create mode 100644 vendor/github.com/pierrec/lz4/README.md
 create mode 100644 vendor/github.com/pierrec/lz4/block.go
 create mode 100644 vendor/github.com/pierrec/lz4/lz4.go
 create mode 100644 vendor/github.com/pierrec/lz4/reader.go
 create mode 100644 vendor/github.com/pierrec/lz4/writer.go
 create mode 100644 vendor/github.com/pierrec/xxHash/LICENSE
 create mode 100644 vendor/github.com/pierrec/xxHash/xxHash32/xxHash32.go

diff --git a/.travis.yml b/.travis.yml
index ee23fec5b6..f9edae92ad 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,6 @@ cache:
   yarn: true
   directories:
   - dashboard/node_modules
-  - vendor
 before_install:
   - echo -e "machine github.com\n login $GITHUB_TOKEN" >> ~/.netrc
   - ulimit -s 1082768
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be47486c7b..aef5349e6c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,6 +67,8 @@ the organization they reside in.
 `omitempty` from protobufs).
 - The sensuctl create command no longer prints a spurious warning when
 non-default organizations or environments are configured.
+- When installing assets, errors no longer cause file descriptors to leak, or
+lockfiles to not be cleaned up.
 
 ### Removed
 - Removed Linux/386 & Windows/386 e2e jobs on Travis CI & AppVeyor
diff --git a/Gopkg.lock b/Gopkg.lock
index 52dc39f2dd..40c49d94d5 100644
--- a/Gopkg.lock
+++ b/Gopkg.lock
@@ -258,6 +258,12 @@
   ]
   revision = "130e6b02ab059e7b717a096f397c5b60111cae74"
 
+[[projects]]
+  branch = "master"
+  name = "github.com/golang/snappy"
+  packages = ["."]
+  revision = "2e65f85255dbc3072edf28d6b5b8efc472979f5a"
+
 [[projects]]
   branch = "master"
   name = "github.com/google/btree"
@@ -433,8 +439,7 @@
 [[projects]]
   name = "github.com/mholt/archiver"
   packages = ["."]
-  revision = "cdc68dd1f170b8dfc1a0d2231b5bb0967ed67006"
-  version = "v2.0"
+  revision = "e4ef56d48eb029648b0e895bb0b6a393ef0829c3"
 
 [[projects]]
   branch = "master"
@@ -494,6 +499,18 @@
   packages = ["llrb"]
   revision = "53be0d36a84c2a886ca057d34b6aa4468df9ccb4"
 
+[[projects]]
+  name = "github.com/pierrec/lz4"
+  packages = ["."]
+  revision = "2fcda4cb7018ce05a25959d2fe08c83e3329f169"
+  version = "v1.1"
+
+[[projects]]
+  name = "github.com/pierrec/xxHash"
+  packages = ["xxHash32"]
+  revision = "f051bb7f1d1aaf1b5a665d74fb6b0217712c69f7"
+  version = "v0.1.1"
+
 [[projects]]
   name = "github.com/pmezard/go-difflib"
   packages = ["difflib"]
@@ -804,6 +821,6 @@
 [solve-meta]
   analyzer-name = "dep"
   analyzer-version = 1
-  inputs-digest = "6797d51548bb5f6d1c8d7b602a4886c6c1b04a7f72e2d25f13744464c14ad47b"
+  inputs-digest = "c0c06482414272b2d813a147abb5712b018779a17fccd873a9591ffc24c969a5"
   solver-name = "gps-cdcl"
   solver-version = 1
diff --git a/Gopkg.toml b/Gopkg.toml
index cf4c6bbcf5..eb604a9b6d 100644
--- a/Gopkg.toml
+++ b/Gopkg.toml
@@ -63,7 +63,7 @@ required = [
 
 [[constraint]]
   name = "github.com/mholt/archiver"
-  version = "2.0.0"
+  revision = "e4ef56d48eb029648b0e895bb0b6a393ef0829c3"
 
 [[constraint]]
   branch = "master"
diff --git a/agent/assetmanager/asset.go b/agent/assetmanager/asset.go
index 4365e17446..52137d3980 100644
--- a/agent/assetmanager/asset.go
+++ b/agent/assetmanager/asset.go
@@ -16,7 +16,9 @@ import (
 	"github.com/sensu/sensu-go/types"
 	"github.com/sensu/sensu-go/util/eval"
 	"github.com/sensu/sensu-go/util/retry"
+	"github.com/sirupsen/logrus"
 	filetype "gopkg.in/h2non/filetype.v1"
+	filetype_types "gopkg.in/h2non/filetype.v1/types"
 )
 
 const (
@@ -25,6 +27,9 @@ const (
 
 	// dependencies cache path
 	depsCachePath = "deps"
+
+	// Size of file header for sniffing type
+	headerSize = 262
 )
 
 // A RuntimeAsset refers to an asset that is currently in use by the agent.
@@ -79,13 +84,16 @@ func (d *RuntimeAsset) markAsInstalled() error {
 		return err
 	}
 
-	_, err = file.Write([]byte{}) // empty file
-	return err
+	// empty file
+	return file.Close()
 }
 
 // Avoid competing installation of assets
 func (d *RuntimeAsset) awaitLock() (*lockfile.Lockfile, error) {
-	lockfile, _ := lockfile.New(filepath.Join(d.path, ".lock"))
+	lockfile, err := lockfile.New(filepath.Join(d.path, ".lock"))
+	if err != nil {
+		return nil, err
+	}
 
 	// Try to lock the asset directory for purpose of writing
 	if err := lockfile.TryLock(); err == nil {
@@ -130,15 +138,73 @@ func (d *RuntimeAsset) fetch() (*http.Response, error) {
 	return r, err
 }
 
-// Downloads the given depdencies asset to the cache directory.
-// TODO(james): ugly; too many responsibilities
-// nolint
-func (d *RuntimeAsset) install() error {
+// binDir creates the asset's bin directory and returns the path
+func (d *RuntimeAsset) binDir() (string, error) {
 	// Ensure that cache directory exists before we attempt to write the contents
 	// of our asset to it.
 	binDir := filepath.Join(d.path, "bin")
-	if err := os.MkdirAll(binDir, 0755); err != nil {
-		return fmt.Errorf("unable to create cache directory '%s': %s", d.path, err.Error())
+	err := os.MkdirAll(binDir, os.ModeDir|0700)
+	if err != nil {
+		err = fmt.Errorf("error creating directory %q: %s", binDir, err)
+	}
+	return binDir, err
+}
+
+func (d *RuntimeAsset) download() (*os.File, error) {
+	// Download the asset
+	r, err := d.fetch()
+	if err != nil {
+		return nil, err
+	}
+
+	// Write response to tmp
+	tmpFile, err := ioutil.TempFile(os.TempDir(), "sensu-asset")
+	if err != nil {
+		return nil, fmt.Errorf("can't open tmp file for asset %q", d.asset.Name)
+	}
+
+	if _, err = io.Copy(tmpFile, r.Body); err != nil {
+		return nil, fmt.Errorf("error downloading asset %q", d.asset.Name)
+	}
+
+	return tmpFile, resetFile(tmpFile)
+}
+
+func hashFile(f *os.File) (string, error) {
+	// Generate checksum for downloaded file
+	h := sha512.New()
+	if _, err := io.Copy(h, f); err != nil {
+		return "", fmt.Errorf("generating checksum for asset failed: %s", err)
+	}
+
+	return hex.EncodeToString(h.Sum(nil)), resetFile(f)
+}
+
+func sniffType(f *os.File) (filetype_types.Type, error) {
+	header := make([]byte, headerSize)
+	if _, err := f.Read(header); err != nil {
+		return filetype_types.Type{}, fmt.Errorf("unable to read asset header: %s", err)
+	}
+	ft, err := filetype.Match(header)
+	if err != nil {
+		return ft, err
+	}
+	return ft, resetFile(f)
+}
+
+func resetFile(f *os.File) error {
+	// Ensure file contents are synced and rewound
+	if err := f.Sync(); err != nil {
+		return err
+	}
+	_, err := f.Seek(0, 0)
+	return err
+}
+
+// Downloads the given depdencies asset to the cache directory.
+func (d *RuntimeAsset) install() (err error) {
+	if _, err := d.binDir(); err != nil {
+		return err
 	}
 
 	// Obtain a lock to avoid clobbering competing installs
@@ -153,80 +219,59 @@ func (d *RuntimeAsset) install() error {
 		return err
 	}
 
-	// logger.WithFields(logrus.Fields{
-	//	"asset_name": d.asset.Name,
-	// }).Info("new dependency encountered; downloading")
+	logger.WithFields(logrus.Fields{
+		"asset": d.asset.Name,
+	}).Info("downloading asset")
 
 	// Download the asset
-	r, err := d.fetch()
+	tmpFile, err := d.download()
 	if err != nil {
 		return err
 	}
-
-	// Write response to tmp
-	tmpFile, err := ioutil.TempFile(os.TempDir(), "sensu-asset")
-	if err != nil {
-		return fmt.Errorf("unable to obtain tmp file for asset '%s'", d.asset.Name)
-	}
+	defer tmpFile.Close()
 	defer os.Remove(tmpFile.Name())
 
-	if _, err = io.Copy(tmpFile, r.Body); err != nil {
-		return fmt.Errorf("unable to write asset '%s' to tmp", d.asset.Name)
-	}
-
-	// Ensure file contents are synced and rewound
-	tmpFile.Sync()
-	tmpFile.Seek(0, 0)
-
 	// Generate checksum for downloaded file
-	h := sha512.New()
-	if _, err = io.Copy(h, tmpFile); err != nil {
-		return fmt.Errorf("generating checksum for asset failed: %s", err.Error())
+	checksum, err := hashFile(tmpFile)
+	if err != nil {
+		return err
 	}
 
-	// Check that fetched file's checksum matches given
-	responseBodySum := hex.EncodeToString(h.Sum(nil))
-	if d.asset.Sha512 != responseBodySum {
-		return fmt.Errorf(
-			"fetched asset checksum did not match '%s' '%s'",
-			d.asset.Sha512,
-			responseBodySum,
-		)
+	// validate checksum
+	if d.asset.Sha512 != checksum {
+		return fmt.Errorf("asset checksum does not match: %q != %q", d.asset.Sha512, checksum)
 	}
 
-	// Read header
-	header := make([]byte, 262)
-	tmpFile.Seek(0, 0)
-	if _, err = tmpFile.Read(header); err != nil {
-		return fmt.Errorf("unable to read asset header: %s", err)
+	// detect the type of archive the asset is
+	ft, err := sniffType(tmpFile)
+	if err != nil {
+		return err
 	}
 
-	// Close tempfile to avoid deadlock
-	tmpFile.Close()
+	var ar archiver.Archiver
 
-	// If file is an archive attempt to extract it
-	fileKind, _ := filetype.Match(header)
-	switch fileKind.MIME.Value {
+	// If the file is not an archive, exit with an error.
+	switch ft.MIME.Value {
 	case "application/x-tar":
-		if err = archiver.Tar.Open(tmpFile.Name(), d.path); err != nil {
-			return fmt.Errorf("unable to extract asset to cache directory w/ err %s", err)
-		}
+		ar = archiver.Tar
 	case "application/gzip":
-		if err = archiver.TarGz.Open(tmpFile.Name(), d.path); err != nil {
-			return fmt.Errorf("unable to extract asset to cache directory w/ err %s", err)
-		}
+		ar = archiver.TarGz
 	default:
 		return fmt.Errorf(
 			"given file of format '%s' does not appear valid",
-			fileKind.MIME.Value,
+			ft.MIME.Value,
 		)
 	}
 
-	// Write .completed file
-	d.markAsInstalled()
+	// Extract the archive to the desired path
+	if err := ar.Read(tmpFile, d.path); err != nil {
+		return fmt.Errorf("error extracting asset: %s", err)
+	}
 
-	// Unlock directory so we allow others others to write again
-	lockfile.Unlock()
+	// Write .completed file
+	if err := d.markAsInstalled(); err != nil {
+		return fmt.Errorf("error finalizing asset installation: %s", err)
+	}
 
 	return nil
 }
diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore
new file mode 100644
index 0000000000..042091d9b3
--- /dev/null
+++ b/vendor/github.com/golang/snappy/.gitignore
@@ -0,0 +1,16 @@
+cmd/snappytool/snappytool
+testdata/bench
+
+# These explicitly listed benchmark data files are for an obsolete version of
+# snappy_test.go.
+testdata/alice29.txt
+testdata/asyoulik.txt
+testdata/fireworks.jpeg
+testdata/geo.protodata
+testdata/html
+testdata/html_x_4
+testdata/kppkn.gtb
+testdata/lcet10.txt
+testdata/paper-100k.pdf
+testdata/plrabn12.txt
+testdata/urls.10K
diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS
new file mode 100644
index 0000000000..bcfa19520a
--- /dev/null
+++ b/vendor/github.com/golang/snappy/AUTHORS
@@ -0,0 +1,15 @@
+# This is the official list of Snappy-Go authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+#	Name or Organization <email address>
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Google Inc.
+Jan Mercl <0xjnml@gmail.com>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS
new file mode 100644
index 0000000000..931ae31606
--- /dev/null
+++ b/vendor/github.com/golang/snappy/CONTRIBUTORS
@@ -0,0 +1,37 @@
+# This is the official list of people who can contribute
+# (and typically have contributed) code to the Snappy-Go repository.
+# The AUTHORS file lists the copyright holders; this file
+# lists people.  For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# The submission process automatically checks to make sure
+# that people submitting code are listed in this file (by email address).
+#
+# Names should be added to this file only after verifying that
+# the individual or the individual's organization has agreed to
+# the appropriate Contributor License Agreement, found here:
+#
+#     http://code.google.com/legal/individual-cla-v1.0.html
+#     http://code.google.com/legal/corporate-cla-v1.0.html
+#
+# The agreement for individuals can be filled out on the web.
+#
+# When adding J Random Contributor's name to this file,
+# either J's name or J's organization's name should be
+# added to the AUTHORS file, depending on whether the
+# individual or corporate CLA was used.
+
+# Names should be added to this file like so:
+#     Name <email address>
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Jan Mercl <0xjnml@gmail.com>
+Kai Backman <kaib@golang.org>
+Marc-Antoine Ruel <maruel@chromium.org>
+Nigel Tao <nigeltao@golang.org>
+Rob Pike <r@golang.org>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Russ Cox <rsc@golang.org>
+Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE
new file mode 100644
index 0000000000..6050c10f4c
--- /dev/null
+++ b/vendor/github.com/golang/snappy/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README
new file mode 100644
index 0000000000..cea12879a0
--- /dev/null
+++ b/vendor/github.com/golang/snappy/README
@@ -0,0 +1,107 @@
+The Snappy compression format in the Go programming language.
+
+To download and install from source:
+$ go get github.com/golang/snappy
+
+Unless otherwise noted, the Snappy-Go source files are distributed
+under the BSD-style license found in the LICENSE file.
+
+
+
+Benchmarks.
+
+The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
+or so files, the same set used by the C++ Snappy code (github.com/google/snappy
+and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
+3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
+
+"go test -test.bench=."
+
+_UFlat0-8         2.19GB/s ± 0%  html
+_UFlat1-8         1.41GB/s ± 0%  urls
+_UFlat2-8         23.5GB/s ± 2%  jpg
+_UFlat3-8         1.91GB/s ± 0%  jpg_200
+_UFlat4-8         14.0GB/s ± 1%  pdf
+_UFlat5-8         1.97GB/s ± 0%  html4
+_UFlat6-8          814MB/s ± 0%  txt1
+_UFlat7-8          785MB/s ± 0%  txt2
+_UFlat8-8          857MB/s ± 0%  txt3
+_UFlat9-8          719MB/s ± 1%  txt4
+_UFlat10-8        2.84GB/s ± 0%  pb
+_UFlat11-8        1.05GB/s ± 0%  gaviota
+
+_ZFlat0-8         1.04GB/s ± 0%  html
+_ZFlat1-8          534MB/s ± 0%  urls
+_ZFlat2-8         15.7GB/s ± 1%  jpg
+_ZFlat3-8          740MB/s ± 3%  jpg_200
+_ZFlat4-8         9.20GB/s ± 1%  pdf
+_ZFlat5-8          991MB/s ± 0%  html4
+_ZFlat6-8          379MB/s ± 0%  txt1
+_ZFlat7-8          352MB/s ± 0%  txt2
+_ZFlat8-8          396MB/s ± 1%  txt3
+_ZFlat9-8          327MB/s ± 1%  txt4
+_ZFlat10-8        1.33GB/s ± 1%  pb
+_ZFlat11-8         605MB/s ± 1%  gaviota
+
+
+
+"go test -test.bench=. -tags=noasm"
+
+_UFlat0-8          621MB/s ± 2%  html
+_UFlat1-8          494MB/s ± 1%  urls
+_UFlat2-8         23.2GB/s ± 1%  jpg
+_UFlat3-8         1.12GB/s ± 1%  jpg_200
+_UFlat4-8         4.35GB/s ± 1%  pdf
+_UFlat5-8          609MB/s ± 0%  html4
+_UFlat6-8          296MB/s ± 0%  txt1
+_UFlat7-8          288MB/s ± 0%  txt2
+_UFlat8-8          309MB/s ± 1%  txt3
+_UFlat9-8          280MB/s ± 1%  txt4
+_UFlat10-8         753MB/s ± 0%  pb
+_UFlat11-8         400MB/s ± 0%  gaviota
+
+_ZFlat0-8          409MB/s ± 1%  html
+_ZFlat1-8          250MB/s ± 1%  urls
+_ZFlat2-8         12.3GB/s ± 1%  jpg
+_ZFlat3-8          132MB/s ± 0%  jpg_200
+_ZFlat4-8         2.92GB/s ± 0%  pdf
+_ZFlat5-8          405MB/s ± 1%  html4
+_ZFlat6-8          179MB/s ± 1%  txt1
+_ZFlat7-8          170MB/s ± 1%  txt2
+_ZFlat8-8          189MB/s ± 1%  txt3
+_ZFlat9-8          164MB/s ± 1%  txt4
+_ZFlat10-8         479MB/s ± 1%  pb
+_ZFlat11-8         270MB/s ± 1%  gaviota
+
+
+
+For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
+are the numbers from C++ Snappy's
+
+make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
+
+BM_UFlat/0     2.4GB/s  html
+BM_UFlat/1     1.4GB/s  urls
+BM_UFlat/2    21.8GB/s  jpg
+BM_UFlat/3     1.5GB/s  jpg_200
+BM_UFlat/4    13.3GB/s  pdf
+BM_UFlat/5     2.1GB/s  html4
+BM_UFlat/6     1.0GB/s  txt1
+BM_UFlat/7   959.4MB/s  txt2
+BM_UFlat/8     1.0GB/s  txt3
+BM_UFlat/9   864.5MB/s  txt4
+BM_UFlat/10    2.9GB/s  pb
+BM_UFlat/11    1.2GB/s  gaviota
+
+BM_ZFlat/0   944.3MB/s  html (22.31 %)
+BM_ZFlat/1   501.6MB/s  urls (47.78 %)
+BM_ZFlat/2    14.3GB/s  jpg (99.95 %)
+BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %)
+BM_ZFlat/4     8.3GB/s  pdf (83.30 %)
+BM_ZFlat/5   903.5MB/s  html4 (22.52 %)
+BM_ZFlat/6   336.0MB/s  txt1 (57.88 %)
+BM_ZFlat/7   312.3MB/s  txt2 (61.91 %)
+BM_ZFlat/8   353.1MB/s  txt3 (54.99 %)
+BM_ZFlat/9   289.9MB/s  txt4 (66.26 %)
+BM_ZFlat/10    1.2GB/s  pb (19.68 %)
+BM_ZFlat/11  527.4MB/s  gaviota (37.72 %)
diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go
new file mode 100644
index 0000000000..72efb0353d
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode.go
@@ -0,0 +1,237 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+var (
+	// ErrCorrupt reports that the input is invalid.
+	ErrCorrupt = errors.New("snappy: corrupt input")
+	// ErrTooLarge reports that the uncompressed length is too large.
+	ErrTooLarge = errors.New("snappy: decoded block is too large")
+	// ErrUnsupported reports that the input isn't supported.
+	ErrUnsupported = errors.New("snappy: unsupported input")
+
+	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
+)
+
+// DecodedLen returns the length of the decoded block.
+func DecodedLen(src []byte) (int, error) {
+	v, _, err := decodedLen(src)
+	return v, err
+}
+
+// decodedLen returns the length of the decoded block and the number of bytes
+// that the length header occupied.
+func decodedLen(src []byte) (blockLen, headerLen int, err error) {
+	v, n := binary.Uvarint(src)
+	if n <= 0 || v > 0xffffffff {
+		return 0, 0, ErrCorrupt
+	}
+
+	const wordSize = 32 << (^uint(0) >> 32 & 1)
+	if wordSize == 32 && v > 0x7fffffff {
+		return 0, 0, ErrTooLarge
+	}
+	return int(v), n, nil
+}
+
+const (
+	decodeErrCodeCorrupt                  = 1
+	decodeErrCodeUnsupportedLiteralLength = 2
+)
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Decode(dst, src []byte) ([]byte, error) {
+	dLen, s, err := decodedLen(src)
+	if err != nil {
+		return nil, err
+	}
+	if dLen <= len(dst) {
+		dst = dst[:dLen]
+	} else {
+		dst = make([]byte, dLen)
+	}
+	switch decode(dst, src[s:]) {
+	case 0:
+		return dst, nil
+	case decodeErrCodeUnsupportedLiteralLength:
+		return nil, errUnsupportedLiteralLength
+	}
+	return nil, ErrCorrupt
+}
+
+// NewReader returns a new Reader that decompresses from r, using the framing
+// format described at
+// /~https://github.com/google/snappy/blob/master/framing_format.txt
+func NewReader(r io.Reader) *Reader {
+	return &Reader{
+		r:       r,
+		decoded: make([]byte, maxBlockSize),
+		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
+	}
+}
+
+// Reader is an io.Reader that can read Snappy-compressed bytes.
+type Reader struct {
+	r       io.Reader
+	err     error
+	decoded []byte
+	buf     []byte
+	// decoded[i:j] contains decoded bytes that have not yet been passed on.
+	i, j       int
+	readHeader bool
+}
+
+// Reset discards any buffered data, resets all state, and switches the Snappy
+// reader to read from r. This permits reusing a Reader rather than allocating
+// a new one.
+func (r *Reader) Reset(reader io.Reader) {
+	r.r = reader
+	r.err = nil
+	r.i = 0
+	r.j = 0
+	r.readHeader = false
+}
+
+func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
+	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
+		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
+			r.err = ErrCorrupt
+		}
+		return false
+	}
+	return true
+}
+
+// Read satisfies the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for {
+		if r.i < r.j {
+			n := copy(p, r.decoded[r.i:r.j])
+			r.i += n
+			return n, nil
+		}
+		if !r.readFull(r.buf[:4], true) {
+			return 0, r.err
+		}
+		chunkType := r.buf[0]
+		if !r.readHeader {
+			if chunkType != chunkTypeStreamIdentifier {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.readHeader = true
+		}
+		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+		if chunkLen > len(r.buf) {
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+
+		// The chunk types are specified at
+		// /~https://github.com/google/snappy/blob/master/framing_format.txt
+		switch chunkType {
+		case chunkTypeCompressedData:
+			// Section 4.2. Compressed data (chunk type 0x00).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:chunkLen]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			buf = buf[checksumSize:]
+
+			n, err := DecodedLen(buf)
+			if err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if _, err := Decode(r.decoded, buf); err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeUncompressedData:
+			// Section 4.3. Uncompressed data (chunk type 0x01).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:checksumSize]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			// Read directly into r.decoded instead of via r.buf.
+			n := chunkLen - checksumSize
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.decoded[:n], false) {
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeStreamIdentifier:
+			// Section 4.1. Stream identifier (chunk type 0xff).
+			if chunkLen != len(magicBody) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.buf[:len(magicBody)], false) {
+				return 0, r.err
+			}
+			for i := 0; i < len(magicBody); i++ {
+				if r.buf[i] != magicBody[i] {
+					r.err = ErrCorrupt
+					return 0, r.err
+				}
+			}
+			continue
+		}
+
+		if chunkType <= 0x7f {
+			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+		// Section 4.4 Padding (chunk type 0xfe).
+		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+		if !r.readFull(r.buf[:chunkLen], false) {
+			return 0, r.err
+		}
+	}
+}
diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go
new file mode 100644
index 0000000000..fcd192b849
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_amd64.go
@@ -0,0 +1,14 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// decode has the same semantics as in decode_other.go.
+//
+//go:noescape
+func decode(dst, src []byte) int
diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s
new file mode 100644
index 0000000000..e6179f65e3
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_amd64.s
@@ -0,0 +1,490 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in decode_other.go, except
+// where marked with a "!!!".
+
+// func decode(dst, src []byte) int
+//
+// All local variables fit into registers. The non-zero stack size is only to
+// spill registers and push args when issuing a CALL. The register allocation:
+//	- AX	scratch
+//	- BX	scratch
+//	- CX	length or x
+//	- DX	offset
+//	- SI	&src[s]
+//	- DI	&dst[d]
+//	+ R8	dst_base
+//	+ R9	dst_len
+//	+ R10	dst_base + dst_len
+//	+ R11	src_base
+//	+ R12	src_len
+//	+ R13	src_base + src_len
+//	- R14	used by doCopy
+//	- R15	used by doCopy
+//
+// The registers R8-R13 (marked with a "+") are set at the start of the
+// function, and after a CALL returns, and are not otherwise modified.
+//
+// The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI.
+// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
+TEXT ·decode(SB), NOSPLIT, $48-56
+	// Initialize SI, DI and R8-R13.
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, DI
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, SI
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+loop:
+	// for s < len(src)
+	CMPQ SI, R13
+	JEQ  end
+
+	// CX = uint32(src[s])
+	//
+	// switch src[s] & 0x03
+	MOVBLZX (SI), CX
+	MOVL    CX, BX
+	ANDL    $3, BX
+	CMPL    BX, $1
+	JAE     tagCopy
+
+	// ----------------------------------------
+	// The code below handles literal tags.
+
+	// case tagLiteral:
+	// x := uint32(src[s] >> 2)
+	// switch
+	SHRL $2, CX
+	CMPL CX, $60
+	JAE  tagLit60Plus
+
+	// case x < 60:
+	// s++
+	INCQ SI
+
+doLit:
+	// This is the end of the inner "switch", when we have a literal tag.
+	//
+	// We assume that CX == x and x fits in a uint32, where x is the variable
+	// used in the pure Go decode_other.go code.
+
+	// length = int(x) + 1
+	//
+	// Unlike the pure Go code, we don't need to check if length <= 0 because
+	// CX can hold 64 bits, so the increment cannot overflow.
+	INCQ CX
+
+	// Prepare to check if copying length bytes will run past the end of dst or
+	// src.
+	//
+	// AX = len(dst) - d
+	// BX = len(src) - s
+	MOVQ R10, AX
+	SUBQ DI, AX
+	MOVQ R13, BX
+	SUBQ SI, BX
+
+	// !!! Try a faster technique for short (16 or fewer bytes) copies.
+	//
+	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
+	//   goto callMemmove // Fall back on calling runtime·memmove.
+	// }
+	//
+	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
+	// against 21 instead of 16, because it cannot assume that all of its input
+	// is contiguous in memory and so it needs to leave enough source bytes to
+	// read the next tag without refilling buffers, but Go's Decode assumes
+	// contiguousness (the src argument is a []byte).
+	CMPQ CX, $16
+	JGT  callMemmove
+	CMPQ AX, $16
+	JLT  callMemmove
+	CMPQ BX, $16
+	JLT  callMemmove
+
+	// !!! Implement the copy from src to dst as a 16-byte load and store.
+	// (Decode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only length bytes, but that's
+	// OK. If the input is a valid Snappy encoding then subsequent iterations
+	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
+	// non-nil error), so the overrun will be ignored.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(SI), X0
+	MOVOU X0, 0(DI)
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+callMemmove:
+	// if length > len(dst)-d || length > len(src)-s { etc }
+	CMPQ CX, AX
+	JGT  errCorrupt
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// copy(dst[d:], src[s:s+length])
+	//
+	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
+	// DI, SI and CX as arguments. Coincidentally, we also need to spill those
+	// three registers to the stack, to save local variables across the CALL.
+	MOVQ DI, 0(SP)
+	MOVQ SI, 8(SP)
+	MOVQ CX, 16(SP)
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP)
+	CALL runtime·memmove(SB)
+
+	// Restore local variables: unspill registers from the stack and
+	// re-calculate R8-R13.
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+tagLit60Plus:
+	// !!! This fragment does the
+	//
+	// s += x - 58; if uint(s) > uint(len(src)) { etc }
+	//
+	// checks. In the asm version, we code it once instead of once per switch case.
+	ADDQ CX, SI
+	SUBQ $58, SI
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// case x == 60:
+	CMPL CX, $61
+	JEQ  tagLit61
+	JA   tagLit62Plus
+
+	// x = uint32(src[s-1])
+	MOVBLZX -1(SI), CX
+	JMP     doLit
+
+tagLit61:
+	// case x == 61:
+	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
+	MOVWLZX -2(SI), CX
+	JMP     doLit
+
+tagLit62Plus:
+	CMPL CX, $62
+	JA   tagLit63
+
+	// case x == 62:
+	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+	MOVWLZX -3(SI), CX
+	MOVBLZX -1(SI), BX
+	SHLL    $16, BX
+	ORL     BX, CX
+	JMP     doLit
+
+tagLit63:
+	// case x == 63:
+	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+	MOVL -4(SI), CX
+	JMP  doLit
+
+// The code above handles literal tags.
+// ----------------------------------------
+// The code below handles copy tags.
+
+tagCopy4:
+	// case tagCopy4:
+	// s += 5
+	ADDQ $5, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-5])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+	MOVLQZX -4(SI), DX
+	JMP     doCopy
+
+tagCopy2:
+	// case tagCopy2:
+	// s += 3
+	ADDQ $3, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-3])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+	MOVWQZX -2(SI), DX
+	JMP     doCopy
+
+tagCopy:
+	// We have a copy tag. We assume that:
+	//	- BX == src[s] & 0x03
+	//	- CX == src[s]
+	CMPQ BX, $2
+	JEQ  tagCopy2
+	JA   tagCopy4
+
+	// case tagCopy1:
+	// s += 2
+	ADDQ $2, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+	MOVQ    CX, DX
+	ANDQ    $0xe0, DX
+	SHLQ    $3, DX
+	MOVBQZX -1(SI), BX
+	ORQ     BX, DX
+
+	// length = 4 + int(src[s-2])>>2&0x7
+	SHRQ $2, CX
+	ANDQ $7, CX
+	ADDQ $4, CX
+
+doCopy:
+	// This is the end of the outer "switch", when we have a copy tag.
+	//
+	// We assume that:
+	//	- CX == length && CX > 0
+	//	- DX == offset
+
+	// if offset <= 0 { etc }
+	CMPQ DX, $0
+	JLE  errCorrupt
+
+	// if d < offset { etc }
+	MOVQ DI, BX
+	SUBQ R8, BX
+	CMPQ BX, DX
+	JLT  errCorrupt
+
+	// if length > len(dst)-d { etc }
+	MOVQ R10, BX
+	SUBQ DI, BX
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
+	//
+	// Set:
+	//	- R14 = len(dst)-d
+	//	- R15 = &dst[d-offset]
+	MOVQ R10, R14
+	SUBQ DI, R14
+	MOVQ DI, R15
+	SUBQ DX, R15
+
+	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
+	//
+	// First, try using two 8-byte load/stores, similar to the doLit technique
+	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
+	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
+	// and not one 16-byte load/store, and the first store has to be before the
+	// second load, due to the overlap if offset is in the range [8, 16).
+	//
+	// if length > 16 || offset < 8 || len(dst)-d < 16 {
+	//   goto slowForwardCopy
+	// }
+	// copy 16 bytes
+	// d += length
+	CMPQ CX, $16
+	JGT  slowForwardCopy
+	CMPQ DX, $8
+	JLT  slowForwardCopy
+	CMPQ R14, $16
+	JLT  slowForwardCopy
+	MOVQ 0(R15), AX
+	MOVQ AX, 0(DI)
+	MOVQ 8(R15), BX
+	MOVQ BX, 8(DI)
+	ADDQ CX, DI
+	JMP  loop
+
+slowForwardCopy:
+	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
+	// can still try 8-byte load stores, provided we can overrun up to 10 extra
+	// bytes. As above, the overrun will be fixed up by subsequent iterations
+	// of the outermost loop.
+	//
+	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
+	// commentary says:
+	//
+	// ----
+	//
+	// The main part of this loop is a simple copy of eight bytes at a time
+	// until we've copied (at least) the requested amount of bytes.  However,
+	// if d and d-offset are less than eight bytes apart (indicating a
+	// repeating pattern of length < 8), we first need to expand the pattern in
+	// order to get the correct results. For instance, if the buffer looks like
+	// this, with the eight-byte <d-offset> and <d> patterns marked as
+	// intervals:
+	//
+	//    abxxxxxxxxxxxx
+	//    [------]           d-offset
+	//      [------]         d
+	//
+	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
+	// once, after which we can move <d> two bytes without moving <d-offset>:
+	//
+	//    ababxxxxxxxxxx
+	//    [------]           d-offset
+	//        [------]       d
+	//
+	// and repeat the exercise until the two no longer overlap.
+	//
+	// This allows us to do very well in the special case of one single byte
+	// repeated many times, without taking a big hit for more general cases.
+	//
+	// The worst case of extra writing past the end of the match occurs when
+	// offset == 1 and length == 1; the last copy will read from byte positions
+	// [0..7] and write to [4..11], whereas it was only supposed to write to
+	// position 1. Thus, ten excess bytes.
+	//
+	// ----
+	//
+	// That "10 byte overrun" worst case is confirmed by Go's
+	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
+	// and finishSlowForwardCopy algorithm.
+	//
+	// if length > len(dst)-d-10 {
+	//   goto verySlowForwardCopy
+	// }
+	SUBQ $10, R14
+	CMPQ CX, R14
+	JGT  verySlowForwardCopy
+
+makeOffsetAtLeast8:
+	// !!! As above, expand the pattern so that offset >= 8 and we can use
+	// 8-byte load/stores.
+	//
+	// for offset < 8 {
+	//   copy 8 bytes from dst[d-offset:] to dst[d:]
+	//   length -= offset
+	//   d      += offset
+	//   offset += offset
+	//   // The two previous lines together means that d-offset, and therefore
+	//   // R15, is unchanged.
+	// }
+	CMPQ DX, $8
+	JGE  fixUpSlowForwardCopy
+	MOVQ (R15), BX
+	MOVQ BX, (DI)
+	SUBQ DX, CX
+	ADDQ DX, DI
+	ADDQ DX, DX
+	JMP  makeOffsetAtLeast8
+
+fixUpSlowForwardCopy:
+	// !!! Add length (which might be negative now) to d (implied by DI being
+	// &dst[d]) so that d ends up at the right place when we jump back to the
+	// top of the loop. Before we do that, though, we save DI to AX so that, if
+	// length is positive, copying the remaining length bytes will write to the
+	// right place.
+	MOVQ DI, AX
+	ADDQ CX, DI
+
+finishSlowForwardCopy:
+	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
+	// length means that we overrun, but as above, that will be fixed up by
+	// subsequent iterations of the outermost loop.
+	CMPQ CX, $0
+	JLE  loop
+	MOVQ (R15), BX
+	MOVQ BX, (AX)
+	ADDQ $8, R15
+	ADDQ $8, AX
+	SUBQ $8, CX
+	JMP  finishSlowForwardCopy
+
+verySlowForwardCopy:
+	// verySlowForwardCopy is a simple implementation of forward copy. In C
+	// parlance, this is a do/while loop instead of a while loop, since we know
+	// that length > 0. In Go syntax:
+	//
+	// for {
+	//   dst[d] = dst[d - offset]
+	//   d++
+	//   length--
+	//   if length == 0 {
+	//     break
+	//   }
+	// }
+	MOVB (R15), BX
+	MOVB BX, (DI)
+	INCQ R15
+	INCQ DI
+	DECQ CX
+	JNZ  verySlowForwardCopy
+	JMP  loop
+
+// The code above handles copy tags.
+// ----------------------------------------
+
+end:
+	// This is the end of the "for s < len(src)".
+	//
+	// if d != len(dst) { etc }
+	CMPQ DI, R10
+	JNE  errCorrupt
+
+	// return 0
+	MOVQ $0, ret+48(FP)
+	RET
+
+errCorrupt:
+	// return decodeErrCodeCorrupt
+	MOVQ $1, ret+48(FP)
+	RET
diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go
new file mode 100644
index 0000000000..8c9f2049bc
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_other.go
@@ -0,0 +1,101 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+// decode writes the decoding of src to dst. It assumes that the varint-encoded
+// length of the decompressed bytes has already been read, and that len(dst)
+// equals that length.
+//
+// It returns 0 on success or a decodeErrCodeXxx error code on failure.
+func decode(dst, src []byte) int {
+	var d, s, offset, length int
+	for s < len(src) {
+		switch src[s] & 0x03 {
+		case tagLiteral:
+			x := uint32(src[s] >> 2)
+			switch {
+			case x < 60:
+				s++
+			case x == 60:
+				s += 2
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-1])
+			case x == 61:
+				s += 3
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-2]) | uint32(src[s-1])<<8
+			case x == 62:
+				s += 4
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+			case x == 63:
+				s += 5
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+			}
+			length = int(x) + 1
+			if length <= 0 {
+				return decodeErrCodeUnsupportedLiteralLength
+			}
+			if length > len(dst)-d || length > len(src)-s {
+				return decodeErrCodeCorrupt
+			}
+			copy(dst[d:], src[s:s+length])
+			d += length
+			s += length
+			continue
+
+		case tagCopy1:
+			s += 2
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 4 + int(src[s-2])>>2&0x7
+			offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+
+		case tagCopy2:
+			s += 3
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-3])>>2
+			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+
+		case tagCopy4:
+			s += 5
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-5])>>2
+			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+		}
+
+		if offset <= 0 || d < offset || length > len(dst)-d {
+			return decodeErrCodeCorrupt
+		}
+		// Copy from an earlier sub-slice of dst to a later sub-slice. Unlike
+		// the built-in copy function, this byte-by-byte copy always runs
+		// forwards, even if the slices overlap. Conceptually, this is:
+		//
+		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
+		for end := d + length; d != end; d++ {
+			dst[d] = dst[d-offset]
+		}
+	}
+	if d != len(dst) {
+		return decodeErrCodeCorrupt
+	}
+	return 0
+}
diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go
new file mode 100644
index 0000000000..8d393e904b
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode.go
@@ -0,0 +1,285 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Encode(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if len(dst) < n {
+		dst = make([]byte, n)
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	d := binary.PutUvarint(dst, uint64(len(src)))
+
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		if len(p) < minNonLiteralBlockSize {
+			d += emitLiteral(dst[d:], p)
+		} else {
+			d += encodeBlock(dst[d:], p)
+		}
+	}
+	return dst[:d]
+}
+
+// inputMargin is the minimum number of extra input bytes to keep, inside
+// encodeBlock's inner loop. On some architectures, this margin lets us
+// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
+// literals can be implemented as a single load to and store from a 16-byte
+// register. That literal's actual length can be as short as 1 byte, so this
+// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
+// the encoding loop will fix up the copy overrun, and this inputMargin ensures
+// that we don't overrun the dst and src buffers.
+const inputMargin = 16 - 1
+
+// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
+// could be encoded with a copy tag. This is the minimum with respect to the
+// algorithm used by encodeBlock, not a minimum enforced by the file format.
+//
+// The encoded output must start with at least a 1 byte literal, as there are
+// no previous bytes to copy. A minimal (1 byte) copy after that, generated
+// from an emitCopy call in encodeBlock's main loop, would require at least
+// another inputMargin bytes, for the reason above: we want any emitLiteral
+// calls inside encodeBlock's main loop to use the fast path if possible, which
+// requires being able to overrun by inputMargin bytes. Thus,
+// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
+//
+// The C++ code doesn't use this exact threshold, but it could, as discussed at
+// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
+// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
+// optimization. It should not affect the encoded form. This is tested by
+// TestSameEncodingAsCppShortCopies.
+const minNonLiteralBlockSize = 1 + 1 + inputMargin
+
+// MaxEncodedLen returns the maximum length of a snappy block, given its
+// uncompressed length.
+//
+// It will return a negative value if srcLen is too large to encode.
+func MaxEncodedLen(srcLen int) int {
+	n := uint64(srcLen)
+	if n > 0xffffffff {
+		return -1
+	}
+	// Compressed data can be defined as:
+	//    compressed := item* literal*
+	//    item       := literal* copy
+	//
+	// The trailing literal sequence has a space blowup of at most 62/60
+	// since a literal of length 60 needs one tag byte + one extra byte
+	// for length information.
+	//
+	// Item blowup is trickier to measure. Suppose the "copy" op copies
+	// 4 bytes of data. Because of a special check in the encoding code,
+	// we produce a 4-byte copy only if the offset is < 65536. Therefore
+	// the copy op takes 3 bytes to encode, and this type of item leads
+	// to at most the 62/60 blowup for representing literals.
+	//
+	// Suppose the "copy" op copies 5 bytes of data. If the offset is big
+	// enough, it will take 5 bytes to encode the copy op. Therefore the
+	// worst case here is a one-byte literal followed by a five-byte copy.
+	// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
+	//
+	// This last factor dominates the blowup, so the final estimate is:
+	n = 32 + n + n/6
+	if n > 0xffffffff {
+		return -1
+	}
+	return int(n)
+}
+
+var errClosed = errors.New("snappy: Writer is closed")
+
+// NewWriter returns a new Writer that compresses to w.
+//
+// The Writer returned does not buffer writes. There is no need to Flush or
+// Close such a Writer.
+//
+// Deprecated: the Writer returned is not suitable for many small writes, only
+// for few large writes. Use NewBufferedWriter instead, which is efficient
+// regardless of the frequency and shape of the writes, and remember to Close
+// that Writer when done.
+func NewWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// NewBufferedWriter returns a new Writer that compresses to w, using the
+// framing format described at
+// /~https://github.com/google/snappy/blob/master/framing_format.txt
+//
+// The Writer returned buffers writes. Users must call Close to guarantee all
+// data has been forwarded to the underlying io.Writer. They may also call
+// Flush zero or more times before calling Close.
+func NewBufferedWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		ibuf: make([]byte, 0, maxBlockSize),
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// Writer is an io.Writer that can write Snappy-compressed bytes.
+type Writer struct {
+	w   io.Writer
+	err error
+
+	// ibuf is a buffer for the incoming (uncompressed) bytes.
+	//
+	// Its use is optional. For backwards compatibility, Writers created by the
+	// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
+	// therefore do not need to be Flush'ed or Close'd.
+	ibuf []byte
+
+	// obuf is a buffer for the outgoing (compressed) bytes.
+	obuf []byte
+
+	// wroteStreamHeader is whether we have written the stream header.
+	wroteStreamHeader bool
+}
+
+// Reset discards the writer's state and switches the Snappy writer to write to
+// w. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(writer io.Writer) {
+	w.w = writer
+	w.err = nil
+	if w.ibuf != nil {
+		w.ibuf = w.ibuf[:0]
+	}
+	w.wroteStreamHeader = false
+}
+
+// Write satisfies the io.Writer interface.
+func (w *Writer) Write(p []byte) (nRet int, errRet error) {
+	if w.ibuf == nil {
+		// Do not buffer incoming bytes. This does not perform or compress well
+		// if the caller of Writer.Write writes many small slices. This
+		// behavior is therefore deprecated, but still supported for backwards
+		// compatibility with code that doesn't explicitly Flush or Close.
+		return w.write(p)
+	}
+
+	// The remainder of this method is based on bufio.Writer.Write from the
+	// standard library.
+
+	for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
+		var n int
+		if len(w.ibuf) == 0 {
+			// Large write, empty buffer.
+			// Write directly from p to avoid copy.
+			n, _ = w.write(p)
+		} else {
+			n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+			w.ibuf = w.ibuf[:len(w.ibuf)+n]
+			w.Flush()
+		}
+		nRet += n
+		p = p[n:]
+	}
+	if w.err != nil {
+		return nRet, w.err
+	}
+	n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+	w.ibuf = w.ibuf[:len(w.ibuf)+n]
+	nRet += n
+	return nRet, nil
+}
+
+func (w *Writer) write(p []byte) (nRet int, errRet error) {
+	if w.err != nil {
+		return 0, w.err
+	}
+	for len(p) > 0 {
+		obufStart := len(magicChunk)
+		if !w.wroteStreamHeader {
+			w.wroteStreamHeader = true
+			copy(w.obuf, magicChunk)
+			obufStart = 0
+		}
+
+		var uncompressed []byte
+		if len(p) > maxBlockSize {
+			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
+		} else {
+			uncompressed, p = p, nil
+		}
+		checksum := crc(uncompressed)
+
+		// Compress the buffer, discarding the result if the improvement
+		// isn't at least 12.5%.
+		compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
+		chunkType := uint8(chunkTypeCompressedData)
+		chunkLen := 4 + len(compressed)
+		obufEnd := obufHeaderLen + len(compressed)
+		if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
+			chunkType = chunkTypeUncompressedData
+			chunkLen = 4 + len(uncompressed)
+			obufEnd = obufHeaderLen
+		}
+
+		// Fill in the per-chunk header that comes before the body.
+		w.obuf[len(magicChunk)+0] = chunkType
+		w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
+		w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
+		w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
+		w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
+		w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
+		w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
+		w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
+
+		if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
+			w.err = err
+			return nRet, err
+		}
+		if chunkType == chunkTypeUncompressedData {
+			if _, err := w.w.Write(uncompressed); err != nil {
+				w.err = err
+				return nRet, err
+			}
+		}
+		nRet += len(uncompressed)
+	}
+	return nRet, nil
+}
+
+// Flush flushes the Writer to its underlying io.Writer.
+func (w *Writer) Flush() error {
+	if w.err != nil {
+		return w.err
+	}
+	if len(w.ibuf) == 0 {
+		return nil
+	}
+	w.write(w.ibuf)
+	w.ibuf = w.ibuf[:0]
+	return w.err
+}
+
+// Close calls Flush and then closes the Writer.
+func (w *Writer) Close() error {
+	w.Flush()
+	ret := w.err
+	if w.err == nil {
+		w.err = errClosed
+	}
+	return ret
+}
diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_amd64.go
new file mode 100644
index 0000000000..150d91bc8b
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_amd64.go
@@ -0,0 +1,29 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// emitLiteral has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitLiteral(dst, lit []byte) int
+
+// emitCopy has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitCopy(dst []byte, offset, length int) int
+
+// extendMatch has the same semantics as in encode_other.go.
+//
+//go:noescape
+func extendMatch(src []byte, i, j int) int
+
+// encodeBlock has the same semantics as in encode_other.go.
+//
+//go:noescape
+func encodeBlock(dst, src []byte) (d int)
diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s
new file mode 100644
index 0000000000..adfd979fe2
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_amd64.s
@@ -0,0 +1,730 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
+// Go toolchain regression. See /~https://github.com/golang/go/issues/15426 and
+// /~https://github.com/golang/snappy/issues/29
+//
+// As a workaround, the package was built with a known good assembler, and
+// those instructions were disassembled by "objdump -d" to yield the
+//	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+// style comments, in AT&T asm syntax. Note that rsp here is a physical
+// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
+// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
+// fine on Go 1.6.
+
+// The asm code generally follows the pure Go code in encode_other.go, except
+// where marked with a "!!!".
+
+// ----------------------------------------------------------------------------
+
+// func emitLiteral(dst, lit []byte) int
+//
+// All local variables fit into registers. The register allocation:
+//	- AX	len(lit)
+//	- BX	n
+//	- DX	return value
+//	- DI	&dst[i]
+//	- R10	&lit[0]
+//
+// The 24 bytes of stack space is to call runtime·memmove.
+//
+// The unusual register allocation of local variables, such as R10 for the
+// source pointer, matches the allocation used at the call site in encodeBlock,
+// which makes it easier to manually inline this function.
+TEXT ·emitLiteral(SB), NOSPLIT, $24-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ lit_base+24(FP), R10
+	MOVQ lit_len+32(FP), AX
+	MOVQ AX, DX
+	MOVL AX, BX
+	SUBL $1, BX
+
+	CMPL BX, $60
+	JLT  oneByte
+	CMPL BX, $256
+	JLT  twoBytes
+
+threeBytes:
+	MOVB $0xf4, 0(DI)
+	MOVW BX, 1(DI)
+	ADDQ $3, DI
+	ADDQ $3, DX
+	JMP  memmove
+
+twoBytes:
+	MOVB $0xf0, 0(DI)
+	MOVB BX, 1(DI)
+	ADDQ $2, DI
+	ADDQ $2, DX
+	JMP  memmove
+
+oneByte:
+	SHLB $2, BX
+	MOVB BX, 0(DI)
+	ADDQ $1, DI
+	ADDQ $1, DX
+
+memmove:
+	MOVQ DX, ret+48(FP)
+
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// DI, R10 and AX as arguments.
+	MOVQ DI, 0(SP)
+	MOVQ R10, 8(SP)
+	MOVQ AX, 16(SP)
+	CALL runtime·memmove(SB)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func emitCopy(dst []byte, offset, length int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- AX	length
+//	- SI	&dst[0]
+//	- DI	&dst[i]
+//	- R11	offset
+//
+// The unusual register allocation of local variables, such as R11 for the
+// offset, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·emitCopy(SB), NOSPLIT, $0-48
+	MOVQ dst_base+0(FP), DI
+	MOVQ DI, SI
+	MOVQ offset+24(FP), R11
+	MOVQ length+32(FP), AX
+
+loop0:
+	// for length >= 68 { etc }
+	CMPL AX, $68
+	JLT  step1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVB $0xfe, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $64, AX
+	JMP  loop0
+
+step1:
+	// if length > 64 { etc }
+	CMPL AX, $64
+	JLE  step2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVB $0xee, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $60, AX
+
+step2:
+	// if length >= 12 || offset >= 2048 { goto step3 }
+	CMPL AX, $12
+	JGE  step3
+	CMPL R11, $2048
+	JGE  step3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(DI)
+	SHRL $8, R11
+	SHLB $5, R11
+	SUBB $4, AX
+	SHLB $2, AX
+	ORB  AX, R11
+	ORB  $1, R11
+	MOVB R11, 0(DI)
+	ADDQ $2, DI
+
+	// Return the number of bytes written.
+	SUBQ SI, DI
+	MOVQ DI, ret+40(FP)
+	RET
+
+step3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBL $1, AX
+	SHLB $2, AX
+	ORB  $2, AX
+	MOVB AX, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+
+	// Return the number of bytes written.
+	SUBQ SI, DI
+	MOVQ DI, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func extendMatch(src []byte, i, j int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- DX	&src[0]
+//	- SI	&src[j]
+//	- R13	&src[len(src) - 8]
+//	- R14	&src[len(src)]
+//	- R15	&src[i]
+//
+// The unusual register allocation of local variables, such as R15 for a source
+// pointer, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·extendMatch(SB), NOSPLIT, $0-48
+	MOVQ src_base+0(FP), DX
+	MOVQ src_len+8(FP), R14
+	MOVQ i+24(FP), R15
+	MOVQ j+32(FP), SI
+	ADDQ DX, R14
+	ADDQ DX, R15
+	ADDQ DX, SI
+	MOVQ R14, R13
+	SUBQ $8, R13
+
+cmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ SI, R13
+	JA   cmp1
+	MOVQ (R15), AX
+	MOVQ (SI), BX
+	CMPQ AX, BX
+	JNE  bsf
+	ADDQ $8, R15
+	ADDQ $8, SI
+	JMP  cmp8
+
+bsf:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, SI
+
+	// Convert from &src[ret] to ret.
+	SUBQ DX, SI
+	MOVQ SI, ret+40(FP)
+	RET
+
+cmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMPQ SI, R14
+	JAE  extendMatchEnd
+	MOVB (R15), AX
+	MOVB (SI), BX
+	CMPB AX, BX
+	JNE  extendMatchEnd
+	ADDQ $1, R15
+	ADDQ $1, SI
+	JMP  cmp1
+
+extendMatchEnd:
+	// Convert from &src[ret] to ret.
+	SUBQ DX, SI
+	MOVQ SI, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func encodeBlock(dst, src []byte) (d int)
+//
+// All local variables fit into registers, other than "var table". The register
+// allocation:
+//	- AX	.	.
+//	- BX	.	.
+//	- CX	56	shift (note that amd64 shifts by non-immediates must use CX).
+//	- DX	64	&src[0], tableSize
+//	- SI	72	&src[s]
+//	- DI	80	&dst[d]
+//	- R9	88	sLimit
+//	- R10	.	&src[nextEmit]
+//	- R11	96	prevHash, currHash, nextHash, offset
+//	- R12	104	&src[base], skip
+//	- R13	.	&src[nextS], &src[len(src) - 8]
+//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
+//	- R15	112	candidate
+//
+// The second column (56, 64, etc) is the stack offset to spill the registers
+// when calling other functions. We could pack this slightly tighter, but it's
+// simpler to have a dedicated spill map independent of the function called.
+//
+// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
+// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
+// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
+TEXT ·encodeBlock(SB), 0, $32888-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ src_base+24(FP), SI
+	MOVQ src_len+32(FP), R14
+
+	// shift, tableSize := uint32(32-8), 1<<8
+	MOVQ $24, CX
+	MOVQ $256, DX
+
+calcShift:
+	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+	//	shift--
+	// }
+	CMPQ DX, $16384
+	JGE  varTable
+	CMPQ DX, R14
+	JGE  varTable
+	SUBQ $1, CX
+	SHLQ $1, DX
+	JMP  calcShift
+
+varTable:
+	// var table [maxTableSize]uint16
+	//
+	// In the asm code, unlike the Go code, we can zero-initialize only the
+	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
+	// writes 16 bytes, so we can do only tableSize/8 writes instead of the
+	// 2048 writes that would zero-initialize all of table's 32768 bytes.
+	SHRQ $3, DX
+	LEAQ table-32768(SP), BX
+	PXOR X0, X0
+
+memclr:
+	MOVOU X0, 0(BX)
+	ADDQ  $16, BX
+	SUBQ  $1, DX
+	JNZ   memclr
+
+	// !!! DX = &src[0]
+	MOVQ SI, DX
+
+	// sLimit := len(src) - inputMargin
+	MOVQ R14, R9
+	SUBQ $15, R9
+
+	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
+	// change for the rest of the function.
+	MOVQ CX, 56(SP)
+	MOVQ DX, 64(SP)
+	MOVQ R9, 88(SP)
+
+	// nextEmit := 0
+	MOVQ DX, R10
+
+	// s := 1
+	ADDQ $1, SI
+
+	// nextHash := hash(load32(src, s), shift)
+	MOVL  0(SI), R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+outer:
+	// for { etc }
+
+	// skip := 32
+	MOVQ $32, R12
+
+	// nextS := s
+	MOVQ SI, R13
+
+	// candidate := 0
+	MOVQ $0, R15
+
+inner0:
+	// for { etc }
+
+	// s := nextS
+	MOVQ R13, SI
+
+	// bytesBetweenHashLookups := skip >> 5
+	MOVQ R12, R14
+	SHRQ $5, R14
+
+	// nextS = s + bytesBetweenHashLookups
+	ADDQ R14, R13
+
+	// skip += bytesBetweenHashLookups
+	ADDQ R14, R12
+
+	// if nextS > sLimit { goto emitRemainder }
+	MOVQ R13, AX
+	SUBQ DX, AX
+	CMPQ AX, R9
+	JA   emitRemainder
+
+	// candidate = int(table[nextHash])
+	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
+	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+	BYTE $0x4e
+	BYTE $0x0f
+	BYTE $0xb7
+	BYTE $0x7c
+	BYTE $0x5c
+	BYTE $0x78
+
+	// table[nextHash] = uint16(s)
+	MOVQ SI, AX
+	SUBQ DX, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// nextHash = hash(load32(src, nextS), shift)
+	MOVL  0(R13), R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// if load32(src, s) != load32(src, candidate) { continue } break
+	MOVL 0(SI), AX
+	MOVL (DX)(R15*1), BX
+	CMPL AX, BX
+	JNE  inner0
+
+fourByteMatch:
+	// As per the encode_other.go code:
+	//
+	// A 4-byte match has been found. We'll later see etc.
+
+	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
+	// on inputMargin in encode.go.
+	MOVQ SI, AX
+	SUBQ R10, AX
+	CMPQ AX, $16
+	JLE  emitLiteralFastPath
+
+	// ----------------------------------------
+	// Begin inline of the emitLiteral call.
+	//
+	// d += emitLiteral(dst[d:], src[nextEmit:s])
+
+	MOVL AX, BX
+	SUBL $1, BX
+
+	CMPL BX, $60
+	JLT  inlineEmitLiteralOneByte
+	CMPL BX, $256
+	JLT  inlineEmitLiteralTwoBytes
+
+inlineEmitLiteralThreeBytes:
+	MOVB $0xf4, 0(DI)
+	MOVW BX, 1(DI)
+	ADDQ $3, DI
+	JMP  inlineEmitLiteralMemmove
+
+inlineEmitLiteralTwoBytes:
+	MOVB $0xf0, 0(DI)
+	MOVB BX, 1(DI)
+	ADDQ $2, DI
+	JMP  inlineEmitLiteralMemmove
+
+inlineEmitLiteralOneByte:
+	SHLB $2, BX
+	MOVB BX, 0(DI)
+	ADDQ $1, DI
+
+inlineEmitLiteralMemmove:
+	// Spill local variables (registers) onto the stack; call; unspill.
+	//
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// DI, R10 and AX as arguments.
+	MOVQ DI, 0(SP)
+	MOVQ R10, 8(SP)
+	MOVQ AX, 16(SP)
+	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)".
+	MOVQ SI, 72(SP)
+	MOVQ DI, 80(SP)
+	MOVQ R15, 112(SP)
+	CALL runtime·memmove(SB)
+	MOVQ 56(SP), CX
+	MOVQ 64(SP), DX
+	MOVQ 72(SP), SI
+	MOVQ 80(SP), DI
+	MOVQ 88(SP), R9
+	MOVQ 112(SP), R15
+	JMP  inner1
+
+inlineEmitLiteralEnd:
+	// End inline of the emitLiteral call.
+	// ----------------------------------------
+
+emitLiteralFastPath:
+	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
+	MOVB AX, BX
+	SUBB $1, BX
+	SHLB $2, BX
+	MOVB BX, (DI)
+	ADDQ $1, DI
+
+	// !!! Implement the copy from lit to dst as a 16-byte load and store.
+	// (Encode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
+	// OK. Subsequent iterations will fix up the overrun.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(R10), X0
+	MOVOU X0, 0(DI)
+	ADDQ  AX, DI
+
+inner1:
+	// for { etc }
+
+	// base := s
+	MOVQ SI, R12
+
+	// !!! offset := base - candidate
+	MOVQ R12, R11
+	SUBQ R15, R11
+	SUBQ DX, R11
+
+	// ----------------------------------------
+	// Begin inline of the extendMatch call.
+	//
+	// s = extendMatch(src, candidate+4, s+4)
+
+	// !!! R14 = &src[len(src)]
+	MOVQ src_len+32(FP), R14
+	ADDQ DX, R14
+
+	// !!! R13 = &src[len(src) - 8]
+	MOVQ R14, R13
+	SUBQ $8, R13
+
+	// !!! R15 = &src[candidate + 4]
+	ADDQ $4, R15
+	ADDQ DX, R15
+
+	// !!! s += 4
+	ADDQ $4, SI
+
+inlineExtendMatchCmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ SI, R13
+	JA   inlineExtendMatchCmp1
+	MOVQ (R15), AX
+	MOVQ (SI), BX
+	CMPQ AX, BX
+	JNE  inlineExtendMatchBSF
+	ADDQ $8, R15
+	ADDQ $8, SI
+	JMP  inlineExtendMatchCmp8
+
+inlineExtendMatchBSF:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, SI
+	JMP  inlineExtendMatchEnd
+
+inlineExtendMatchCmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMPQ SI, R14
+	JAE  inlineExtendMatchEnd
+	MOVB (R15), AX
+	MOVB (SI), BX
+	CMPB AX, BX
+	JNE  inlineExtendMatchEnd
+	ADDQ $1, R15
+	ADDQ $1, SI
+	JMP  inlineExtendMatchCmp1
+
+inlineExtendMatchEnd:
+	// End inline of the extendMatch call.
+	// ----------------------------------------
+
+	// ----------------------------------------
+	// Begin inline of the emitCopy call.
+	//
+	// d += emitCopy(dst[d:], base-candidate, s-base)
+
+	// !!! length := s - base
+	MOVQ SI, AX
+	SUBQ R12, AX
+
+inlineEmitCopyLoop0:
+	// for length >= 68 { etc }
+	CMPL AX, $68
+	JLT  inlineEmitCopyStep1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVB $0xfe, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $64, AX
+	JMP  inlineEmitCopyLoop0
+
+inlineEmitCopyStep1:
+	// if length > 64 { etc }
+	CMPL AX, $64
+	JLE  inlineEmitCopyStep2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVB $0xee, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $60, AX
+
+inlineEmitCopyStep2:
+	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
+	CMPL AX, $12
+	JGE  inlineEmitCopyStep3
+	CMPL R11, $2048
+	JGE  inlineEmitCopyStep3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(DI)
+	SHRL $8, R11
+	SHLB $5, R11
+	SUBB $4, AX
+	SHLB $2, AX
+	ORB  AX, R11
+	ORB  $1, R11
+	MOVB R11, 0(DI)
+	ADDQ $2, DI
+	JMP  inlineEmitCopyEnd
+
+inlineEmitCopyStep3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBL $1, AX
+	SHLB $2, AX
+	ORB  $2, AX
+	MOVB AX, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+
+inlineEmitCopyEnd:
+	// End inline of the emitCopy call.
+	// ----------------------------------------
+
+	// nextEmit = s
+	MOVQ SI, R10
+
+	// if s >= sLimit { goto emitRemainder }
+	MOVQ SI, AX
+	SUBQ DX, AX
+	CMPQ AX, R9
+	JAE  emitRemainder
+
+	// As per the encode_other.go code:
+	//
+	// We could immediately etc.
+
+	// x := load64(src, s-1)
+	MOVQ -1(SI), R14
+
+	// prevHash := hash(uint32(x>>0), shift)
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// table[prevHash] = uint16(s-1)
+	MOVQ SI, AX
+	SUBQ DX, AX
+	SUBQ $1, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// currHash := hash(uint32(x>>8), shift)
+	SHRQ  $8, R14
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// candidate = int(table[currHash])
+	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
+	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+	BYTE $0x4e
+	BYTE $0x0f
+	BYTE $0xb7
+	BYTE $0x7c
+	BYTE $0x5c
+	BYTE $0x78
+
+	// table[currHash] = uint16(s)
+	ADDQ $1, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// if uint32(x>>8) == load32(src, candidate) { continue }
+	MOVL (DX)(R15*1), BX
+	CMPL R14, BX
+	JEQ  inner1
+
+	// nextHash = hash(uint32(x>>16), shift)
+	SHRQ  $8, R14
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// s++
+	ADDQ $1, SI
+
+	// break out of the inner1 for loop, i.e. continue the outer loop.
+	JMP outer
+
+emitRemainder:
+	// if nextEmit < len(src) { etc }
+	MOVQ src_len+32(FP), AX
+	ADDQ DX, AX
+	CMPQ R10, AX
+	JEQ  encodeBlockEnd
+
+	// d += emitLiteral(dst[d:], src[nextEmit:])
+	//
+	// Push args.
+	MOVQ DI, 0(SP)
+	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative.
+	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative.
+	MOVQ R10, 24(SP)
+	SUBQ R10, AX
+	MOVQ AX, 32(SP)
+	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative.
+
+	// Spill local variables (registers) onto the stack; call; unspill.
+	MOVQ DI, 80(SP)
+	CALL ·emitLiteral(SB)
+	MOVQ 80(SP), DI
+
+	// Finish the "d +=" part of "d += emitLiteral(etc)".
+	ADDQ 48(SP), DI
+
+encodeBlockEnd:
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, DI
+	MOVQ DI, d+48(FP)
+	RET
diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go
new file mode 100644
index 0000000000..dbcae905e6
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_other.go
@@ -0,0 +1,238 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+func load32(b []byte, i int) uint32 {
+	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load64(b []byte, i int) uint64 {
+	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+//
+// It assumes that:
+//	dst is long enough to hold the encoded bytes
+//	1 <= len(lit) && len(lit) <= 65536
+func emitLiteral(dst, lit []byte) int {
+	i, n := 0, uint(len(lit)-1)
+	switch {
+	case n < 60:
+		dst[0] = uint8(n)<<2 | tagLiteral
+		i = 1
+	case n < 1<<8:
+		dst[0] = 60<<2 | tagLiteral
+		dst[1] = uint8(n)
+		i = 2
+	default:
+		dst[0] = 61<<2 | tagLiteral
+		dst[1] = uint8(n)
+		dst[2] = uint8(n >> 8)
+		i = 3
+	}
+	return i + copy(dst[i:], lit)
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+//
+// It assumes that:
+//	dst is long enough to hold the encoded bytes
+//	1 <= offset && offset <= 65535
+//	4 <= length && length <= 65535
+func emitCopy(dst []byte, offset, length int) int {
+	i := 0
+	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
+	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
+	// length emitted down below is is a little lower (at 60 = 64 - 4), because
+	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
+	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
+	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
+	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
+	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
+	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
+	for length >= 68 {
+		// Emit a length 64 copy, encoded as 3 bytes.
+		dst[i+0] = 63<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 64
+	}
+	if length > 64 {
+		// Emit a length 60 copy, encoded as 3 bytes.
+		dst[i+0] = 59<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 60
+	}
+	if length >= 12 || offset >= 2048 {
+		// Emit the remaining copy, encoded as 3 bytes.
+		dst[i+0] = uint8(length-1)<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		return i + 3
+	}
+	// Emit the remaining copy, encoded as 2 bytes.
+	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+	dst[i+1] = uint8(offset)
+	return i + 2
+}
+
+// extendMatch returns the largest k such that k <= len(src) and that
+// src[i:i+k-j] and src[j:k] have the same contents.
+//
+// It assumes that:
+//	0 <= i && i < j && j <= len(src)
+func extendMatch(src []byte, i, j int) int {
+	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
+	}
+	return j
+}
+
+func hash(u, shift uint32) uint32 {
+	return (u * 0x1e35a7bd) >> shift
+}
+
+// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+//	len(dst) >= MaxEncodedLen(len(src)) &&
+// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+func encodeBlock(dst, src []byte) (d int) {
+	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+	// The table element type is uint16, as s < sLimit and sLimit < len(src)
+	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
+	const (
+		maxTableSize = 1 << 14
+		// tableMask is redundant, but helps the compiler eliminate bounds
+		// checks.
+		tableMask = maxTableSize - 1
+	)
+	shift := uint32(32 - 8)
+	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+		shift--
+	}
+	// In Go, all array elements are zero-initialized, so there is no advantage
+	// to a smaller tableSize per se. However, it matches the C++ algorithm,
+	// and in the asm versions of this code, we can get away with zeroing only
+	// the first tableSize elements.
+	var table [maxTableSize]uint16
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	nextHash := hash(load32(src, s), shift)
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := 32
+
+		nextS := s
+		candidate := 0
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = int(table[nextHash&tableMask])
+			table[nextHash&tableMask] = uint16(s)
+			nextHash = hash(load32(src, nextS), shift)
+			if load32(src, s) == load32(src, candidate) {
+				break
+			}
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		d += emitLiteral(dst[d:], src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+
+			// Extend the 4-byte match as long as possible.
+			//
+			// This is an inlined version of:
+			//	s = extendMatch(src, candidate+4, s+4)
+			s += 4
+			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
+			}
+
+			d += emitCopy(dst[d:], base-candidate, s-base)
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-1 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load64(src, s-1)
+			prevHash := hash(uint32(x>>0), shift)
+			table[prevHash&tableMask] = uint16(s - 1)
+			currHash := hash(uint32(x>>8), shift)
+			candidate = int(table[currHash&tableMask])
+			table[currHash&tableMask] = uint16(s)
+			if uint32(x>>8) != load32(src, candidate) {
+				nextHash = hash(uint32(x>>16), shift)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		d += emitLiteral(dst[d:], src[nextEmit:])
+	}
+	return d
+}
diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go
new file mode 100644
index 0000000000..ece692ea46
--- /dev/null
+++ b/vendor/github.com/golang/snappy/snappy.go
@@ -0,0 +1,98 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package snappy implements the Snappy compression format. It aims for very
+// high speeds and reasonable compression.
+//
+// There are actually two Snappy formats: block and stream. They are related,
+// but different: trying to decompress block-compressed data as a Snappy stream
+// will fail, and vice versa. The block format is the Decode and Encode
+// functions and the stream format is the Reader and Writer types.
+//
+// The block format, the more common case, is used when the complete size (the
+// number of bytes) of the original data is known upfront, at the time
+// compression starts. The stream format, also known as the framing format, is
+// for when that isn't always true.
+//
+// The canonical, C++ implementation is at /~https://github.com/google/snappy and
+// it only implements the block format.
+package snappy // import "github.com/golang/snappy"
+
+import (
+	"hash/crc32"
+)
+
+/*
+Each encoded block begins with the varint-encoded length of the decoded data,
+followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
+first byte of each chunk is broken into its 2 least and 6 most significant bits
+called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
+Zero means a literal tag. All other values mean a copy tag.
+
+For literal tags:
+  - If m < 60, the next 1 + m bytes are literal bytes.
+  - Otherwise, let n be the little-endian unsigned integer denoted by the next
+    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
+
+For copy tags, length bytes are copied from offset bytes ago, in the style of
+Lempel-Ziv compression algorithms. In particular:
+  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
+    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
+    of the offset. The next byte is bits 0-7 of the offset.
+  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
+    The length is 1 + m. The offset is the little-endian unsigned integer
+    denoted by the next 2 bytes.
+  - For l == 3, this tag is a legacy format that is no longer issued by most
+    encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
+    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
+    integer denoted by the next 4 bytes.
+*/
+const (
+	tagLiteral = 0x00
+	tagCopy1   = 0x01
+	tagCopy2   = 0x02
+	tagCopy4   = 0x03
+)
+
+const (
+	checksumSize    = 4
+	chunkHeaderSize = 4
+	magicChunk      = "\xff\x06\x00\x00" + magicBody
+	magicBody       = "sNaPpY"
+
+	// maxBlockSize is the maximum size of the input to encodeBlock. It is not
+	// part of the wire format per se, but some parts of the encoder assume
+	// that an offset fits into a uint16.
+	//
+	// Also, for the framing format (Writer type instead of Encode function),
+	// /~https://github.com/google/snappy/blob/master/framing_format.txt says
+	// that "the uncompressed data in a chunk must be no longer than 65536
+	// bytes".
+	maxBlockSize = 65536
+
+	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
+	// hard coded to be a const instead of a variable, so that obufLen can also
+	// be a const. Their equivalence is confirmed by
+	// TestMaxEncodedLenOfMaxBlockSize.
+	maxEncodedLenOfMaxBlockSize = 76490
+
+	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
+	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize
+)
+
+const (
+	chunkTypeCompressedData   = 0x00
+	chunkTypeUncompressedData = 0x01
+	chunkTypePadding          = 0xfe
+	chunkTypeStreamIdentifier = 0xff
+)
+
+var crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+// crc implements the checksum specified in section 3 of
+// /~https://github.com/google/snappy/blob/master/framing_format.txt
+func crc(b []byte) uint32 {
+	c := crc32.Update(0, crcTable, b)
+	return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/vendor/github.com/mholt/archiver/.travis.yml b/vendor/github.com/mholt/archiver/.travis.yml
index 9d5f79a73f..2b39a6e6e2 100644
--- a/vendor/github.com/mholt/archiver/.travis.yml
+++ b/vendor/github.com/mholt/archiver/.travis.yml
@@ -1,7 +1,7 @@
 language: go
 
 go:
-  - 1.7.1
+  - 1.x
 
 env:
   - CGO_ENABLED=0
diff --git a/vendor/github.com/mholt/archiver/README.md b/vendor/github.com/mholt/archiver/README.md
index 47eeb543ba..33d0a3eef1 100644
--- a/vendor/github.com/mholt/archiver/README.md
+++ b/vendor/github.com/mholt/archiver/README.md
@@ -14,6 +14,8 @@ Supported formats/extensions:
 - .tar.gz & .tgz
 - .tar.bz2 & .tbz2
 - .tar.xz & .txz
+- .tar.lz4 & .tlz4
+- .tar.sz & .tsz
 - .rar (open only)
 
 
diff --git a/vendor/github.com/mholt/archiver/archiver.go b/vendor/github.com/mholt/archiver/archiver.go
index 2561e0f60d..32baf3e0aa 100644
--- a/vendor/github.com/mholt/archiver/archiver.go
+++ b/vendor/github.com/mholt/archiver/archiver.go
@@ -7,16 +7,21 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+	"strings"
 )
 
 // Archiver represent a archive format
 type Archiver interface {
 	// Match checks supported files
 	Match(filename string) bool
-	// Make makes an archive.
+	// Make makes an archive file on disk.
 	Make(destination string, sources []string) error
-	// Open extracts an archive.
+	// Open extracts an archive file on disk.
 	Open(source, destination string) error
+	// Write writes an archive to a Writer.
+	Write(output io.Writer, sources []string) error
+	// Read reads an archive from a Reader.
+	Read(input io.Reader, destination string) error
 }
 
 // SupportedFormats contains all supported archive formats
@@ -31,6 +36,17 @@ func RegisterFormat(name string, format Archiver) {
 	SupportedFormats[name] = format
 }
 
+// MatchingFormat returns the first archive format that matches
+// the given file, or nil if there is no match
+func MatchingFormat(fpath string) Archiver {
+	for _, fmt := range SupportedFormats {
+		if fmt.Match(fpath) {
+			return fmt
+		}
+	}
+	return nil
+}
+
 func writeNewFile(fpath string, in io.Reader, fm os.FileMode) error {
 	err := os.MkdirAll(filepath.Dir(fpath), 0755)
 	if err != nil {
@@ -69,6 +85,20 @@ func writeNewSymbolicLink(fpath string, target string) error {
 	return nil
 }
 
+func writeNewHardLink(fpath string, target string) error {
+	err := os.MkdirAll(filepath.Dir(fpath), 0755)
+	if err != nil {
+		return fmt.Errorf("%s: making directory for file: %v", fpath, err)
+	}
+
+	err = os.Link(target, fpath)
+	if err != nil {
+		return fmt.Errorf("%s: making hard link for: %v", fpath, err)
+	}
+
+	return nil
+}
+
 func mkdir(dirPath string) error {
 	err := os.MkdirAll(dirPath, 0755)
 	if err != nil {
@@ -76,3 +106,14 @@ func mkdir(dirPath string) error {
 	}
 	return nil
 }
+
+func sanitizeExtractPath(filePath string, destination string) error {
+	// to avoid zip slip (writing outside of the destination), we resolve
+	// the target path, and make sure it's nested in the intended
+	// destination, or bail otherwise.
+	destpath := filepath.Join(destination, filePath)
+	if !strings.HasPrefix(destpath, destination) {
+		return fmt.Errorf("%s: illegal file path", filePath)
+	}
+	return nil
+}
diff --git a/vendor/github.com/mholt/archiver/rar.go b/vendor/github.com/mholt/archiver/rar.go
index a15a202bc9..86f8a63d58 100644
--- a/vendor/github.com/mholt/archiver/rar.go
+++ b/vendor/github.com/mholt/archiver/rar.go
@@ -1,6 +1,7 @@
 package archiver
 
 import (
+	"bytes"
 	"fmt"
 	"io"
 	"os"
@@ -20,8 +21,32 @@ func init() {
 type rarFormat struct{}
 
 func (rarFormat) Match(filename string) bool {
-	// TODO: read file header to identify the format
-	return strings.HasSuffix(strings.ToLower(filename), ".rar")
+	return strings.HasSuffix(strings.ToLower(filename), ".rar") || isRar(filename)
+}
+
+// isRar checks the file has the RAR 1.5 or 5.0 format signature by reading its
+// beginning bytes and matching it
+func isRar(rarPath string) bool {
+	f, err := os.Open(rarPath)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	buf := make([]byte, 8)
+	if n, err := f.Read(buf); err != nil || n < 8 {
+		return false
+	}
+
+	return bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5
+		bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0
+}
+
+// Write outputs a .rar archive, but this is not implemented because
+// RAR is a proprietary format. It is here only for symmetry with
+// the other archive formats in this package.
+func (rarFormat) Write(output io.Writer, filePaths []string) error {
+	return fmt.Errorf("write: RAR not implemented (proprietary format)")
 }
 
 // Make makes a .rar archive, but this is not implemented because
@@ -31,18 +56,12 @@ func (rarFormat) Make(rarPath string, filePaths []string) error {
 	return fmt.Errorf("make %s: RAR not implemented (proprietary format)", rarPath)
 }
 
-// Open extracts the RAR file at source and puts the contents
+// Read extracts the RAR file read from input and puts the contents
 // into destination.
-func (rarFormat) Open(source, destination string) error {
-	f, err := os.Open(source)
+func (rarFormat) Read(input io.Reader, destination string) error {
+	rr, err := rardecode.NewReader(input, "")
 	if err != nil {
-		return fmt.Errorf("%s: failed to open archive: %v", source, err)
-	}
-	defer f.Close()
-
-	rr, err := rardecode.NewReader(f, "")
-	if err != nil {
-		return fmt.Errorf("%s: failed to create reader: %v", source, err)
+		return fmt.Errorf("read: failed to create reader: %v", err)
 	}
 
 	for {
@@ -53,8 +72,15 @@ func (rarFormat) Open(source, destination string) error {
 			return err
 		}
 
+		err = sanitizeExtractPath(header.Name, destination)
+		if err != nil {
+			return err
+		}
+
+		destpath := filepath.Join(destination, header.Name)
+
 		if header.IsDir {
-			err = mkdir(filepath.Join(destination, header.Name))
+			err = mkdir(destpath)
 			if err != nil {
 				return err
 			}
@@ -63,12 +89,12 @@ func (rarFormat) Open(source, destination string) error {
 
 		// if files come before their containing folders, then we must
 		// create their folders before writing the file
-		err = mkdir(filepath.Dir(filepath.Join(destination, header.Name)))
+		err = mkdir(filepath.Dir(destpath))
 		if err != nil {
 			return err
 		}
 
-		err = writeNewFile(filepath.Join(destination, header.Name), rr, header.Mode())
+		err = writeNewFile(destpath, rr, header.Mode())
 		if err != nil {
 			return err
 		}
@@ -76,3 +102,15 @@ func (rarFormat) Open(source, destination string) error {
 
 	return nil
 }
+
+// Open extracts the RAR file at source and puts the contents
+// into destination.
+func (rarFormat) Open(source, destination string) error {
+	rf, err := os.Open(source)
+	if err != nil {
+		return fmt.Errorf("%s: failed to open file: %v", source, err)
+	}
+	defer rf.Close()
+
+	return Rar.Read(rf, destination)
+}
diff --git a/vendor/github.com/mholt/archiver/tar.go b/vendor/github.com/mholt/archiver/tar.go
index c3231d42fd..caa9de22eb 100644
--- a/vendor/github.com/mholt/archiver/tar.go
+++ b/vendor/github.com/mholt/archiver/tar.go
@@ -2,10 +2,12 @@ package archiver
 
 import (
 	"archive/tar"
+	"bytes"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 )
 
@@ -19,8 +21,71 @@ func init() {
 type tarFormat struct{}
 
 func (tarFormat) Match(filename string) bool {
-	// TODO: read file header to identify the format
-	return strings.HasSuffix(strings.ToLower(filename), ".tar")
+	return strings.HasSuffix(strings.ToLower(filename), ".tar") || isTar(filename)
+}
+
+const tarBlockSize int = 512
+
+// isTar checks the file has the Tar format header by reading its beginning
+// block.
+func isTar(tarPath string) bool {
+	f, err := os.Open(tarPath)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	buf := make([]byte, tarBlockSize)
+	if _, err = io.ReadFull(f, buf); err != nil {
+		return false
+	}
+
+	return hasTarHeader(buf)
+}
+
+// hasTarHeader checks passed bytes has a valid tar header or not. buf must
+// contain at least 512 bytes and if not, it always returns false.
+func hasTarHeader(buf []byte) bool {
+	if len(buf) < tarBlockSize {
+		return false
+	}
+
+	b := buf[148:156]
+	b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes
+	if len(b) == 0 {
+		return false // unknown format
+	}
+	hdrSum, err := strconv.ParseUint(string(b), 8, 64)
+	if err != nil {
+		return false
+	}
+
+	// According to the go official archive/tar, Sun tar uses signed byte
+	// values so this calcs both signed and unsigned
+	var usum uint64
+	var sum int64
+	for i, c := range buf {
+		if 148 <= i && i < 156 {
+			c = ' ' // checksum field itself is counted as branks
+		}
+		usum += uint64(uint8(c))
+		sum += int64(int8(c))
+	}
+
+	if hdrSum != usum && int64(hdrSum) != sum {
+		return false // invalid checksum
+	}
+
+	return true
+}
+
+// Write outputs a .tar file to a Writer containing the
+// contents of files listed in filePaths. File paths can
+// be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (tarFormat) Write(output io.Writer, filePaths []string) error {
+	return writeTar(filePaths, output, "")
 }
 
 // Make creates a .tar file at tarPath containing the
@@ -35,10 +100,14 @@ func (tarFormat) Make(tarPath string, filePaths []string) error {
 	}
 	defer out.Close()
 
-	tarWriter := tar.NewWriter(out)
+	return writeTar(filePaths, out, tarPath)
+}
+
+func writeTar(filePaths []string, output io.Writer, dest string) error {
+	tarWriter := tar.NewWriter(output)
 	defer tarWriter.Close()
 
-	return tarball(filePaths, tarWriter, tarPath)
+	return tarball(filePaths, tarWriter, dest)
 }
 
 // tarball writes all files listed in filePaths into tarWriter, which is
@@ -114,6 +183,12 @@ func tarFile(tarWriter *tar.Writer, source, dest string) error {
 	})
 }
 
+// Read untars a .tar file read from a Reader and puts
+// the contents into destination.
+func (tarFormat) Read(input io.Reader, destination string) error {
+	return untar(tar.NewReader(input), destination)
+}
+
 // Open untars source and puts the contents into destination.
 func (tarFormat) Open(source, destination string) error {
 	f, err := os.Open(source)
@@ -122,7 +197,7 @@ func (tarFormat) Open(source, destination string) error {
 	}
 	defer f.Close()
 
-	return untar(tar.NewReader(f), destination)
+	return Tar.Read(f, destination)
 }
 
 // untar un-tarballs the contents of tr into destination.
@@ -144,13 +219,22 @@ func untar(tr *tar.Reader, destination string) error {
 
 // untarFile untars a single file from tr with header header into destination.
 func untarFile(tr *tar.Reader, header *tar.Header, destination string) error {
+	err := sanitizeExtractPath(header.Name, destination)
+	if err != nil {
+		return err
+	}
+
+	destpath := filepath.Join(destination, header.Name)
+
 	switch header.Typeflag {
 	case tar.TypeDir:
-		return mkdir(filepath.Join(destination, header.Name))
-	case tar.TypeReg, tar.TypeRegA:
-		return writeNewFile(filepath.Join(destination, header.Name), tr, header.FileInfo().Mode())
+		return mkdir(destpath)
+	case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo:
+		return writeNewFile(destpath, tr, header.FileInfo().Mode())
 	case tar.TypeSymlink:
-		return writeNewSymbolicLink(filepath.Join(destination, header.Name), header.Linkname)
+		return writeNewSymbolicLink(destpath, header.Linkname)
+	case tar.TypeLink:
+		return writeNewHardLink(destpath, filepath.Join(destination, header.Linkname))
 	default:
 		return fmt.Errorf("%s: unknown type flag: %c", header.Name, header.Typeflag)
 	}
diff --git a/vendor/github.com/mholt/archiver/tarbz2.go b/vendor/github.com/mholt/archiver/tarbz2.go
index 83e7fa58c0..e0051d3c3b 100644
--- a/vendor/github.com/mholt/archiver/tarbz2.go
+++ b/vendor/github.com/mholt/archiver/tarbz2.go
@@ -1,8 +1,8 @@
 package archiver
 
 import (
-	"archive/tar"
 	"fmt"
+	"io"
 	"os"
 	"strings"
 
@@ -19,9 +19,42 @@ func init() {
 type tarBz2Format struct{}
 
 func (tarBz2Format) Match(filename string) bool {
-	// TODO: read file header to identify the format
 	return strings.HasSuffix(strings.ToLower(filename), ".tar.bz2") ||
-		strings.HasSuffix(strings.ToLower(filename), ".tbz2")
+		strings.HasSuffix(strings.ToLower(filename), ".tbz2") ||
+		isTarBz2(filename)
+}
+
+// isTarBz2 checks the file has the bzip2 compressed Tar format header by
+// reading its beginning block.
+func isTarBz2(tarbz2Path string) bool {
+	f, err := os.Open(tarbz2Path)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	bz2r, err := bzip2.NewReader(f, nil)
+	if err != nil {
+		return false
+	}
+	defer bz2r.Close()
+
+	buf := make([]byte, tarBlockSize)
+	n, err := bz2r.Read(buf)
+	if err != nil || n < tarBlockSize {
+		return false
+	}
+
+	return hasTarHeader(buf)
+}
+
+// Write outputs a .tar.bz2 file to a Writer containing
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (tarBz2Format) Write(output io.Writer, filePaths []string) error {
+	return writeTarBz2(filePaths, output, "")
 }
 
 // Make creates a .tar.bz2 file at tarbz2Path containing
@@ -36,16 +69,29 @@ func (tarBz2Format) Make(tarbz2Path string, filePaths []string) error {
 	}
 	defer out.Close()
 
-	bz2Writer, err := bzip2.NewWriter(out, nil)
+	return writeTarBz2(filePaths, out, tarbz2Path)
+}
+
+func writeTarBz2(filePaths []string, output io.Writer, dest string) error {
+	bz2w, err := bzip2.NewWriter(output, nil)
 	if err != nil {
-		return fmt.Errorf("error compressing %s: %v", tarbz2Path, err)
+		return fmt.Errorf("error compressing bzip2: %v", err)
 	}
-	defer bz2Writer.Close()
+	defer bz2w.Close()
 
-	tarWriter := tar.NewWriter(bz2Writer)
-	defer tarWriter.Close()
+	return writeTar(filePaths, bz2w, dest)
+}
 
-	return tarball(filePaths, tarWriter, tarbz2Path)
+// Read untars a .tar.bz2 file read from a Reader and decompresses
+// the contents into destination.
+func (tarBz2Format) Read(input io.Reader, destination string) error {
+	bz2r, err := bzip2.NewReader(input, nil)
+	if err != nil {
+		return fmt.Errorf("error decompressing bzip2: %v", err)
+	}
+	defer bz2r.Close()
+
+	return Tar.Read(bz2r, destination)
 }
 
 // Open untars source and decompresses the contents into destination.
@@ -56,11 +102,5 @@ func (tarBz2Format) Open(source, destination string) error {
 	}
 	defer f.Close()
 
-	bz2r, err := bzip2.NewReader(f, nil)
-	if err != nil {
-		return fmt.Errorf("error decompressing %s: %v", source, err)
-	}
-	defer bz2r.Close()
-
-	return untar(tar.NewReader(bz2r), destination)
+	return TarBz2.Read(f, destination)
 }
diff --git a/vendor/github.com/mholt/archiver/targz.go b/vendor/github.com/mholt/archiver/targz.go
index 895b099ca9..6751d49db7 100644
--- a/vendor/github.com/mholt/archiver/targz.go
+++ b/vendor/github.com/mholt/archiver/targz.go
@@ -1,9 +1,9 @@
 package archiver
 
 import (
-	"archive/tar"
 	"compress/gzip"
 	"fmt"
+	"io"
 	"os"
 	"strings"
 )
@@ -18,9 +18,40 @@ func init() {
 type tarGzFormat struct{}
 
 func (tarGzFormat) Match(filename string) bool {
-	// TODO: read file header to identify the format
 	return strings.HasSuffix(strings.ToLower(filename), ".tar.gz") ||
-		strings.HasSuffix(strings.ToLower(filename), ".tgz")
+		strings.HasSuffix(strings.ToLower(filename), ".tgz") ||
+		isTarGz(filename)
+}
+
+// isTarGz checks the file has the gzip compressed Tar format header by reading
+// its beginning block.
+func isTarGz(targzPath string) bool {
+	f, err := os.Open(targzPath)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	gzr, err := gzip.NewReader(f)
+	if err != nil {
+		return false
+	}
+	defer gzr.Close()
+
+	buf := make([]byte, tarBlockSize)
+	n, err := gzr.Read(buf)
+	if err != nil || n < tarBlockSize {
+		return false
+	}
+
+	return hasTarHeader(buf)
+}
+
+// Write outputs a .tar.gz file to a Writer containing
+// the contents of files listed in filePaths. It works
+// the same way Tar does, but with gzip compression.
+func (tarGzFormat) Write(output io.Writer, filePaths []string) error {
+	return writeTarGz(filePaths, output, "")
 }
 
 // Make creates a .tar.gz file at targzPath containing
@@ -33,13 +64,26 @@ func (tarGzFormat) Make(targzPath string, filePaths []string) error {
 	}
 	defer out.Close()
 
-	gzWriter := gzip.NewWriter(out)
-	defer gzWriter.Close()
+	return writeTarGz(filePaths, out, targzPath)
+}
+
+func writeTarGz(filePaths []string, output io.Writer, dest string) error {
+	gzw := gzip.NewWriter(output)
+	defer gzw.Close()
 
-	tarWriter := tar.NewWriter(gzWriter)
-	defer tarWriter.Close()
+	return writeTar(filePaths, gzw, dest)
+}
+
+// Read untars a .tar.gz file read from a Reader and decompresses
+// the contents into destination.
+func (tarGzFormat) Read(input io.Reader, destination string) error {
+	gzr, err := gzip.NewReader(input)
+	if err != nil {
+		return fmt.Errorf("error decompressing: %v", err)
+	}
+	defer gzr.Close()
 
-	return tarball(filePaths, tarWriter, targzPath)
+	return Tar.Read(gzr, destination)
 }
 
 // Open untars source and decompresses the contents into destination.
@@ -50,11 +94,5 @@ func (tarGzFormat) Open(source, destination string) error {
 	}
 	defer f.Close()
 
-	gzr, err := gzip.NewReader(f)
-	if err != nil {
-		return fmt.Errorf("%s: create new gzip reader: %v", source, err)
-	}
-	defer gzr.Close()
-
-	return untar(tar.NewReader(gzr), destination)
+	return TarGz.Read(f, destination)
 }
diff --git a/vendor/github.com/mholt/archiver/tarlz4.go b/vendor/github.com/mholt/archiver/tarlz4.go
new file mode 100644
index 0000000000..1ddc881fa4
--- /dev/null
+++ b/vendor/github.com/mholt/archiver/tarlz4.go
@@ -0,0 +1,92 @@
+package archiver
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"github.com/pierrec/lz4"
+)
+
+// TarLz4 is for TarLz4 format
+var TarLz4 tarLz4Format
+
+func init() {
+	RegisterFormat("TarLz4", TarLz4)
+}
+
+type tarLz4Format struct{}
+
+func (tarLz4Format) Match(filename string) bool {
+	return strings.HasSuffix(strings.ToLower(filename), ".tar.lz4") || strings.HasSuffix(strings.ToLower(filename), ".tlz4") || isTarLz4(filename)
+}
+
+// isTarLz4 checks the file has the lz4 compressed Tar format header by
+// reading its beginning block.
+func isTarLz4(tarlz4Path string) bool {
+	f, err := os.Open(tarlz4Path)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	lz4r := lz4.NewReader(f)
+	buf := make([]byte, tarBlockSize)
+	n, err := lz4r.Read(buf)
+	if err != nil || n < tarBlockSize {
+		return false
+	}
+
+	return hasTarHeader(buf)
+}
+
+// Write outputs a .tar.lz4 file to a Writer containing
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (tarLz4Format) Write(output io.Writer, filePaths []string) error {
+	return writeTarLz4(filePaths, output, "")
+}
+
+// Make creates a .tar.lz4 file at tarlz4Path containing
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (tarLz4Format) Make(tarlz4Path string, filePaths []string) error {
+	out, err := os.Create(tarlz4Path)
+	if err != nil {
+		return fmt.Errorf("error creating %s: %v", tarlz4Path, err)
+	}
+	defer out.Close()
+
+	return writeTarLz4(filePaths, out, tarlz4Path)
+}
+
+func writeTarLz4(filePaths []string, output io.Writer, dest string) error {
+	lz4w := lz4.NewWriter(output)
+	defer lz4w.Close()
+
+	return writeTar(filePaths, lz4w, dest)
+}
+
+// Read untars a .tar.xz file read from a Reader and decompresses
+// the contents into destination.
+func (tarLz4Format) Read(input io.Reader, destination string) error {
+	lz4r := lz4.NewReader(input)
+
+	return Tar.Read(lz4r, destination)
+}
+
+// Open untars source and decompresses the contents into destination.
+func (tarLz4Format) Open(source, destination string) error {
+	f, err := os.Open(source)
+	if err != nil {
+		return fmt.Errorf("%s: failed to open archive: %v", source, err)
+	}
+	defer f.Close()
+
+	return TarLz4.Read(f, destination)
+}
diff --git a/vendor/github.com/mholt/archiver/tarsz.go b/vendor/github.com/mholt/archiver/tarsz.go
new file mode 100644
index 0000000000..2e29019081
--- /dev/null
+++ b/vendor/github.com/mholt/archiver/tarsz.go
@@ -0,0 +1,92 @@
+package archiver
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"github.com/golang/snappy"
+)
+
+// TarSz is for TarSz format
+var TarSz tarSzFormat
+
+func init() {
+	RegisterFormat("TarSz", TarSz)
+}
+
+type tarSzFormat struct{}
+
+func (tarSzFormat) Match(filename string) bool {
+	return strings.HasSuffix(strings.ToLower(filename), ".tar.sz") || strings.HasSuffix(strings.ToLower(filename), ".tsz") || isTarSz(filename)
+}
+
+// isTarSz checks the file has the sz compressed Tar format header by
+// reading its beginning block.
+func isTarSz(tarszPath string) bool {
+	f, err := os.Open(tarszPath)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	szr := snappy.NewReader(f)
+	buf := make([]byte, tarBlockSize)
+	n, err := szr.Read(buf)
+	if err != nil || n < tarBlockSize {
+		return false
+	}
+
+	return hasTarHeader(buf)
+}
+
+// Write outputs a .tar.sz file to a Writer containing
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (tarSzFormat) Write(output io.Writer, filePaths []string) error {
+	return writeTarSz(filePaths, output, "")
+}
+
+// Make creates a .tar.sz file at tarszPath containing
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (tarSzFormat) Make(tarszPath string, filePaths []string) error {
+	out, err := os.Create(tarszPath)
+	if err != nil {
+		return fmt.Errorf("error creating %s: %v", tarszPath, err)
+	}
+	defer out.Close()
+
+	return writeTarSz(filePaths, out, tarszPath)
+}
+
+func writeTarSz(filePaths []string, output io.Writer, dest string) error {
+	szw := snappy.NewBufferedWriter(output)
+	defer szw.Close()
+
+	return writeTar(filePaths, szw, dest)
+}
+
+// Read untars a .tar.sz file read from a Reader and decompresses
+// the contents into destination.
+func (tarSzFormat) Read(input io.Reader, destination string) error {
+	szr := snappy.NewReader(input)
+
+	return Tar.Read(szr, destination)
+}
+
+// Open untars source and decompresses the contents into destination.
+func (tarSzFormat) Open(source, destination string) error {
+	f, err := os.Open(source)
+	if err != nil {
+		return fmt.Errorf("%s: failed to open archive: %v", source, err)
+	}
+	defer f.Close()
+
+	return TarSz.Read(f, destination)
+}
diff --git a/vendor/github.com/mholt/archiver/tarxz.go b/vendor/github.com/mholt/archiver/tarxz.go
index 9bffce73c3..e222fb4ad1 100644
--- a/vendor/github.com/mholt/archiver/tarxz.go
+++ b/vendor/github.com/mholt/archiver/tarxz.go
@@ -1,8 +1,8 @@
 package archiver
 
 import (
-	"archive/tar"
 	"fmt"
+	"io"
 	"os"
 	"strings"
 
@@ -20,9 +20,41 @@ type xzFormat struct{}
 
 // Match returns whether filename matches this format.
 func (xzFormat) Match(filename string) bool {
-	// TODO: read file header to identify the format
 	return strings.HasSuffix(strings.ToLower(filename), ".tar.xz") ||
-		strings.HasSuffix(strings.ToLower(filename), ".txz")
+		strings.HasSuffix(strings.ToLower(filename), ".txz") ||
+		isTarXz(filename)
+}
+
+// isTarXz checks the file has the xz compressed Tar format header by reading
+// its beginning block.
+func isTarXz(tarxzPath string) bool {
+	f, err := os.Open(tarxzPath)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	xzr, err := xz.NewReader(f)
+	if err != nil {
+		return false
+	}
+
+	buf := make([]byte, tarBlockSize)
+	n, err := xzr.Read(buf)
+	if err != nil || n < tarBlockSize {
+		return false
+	}
+
+	return hasTarHeader(buf)
+}
+
+// Write outputs a .tar.xz file to a Writer containing
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+func (xzFormat) Write(output io.Writer, filePaths []string) error {
+	return writeTarXZ(filePaths, output, "")
 }
 
 // Make creates a .tar.xz file at xzPath containing
@@ -37,16 +69,28 @@ func (xzFormat) Make(xzPath string, filePaths []string) error {
 	}
 	defer out.Close()
 
-	xzWriter, err := xz.NewWriter(out)
+	return writeTarXZ(filePaths, out, xzPath)
+}
+
+func writeTarXZ(filePaths []string, output io.Writer, dest string) error {
+	xzw, err := xz.NewWriter(output)
 	if err != nil {
-		return fmt.Errorf("error compressing %s: %v", xzPath, err)
+		return fmt.Errorf("error compressing xz: %v", err)
 	}
-	defer xzWriter.Close()
+	defer xzw.Close()
 
-	tarWriter := tar.NewWriter(xzWriter)
-	defer tarWriter.Close()
+	return writeTar(filePaths, xzw, dest)
+}
 
-	return tarball(filePaths, tarWriter, xzPath)
+// Read untars a .tar.xz file read from a Reader and decompresses
+// the contents into destination.
+func (xzFormat) Read(input io.Reader, destination string) error {
+	xzr, err := xz.NewReader(input)
+	if err != nil {
+		return fmt.Errorf("error decompressing xz: %v", err)
+	}
+
+	return Tar.Read(xzr, destination)
 }
 
 // Open untars source and decompresses the contents into destination.
@@ -57,10 +101,5 @@ func (xzFormat) Open(source, destination string) error {
 	}
 	defer f.Close()
 
-	xzReader, err := xz.NewReader(f)
-	if err != nil {
-		return fmt.Errorf("error decompressing %s: %v", source, err)
-	}
-
-	return untar(tar.NewReader(xzReader), destination)
+	return TarXZ.Read(f, destination)
 }
diff --git a/vendor/github.com/mholt/archiver/zip.go b/vendor/github.com/mholt/archiver/zip.go
index 9b5353797f..9d20bc1b77 100644
--- a/vendor/github.com/mholt/archiver/zip.go
+++ b/vendor/github.com/mholt/archiver/zip.go
@@ -4,8 +4,10 @@ package archiver
 
 import (
 	"archive/zip"
+	"bytes"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"os"
 	"path"
 	"path/filepath"
@@ -22,8 +24,44 @@ func init() {
 type zipFormat struct{}
 
 func (zipFormat) Match(filename string) bool {
-	// TODO: read file header to identify the format
-	return strings.HasSuffix(strings.ToLower(filename), ".zip")
+	return strings.HasSuffix(strings.ToLower(filename), ".zip") || isZip(filename)
+}
+
+// isZip checks the file has the Zip format signature by reading its beginning
+// bytes and matching it against "PK\x03\x04"
+func isZip(zipPath string) bool {
+	f, err := os.Open(zipPath)
+	if err != nil {
+		return false
+	}
+	defer f.Close()
+
+	buf := make([]byte, 4)
+	if n, err := f.Read(buf); err != nil || n < 4 {
+		return false
+	}
+
+	return bytes.Equal(buf, []byte("PK\x03\x04"))
+}
+
+// Write outputs a .zip file to the given writer with
+// the contents of files listed in filePaths. File paths
+// can be those of regular files or directories. Regular
+// files are stored at the 'root' of the archive, and
+// directories are recursively added.
+//
+// Files with an extension for formats that are already
+// compressed will be stored only, not compressed.
+func (zipFormat) Write(output io.Writer, filePaths []string) error {
+	w := zip.NewWriter(output)
+	for _, fpath := range filePaths {
+		if err := zipFile(w, fpath); err != nil {
+			w.Close()
+			return err
+		}
+	}
+
+	return w.Close()
 }
 
 // Make creates a .zip file in the location zipPath containing
@@ -41,16 +79,7 @@ func (zipFormat) Make(zipPath string, filePaths []string) error {
 	}
 	defer out.Close()
 
-	w := zip.NewWriter(out)
-	for _, fpath := range filePaths {
-		err = zipFile(w, fpath)
-		if err != nil {
-			w.Close()
-			return err
-		}
-	}
-
-	return w.Close()
+	return Zip.Write(out, filePaths)
 }
 
 func zipFile(w *zip.Writer, source string) error {
@@ -75,7 +104,11 @@ func zipFile(w *zip.Writer, source string) error {
 		}
 
 		if baseDir != "" {
-			header.Name = path.Join(baseDir, strings.TrimPrefix(fpath, source))
+			name, err := filepath.Rel(source, fpath)
+			if err != nil {
+				return err
+			}
+			header.Name = path.Join(baseDir, filepath.ToSlash(name))
 		}
 
 		if info.IsDir() {
@@ -116,6 +149,22 @@ func zipFile(w *zip.Writer, source string) error {
 	})
 }
 
+// Read unzips the .zip file read from the input Reader into destination.
+func (zipFormat) Read(input io.Reader, destination string) error {
+	buf, err := ioutil.ReadAll(input)
+	if err != nil {
+		return err
+	}
+
+	rdr := bytes.NewReader(buf)
+	r, err := zip.NewReader(rdr, rdr.Size())
+	if err != nil {
+		return err
+	}
+
+	return unzipAll(r, destination)
+}
+
 // Open unzips the .zip file at source into destination.
 func (zipFormat) Open(source, destination string) error {
 	r, err := zip.OpenReader(source)
@@ -124,6 +173,10 @@ func (zipFormat) Open(source, destination string) error {
 	}
 	defer r.Close()
 
+	return unzipAll(&r.Reader, destination)
+}
+
+func unzipAll(r *zip.Reader, destination string) error {
 	for _, zf := range r.File {
 		if err := unzipFile(zf, destination); err != nil {
 			return err
@@ -134,6 +187,11 @@ func (zipFormat) Open(source, destination string) error {
 }
 
 func unzipFile(zf *zip.File, destination string) error {
+	err := sanitizeExtractPath(zf.Name, destination)
+	if err != nil {
+		return err
+	}
+
 	if strings.HasSuffix(zf.Name, "/") {
 		return mkdir(filepath.Join(destination, zf.Name))
 	}
diff --git a/vendor/github.com/pierrec/lz4/.gitignore b/vendor/github.com/pierrec/lz4/.gitignore
new file mode 100644
index 0000000000..c2bb6e4af1
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/.gitignore
@@ -0,0 +1,31 @@
+# Created by https://www.gitignore.io/api/macos
+
+### macOS ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# End of https://www.gitignore.io/api/macos
diff --git a/vendor/github.com/pierrec/lz4/.travis.yml b/vendor/github.com/pierrec/lz4/.travis.yml
new file mode 100644
index 0000000000..78be21cc82
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/.travis.yml
@@ -0,0 +1,8 @@
+language: go
+
+go:
+  - 1.x
+
+script: 
+ - go test -v -cpu=2
+ - go test -v -cpu=2 -race
\ No newline at end of file
diff --git a/vendor/github.com/pierrec/lz4/LICENSE b/vendor/github.com/pierrec/lz4/LICENSE
new file mode 100644
index 0000000000..bd899d8353
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2015, Pierre Curto
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of xxHash nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/vendor/github.com/pierrec/lz4/README.md b/vendor/github.com/pierrec/lz4/README.md
new file mode 100644
index 0000000000..dd3c9d47e1
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/README.md
@@ -0,0 +1,31 @@
+[![godoc](https://godoc.org/github.com/pierrec/lz4?status.png)](https://godoc.org/github.com/pierrec/lz4)
+[![Build Status](https://travis-ci.org/pierrec/lz4.svg?branch=master)](https://travis-ci.org/pierrec/lz4)
+
+# lz4
+LZ4 compression and decompression in pure Go
+
+## Usage
+
+```go
+import "github.com/pierrec/lz4"
+```
+
+## Description
+
+Package lz4 implements reading and writing lz4 compressed data (a frame),
+as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
+using an io.Reader (decompression) and io.Writer (compression).
+It is designed to minimize memory usage while maximizing throughput by being able to
+[de]compress data concurrently.
+
+The Reader and the Writer support concurrent processing provided the supplied buffers are
+large enough (in multiples of BlockMaxSize) and there is no block dependency.
+Reader.WriteTo and Writer.ReadFrom do leverage the concurrency transparently.
+The runtime.GOMAXPROCS() value is used to apply concurrency or not.
+
+Although the block level compression and decompression functions are exposed and are fully compatible
+with the lz4 block format definition, they are low level and should not be used directly.
+For a complete description of an lz4 compressed block, see:
+http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
+
+See /~https://github.com/Cyan4973/lz4 for the reference C implementation.
diff --git a/vendor/github.com/pierrec/lz4/block.go b/vendor/github.com/pierrec/lz4/block.go
new file mode 100644
index 0000000000..44e3eaaac7
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/block.go
@@ -0,0 +1,454 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"errors"
+)
+
+// block represents a frame data block.
+// Used when compressing or decompressing frame blocks concurrently.
+type block struct {
+	compressed bool
+	zdata      []byte // compressed data
+	data       []byte // decompressed data
+	offset     int    // offset within the data as with block dependency the 64Kb window is prepended to it
+	checksum   uint32 // compressed data checksum
+	err        error  // error while [de]compressing
+}
+
+var (
+	// ErrInvalidSource is returned by UncompressBlock when a compressed block is corrupted.
+	ErrInvalidSource = errors.New("lz4: invalid source")
+	// ErrShortBuffer is returned by UncompressBlock, CompressBlock or CompressBlockHC when
+	// the supplied buffer for [de]compression is too small.
+	ErrShortBuffer = errors.New("lz4: short buffer")
+)
+
+// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
+func CompressBlockBound(n int) int {
+	return n + n/255 + 16
+}
+
+// UncompressBlock decompresses the source buffer into the destination one,
+// starting at the di index and returning the decompressed size.
+//
+// The destination buffer must be sized appropriately.
+//
+// An error is returned if the source data is invalid or the destination buffer is too small.
+func UncompressBlock(src, dst []byte, di int) (int, error) {
+	si, sn, di0 := 0, len(src), di
+	if sn == 0 {
+		return 0, nil
+	}
+
+	for {
+		// literals and match lengths (token)
+		lLen := int(src[si] >> 4)
+		mLen := int(src[si] & 0xF)
+		if si++; si == sn {
+			return di, ErrInvalidSource
+		}
+
+		// literals
+		if lLen > 0 {
+			if lLen == 0xF {
+				for src[si] == 0xFF {
+					lLen += 0xFF
+					if si++; si == sn {
+						return di - di0, ErrInvalidSource
+					}
+				}
+				lLen += int(src[si])
+				if si++; si == sn {
+					return di - di0, ErrInvalidSource
+				}
+			}
+			if len(dst)-di < lLen || si+lLen > sn {
+				return di - di0, ErrShortBuffer
+			}
+			di += copy(dst[di:], src[si:si+lLen])
+
+			if si += lLen; si >= sn {
+				return di - di0, nil
+			}
+		}
+
+		if si += 2; si >= sn {
+			return di, ErrInvalidSource
+		}
+		offset := int(src[si-2]) | int(src[si-1])<<8
+		if di-offset < 0 || offset == 0 {
+			return di - di0, ErrInvalidSource
+		}
+
+		// match
+		if mLen == 0xF {
+			for src[si] == 0xFF {
+				mLen += 0xFF
+				if si++; si == sn {
+					return di - di0, ErrInvalidSource
+				}
+			}
+			mLen += int(src[si])
+			if si++; si == sn {
+				return di - di0, ErrInvalidSource
+			}
+		}
+		// minimum match length is 4
+		mLen += 4
+		if len(dst)-di <= mLen {
+			return di - di0, ErrShortBuffer
+		}
+
+		// copy the match (NB. match is at least 4 bytes long)
+		if mLen >= offset {
+			bytesToCopy := offset * (mLen / offset)
+			// Efficiently copy the match dst[di-offset:di] into the slice
+			// dst[di:di+bytesToCopy]
+			expanded := dst[di-offset : di+bytesToCopy]
+			n := offset
+			for n <= bytesToCopy+offset {
+				copy(expanded[n:], expanded[:n])
+				n *= 2
+			}
+			di += bytesToCopy
+			mLen -= bytesToCopy
+		}
+
+		di += copy(dst[di:], dst[di-offset:di-offset+mLen])
+	}
+}
+
+// CompressBlock compresses the source buffer starting at soffet into the destination one.
+// This is the fast version of LZ4 compression and also the default one.
+//
+// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlock(src, dst []byte, soffset int) (int, error) {
+	sn, dn := len(src)-mfLimit, len(dst)
+	if sn <= 0 || dn == 0 || soffset >= sn {
+		return 0, nil
+	}
+	var si, di int
+
+	// fast scan strategy:
+	// we only need a hash table to store the last sequences (4 bytes)
+	var hashTable [1 << hashLog]int
+	var hashShift = uint((minMatch * 8) - hashLog)
+
+	// Initialise the hash table with the first 64Kb of the input buffer
+	// (used when compressing dependent blocks)
+	for si < soffset {
+		h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
+		si++
+		hashTable[h] = si
+	}
+
+	anchor := si
+	fma := 1 << skipStrength
+	for si < sn-minMatch {
+		// hash the next 4 bytes (sequence)...
+		h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
+		// -1 to separate existing entries from new ones
+		ref := hashTable[h] - 1
+		// ...and store the position of the hash in the hash table (+1 to compensate the -1 upon saving)
+		hashTable[h] = si + 1
+		// no need to check the last 3 bytes in the first literal 4 bytes as
+		// this guarantees that the next match, if any, is compressed with
+		// a lower size, since to have some compression we must have:
+		// ll+ml-overlap > 1 + (ll-15)/255 + (ml-4-15)/255 + 2 (uncompressed size>compressed size)
+		// => ll+ml>3+2*overlap => ll+ml>= 4+2*overlap
+		// and by definition we do have:
+		// ll >= 1, ml >= 4
+		// => ll+ml >= 5
+		// => so overlap must be 0
+
+		// the sequence is new, out of bound (64kb) or not valid: try next sequence
+		if ref < 0 || fma&(1<<skipStrength-1) < 4 ||
+			(si-ref)>>winSizeLog > 0 ||
+			src[ref] != src[si] ||
+			src[ref+1] != src[si+1] ||
+			src[ref+2] != src[si+2] ||
+			src[ref+3] != src[si+3] {
+			// variable step: improves performance on non-compressible data
+			si += fma >> skipStrength
+			fma++
+			continue
+		}
+		// match found
+		fma = 1 << skipStrength
+		lLen := si - anchor
+		offset := si - ref
+
+		// encode match length part 1
+		si += minMatch
+		mLen := si // match length has minMatch already
+		for si <= sn && src[si] == src[si-offset] {
+			si++
+		}
+		mLen = si - mLen
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// encode literals length
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(l)
+		}
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+
+		// literals
+		if di+lLen >= dn {
+			return di, ErrShortBuffer
+		}
+		di += copy(dst[di:], src[anchor:anchor+lLen])
+		anchor = si
+
+		// encode offset
+		if di += 2; di >= dn {
+			return di, ErrShortBuffer
+		}
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// encode match length part 2
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(mLen)
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+	}
+
+	if anchor == 0 {
+		// incompressible
+		return 0, nil
+	}
+
+	// last literals
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+		lLen -= 0xF
+		for ; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+		dst[di] = byte(lLen)
+	}
+	if di++; di == dn {
+		return di, ErrShortBuffer
+	}
+
+	// write literals
+	src = src[anchor:]
+	switch n := di + len(src); {
+	case n > dn:
+		return di, ErrShortBuffer
+	case n >= sn:
+		// incompressible
+		return 0, nil
+	}
+	di += copy(dst[di:], src)
+	return di, nil
+}
+
+// CompressBlockHC compresses the source buffer starting at soffet into the destination one.
+// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
+//
+// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlockHC(src, dst []byte, soffset int) (int, error) {
+	sn, dn := len(src)-mfLimit, len(dst)
+	if sn <= 0 || dn == 0 || soffset >= sn {
+		return 0, nil
+	}
+	var si, di int
+
+	// Hash Chain strategy:
+	// we need a hash table and a chain table
+	// the chain table cannot contain more entries than the window size (64Kb entries)
+	var hashTable [1 << hashLog]int
+	var chainTable [winSize]int
+	var hashShift = uint((minMatch * 8) - hashLog)
+
+	// Initialise the hash table with the first 64Kb of the input buffer
+	// (used when compressing dependent blocks)
+	for si < soffset {
+		h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
+		chainTable[si&winMask] = hashTable[h]
+		si++
+		hashTable[h] = si
+	}
+
+	anchor := si
+	for si < sn-minMatch {
+		// hash the next 4 bytes (sequence)...
+		h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
+
+		// follow the chain until out of window and give the longest match
+		mLen := 0
+		offset := 0
+		for next := hashTable[h] - 1; next > 0 && next > si-winSize; next = chainTable[next&winMask] - 1 {
+			// the first (mLen==0) or next byte (mLen>=minMatch) at current match length must match to improve on the match length
+			if src[next+mLen] == src[si+mLen] {
+				for ml := 0; ; ml++ {
+					if src[next+ml] != src[si+ml] || si+ml > sn {
+						// found a longer match, keep its position and length
+						if mLen < ml && ml >= minMatch {
+							mLen = ml
+							offset = si - next
+						}
+						break
+					}
+				}
+			}
+		}
+		chainTable[si&winMask] = hashTable[h]
+		hashTable[h] = si + 1
+
+		// no match found
+		if mLen == 0 {
+			si++
+			continue
+		}
+
+		// match found
+		// update hash/chain tables with overlaping bytes:
+		// si already hashed, add everything from si+1 up to the match length
+		for si, ml := si+1, si+mLen; si < ml; {
+			h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
+			chainTable[si&winMask] = hashTable[h]
+			si++
+			hashTable[h] = si
+		}
+
+		lLen := si - anchor
+		si += mLen
+		mLen -= minMatch // match length does not include minMatch
+
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// encode literals length
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(l)
+		}
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+
+		// literals
+		if di+lLen >= dn {
+			return di, ErrShortBuffer
+		}
+		di += copy(dst[di:], src[anchor:anchor+lLen])
+		anchor = si
+
+		// encode offset
+		if di += 2; di >= dn {
+			return di, ErrShortBuffer
+		}
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// encode match length part 2
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(mLen)
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+	}
+
+	if anchor == 0 {
+		// incompressible
+		return 0, nil
+	}
+
+	// last literals
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+		lLen -= 0xF
+		for ; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+		dst[di] = byte(lLen)
+	}
+	if di++; di == dn {
+		return di, ErrShortBuffer
+	}
+
+	// write literals
+	src = src[anchor:]
+	switch n := di + len(src); {
+	case n > dn:
+		return di, ErrShortBuffer
+	case n >= sn:
+		// incompressible
+		return 0, nil
+	}
+	di += copy(dst[di:], src)
+	return di, nil
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go
new file mode 100644
index 0000000000..ddb82f66f8
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4.go
@@ -0,0 +1,105 @@
+// Package lz4 implements reading and writing lz4 compressed data (a frame),
+// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
+// using an io.Reader (decompression) and io.Writer (compression).
+// It is designed to minimize memory usage while maximizing throughput by being able to
+// [de]compress data concurrently.
+//
+// The Reader and the Writer support concurrent processing provided the supplied buffers are
+// large enough (in multiples of BlockMaxSize) and there is no block dependency.
+// Reader.WriteTo and Writer.ReadFrom do leverage the concurrency transparently.
+// The runtime.GOMAXPROCS() value is used to apply concurrency or not.
+//
+// Although the block level compression and decompression functions are exposed and are fully compatible
+// with the lz4 block format definition, they are low level and should not be used directly.
+// For a complete description of an lz4 compressed block, see:
+// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
+//
+// See /~https://github.com/Cyan4973/lz4 for the reference C implementation.
+package lz4
+
+import (
+	"hash"
+	"sync"
+
+	"github.com/pierrec/xxHash/xxHash32"
+)
+
+const (
+	// Extension is the LZ4 frame file name extension
+	Extension = ".lz4"
+	// Version is the LZ4 frame format version
+	Version = 1
+
+	frameMagic     = uint32(0x184D2204)
+	frameSkipMagic = uint32(0x184D2A50)
+
+	// The following constants are used to setup the compression algorithm.
+	minMatch   = 4  // the minimum size of the match sequence size (4 bytes)
+	winSizeLog = 16 // LZ4 64Kb window size limit
+	winSize    = 1 << winSizeLog
+	winMask    = winSize - 1 // 64Kb window of previous data for dependent blocks
+
+	// hashLog determines the size of the hash table used to quickly find a previous match position.
+	// Its value influences the compression speed and memory usage, the lower the faster,
+	// but at the expense of the compression ratio.
+	// 16 seems to be the best compromise.
+	hashLog       = 16
+	hashTableSize = 1 << hashLog
+	hashShift     = uint((minMatch * 8) - hashLog)
+
+	mfLimit      = 8 + minMatch // The last match cannot start within the last 12 bytes.
+	skipStrength = 6            // variable step for fast scan
+
+	hasher = uint32(2654435761) // prime number used to hash minMatch
+)
+
+// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
+var bsMapID = map[byte]int{4: 64 << 10, 5: 256 << 10, 6: 1 << 20, 7: 4 << 20}
+var bsMapValue = map[int]byte{}
+
+// Reversed.
+func init() {
+	for i, v := range bsMapID {
+		bsMapValue[v] = i
+	}
+}
+
+// Header describes the various flags that can be set on a Writer or obtained from a Reader.
+// The default values match those of the LZ4 frame format definition (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
+//
+// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
+// It is the caller responsibility to check them if necessary (typically when using the Reader concurrency).
+type Header struct {
+	BlockDependency bool   // compressed blocks are dependent (one block depends on the last 64Kb of the previous one)
+	BlockChecksum   bool   // compressed blocks are checksumed
+	NoChecksum      bool   // frame checksum
+	BlockMaxSize    int    // the size of the decompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
+	Size            uint64 // the frame total size. It is _not_ computed by the Writer.
+	HighCompression bool   // use high compression (only for the Writer)
+	done            bool   // whether the descriptor was processed (Read or Write and checked)
+	// Removed as not supported
+	// 	Dict            bool   // a dictionary id is to be used
+	// 	DictID          uint32 // the dictionary id read from the frame, if any.
+}
+
+// xxhPool wraps the standard pool for xxHash items.
+// Putting items back in the pool automatically resets them.
+type xxhPool struct {
+	sync.Pool
+}
+
+func (p *xxhPool) Get() hash.Hash32 {
+	return p.Pool.Get().(hash.Hash32)
+}
+
+func (p *xxhPool) Put(h hash.Hash32) {
+	h.Reset()
+	p.Pool.Put(h)
+}
+
+// hashPool is used by readers and writers and contains xxHash items.
+var hashPool = xxhPool{
+	Pool: sync.Pool{
+		New: func() interface{} { return xxHash32.New(0) },
+	},
+}
diff --git a/vendor/github.com/pierrec/lz4/reader.go b/vendor/github.com/pierrec/lz4/reader.go
new file mode 100644
index 0000000000..9f7fd60424
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/reader.go
@@ -0,0 +1,364 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"hash"
+	"io"
+	"io/ioutil"
+	"runtime"
+	"sync"
+	"sync/atomic"
+)
+
+// ErrInvalid is returned when the data being read is not an LZ4 archive
+// (LZ4 magic number detection failed).
+var ErrInvalid = errors.New("invalid lz4 data")
+
+// errEndOfBlock is returned by readBlock when it has reached the last block of the frame.
+// It is not an error.
+var errEndOfBlock = errors.New("end of block")
+
+// Reader implements the LZ4 frame decoder.
+// The Header is set after the first call to Read().
+// The Header may change between Read() calls in case of concatenated frames.
+type Reader struct {
+	Pos int64 // position within the source
+	Header
+	src      io.Reader
+	checksum hash.Hash32    // frame hash
+	wg       sync.WaitGroup // decompressing go routine wait group
+	data     []byte         // buffered decompressed data
+	window   []byte         // 64Kb decompressed data window
+}
+
+// NewReader returns a new LZ4 frame decoder.
+// No access to the underlying io.Reader is performed.
+func NewReader(src io.Reader) *Reader {
+	return &Reader{
+		src:      src,
+		checksum: hashPool.Get(),
+	}
+}
+
+// readHeader checks the frame magic number and parses the frame descriptoz.
+// Skippable frames are supported even as a first frame although the LZ4
+// specifications recommends skippable frames not to be used as first frames.
+func (z *Reader) readHeader(first bool) error {
+	defer z.checksum.Reset()
+
+	for {
+		var magic uint32
+		if err := binary.Read(z.src, binary.LittleEndian, &magic); err != nil {
+			if !first && err == io.ErrUnexpectedEOF {
+				return io.EOF
+			}
+			return err
+		}
+		z.Pos += 4
+		if magic>>8 == frameSkipMagic>>8 {
+			var skipSize uint32
+			if err := binary.Read(z.src, binary.LittleEndian, &skipSize); err != nil {
+				return err
+			}
+			z.Pos += 4
+			m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
+			z.Pos += m
+			if err != nil {
+				return err
+			}
+			continue
+		}
+		if magic != frameMagic {
+			return ErrInvalid
+		}
+		break
+	}
+
+	// header
+	var buf [8]byte
+	if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
+		return err
+	}
+	z.Pos += 2
+
+	b := buf[0]
+	if b>>6 != Version {
+		return fmt.Errorf("lz4.Read: invalid version: got %d expected %d", b>>6, Version)
+	}
+	z.BlockDependency = b>>5&1 == 0
+	z.BlockChecksum = b>>4&1 > 0
+	frameSize := b>>3&1 > 0
+	z.NoChecksum = b>>2&1 == 0
+	// 	z.Dict = b&1 > 0
+
+	bmsID := buf[1] >> 4 & 0x7
+	bSize, ok := bsMapID[bmsID]
+	if !ok {
+		return fmt.Errorf("lz4.Read: invalid block max size: %d", bmsID)
+	}
+	z.BlockMaxSize = bSize
+
+	z.checksum.Write(buf[0:2])
+
+	if frameSize {
+		if err := binary.Read(z.src, binary.LittleEndian, &z.Size); err != nil {
+			return err
+		}
+		z.Pos += 8
+		binary.LittleEndian.PutUint64(buf[:], z.Size)
+		z.checksum.Write(buf[0:8])
+	}
+
+	// 	if z.Dict {
+	// 		if err := binary.Read(z.src, binary.LittleEndian, &z.DictID); err != nil {
+	// 			return err
+	// 		}
+	// 		z.Pos += 4
+	// 		binary.LittleEndian.PutUint32(buf[:], z.DictID)
+	// 		z.checksum.Write(buf[0:4])
+	// 	}
+
+	// header checksum
+	if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
+		return err
+	}
+	z.Pos++
+	if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
+		return fmt.Errorf("lz4.Read: invalid header checksum: got %v expected %v", buf[0], h)
+	}
+
+	z.Header.done = true
+
+	return nil
+}
+
+// Read decompresses data from the underlying source into the supplied buffer.
+//
+// Since there can be multiple streams concatenated, Header values may
+// change between calls to Read(). If that is the case, no data is actually read from
+// the underlying io.Reader, to allow for potential input buffer resizing.
+//
+// Data is buffered if the input buffer is too small, and exhausted upon successive calls.
+//
+// If the buffer is large enough (typically in multiples of BlockMaxSize) and there is
+// no block dependency, then the data will be decompressed concurrently based on the GOMAXPROCS value.
+func (z *Reader) Read(buf []byte) (n int, err error) {
+	if !z.Header.done {
+		if err = z.readHeader(true); err != nil {
+			return
+		}
+	}
+
+	if len(buf) == 0 {
+		return
+	}
+
+	// exhaust remaining data from previous Read()
+	if len(z.data) > 0 {
+		n = copy(buf, z.data)
+		z.data = z.data[n:]
+		if len(z.data) == 0 {
+			z.data = nil
+		}
+		return
+	}
+
+	// Break up the input buffer into BlockMaxSize blocks with at least one block.
+	// Then decompress into each of them concurrently if possible (no dependency).
+	// In case of dependency, the first block will be missing the window (except on the
+	// very first call), the rest will have it already since it comes from the previous block.
+	wbuf := buf
+	zn := (len(wbuf) + z.BlockMaxSize - 1) / z.BlockMaxSize
+	zblocks := make([]block, zn)
+	for zi, abort := 0, uint32(0); zi < zn && atomic.LoadUint32(&abort) == 0; zi++ {
+		zb := &zblocks[zi]
+		// last block may be too small
+		if len(wbuf) < z.BlockMaxSize+len(z.window) {
+			wbuf = make([]byte, z.BlockMaxSize+len(z.window))
+		}
+		copy(wbuf, z.window)
+		if zb.err = z.readBlock(wbuf, zb); zb.err != nil {
+			break
+		}
+		wbuf = wbuf[z.BlockMaxSize:]
+		if !z.BlockDependency {
+			z.wg.Add(1)
+			go z.decompressBlock(zb, &abort)
+			continue
+		}
+		// cannot decompress concurrently when dealing with block dependency
+		z.decompressBlock(zb, nil)
+		// the last block may not contain enough data
+		if len(z.window) == 0 {
+			z.window = make([]byte, winSize)
+		}
+		if len(zb.data) >= winSize {
+			copy(z.window, zb.data[len(zb.data)-winSize:])
+		} else {
+			copy(z.window, z.window[len(zb.data):])
+			copy(z.window[len(zb.data)+1:], zb.data)
+		}
+	}
+	z.wg.Wait()
+
+	// since a block size may be less then BlockMaxSize, trim the decompressed buffers
+	for _, zb := range zblocks {
+		if zb.err != nil {
+			if zb.err == errEndOfBlock {
+				return n, z.close()
+			}
+			return n, zb.err
+		}
+		bLen := len(zb.data)
+		if !z.NoChecksum {
+			z.checksum.Write(zb.data)
+		}
+		m := copy(buf[n:], zb.data)
+		// buffer the remaining data (this is necessarily the last block)
+		if m < bLen {
+			z.data = zb.data[m:]
+		}
+		n += m
+	}
+
+	return
+}
+
+// readBlock reads an entire frame block from the frame.
+// The input buffer is the one that will receive the decompressed data.
+// If the end of the frame is detected, it returns the errEndOfBlock error.
+func (z *Reader) readBlock(buf []byte, b *block) error {
+	var bLen uint32
+	if err := binary.Read(z.src, binary.LittleEndian, &bLen); err != nil {
+		return err
+	}
+	atomic.AddInt64(&z.Pos, 4)
+
+	switch {
+	case bLen == 0:
+		return errEndOfBlock
+	case bLen&(1<<31) == 0:
+		b.compressed = true
+		b.data = buf
+		b.zdata = make([]byte, bLen)
+	default:
+		bLen = bLen & (1<<31 - 1)
+		if int(bLen) > len(buf) {
+			return fmt.Errorf("lz4.Read: invalid block size: %d", bLen)
+		}
+		b.data = buf[:bLen]
+		b.zdata = buf[:bLen]
+	}
+	if _, err := io.ReadFull(z.src, b.zdata); err != nil {
+		return err
+	}
+
+	if z.BlockChecksum {
+		if err := binary.Read(z.src, binary.LittleEndian, &b.checksum); err != nil {
+			return err
+		}
+		xxh := hashPool.Get()
+		defer hashPool.Put(xxh)
+		xxh.Write(b.zdata)
+		if h := xxh.Sum32(); h != b.checksum {
+			return fmt.Errorf("lz4.Read: invalid block checksum: got %x expected %x", h, b.checksum)
+		}
+	}
+
+	return nil
+}
+
+// decompressBlock decompresses a frame block.
+// In case of an error, the block err is set with it and abort is set to 1.
+func (z *Reader) decompressBlock(b *block, abort *uint32) {
+	if abort != nil {
+		defer z.wg.Done()
+	}
+	if b.compressed {
+		n := len(z.window)
+		m, err := UncompressBlock(b.zdata, b.data, n)
+		if err != nil {
+			if abort != nil {
+				atomic.StoreUint32(abort, 1)
+			}
+			b.err = err
+			return
+		}
+		b.data = b.data[n : n+m]
+	}
+	atomic.AddInt64(&z.Pos, int64(len(b.data)))
+}
+
+// close validates the frame checksum (if any) and checks the next frame (if any).
+func (z *Reader) close() error {
+	if !z.NoChecksum {
+		var checksum uint32
+		if err := binary.Read(z.src, binary.LittleEndian, &checksum); err != nil {
+			return err
+		}
+		if checksum != z.checksum.Sum32() {
+			return fmt.Errorf("lz4.Read: invalid frame checksum: got %x expected %x", z.checksum.Sum32(), checksum)
+		}
+	}
+
+	// get ready for the next concatenated frame, but do not change the position
+	pos := z.Pos
+	z.Reset(z.src)
+	z.Pos = pos
+
+	// since multiple frames can be concatenated, check for another one
+	return z.readHeader(false)
+}
+
+// Reset discards the Reader's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *Reader) Reset(r io.Reader) {
+	z.Header = Header{}
+	z.Pos = 0
+	z.src = r
+	z.checksum.Reset()
+	z.data = nil
+	z.window = nil
+}
+
+// WriteTo decompresses the data from the underlying io.Reader and writes it to the io.Writer.
+// Returns the number of bytes written.
+func (z *Reader) WriteTo(w io.Writer) (n int64, err error) {
+	cpus := runtime.GOMAXPROCS(0)
+	var buf []byte
+
+	// The initial buffer being nil, the first Read will be only read the compressed frame options.
+	// The buffer can then be sized appropriately to support maximum concurrency decompression.
+	// If multiple frames are concatenated, Read() will return with no data decompressed but with
+	// potentially changed options. The buffer will be resized accordingly, always trying to
+	// maximize concurrency.
+	for {
+		nsize := 0
+		// the block max size can change if multiple streams are concatenated.
+		// Check it after every Read().
+		if z.BlockDependency {
+			// in case of dependency, we cannot decompress concurrently,
+			// so allocate the minimum buffer + window size
+			nsize = len(z.window) + z.BlockMaxSize
+		} else {
+			// if no dependency, allocate a buffer large enough for concurrent decompression
+			nsize = cpus * z.BlockMaxSize
+		}
+		if nsize != len(buf) {
+			buf = make([]byte, nsize)
+		}
+
+		m, er := z.Read(buf)
+		if er != nil && er != io.EOF {
+			return n, er
+		}
+		m, err = w.Write(buf[:m])
+		n += int64(m)
+		if err != nil || er == io.EOF {
+			return
+		}
+	}
+}
diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go
new file mode 100644
index 0000000000..b1b712fe21
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/writer.go
@@ -0,0 +1,377 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"hash"
+	"io"
+	"runtime"
+)
+
+// Writer implements the LZ4 frame encoder.
+type Writer struct {
+	Header
+	dst      io.Writer
+	checksum hash.Hash32 // frame checksum
+	data     []byte      // data to be compressed, only used when dealing with block dependency as we need 64Kb to work with
+	window   []byte      // last 64KB of decompressed data (block dependency) + blockMaxSize buffer
+
+	zbCompressBuf []byte // buffer for compressing lz4 blocks
+	writeSizeBuf  []byte // four-byte slice for writing checksums and sizes in writeblock
+}
+
+// NewWriter returns a new LZ4 frame encoder.
+// No access to the underlying io.Writer is performed.
+// The supplied Header is checked at the first Write.
+// It is ok to change it before the first Write but then not until a Reset() is performed.
+func NewWriter(dst io.Writer) *Writer {
+	return &Writer{
+		dst:      dst,
+		checksum: hashPool.Get(),
+		Header: Header{
+			BlockMaxSize: 4 << 20,
+		},
+		writeSizeBuf: make([]byte, 4),
+	}
+}
+
+// writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
+func (z *Writer) writeHeader() error {
+	// Default to 4Mb if BlockMaxSize is not set
+	if z.Header.BlockMaxSize == 0 {
+		z.Header.BlockMaxSize = 4 << 20
+	}
+	// the only option that need to be validated
+	bSize, ok := bsMapValue[z.Header.BlockMaxSize]
+	if !ok {
+		return fmt.Errorf("lz4: invalid block max size: %d", z.Header.BlockMaxSize)
+	}
+
+	// magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
+	// Size and DictID are optional
+	var buf [19]byte
+
+	// set the fixed size data: magic number, block max size and flags
+	binary.LittleEndian.PutUint32(buf[0:], frameMagic)
+	flg := byte(Version << 6)
+	if !z.Header.BlockDependency {
+		flg |= 1 << 5
+	}
+	if z.Header.BlockChecksum {
+		flg |= 1 << 4
+	}
+	if z.Header.Size > 0 {
+		flg |= 1 << 3
+	}
+	if !z.Header.NoChecksum {
+		flg |= 1 << 2
+	}
+	//  if z.Header.Dict {
+	//      flg |= 1
+	//  }
+	buf[4] = flg
+	buf[5] = bSize << 4
+
+	// current buffer size: magic(4) + flags(1) + block max size (1)
+	n := 6
+	// optional items
+	if z.Header.Size > 0 {
+		binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
+		n += 8
+	}
+	//  if z.Header.Dict {
+	//      binary.LittleEndian.PutUint32(buf[n:], z.Header.DictID)
+	//      n += 4
+	//  }
+
+	// header checksum includes the flags, block max size and optional Size and DictID
+	z.checksum.Write(buf[4:n])
+	buf[n] = byte(z.checksum.Sum32() >> 8 & 0xFF)
+	z.checksum.Reset()
+
+	// header ready, write it out
+	if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
+		return err
+	}
+	z.Header.done = true
+
+	// initialize buffers dependent on header info
+	z.zbCompressBuf = make([]byte, winSize+z.BlockMaxSize)
+
+	return nil
+}
+
+// Write compresses data from the supplied buffer into the underlying io.Writer.
+// Write does not return until the data has been written.
+//
+// If the input buffer is large enough (typically in multiples of BlockMaxSize)
+// the data will be compressed concurrently.
+//
+// Write never buffers any data unless in BlockDependency mode where it may
+// do so until it has 64Kb of data, after which it never buffers any.
+func (z *Writer) Write(buf []byte) (n int, err error) {
+	if !z.Header.done {
+		if err = z.writeHeader(); err != nil {
+			return
+		}
+	}
+
+	if len(buf) == 0 {
+		return
+	}
+
+	if !z.NoChecksum {
+		z.checksum.Write(buf)
+	}
+
+	// with block dependency, require at least 64Kb of data to work with
+	// not having 64Kb only matters initially to setup the first window
+	bl := 0
+	if z.BlockDependency && len(z.window) == 0 {
+		bl = len(z.data)
+		z.data = append(z.data, buf...)
+		if len(z.data) < winSize {
+			return len(buf), nil
+		}
+		buf = z.data
+		z.data = nil
+	}
+
+	// Break up the input buffer into BlockMaxSize blocks, provisioning the left over block.
+	// Then compress into each of them concurrently if possible (no dependency).
+	var (
+		zb       block
+		wbuf     = buf
+		zn       = len(wbuf) / z.BlockMaxSize
+		zi       = 0
+		leftover = len(buf) % z.BlockMaxSize
+	)
+
+loop:
+	for zi < zn {
+		if z.BlockDependency {
+			if zi == 0 {
+				// first block does not have the window
+				zb.data = append(z.window, wbuf[:z.BlockMaxSize]...)
+				zb.offset = len(z.window)
+				wbuf = wbuf[z.BlockMaxSize-winSize:]
+			} else {
+				// set the uncompressed data including the window from previous block
+				zb.data = wbuf[:z.BlockMaxSize+winSize]
+				zb.offset = winSize
+				wbuf = wbuf[z.BlockMaxSize:]
+			}
+		} else {
+			zb.data = wbuf[:z.BlockMaxSize]
+			wbuf = wbuf[z.BlockMaxSize:]
+		}
+
+		goto write
+	}
+
+	// left over
+	if leftover > 0 {
+		zb = block{data: wbuf}
+		if z.BlockDependency {
+			if zn == 0 {
+				zb.data = append(z.window, zb.data...)
+				zb.offset = len(z.window)
+			} else {
+				zb.offset = winSize
+			}
+		}
+
+		leftover = 0
+		goto write
+	}
+
+	if z.BlockDependency {
+		if len(z.window) == 0 {
+			z.window = make([]byte, winSize)
+		}
+		// last buffer may be shorter than the window
+		if len(buf) >= winSize {
+			copy(z.window, buf[len(buf)-winSize:])
+		} else {
+			copy(z.window, z.window[len(buf):])
+			copy(z.window[len(buf)+1:], buf)
+		}
+	}
+
+	return
+
+write:
+	zb = z.compressBlock(zb)
+	_, err = z.writeBlock(zb)
+
+	written := len(zb.data)
+	if bl > 0 {
+		if written >= bl {
+			written -= bl
+			bl = 0
+		} else {
+			bl -= written
+			written = 0
+		}
+	}
+
+	n += written
+	// remove the window in zb.data
+	if z.BlockDependency {
+		if zi == 0 {
+			n -= len(z.window)
+		} else {
+			n -= winSize
+		}
+	}
+	if err != nil {
+		return
+	}
+	zi++
+	goto loop
+}
+
+// compressBlock compresses a block.
+func (z *Writer) compressBlock(zb block) block {
+	// compressed block size cannot exceed the input's
+	var (
+		n    int
+		err  error
+		zbuf = z.zbCompressBuf
+	)
+	if z.HighCompression {
+		n, err = CompressBlockHC(zb.data, zbuf, zb.offset)
+	} else {
+		n, err = CompressBlock(zb.data, zbuf, zb.offset)
+	}
+
+	// compressible and compressed size smaller than decompressed: ok!
+	if err == nil && n > 0 && len(zb.zdata) < len(zb.data) {
+		zb.compressed = true
+		zb.zdata = zbuf[:n]
+	} else {
+		zb.compressed = false
+		zb.zdata = zb.data[zb.offset:]
+	}
+
+	if z.BlockChecksum {
+		xxh := hashPool.Get()
+		xxh.Write(zb.zdata)
+		zb.checksum = xxh.Sum32()
+		hashPool.Put(xxh)
+	}
+
+	return zb
+}
+
+// writeBlock writes a frame block to the underlying io.Writer (size, data).
+func (z *Writer) writeBlock(zb block) (int, error) {
+	bLen := uint32(len(zb.zdata))
+	if !zb.compressed {
+		bLen |= 1 << 31
+	}
+
+	n := 0
+
+	binary.LittleEndian.PutUint32(z.writeSizeBuf, bLen)
+	n, err := z.dst.Write(z.writeSizeBuf)
+	if err != nil {
+		return n, err
+	}
+
+	m, err := z.dst.Write(zb.zdata)
+	n += m
+	if err != nil {
+		return n, err
+	}
+
+	if z.BlockChecksum {
+		binary.LittleEndian.PutUint32(z.writeSizeBuf, zb.checksum)
+		m, err := z.dst.Write(z.writeSizeBuf)
+		n += m
+
+		if err != nil {
+			return n, err
+		}
+	}
+
+	return n, nil
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+// Flush does not return until the data has been written.
+// If the underlying writer returns an error, Flush returns that error.
+//
+// Flush is only required when in BlockDependency mode and the total of
+// data written is less than 64Kb.
+func (z *Writer) Flush() error {
+	if len(z.data) == 0 {
+		return nil
+	}
+
+	zb := z.compressBlock(block{data: z.data})
+	if _, err := z.writeBlock(zb); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+	if !z.Header.done {
+		if err := z.writeHeader(); err != nil {
+			return err
+		}
+	}
+
+	// buffered data for the block dependency window
+	if z.BlockDependency && len(z.data) > 0 {
+		zb := block{data: z.data}
+		if _, err := z.writeBlock(z.compressBlock(zb)); err != nil {
+			return err
+		}
+	}
+
+	if err := binary.Write(z.dst, binary.LittleEndian, uint32(0)); err != nil {
+		return err
+	}
+	if !z.NoChecksum {
+		if err := binary.Write(z.dst, binary.LittleEndian, z.checksum.Sum32()); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Reset clears the state of the Writer z such that it is equivalent to its
+// initial state from NewWriter, but instead writing to w.
+// No access to the underlying io.Writer is performed.
+func (z *Writer) Reset(w io.Writer) {
+	z.Header = Header{}
+	z.dst = w
+	z.checksum.Reset()
+	z.data = nil
+	z.window = nil
+}
+
+// ReadFrom compresses the data read from the io.Reader and writes it to the underlying io.Writer.
+// Returns the number of bytes read.
+// It does not close the Writer.
+func (z *Writer) ReadFrom(r io.Reader) (n int64, err error) {
+	cpus := runtime.GOMAXPROCS(0)
+	buf := make([]byte, cpus*z.BlockMaxSize)
+	for {
+		m, er := io.ReadFull(r, buf)
+		n += int64(m)
+		if er == nil || er == io.ErrUnexpectedEOF || er == io.EOF {
+			if _, err = z.Write(buf[:m]); err != nil {
+				return
+			}
+			if er == nil {
+				continue
+			}
+			return
+		}
+		return n, er
+	}
+}
diff --git a/vendor/github.com/pierrec/xxHash/LICENSE b/vendor/github.com/pierrec/xxHash/LICENSE
new file mode 100644
index 0000000000..c1418f3f67
--- /dev/null
+++ b/vendor/github.com/pierrec/xxHash/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2014, Pierre Curto
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of xxHash nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/vendor/github.com/pierrec/xxHash/xxHash32/xxHash32.go b/vendor/github.com/pierrec/xxHash/xxHash32/xxHash32.go
new file mode 100644
index 0000000000..411504e4bb
--- /dev/null
+++ b/vendor/github.com/pierrec/xxHash/xxHash32/xxHash32.go
@@ -0,0 +1,205 @@
+// Package xxHash32 implements the very fast xxHash hashing algorithm (32 bits version).
+// (/~https://github.com/Cyan4973/xxHash/)
+package xxHash32
+
+import "hash"
+
+const (
+	prime32_1 = 2654435761
+	prime32_2 = 2246822519
+	prime32_3 = 3266489917
+	prime32_4 = 668265263
+	prime32_5 = 374761393
+)
+
+type xxHash struct {
+	seed     uint32
+	v1       uint32
+	v2       uint32
+	v3       uint32
+	v4       uint32
+	totalLen uint64
+	buf      [16]byte
+	bufused  int
+}
+
+// New returns a new Hash32 instance.
+func New(seed uint32) hash.Hash32 {
+	xxh := &xxHash{seed: seed}
+	xxh.Reset()
+	return xxh
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (xxh xxHash) Sum(b []byte) []byte {
+	h32 := xxh.Sum32()
+	return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
+}
+
+// Reset resets the Hash to its initial state.
+func (xxh *xxHash) Reset() {
+	xxh.v1 = xxh.seed + prime32_1 + prime32_2
+	xxh.v2 = xxh.seed + prime32_2
+	xxh.v3 = xxh.seed
+	xxh.v4 = xxh.seed - prime32_1
+	xxh.totalLen = 0
+	xxh.bufused = 0
+}
+
+// Size returns the number of bytes returned by Sum().
+func (xxh *xxHash) Size() int {
+	return 4
+}
+
+// BlockSize gives the minimum number of bytes accepted by Write().
+func (xxh *xxHash) BlockSize() int {
+	return 1
+}
+
+// Write adds input bytes to the Hash.
+// It never returns an error.
+func (xxh *xxHash) Write(input []byte) (int, error) {
+	n := len(input)
+	m := xxh.bufused
+
+	xxh.totalLen += uint64(n)
+
+	r := len(xxh.buf) - m
+	if n < r {
+		copy(xxh.buf[m:], input)
+		xxh.bufused += len(input)
+		return n, nil
+	}
+
+	p := 0
+	if m > 0 {
+		// some data left from previous update
+		copy(xxh.buf[xxh.bufused:], input[:r])
+		xxh.bufused += len(input) - r
+
+		// fast rotl(13)
+		p32 := xxh.v1 + (uint32(xxh.buf[p+3])<<24|uint32(xxh.buf[p+2])<<16|uint32(xxh.buf[p+1])<<8|uint32(xxh.buf[p]))*prime32_2
+		xxh.v1 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+		p32 = xxh.v2 + (uint32(xxh.buf[p+3])<<24|uint32(xxh.buf[p+2])<<16|uint32(xxh.buf[p+1])<<8|uint32(xxh.buf[p]))*prime32_2
+		xxh.v2 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+		p32 = xxh.v3 + (uint32(xxh.buf[p+3])<<24|uint32(xxh.buf[p+2])<<16|uint32(xxh.buf[p+1])<<8|uint32(xxh.buf[p]))*prime32_2
+		xxh.v3 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+		p32 = xxh.v4 + (uint32(xxh.buf[p+3])<<24|uint32(xxh.buf[p+2])<<16|uint32(xxh.buf[p+1])<<8|uint32(xxh.buf[p]))*prime32_2
+		xxh.v4 = (p32<<13 | p32>>19) * prime32_1
+
+		p = r
+		xxh.bufused = 0
+	}
+
+	for n := n - 16; p <= n; {
+		p32 := xxh.v1 + (uint32(input[p+3])<<24|uint32(input[p+2])<<16|uint32(input[p+1])<<8|uint32(input[p]))*prime32_2
+		xxh.v1 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+		p32 = xxh.v2 + (uint32(input[p+3])<<24|uint32(input[p+2])<<16|uint32(input[p+1])<<8|uint32(input[p]))*prime32_2
+		xxh.v2 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+		p32 = xxh.v3 + (uint32(input[p+3])<<24|uint32(input[p+2])<<16|uint32(input[p+1])<<8|uint32(input[p]))*prime32_2
+		xxh.v3 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+		p32 = xxh.v4 + (uint32(input[p+3])<<24|uint32(input[p+2])<<16|uint32(input[p+1])<<8|uint32(input[p]))*prime32_2
+		xxh.v4 = (p32<<13 | p32>>19) * prime32_1
+		p += 4
+	}
+
+	copy(xxh.buf[xxh.bufused:], input[p:])
+	xxh.bufused += len(input) - p
+
+	return n, nil
+}
+
+// Sum32 returns the 32 bits Hash value.
+func (xxh *xxHash) Sum32() uint32 {
+	h32 := uint32(xxh.totalLen)
+	if xxh.totalLen >= 16 {
+		h32 += ((xxh.v1 << 1) | (xxh.v1 >> 31)) +
+			((xxh.v2 << 7) | (xxh.v2 >> 25)) +
+			((xxh.v3 << 12) | (xxh.v3 >> 20)) +
+			((xxh.v4 << 18) | (xxh.v4 >> 14))
+	} else {
+		h32 += xxh.seed + prime32_5
+	}
+
+	p := 0
+	n := xxh.bufused
+	for n := n - 4; p <= n; p += 4 {
+		h32 += (uint32(xxh.buf[p+3])<<24 | uint32(xxh.buf[p+2])<<16 | uint32(xxh.buf[p+1])<<8 | uint32(xxh.buf[p])) * prime32_3
+		h32 = ((h32 << 17) | (h32 >> 15)) * prime32_4
+	}
+	for ; p < n; p++ {
+		h32 += uint32(xxh.buf[p]) * prime32_5
+		h32 = ((h32 << 11) | (h32 >> 21)) * prime32_1
+	}
+
+	h32 ^= h32 >> 15
+	h32 *= prime32_2
+	h32 ^= h32 >> 13
+	h32 *= prime32_3
+	h32 ^= h32 >> 16
+
+	return h32
+}
+
+// Checksum returns the 32bits Hash value.
+func Checksum(input []byte, seed uint32) uint32 {
+	n := len(input)
+	h32 := uint32(n)
+
+	if n < 16 {
+		h32 += seed + prime32_5
+	} else {
+		v1 := seed + prime32_1 + prime32_2
+		v2 := seed + prime32_2
+		v3 := seed
+		v4 := seed - prime32_1
+		p := 0
+		for p <= n-16 {
+			v1 += (uint32(input[p+3])<<24 | uint32(input[p+2])<<16 | uint32(input[p+1])<<8 | uint32(input[p])) * prime32_2
+			v1 = (v1<<13 | v1>>19) * prime32_1
+			p += 4
+			v2 += (uint32(input[p+3])<<24 | uint32(input[p+2])<<16 | uint32(input[p+1])<<8 | uint32(input[p])) * prime32_2
+			v2 = (v2<<13 | v2>>19) * prime32_1
+			p += 4
+			v3 += (uint32(input[p+3])<<24 | uint32(input[p+2])<<16 | uint32(input[p+1])<<8 | uint32(input[p])) * prime32_2
+			v3 = (v3<<13 | v3>>19) * prime32_1
+			p += 4
+			v4 += (uint32(input[p+3])<<24 | uint32(input[p+2])<<16 | uint32(input[p+1])<<8 | uint32(input[p])) * prime32_2
+			v4 = (v4<<13 | v4>>19) * prime32_1
+			p += 4
+		}
+		input = input[p:]
+		n -= p
+		h32 += ((v1 << 1) | (v1 >> 31)) +
+			((v2 << 7) | (v2 >> 25)) +
+			((v3 << 12) | (v3 >> 20)) +
+			((v4 << 18) | (v4 >> 14))
+	}
+
+	p := 0
+	for p <= n-4 {
+		h32 += (uint32(input[p+3])<<24 | uint32(input[p+2])<<16 | uint32(input[p+1])<<8 | uint32(input[p])) * prime32_3
+		h32 = ((h32 << 17) | (h32 >> 15)) * prime32_4
+		p += 4
+	}
+	for p < n {
+		h32 += uint32(input[p]) * prime32_5
+		h32 = ((h32 << 11) | (h32 >> 21)) * prime32_1
+		p++
+	}
+
+	h32 ^= h32 >> 15
+	h32 *= prime32_2
+	h32 ^= h32 >> 13
+	h32 *= prime32_3
+	h32 ^= h32 >> 16
+
+	return h32
+}