Skip to content

Commit

Permalink
unikmer: add global option -L/--compression-level
Browse files Browse the repository at this point in the history
  • Loading branch information
shenwei356 committed Oct 27, 2018
1 parent 7f4b69c commit 12a656f
Show file tree
Hide file tree
Showing 17 changed files with 40 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
- v0.4.4
- `unikmer`: add global option `-L/--compression-level`.
- `unikmer diff`: reduce memory occupation, speed not affected.
- v0.4.3
- `unikmer diff`: fix bug of hanging when the first file having no Kmers.
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/concat.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Attentions:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/count.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ var countCmd = &cobra.Command{
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
6 changes: 3 additions & 3 deletions unikmer/cmd/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Tips:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down Expand Up @@ -230,7 +230,7 @@ Tips:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down Expand Up @@ -477,7 +477,7 @@ Tips:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ var dumpCmd = &cobra.Command{
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/grep.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ var grepCmd = &cobra.Command{
log.Infof("finish reading Kmers from %s", file)
}

outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"))
outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"), opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
4 changes: 2 additions & 2 deletions unikmer/cmd/inter.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ var interCmd = &cobra.Command{
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down Expand Up @@ -260,7 +260,7 @@ var interCmd = &cobra.Command{
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/num.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ var numCmd = &cobra.Command{
outFile := getFlagString(cmd, "out-file")
showFile := getFlagBool(cmd, "file-name")

outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"))
outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"), opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 2 additions & 0 deletions unikmer/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"os"
"runtime"

"github.com/klauspost/compress/flate"
"github.com/spf13/cobra"
)

Expand Down Expand Up @@ -70,6 +71,7 @@ func init() {
RootCmd.PersistentFlags().IntP("threads", "j", defaultThreads, "number of CPUs to use. (default value: 1 for single-CPU PC, 2 for others)")
RootCmd.PersistentFlags().BoolP("verbose", "", false, "print verbose information")
RootCmd.PersistentFlags().BoolP("no-compress", "C", false, "do not compress binary file (not recommended)")
RootCmd.PersistentFlags().IntP("compression-level", "L", flate.DefaultCompression, "compression level")
RootCmd.PersistentFlags().BoolP("compact", "c", false, "write more compact binary file with little loss of speed")
RootCmd.PersistentFlags().StringP("infile-list", "i", "", "file of input files list (one file per line), if given, files from cli arguments are ignored")
}
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ Attentions:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var sortCmd = &cobra.Command{
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Tips:
checkError(fmt.Errorf("values of -/--symbol-true and -F/--symbol--false should no be the same"))
}

outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"))
outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"), opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/subset.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Attention:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/union.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Attentions:
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down
8 changes: 6 additions & 2 deletions unikmer/cmd/util-io.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
gzip "github.com/klauspost/pgzip"
)

func outStream(file string, gzipped bool) (*bufio.Writer, io.WriteCloser, *os.File, error) {
func outStream(file string, gzipped bool, level int) (*bufio.Writer, io.WriteCloser, *os.File, error) {
var w *os.File
if file == "-" {
w = os.Stdout
Expand All @@ -52,7 +52,11 @@ func outStream(file string, gzipped bool) (*bufio.Writer, io.WriteCloser, *os.Fi
}

if gzipped {
gw := gzip.NewWriter(w)
// gw := gzip.NewWriter(w)
gw, err := gzip.NewWriterLevel(w, level)
if err != nil {
return nil, nil, nil, fmt.Errorf("fail to write %s: %s", file, err)
}
return bufio.NewWriterSize(gw, os.Getpagesize()), gw, w, nil
}
return bufio.NewWriterSize(w, os.Getpagesize()), nil, w, nil
Expand Down
23 changes: 15 additions & 8 deletions unikmer/cmd/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
package cmd

import (
"compress/flate"
"fmt"
"io"
"sort"
Expand All @@ -41,19 +42,25 @@ const (

// Options contains the global flags
type Options struct {
NumCPUs int
Verbose bool
Compress bool
Compact bool
NumCPUs int
Verbose bool
Compress bool
Compact bool
CompressionLevel int
}

func getOptions(cmd *cobra.Command) *Options {
level := getFlagInt(cmd, "compression-level")
if level < flate.HuffmanOnly || level > flate.BestCompression {
checkError(fmt.Errorf("gzip: invalid compression level: %d", level))
}
return &Options{
NumCPUs: getFlagPositiveInt(cmd, "threads"),
// NumCPUs: 1,
Verbose: getFlagBool(cmd, "verbose"),
Compress: !getFlagBool(cmd, "no-compress"),
Compact: getFlagBool(cmd, "compact"),
Verbose: getFlagBool(cmd, "verbose"),
Compress: !getFlagBool(cmd, "no-compress"),
Compact: getFlagBool(cmd, "compact"),
CompressionLevel: level,
}
}

Expand Down Expand Up @@ -168,7 +175,7 @@ func sortUnikFile(opt Options, unique bool, file string, outFile string) (*unikm
if !isStdout(outFile) {
outFile += extDataFile
}
outfh, gw, w, err := outStream(outFile, opt.Compress)
outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
if err != nil {
return nil, 0, err
}
Expand Down
2 changes: 1 addition & 1 deletion unikmer/cmd/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ var viewCmd = &cobra.Command{
outFasta := getFlagBool(cmd, "fasta")
outFastq := getFlagBool(cmd, "fastq")

outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"))
outfh, gw, w, err := outStream(outFile, strings.HasSuffix(strings.ToLower(outFile), ".gz"), opt.CompressionLevel)
checkError(err)
defer func() {
outfh.Flush()
Expand Down

0 comments on commit 12a656f

Please sign in to comment.