Skip to content

Commit

Permalink
[feat] new features for gzip/tar module:
Browse files Browse the repository at this point in the history
  - GzStreamFile:
    - do decompress from a stream buffer
    - dump gzip index to a separate file

  - TarMeta:
    TarMeta is a modified structure of TarHeader which include the
inner offset of each object in this tarball

Signed-off-by: Yifan Yuan <tuji.yyf@alibaba-inc.com>
  • Loading branch information
BigVan committed Dec 7, 2023
1 parent a955159 commit 5eb5903
Show file tree
Hide file tree
Showing 20 changed files with 635 additions and 190 deletions.
2 changes: 1 addition & 1 deletion CMake/Findphoton.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set(FETCHCONTENT_QUIET false)
FetchContent_Declare(
photon
GIT_REPOSITORY /~https://github.com/alibaba/PhotonLibOS.git
GIT_TAG v0.6.2
GIT_TAG v0.6.6
)

if(BUILD_TESTING)
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_library(overlaybd_image_lib
switch_file.cpp
bk_download.cpp
prefetch.cpp
tools/sha256file.cpp
)
target_include_directories(overlaybd_image_lib PUBLIC
${CURL_INCLUDE_DIRS}
Expand Down
38 changes: 1 addition & 37 deletions src/bk_download.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,50 +31,14 @@
#include <unistd.h>
#include "switch_file.h"
#include "image_file.h"
#include "tools/sha256file.h"

using namespace photon::fs;

static constexpr size_t ALIGNMENT = 4096;

namespace BKDL {

std::string sha256sum(const char *fn) {
constexpr size_t BUFFERSIZE = 65536;
int fd = open(fn, O_RDONLY | O_DIRECT);
if (fd < 0) {
LOG_ERROR("failed to open `", fn);
return "";
}
DEFER(close(fd););

struct stat stat;
if (::fstat(fd, &stat) < 0) {
LOG_ERROR("failed to stat `", fn);
return "";
}
SHA256_CTX ctx = {0};
SHA256_Init(&ctx);
__attribute__((aligned(ALIGNMENT))) char buffer[65536];
unsigned char sha[32];
ssize_t recv = 0;
for (off_t offset = 0; offset < stat.st_size; offset += BUFFERSIZE) {
recv = pread(fd, &buffer, BUFFERSIZE, offset);
if (recv < 0) {
LOG_ERROR("io error: `", fn);
return "";
}
if (SHA256_Update(&ctx, buffer, recv) < 0) {
LOG_ERROR("sha256 calculate error: `", fn);
return "";
}
}
SHA256_Final(sha, &ctx);
char res[SHA256_DIGEST_LENGTH * 2];
for (int i = 0; i < SHA256_DIGEST_LENGTH; i++)
sprintf(res + (i * 2), "%02x", sha[i]);
return "sha256:" + std::string(res, SHA256_DIGEST_LENGTH * 2);
}

bool check_downloaded(const std::string &dir) {
std::string fn = dir + "/" + COMMIT_FILE_NAME;
auto lfs = photon::fs::new_localfs_adaptor();
Expand Down
6 changes: 5 additions & 1 deletion src/overlaybd/gzindex/gzfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

#pragma once
#include "photon/fs/filesystem.h"
#include "gzfile_index.h"


extern photon::fs::IFile* new_gzfile(photon::fs::IFile* gzip_file, photon::fs::IFile* index, bool ownership = false);

//chunksize:
Expand All @@ -32,6 +35,7 @@ extern photon::fs::IFile* new_gzfile(photon::fs::IFile* gzip_file, photon::fs::I
//0: no compression
//1: best speed
//9: best compression
extern int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, off_t chunk_size=1048576, int dict_compress_algo=1, int dict_compress_level=6);
extern int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path,
off_t chunk_size=GZ_CHUNK_SIZE, int dict_compress_algo=GZ_DICT_COMPERSS_ALGO, int dict_compress_level=GZ_COMPRESS_LEVEL);

bool is_gzfile(photon::fs::IFile* file);
16 changes: 16 additions & 0 deletions src/overlaybd/gzindex/gzfile_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@
#include <iostream>
#include <sstream>
#include <vector>
#include <zlib.h>
#include "photon/common/checksum/crc32c.h"
#include "photon/fs/filesystem.h"

#define GZ_CHUNK_SIZE 1048576
#define GZ_DICT_COMPERSS_ALGO 1
#define GZ_COMPRESS_LEVEL 6

#define WINSIZE 32768U
#define DEFLATE_BLOCK_UNCOMPRESS_MAX_SIZE 65536U
#define GZFILE_INDEX_MAGIC "ddgzidx"
Expand Down Expand Up @@ -76,3 +83,12 @@ struct IndexEntry {


typedef std::vector<struct IndexEntry *> INDEX;

struct IndexFilterRecorder;
IndexFilterRecorder *new_index_filter(IndexFileHeader *h, INDEX *index, photon::fs::IFile *save_as);

int init_index_header(photon::fs::IFile* src, IndexFileHeader &h, off_t span, int dict_compress_algo, int dict_compress_level);

int create_index_entry(z_stream strm, IndexFilterRecorder *filter, off_t en_pos, off_t de_pos, unsigned char *window);

int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFile *index_file);
74 changes: 47 additions & 27 deletions src/overlaybd/gzindex/gzip_index_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,25 @@ static int dict_compress(const IndexFileHeader& h,
return -1;
}

static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &index, photon::fs::IFile* index_file) {
IndexFilterRecorder filter(&h, &index, index_file);
int create_index_entry(z_stream strm, IndexFilterRecorder *filter, off_t en_pos, off_t de_pos, unsigned char *window){
LOG_DEBUG("`",VALUE(strm.data_type));
if ((strm.data_type & EACH_DEFLATE_BLOCK_BIT) && !(strm.data_type & LAST_DEFLATE_BLOCK_BIT)) {
if (filter->record(strm.data_type & 7, en_pos, de_pos, strm.avail_out, window) != 0) {
return -1;
}
}
return 0;
}

IndexFilterRecorder* new_index_filter(IndexFileHeader *h, INDEX *index, photon::fs::IFile *save_as)
{
return new IndexFilterRecorder(h, index, save_as);
}

static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &index, photon::fs::IFile* index_file) {
// IndexFilterRecorder filter(&h, &index, index_file);
auto filter = new IndexFilterRecorder(&h, &index, index_file);
DEFER(delete filter);
int32_t inbuf_size = WINSIZE;
unsigned char *inbuf = new unsigned char[inbuf_size];
DEFER(delete []inbuf);
Expand Down Expand Up @@ -216,7 +232,6 @@ static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &inde
strm.avail_out = WINSIZE;
strm.next_out = window;
}

ttin += strm.avail_in;
ttout += strm.avail_out;
ret = inflate(&strm, Z_BLOCK);
Expand All @@ -231,12 +246,10 @@ static int build_index(IndexFileHeader& h,photon::fs::IFile *gzfile, INDEX &inde
LOG_ERRNO_RETURN(0, -1, "Fail to inflate. ret:`", ret);
}
//TODO Here generate crc32 for uncompressed data block

if ((strm.data_type & EACH_DEFLATE_BLOCK_BIT) && !(strm.data_type & LAST_DEFLATE_BLOCK_BIT)) {
if (filter.record(strm.data_type & 7, ttin, ttout, strm.avail_out, window) != 0) {
LOG_ERRNO_RETURN(ret, -1, "Failed to add_index_entry");
}
if (create_index_entry(strm, filter, ttin, ttout, window) != 0){
LOG_ERRNO_RETURN(ret, -1, "Failed to add_index_entry");
}

} while (strm.avail_in != 0);
} while (ret != Z_STREAM_END);
return 0;
Expand All @@ -262,7 +275,7 @@ static int get_compressed_index(const IndexFileHeader& h, const INDEX& index, un
return zlib_compress(h.dict_compress_level, buf, index_len, out, out_len);
}

static int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFile *index_file) {
int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFile *index_file) {
int indx_cmpr_buf_len = index.size() * sizeof(IndexEntry) * 2 + 4096;
unsigned char *buf = new unsigned char[indx_cmpr_buf_len];
DEFER(delete []buf);
Expand Down Expand Up @@ -293,6 +306,28 @@ static int save_index_to_file(IndexFileHeader &h, INDEX& index, photon::fs::IFil
return 0;
}

int init_index_header(photon::fs::IFile* src, IndexFileHeader &h, off_t span, int dict_compress_algo, int dict_compress_level) {

struct stat sbuf;
if (src->fstat(&sbuf) != 0) {
LOG_ERRNO_RETURN(0, -1, "Faild to gzip_file->fstat()");
}
memset(&h, 0, sizeof(h));
strncpy(h.magic, "ddgzidx", sizeof(h.magic));
h.major_version =1;
h.minor_version =0;
h.dict_compress_algo = dict_compress_algo;
h.dict_compress_level = dict_compress_level;
h.flag=0;
h.index_size = sizeof(struct IndexEntry);
h.span = span;
h.window= WINSIZE;
h.gzip_file_size= sbuf.st_size;
memset(h.reserve, 0, sizeof(h.reserve));
h.index_start = sizeof(h);
return 0;
}

//int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, off_t span, unsigned char dict_compress_algo) {
//int create_gz_index(photon::fs::IFile* gzip_file, off_t span, const char *index_file_path) {
int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, off_t span, int dict_compress_algo, int dict_compress_level) {
Expand All @@ -310,31 +345,16 @@ int create_gz_index(photon::fs::IFile* gzip_file, const char *index_file_path, o
LOG_ERRNO_RETURN(0, -1, "Span is too small, must be greater than 100, span:`", span);
}

struct stat sbuf;
if (gzip_file->fstat(&sbuf) != 0) {
LOG_ERRNO_RETURN(0, -1, "Faild to gzip_file->fstat()");
}

photon::fs::IFile *index_file = photon::fs::open_localfile_adaptor(index_file_path, O_RDWR | O_CREAT | O_TRUNC, 0644);
if (index_file == nullptr) {
LOG_ERROR_RETURN(0, -1, "Failed to open(`)", index_file_path);
}
DEFER(index_file->close());

IndexFileHeader h;
memset(&h, 0, sizeof(h));
strncpy(h.magic, "ddgzidx", sizeof(h.magic));
h.major_version =1;
h.minor_version =0;
h.dict_compress_algo = dict_compress_algo;
h.dict_compress_level = dict_compress_level;
h.flag=0;
h.index_size = sizeof(struct IndexEntry);
h.span = span;
h.window= WINSIZE;
h.gzip_file_size= sbuf.st_size;
memset(h.reserve, 0, sizeof(h.reserve));
h.index_start = sizeof(h);
if (init_index_header(gzip_file, h, span, dict_compress_algo, dict_compress_level) != 0) {
LOG_ERRNO_RETURN(0, -1, "init index header failed.");
}

INDEX index;
int ret = build_index(h, gzip_file, index, index_file);
Expand Down
3 changes: 2 additions & 1 deletion src/overlaybd/gzindex/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ link_directories($ENV{GTEST}/lib)

add_executable(gzindex_test test.cpp)
target_include_directories(gzindex_test PUBLIC ${PHOTON_INCLUDE_DIR})
target_link_libraries(gzindex_test gtest gtest_main gflags pthread photon_static gzindex_lib cache_lib)
target_link_libraries(gzindex_test gtest gtest_main gflags pthread photon_static
gzindex_lib gzip_lib cache_lib checksum_lib)

add_test(
NAME gzindex_test
Expand Down
Loading

0 comments on commit 5eb5903

Please sign in to comment.