Skip to content

Commit

Permalink
Convert MappedRegion to MemoryRegion and make it more generic.
Browse files Browse the repository at this point in the history
Improve ReadAllToString to better handle /proc and /sys files and other file types which do not report correct sizes by using the standard technique of calling ::read until the return value is 0.

PiperOrigin-RevId: 715978234
Change-Id: I09c9681d5cb1b6140398c8b6d3748f051b321512
  • Loading branch information
laramiel authored and copybara-github committed Jan 15, 2025
1 parent 15f5dfd commit 5d64c23
Show file tree
Hide file tree
Showing 12 changed files with 289 additions and 115 deletions.
2 changes: 1 addition & 1 deletion tensorstore/internal/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,9 @@ tensorstore_cc_library(
name = "flat_cord_builder",
hdrs = ["flat_cord_builder.h"],
deps = [
"//tensorstore/internal/os:memory_region",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:cord",
],
)
Expand Down
64 changes: 26 additions & 38 deletions tensorstore/internal/flat_cord_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#ifndef TENSORSTORE_INTERNAL_FLAT_CORD_BUILDER_H_
#define TENSORSTORE_INTERNAL_FLAT_CORD_BUILDER_H_

#include <stddef.h>

#include <cstdlib>
#include <cstring>
#include <string_view>
Expand All @@ -23,7 +25,7 @@
#include "absl/base/optimization.h"
#include "absl/log/absl_check.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "tensorstore/internal/os/memory_region.h"

namespace tensorstore {
namespace internal {
Expand All @@ -39,67 +41,53 @@ class FlatCordBuilder {
FlatCordBuilder() = default;
explicit FlatCordBuilder(size_t size) : FlatCordBuilder(size, size) {}

explicit FlatCordBuilder(internal_os::MemoryRegion region)
: region_(std::move(region)), inuse_(region_.size()) {}

FlatCordBuilder(size_t size, size_t inuse)
: data_(static_cast<char*>(::malloc(size))),
size_(size),
inuse_(inuse <= size ? inuse : size) {
ABSL_CHECK(size == 0 || data_);
: FlatCordBuilder(internal_os::AllocateHeapRegion(size), inuse) {}

FlatCordBuilder(internal_os::MemoryRegion region, size_t inuse)
: region_(std::move(region)), inuse_(inuse) {
ABSL_CHECK(inuse <= region_.size());
}

FlatCordBuilder(const FlatCordBuilder&) = delete;
FlatCordBuilder& operator=(const FlatCordBuilder&) = delete;
FlatCordBuilder(FlatCordBuilder&& other)
: data_(std::exchange(other.data_, nullptr)),
size_(std::exchange(other.size_, 0)),
: region_(std::move(other.region_)),
inuse_(std::exchange(other.inuse_, 0)) {}

FlatCordBuilder& operator=(FlatCordBuilder&& other) {
std::swap(data_, other.data_);
std::swap(size_, other.size_);
std::swap(inuse_, other.inuse_);
region_ = std::move(other.region_);
inuse_ = std::exchange(other.inuse_, 0);
return *this;
}
~FlatCordBuilder() {
if (data_) {
::free(data_);
}
}

const char* data() const { return data_; }
char* data() { return data_; }
size_t size() const { return size_; }
size_t available() const { return size_ - inuse_; }
~FlatCordBuilder() = default;

const char* data() const { return region_.data(); }
char* data() { return region_.data(); }
size_t size() const { return region_.size(); }
size_t available() const { return region_.size() - inuse_; }

void set_inuse(size_t size) {
ABSL_CHECK(size <= size_);
ABSL_CHECK(size <= region_.size());
inuse_ = size;
}

/// Append data to the builder.
void Append(absl::string_view sv) {
void Append(std::string_view sv) {
if (ABSL_PREDICT_FALSE(sv.empty())) return;
ABSL_CHECK(sv.size() <= available());
::memcpy(data_ + inuse_, sv.data(), sv.size());
::memcpy(region_.data() + inuse_, sv.data(), sv.size());
inuse_ += sv.size();
}

absl::Cord Build() && {
return absl::MakeCordFromExternal(release(), [](absl::string_view s) {
::free(const_cast<char*>(s.data()));
});
}
absl::Cord Build() && { return std::move(region_).as_cord(); }

private:
/// Releases ownership of the buffer. The caller must call `::free`.
absl::string_view release() {
absl::string_view view(data_, inuse_);
data_ = nullptr;
size_ = 0;
inuse_ = 0;
return view;
}

char* data_ = nullptr;
size_t size_ = 0;
internal_os::MemoryRegion region_;
size_t inuse_ = 0;
};

Expand Down
22 changes: 22 additions & 0 deletions tensorstore/internal/os/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ tensorstore_cc_library(
deps = [
":error_code",
":include_windows",
":memory_region",
":potentially_blocking_region",
":unique_handle",
":wstring", # build_cleaner: keep
Expand Down Expand Up @@ -278,3 +279,24 @@ tensorstore_cc_library(
"@com_google_absl//absl/strings",
],
)

tensorstore_cc_library(
name = "memory_region",
srcs = ["memory_region.cc"],
hdrs = ["memory_region.h"],
deps = [
"//tensorstore/util:result",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/log:absl_log",
"@com_google_absl//absl/strings:cord",
],
)

tensorstore_cc_test(
name = "memory_region_test",
srcs = ["memory_region_test.cc"],
deps = [
":memory_region",
"@com_google_googletest//:gtest_main",
],
)
52 changes: 40 additions & 12 deletions tensorstore/internal/os/file_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@

#include "tensorstore/internal/os/file_util.h"

#include <stddef.h>

#include <cassert>
#include <cstring>
#include <string>

#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "tensorstore/util/quote_string.h"
#include "tensorstore/util/result.h"
#include "tensorstore/util/status.h"

Expand All @@ -31,19 +32,46 @@ Result<std::string> ReadAllToString(const std::string& path) {

internal_os::FileInfo info;
TENSORSTORE_RETURN_IF_ERROR(internal_os::GetFileInfo(fd.get(), &info));
if (internal_os::GetSize(info) == 0) {
return absl::InternalError(
absl::StrCat("File ", QuoteString(path), " is empty"));

// Handle the case where the file is empty.
std::string result(internal_os::GetSize(info), 0);
if (result.empty()) {
result.resize(4096);
}
std::string result;
result.resize(internal_os::GetSize(info));

size_t offset = 0;
TENSORSTORE_ASSIGN_OR_RETURN(
auto read,
internal_os::ReadFromFile(fd.get(), result.data(), result.size(), 0));
if (read != result.size()) {
return absl::InternalError("Failed to read entire file");
internal_os::ReadFromFile(fd.get(), &result[0], result.size(), 0));
offset += read;

while (true) {
assert(offset <= result.size());
if ((result.size() - offset) < 4096) {
// In most cases the buffer will be at least as large as necessary and
// this read will return 0, avoiding the copy+resize.
char buffer[4096];
TENSORSTORE_ASSIGN_OR_RETURN(
read,
internal_os::ReadFromFile(fd.get(), buffer, sizeof(buffer), offset));
if (read > 0) {
// Amortized resize; double the size of the buffer.
if (read > result.size()) {
result.resize(read + result.size() * 2);
}
memcpy(&result[offset], buffer, read);
}
} else {
TENSORSTORE_ASSIGN_OR_RETURN(
read, internal_os::ReadFromFile(fd.get(), &result[offset],
result.size() - offset, offset));
}
if (read == 0) {
result.resize(offset);
return result;
}
offset += read;
}
return result;
}

} // namespace internal_os
Expand Down
40 changes: 2 additions & 38 deletions tensorstore/internal/os/file_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/time/time.h"
#include "tensorstore/internal/os/memory_region.h"
#include "tensorstore/internal/os/unique_handle.h"
#include "tensorstore/util/result.h"

Expand Down Expand Up @@ -105,46 +106,9 @@ uint32_t GetDefaultPageSize();
/// \param offset Byte offset within file at which to start reading.
/// \param size Number of bytes to read, or 0 to read the entire file.
/// \returns The contents of the file or a failure absl::Status code.
Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
Result<MemoryRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
size_t size);

class MappedRegion {
public:
// ::munmap happens when the MappedRegion destructor runs.
~MappedRegion();

MappedRegion(const MappedRegion&) = delete;
MappedRegion& operator=(const MappedRegion&) = delete;

MappedRegion(MappedRegion&& other) { *this = std::move(other); }
MappedRegion& operator=(MappedRegion&& other) {
data_ = std::exchange(other.data_, nullptr);
size_ = std::exchange(other.size_, 0);
return *this;
}

std::string_view as_string_view() const {
return std::string_view(data_, size_);
}

absl::Cord as_cord() && {
std::string_view string_view = as_string_view();
data_ = nullptr;
size_ = 0;
return absl::MakeCordFromExternal(
string_view, [](auto s) { MappedRegion cleanup(s.data(), s.size()); });
}

private:
MappedRegion(const char* data, size_t size) : data_(data), size_(size) {}

friend Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t,
size_t);

const char* data_;
size_t size_;
};

/// --------------------------------------------------------------------------

// Restricted subset of POSIX open flags.
Expand Down
25 changes: 13 additions & 12 deletions tensorstore/internal/os/file_util_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "tensorstore/internal/metrics/gauge.h"
#include "tensorstore/internal/metrics/metadata.h"
#include "tensorstore/internal/os/error_code.h"
#include "tensorstore/internal/os/memory_region.h"
#include "tensorstore/internal/os/potentially_blocking_region.h"
#include "tensorstore/internal/tracing/logged_trace_span.h"
#include "tensorstore/util/quote_string.h"
Expand Down Expand Up @@ -376,7 +377,16 @@ uint32_t GetDefaultPageSize() {
return kDefaultPageSize;
}

Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
namespace {
void UnmapFilePosix(char* data, size_t size) {
if (::munmap(data, size) != 0) {
ABSL_LOG(FATAL) << StatusFromOsError(errno, "Failed to unmap file");
}
mmap_active.Decrement();
}
} // namespace

Result<MemoryRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
size_t size) {
#if ABSL_HAVE_MMAP
LoggedTraceSpan tspan(__func__, detail_logging.Level(1),
Expand All @@ -402,7 +412,7 @@ Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
size = file_size - offset;
}
if (size == 0) {
return MappedRegion(nullptr, 0);
return MemoryRegion(nullptr, 0, UnmapFilePosix);
}
}

Expand All @@ -416,21 +426,12 @@ Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
mmap_count.Increment();
mmap_bytes.IncrementBy(size);
mmap_active.Increment();
return MappedRegion(static_cast<const char*>(address), size);
return MemoryRegion(static_cast<char*>(address), size, UnmapFilePosix);
#else
return absl::UnimplementedError("::mmap not supported");
#endif
}

MappedRegion::~MappedRegion() {
if (data_) {
if (::munmap(const_cast<char*>(data_), size_) != 0) {
ABSL_LOG(FATAL) << StatusFromOsError(errno, "Failed to unmap file");
}
mmap_active.Decrement();
}
}

absl::Status FsyncFile(FileDescriptor fd) {
LoggedTraceSpan tspan(__func__, detail_logging.Level(1), {{"fd", fd}});
PotentiallyBlockingRegion region;
Expand Down
25 changes: 13 additions & 12 deletions tensorstore/internal/os/file_util_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
#include "tensorstore/internal/metrics/gauge.h"
#include "tensorstore/internal/metrics/metadata.h"
#include "tensorstore/internal/os/error_code.h"
#include "tensorstore/internal/os/memory_region.h"
#include "tensorstore/internal/os/potentially_blocking_region.h"
#include "tensorstore/internal/os/wstring.h"
#include "tensorstore/internal/tracing/logged_trace_span.h"
Expand Down Expand Up @@ -464,7 +465,17 @@ uint32_t GetDefaultPageSize() {
return kDefaultPageSize;
}
Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
namespace {
void UnmapFileWin32(char* data, size_t size) {
if (!::UnmapViewOfFile(data)) {
ABSL_LOG(FATAL) << StatusFromOsError(::GetLastError(),
"Failed in UnmapViewOfFile");
}
mmap_active.Decrement();
}
} // namespace
Result<MemoryRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
size_t size) {
if (offset > 0 && offset % GetDefaultPageSize() != 0) {
return absl::InvalidArgumentError(
Expand Down Expand Up @@ -513,17 +524,7 @@ Result<MappedRegion> MemmapFileReadOnly(FileDescriptor fd, size_t offset,
mmap_count.Increment();
mmap_bytes.IncrementBy(size);
mmap_active.Increment();
return MappedRegion(static_cast<const char*>(address), size);
}
MappedRegion::~MappedRegion() {
if (data_) {
if (!::UnmapViewOfFile(const_cast<char*>(data_))) {
ABSL_LOG(FATAL) << StatusFromOsError(::GetLastError(),
"Failed in UnmapViewOfFile");
}
mmap_active.Decrement();
}
return MemoryRegion(static_cast<char*>(address), size, UnmapFileWin32);
}
absl::Status FsyncFile(FileDescriptor fd) {
Expand Down
Loading

0 comments on commit 5d64c23

Please sign in to comment.