Skip to content

Commit

Permalink
Revert dmlc#5755
Browse files Browse the repository at this point in the history
  • Loading branch information
hcho3 committed Sep 22, 2020
1 parent 33d80ff commit a585255
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 66 deletions.
6 changes: 2 additions & 4 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,7 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, float *values,
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
auto *learner = static_cast<xgboost::Learner *>(handle);

std::shared_ptr<xgboost::data::DenseAdapter> x{
new xgboost::data::DenseAdapter(values, n_rows, n_cols)};
auto x = xgboost::data::DenseAdapter(values, n_rows, n_cols);
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt);
Expand Down Expand Up @@ -571,8 +570,7 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle,
CHECK_EQ(cache_id, 0) << "Cache ID is not supported yet";
auto *learner = static_cast<xgboost::Learner *>(handle);

std::shared_ptr<xgboost::data::CSRAdapter> x{
new xgboost::data::CSRAdapter(indptr, indices, data, nindptr - 1, nelem, num_col)};
auto x = data::CSRAdapter(indptr, indices, data, nindptr - 1, nelem, num_col);
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt);
Expand Down
4 changes: 2 additions & 2 deletions src/c_api/c_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(BoosterHandle handle,
auto *learner = static_cast<Learner*>(handle);

std::string json_str{c_json_strs};
auto x = std::make_shared<data::CudfAdapter>(json_str);
auto x = data::CudfAdapter(json_str);
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt);
Expand Down Expand Up @@ -74,7 +74,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
auto *learner = static_cast<Learner*>(handle);

std::string json_str{c_json_strs};
auto x = std::make_shared<data::CupyAdapter>(json_str);
auto x = data::CupyAdapter(json_str);
HostDeviceVector<float>* p_predt { nullptr };
std::string type { c_type };
learner->InplacePredict(x, type, missing, &p_predt);
Expand Down
52 changes: 39 additions & 13 deletions src/data/device_adapter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,44 @@ struct IsValidFunctor : public thrust::unary_function<Entry, bool> {
};

class CudfAdapterBatch : public detail::NoMetaInfo {
friend class CudfAdapter;

public:
CudfAdapterBatch() = default;
CudfAdapterBatch(common::Span<ArrayInterface> columns, size_t num_rows)
CudfAdapterBatch(common::Span<ArrayInterface> columns,
common::Span<size_t> column_ptr, size_t num_elements, size_t num_rows)
: columns_(columns),
column_ptr_(column_ptr),
num_elements_(num_elements),
num_rows_(num_rows) {}
size_t Size() const { return num_rows_ * columns_.size(); }
size_t Size() const { return num_elements_; }
__device__ COOTuple GetElement(size_t idx) const {
size_t column_idx = idx % columns_.size();
size_t row_idx = idx / columns_.size();
auto const& column = columns_[column_idx];
size_t column_idx =
thrust::upper_bound(thrust::seq, column_ptr_.begin(), column_ptr_.end(), idx)
- column_ptr_.begin() - 1;
auto& column = columns_[column_idx];
size_t row_idx = idx - column_ptr_[column_idx];
float value = column.valid.Data() == nullptr || column.valid.Check(row_idx)
? column.GetElement(row_idx)
: std::numeric_limits<float>::quiet_NaN();
return {row_idx, column_idx, value};
}
__device__ float GetValue(size_t ridx, bst_feature_t fidx) const {
auto const& column = columns_[fidx];
float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
? column.GetElement(ridx)
: std::numeric_limits<float>::quiet_NaN();
return value;
}

XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
XGBOOST_DEVICE bst_row_t NumRows() const { return num_elements_ / columns_.size(); }
XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }

// Cudf is column major
bool IsRowMajor() { return false; }

private:
common::Span<ArrayInterface> columns_;
common::Span<size_t> column_ptr_;
size_t num_elements_;
size_t num_rows_;
};

Expand Down Expand Up @@ -121,6 +136,7 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();
std::vector<ArrayInterface> columns;
std::vector<size_t> column_ptr({0});
auto first_column = ArrayInterface(get<Object const>(json_columns[0]));
num_rows_ = first_column.num_rows;
if (num_rows_ == 0) {
Expand All @@ -134,27 +150,31 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
auto column = ArrayInterface(get<Object const>(json_col));
columns.push_back(column);
CHECK_EQ(column.num_cols, 1);
column_ptr.emplace_back(column_ptr.back() + column.num_rows);
num_rows_ = std::max(num_rows_, size_t(column.num_rows));
CHECK_EQ(device_idx_, dh::CudaGetPointerDevice(column.data))
<< "All columns should use the same device.";
CHECK_EQ(num_rows_, column.num_rows)
<< "All columns should have same number of rows.";
}
columns_ = columns;
batch_ = CudfAdapterBatch(dh::ToSpan(columns_), num_rows_);
}
const CudfAdapterBatch& Value() const override {
CHECK_EQ(batch_.columns_.data(), columns_.data().get());
return batch_;
column_ptr_ = column_ptr;
batch_ = CudfAdapterBatch(dh::ToSpan(columns_), dh::ToSpan(column_ptr_),
column_ptr.back(), num_rows_);
}
const CudfAdapterBatch& Value() const override { return batch_; }

size_t NumRows() const { return num_rows_; }
size_t NumColumns() const { return columns_.size(); }
size_t DeviceIdx() const { return device_idx_; }

// Cudf is column major
bool IsRowMajor() { return false; }

private:
CudfAdapterBatch batch_;
dh::device_vector<ArrayInterface> columns_;
dh::device_vector<size_t> column_ptr_; // Exclusive scan of column sizes
size_t num_rows_{0};
int device_idx_;
};
Expand All @@ -177,6 +197,9 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.num_rows; }
XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.num_cols; }

// Cupy is row major
bool IsRowMajor() { return true; }

private:
ArrayInterface array_interface_;
};
Expand All @@ -200,6 +223,9 @@ class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
size_t NumColumns() const { return array_interface_.num_cols; }
size_t DeviceIdx() const { return device_idx_; }

// Cupy is row major
bool IsRowMajor() { return true; }

private:
ArrayInterface array_interface_;
CupyAdapterBatch batch_;
Expand Down
53 changes: 51 additions & 2 deletions src/data/ellpack_page.cu
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ struct WriteCompressedEllpackFunctor {
// Here the data is already correctly ordered and simply needs to be compacted
// to remove missing data
template <typename AdapterBatchT>
void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
void CopyDataRowMajor(const AdapterBatchT& batch, EllpackPageImpl* dst,
int device_idx, float missing) {
// Some witchcraft happens here
// The goal is to copy valid elements out of the input to an ellpack matrix
Expand Down Expand Up @@ -209,6 +209,51 @@ void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
});
}

template <typename AdapterBatchT>
void CopyDataColumnMajor(const AdapterBatchT& batch, EllpackPageImpl* dst,
int device_idx, float missing) {
// Step 1: Get the sizes of the input columns
dh::caching_device_vector<size_t> column_sizes(batch.NumCols(), 0);
auto d_column_sizes = column_sizes.data().get();
// Populate column sizes
dh::LaunchN(device_idx, batch.Size(), [=] __device__(size_t idx) {
const auto& e = batch.GetElement(idx);
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
&d_column_sizes[e.column_idx]),
static_cast<unsigned long long>(1)); // NOLINT
});

thrust::host_vector<size_t> host_column_sizes = column_sizes;

// Step 2: Iterate over columns, place elements in correct row, increment
// temporary row pointers
dh::caching_device_vector<size_t> temp_row_ptr(batch.NumRows(), 0);
auto d_temp_row_ptr = temp_row_ptr.data().get();
auto row_stride = dst->row_stride;
size_t begin = 0;
auto device_accessor = dst->GetDeviceAccessor(device_idx);
common::CompressedBufferWriter writer(device_accessor.NumSymbols());
auto d_compressed_buffer = dst->gidx_buffer.DevicePointer();
data::IsValidFunctor is_valid(missing);
for (auto size : host_column_sizes) {
size_t end = begin + size;
dh::LaunchN(device_idx, end - begin, [=] __device__(size_t idx) {
auto writer_non_const =
writer; // For some reason this variable gets captured as const
const auto& e = batch.GetElement(idx + begin);
if (!is_valid(e)) return;
size_t output_position =
e.row_idx * row_stride + d_temp_row_ptr[e.row_idx];
auto bin_idx = device_accessor.SearchBin(e.value, e.column_idx);
writer_non_const.AtomicWriteSymbol(d_compressed_buffer, bin_idx,
output_position);
d_temp_row_ptr[e.row_idx] += 1;
});

begin = end;
}
}

void WriteNullValues(EllpackPageImpl* dst, int device_idx,
common::Span<size_t> row_counts) {
// Write the null values
Expand Down Expand Up @@ -237,7 +282,11 @@ EllpackPageImpl::EllpackPageImpl(AdapterBatch batch, float missing, int device,
dh::safe_cuda(cudaSetDevice(device));

*this = EllpackPageImpl(device, cuts, is_dense, row_stride, n_rows);
CopyDataToEllpack(batch, this, device, missing);
if (batch.IsRowMajor()) {
CopyDataRowMajor(batch, this, device, missing);
} else {
CopyDataColumnMajor(batch, this, device, missing);
}
WriteNullValues(this, device, row_counts_span);
}

Expand Down
54 changes: 49 additions & 5 deletions src/data/simple_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,51 @@ void CountRowOffsets(const AdapterBatchT& batch, common::Span<bst_row_t> offset,
thrust::device_pointer_cast(offset.data()));
}

template <typename AdapterT>
void CopyDataColumnMajor(AdapterT* adapter, common::Span<Entry> data,
int device_idx, float missing,
common::Span<size_t> row_ptr) {
// Step 1: Get the sizes of the input columns
dh::device_vector<size_t> column_sizes(adapter->NumColumns());
auto d_column_sizes = column_sizes.data().get();
auto& batch = adapter->Value();
// Populate column sizes
dh::LaunchN(device_idx, batch.Size(), [=] __device__(size_t idx) {
const auto& e = batch.GetElement(idx);
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
&d_column_sizes[e.column_idx]),
static_cast<unsigned long long>(1)); // NOLINT
});

thrust::host_vector<size_t> host_column_sizes = column_sizes;

// Step 2: Iterate over columns, place elements in correct row, increment
// temporary row pointers
dh::device_vector<size_t> temp_row_ptr(
thrust::device_pointer_cast(row_ptr.data()),
thrust::device_pointer_cast(row_ptr.data() + row_ptr.size()));
auto d_temp_row_ptr = temp_row_ptr.data().get();
size_t begin = 0;
IsValidFunctor is_valid(missing);
for (auto size : host_column_sizes) {
size_t end = begin + size;
dh::LaunchN(device_idx, end - begin, [=] __device__(size_t idx) {
const auto& e = batch.GetElement(idx + begin);
if (!is_valid(e)) return;
data[d_temp_row_ptr[e.row_idx]] = Entry(e.column_idx, e.value);
d_temp_row_ptr[e.row_idx] += 1;
});

begin = end;
}
}

// Here the data is already correctly ordered and simply needs to be compacted
// to remove missing data
template <typename AdapterT>
void CopyDataToDMatrix(AdapterT* adapter, common::Span<Entry> data,
int device_idx, float missing,
common::Span<size_t> row_ptr) {
void CopyDataRowMajor(AdapterT* adapter, common::Span<Entry> data,
int device_idx, float missing,
common::Span<size_t> row_ptr) {
auto& batch = adapter->Value();
auto transform_f = [=] __device__(size_t idx) {
const auto& e = batch.GetElement(idx);
Expand Down Expand Up @@ -77,8 +116,13 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread) {
CountRowOffsets(batch, s_offset, adapter->DeviceIdx(), missing);
info_.num_nonzero_ = sparse_page_.offset.HostVector().back();
sparse_page_.data.Resize(info_.num_nonzero_);
CopyDataToDMatrix(adapter, sparse_page_.data.DeviceSpan(),
adapter->DeviceIdx(), missing, s_offset);
if (adapter->IsRowMajor()) {
CopyDataRowMajor(adapter, sparse_page_.data.DeviceSpan(),
adapter->DeviceIdx(), missing, s_offset);
} else {
CopyDataColumnMajor(adapter, sparse_page_.data.DeviceSpan(),
adapter->DeviceIdx(), missing, s_offset);
}

info_.num_col_ = adapter->NumColumns();
info_.num_row_ = adapter->NumRows();
Expand Down
14 changes: 7 additions & 7 deletions src/predictor/cpu_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -269,30 +269,30 @@ class CPUPredictor : public Predictor {
PredictionCacheEntry *out_preds,
uint32_t tree_begin, uint32_t tree_end) const {
auto threads = omp_get_max_threads();
auto m = dmlc::get<std::shared_ptr<Adapter>>(x);
CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
auto m = dmlc::get<Adapter>(x);
CHECK_EQ(m.NumColumns(), model.learner_model_param->num_feature)
<< "Number of columns in data must equal to trained model.";
MetaInfo info;
info.num_col_ = m->NumColumns();
info.num_row_ = m->NumRows();
info.num_col_ = m.NumColumns();
info.num_row_ = m.NumRows();
this->InitOutPredictions(info, &(out_preds->predictions), model);
std::vector<Entry> workspace(info.num_col_ * 8 * threads);
auto &predictions = out_preds->predictions.HostVector();
std::vector<RegTree::FVec> thread_temp;
InitThreadTemp(threads, model.learner_model_param->num_feature, &thread_temp);
size_t constexpr kUnroll = 8;
PredictBatchKernel(AdapterView<Adapter, kUnroll>(
m.get(), missing, common::Span<Entry>{workspace}),
&m, missing, common::Span<Entry>{workspace}),
&predictions, model, tree_begin, tree_end, &thread_temp);
}

void InplacePredict(dmlc::any const &x, const gbm::GBTreeModel &model,
float missing, PredictionCacheEntry *out_preds,
uint32_t tree_begin, unsigned tree_end) const override {
if (x.type() == typeid(std::shared_ptr<data::DenseAdapter>)) {
if (x.type() == typeid(data::DenseAdapter)) {
this->DispatchedInplacePredict<data::DenseAdapter>(
x, model, missing, out_preds, tree_begin, tree_end);
} else if (x.type() == typeid(std::shared_ptr<data::CSRAdapter>)) {
} else if (x.type() == typeid(data::CSRAdapter)) {
this->DispatchedInplacePredict<data::CSRAdapter>(
x, model, missing, out_preds, tree_begin, tree_end);
} else {
Expand Down
Loading

0 comments on commit a585255

Please sign in to comment.