-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
519 additions
and
213 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
/config.local | ||
/tmp | ||
/cache |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"$schema": "https://vega.github.io/schema/vega-lite/v4.json", | ||
"data": { | ||
"values": "<DVC_METRIC_DATA>" | ||
}, | ||
"title": "<DVC_METRIC_TITLE>", | ||
"mark": "rect", | ||
"encoding": { | ||
"x": { | ||
"field": "<DVC_METRIC_X>", | ||
"type": "nominal", | ||
"sort": "ascending", | ||
"title": "<DVC_METRIC_X_LABEL>" | ||
}, | ||
"y": { | ||
"field": "<DVC_METRIC_Y>", | ||
"type": "nominal", | ||
"sort": "ascending", | ||
"title": "<DVC_METRIC_Y_LABEL>" | ||
}, | ||
"color": { | ||
"aggregate": "count", | ||
"type": "quantitative" | ||
}, | ||
"facet": { | ||
"field": "rev", | ||
"type": "nominal" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ | ||
"$schema": "https://vega.github.io/schema/vega-lite/v4.json", | ||
"data": { | ||
"values": "<DVC_METRIC_DATA>" | ||
}, | ||
"title": "<DVC_METRIC_TITLE>", | ||
"mark": { | ||
"type": "line" | ||
}, | ||
"encoding": { | ||
"x": { | ||
"field": "<DVC_METRIC_X>", | ||
"type": "quantitative", | ||
"title": "<DVC_METRIC_X_LABEL>" | ||
}, | ||
"y": { | ||
"field": "<DVC_METRIC_Y>", | ||
"type": "quantitative", | ||
"title": "<DVC_METRIC_Y_LABEL>", | ||
"scale": { | ||
"zero": false | ||
} | ||
}, | ||
"color": { | ||
"field": "rev", | ||
"type": "nominal" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{ | ||
"$schema": "https://vega.github.io/schema/vega-lite/v4.json", | ||
"data": { | ||
"values": "<DVC_METRIC_DATA>" | ||
}, | ||
"title": "<DVC_METRIC_TITLE>", | ||
"mark": "point", | ||
"encoding": { | ||
"x": { | ||
"field": "<DVC_METRIC_X>", | ||
"type": "quantitative", | ||
"title": "<DVC_METRIC_X_LABEL>" | ||
}, | ||
"y": { | ||
"field": "<DVC_METRIC_Y>", | ||
"type": "quantitative", | ||
"title": "<DVC_METRIC_Y_LABEL>", | ||
"scale": { | ||
"zero": false | ||
} | ||
}, | ||
"color": { | ||
"field": "rev", | ||
"type": "nominal" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
{ | ||
"$schema": "https://vega.github.io/schema/vega-lite/v4.json", | ||
"data": { | ||
"values": "<DVC_METRIC_DATA>" | ||
}, | ||
"title": "<DVC_METRIC_TITLE>", | ||
"mark": { | ||
"type": "line" | ||
}, | ||
"encoding": { | ||
"x": { | ||
"field": "<DVC_METRIC_X>", | ||
"type": "quantitative", | ||
"title": "<DVC_METRIC_X_LABEL>" | ||
}, | ||
"y": { | ||
"field": "<DVC_METRIC_Y>", | ||
"type": "quantitative", | ||
"title": "<DVC_METRIC_Y_LABEL>", | ||
"scale": { | ||
"zero": false | ||
} | ||
}, | ||
"color": { | ||
"field": "rev", | ||
"type": "nominal" | ||
} | ||
}, | ||
"transform": [ | ||
{ | ||
"loess": "<DVC_METRIC_Y>", | ||
"on": "<DVC_METRIC_X>", | ||
"groupby": [ | ||
"rev" | ||
], | ||
"bandwidth": 0.3 | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Add patterns of files dvc should ignore, which could improve | ||
# the performance. Learn more at | ||
# https://dvc.org/doc/user-guide/dvcignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: benchmarks | ||
on: [push] | ||
jobs: | ||
run: | ||
runs-on: [ubuntu-latest] | ||
container: docker://dvcorg/cml-py3:latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: cml_run | ||
env: | ||
repo_token: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
# Your ML workflow goes here | ||
pip install -e .[tests] | ||
dvc repro | ||
git fetch --prune | ||
dvc metrics diff --show-json master > report.json | ||
python ./benchmarks/format.py report.json report.md | ||
cml-send-comment report.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import json | ||
import os | ||
import tempfile | ||
|
||
import nlp | ||
from nlp.arrow_writer import ArrowWriter | ||
from nlp.features import Array2D | ||
from utils import generate_examples, get_duration | ||
|
||
|
||
SHAPE_TEST_1 = (30, 487) | ||
SHAPE_TEST_2 = (36, 1024) | ||
SPEED_TEST_SHAPE = (100, 100) | ||
SPEED_TEST_N_EXAMPLES = 100 | ||
|
||
DEFAULT_FEATURES = nlp.Features( | ||
{"text": Array2D(SHAPE_TEST_1, dtype="float32"), "image": Array2D(SHAPE_TEST_2, dtype="float32")} | ||
) | ||
|
||
RESULTS_BASEPATH, RESULTS_FILENAME = os.path.split(__file__) | ||
RESULTS_FILE_PATH = os.path.join(RESULTS_BASEPATH, "results", RESULTS_FILENAME.replace(".py", ".json")) | ||
|
||
|
||
@get_duration | ||
def write(my_features, dummy_data, tmp_dir): | ||
writer = ArrowWriter(features=my_features, path=os.path.join(tmp_dir, "beta.arrow")) | ||
for key, record in dummy_data: | ||
example = my_features.encode_example(record) | ||
writer.write(example) | ||
num_examples, num_bytes = writer.finalize() | ||
|
||
|
||
@get_duration | ||
def read_unformated(feats, tmp_dir): | ||
dataset = nlp.Dataset.from_file(filename=os.path.join(tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) | ||
for _ in dataset: | ||
pass | ||
|
||
|
||
@get_duration | ||
def read_formatted_as_numpy(feats, tmp_dir): | ||
dataset = nlp.Dataset.from_file(filename=os.path.join(tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) | ||
dataset.set_format("numpy") | ||
for _ in dataset: | ||
pass | ||
|
||
|
||
@get_duration | ||
def read_batch_unformated(feats, tmp_dir): | ||
batch_size = 10 | ||
dataset = nlp.Dataset.from_file(filename=os.path.join(tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) | ||
for i in range(0, len(dataset), batch_size): | ||
_ = dataset[i : i + batch_size] | ||
|
||
|
||
@get_duration | ||
def read_batch_formatted_as_numpy(feats, tmp_dir): | ||
batch_size = 10 | ||
dataset = nlp.Dataset.from_file(filename=os.path.join(tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) | ||
dataset.set_format("numpy") | ||
for i in range(0, len(dataset), batch_size): | ||
_ = dataset[i : i + batch_size] | ||
|
||
|
||
@get_duration | ||
def read_col_unformated(feats, tmp_dir): | ||
dataset = nlp.Dataset.from_file(filename=os.path.join(tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) | ||
for col in feats: | ||
_ = dataset[col] | ||
|
||
|
||
@get_duration | ||
def read_col_formatted_as_numpy(feats, tmp_dir): | ||
dataset = nlp.Dataset.from_file(filename=os.path.join(tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) | ||
dataset.set_format("numpy") | ||
for col in feats: | ||
_ = dataset[col] | ||
|
||
|
||
def benchmark_array_xd(): | ||
times = {} | ||
read_functions = ( | ||
read_unformated, | ||
read_formatted_as_numpy, | ||
read_batch_unformated, | ||
read_batch_formatted_as_numpy, | ||
read_col_unformated, | ||
read_col_formatted_as_numpy, | ||
) | ||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
feats = nlp.Features({"image": Array2D(SPEED_TEST_SHAPE, dtype="float32")}) | ||
data = generate_examples(features=feats, num_examples=SPEED_TEST_N_EXAMPLES) | ||
times["write_array2d"] = write(feats, data, tmp_dir) | ||
for read_func in read_functions: | ||
times[read_func.__name__ + " after write_array2d"] = read_func(feats, tmp_dir) | ||
|
||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
# don't use fixed length for fair comparison | ||
# feats = nlp.Features( | ||
# {"image": nlp.Sequence(nlp.Sequence(nlp.Value("float32"), SPEED_TEST_SHAPE[1]), SPEED_TEST_SHAPE[0])} | ||
# ) | ||
feats = nlp.Features({"image": nlp.Sequence(nlp.Sequence(nlp.Value("float32")))}) | ||
data = generate_examples( | ||
features=feats, num_examples=SPEED_TEST_N_EXAMPLES, seq_shapes={"image": SPEED_TEST_SHAPE} | ||
) | ||
times["write_nested_sequence"] = write(feats, data, tmp_dir) | ||
for read_func in read_functions: | ||
times[read_func.__name__ + " after write_nested_sequence"] = read_func(feats, tmp_dir) | ||
|
||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
# don't use fixed length for fair comparison | ||
# feats = nlp.Features( | ||
# {"image": nlp.Sequence(nlp.Value("float32"), SPEED_TEST_SHAPE[0] * SPEED_TEST_SHAPE[1])} | ||
# ) | ||
feats = nlp.Features({"image": nlp.Sequence(nlp.Value("float32"))}) | ||
data = generate_examples( | ||
features=feats, | ||
num_examples=SPEED_TEST_N_EXAMPLES, | ||
seq_shapes={"image": [SPEED_TEST_SHAPE[0] * SPEED_TEST_SHAPE[1]]}, | ||
) | ||
times["write_flattened_sequence"] = write(feats, data, tmp_dir) | ||
for read_func in read_functions: | ||
times[read_func.__name__ + " after write_flattened_sequence"] = read_func(feats, tmp_dir) | ||
|
||
with open(RESULTS_FILE_PATH, "wb") as f: | ||
f.write(json.dumps(times).encode("utf-8")) | ||
|
||
|
||
if __name__ == "__main__": # useful to run the profiler | ||
benchmark_array_xd() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import json | ||
import os | ||
import tempfile | ||
|
||
import nlp | ||
from utils import generate_example_dataset, get_duration | ||
|
||
|
||
SPEED_TEST_N_EXAMPLES = 500_000 | ||
|
||
RESULTS_BASEPATH, RESULTS_FILENAME = os.path.split(__file__) | ||
RESULTS_FILE_PATH = os.path.join(RESULTS_BASEPATH, "results", RESULTS_FILENAME.replace(".py", ".json")) | ||
|
||
|
||
@get_duration | ||
def select(dataset: nlp.Dataset): | ||
_ = dataset.select(range(0, len(dataset), 2)) | ||
|
||
|
||
@get_duration | ||
def sort(dataset: nlp.Dataset): | ||
_ = dataset.sort("numbers") | ||
|
||
|
||
@get_duration | ||
def shuffle(dataset: nlp.Dataset): | ||
_ = dataset.shuffle() | ||
|
||
|
||
@get_duration | ||
def train_test_split(dataset: nlp.Dataset): | ||
_ = dataset.train_test_split(0.1) | ||
|
||
|
||
@get_duration | ||
def shard(dataset: nlp.Dataset, num_shards=10): | ||
for shard_id in range(num_shards): | ||
_ = dataset.shard(num_shards, shard_id) | ||
|
||
|
||
def benchmark_indices_mapping(): | ||
times = {} | ||
functions = (select, sort, shuffle, train_test_split, shard) | ||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
features = nlp.Features({"text": nlp.Value("string"), "numbers": nlp.Value("float32")}) | ||
dataset = generate_example_dataset( | ||
os.path.join(tmp_dir, "dataset.arrow"), features, num_examples=SPEED_TEST_N_EXAMPLES | ||
) | ||
for func in functions: | ||
times[func.__name__] = func(dataset) | ||
|
||
with open(RESULTS_FILE_PATH, "wb") as f: | ||
f.write(json.dumps(times).encode("utf-8")) | ||
|
||
|
||
if __name__ == "__main__": # useful to run the profiler | ||
benchmark_indices_mapping() |
Oops, something went wrong.
3a830c3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Benchmark: benchmark_array_xd.json
Benchmark: benchmark_indices_mapping.json