Skip to content

Commit

Permalink
Adding testdata package.
Browse files Browse the repository at this point in the history
- Includes tooling to re-generate the testdata.
  • Loading branch information
sffc committed Oct 8, 2020
1 parent c08d490 commit 48beede
Show file tree
Hide file tree
Showing 40 changed files with 1,986 additions and 4 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ members = [
"components/locale",
"components/pluralrules",
"components/datetime",
"resources/testdata",
"utils/fixed-decimal",
]
9 changes: 6 additions & 3 deletions components/cldr-json-data-provider/src/cldr_paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@ use std::path::PathBuf;

/// Trait returning filesystem paths to CLDR JSON resource directories.
/// The fields should be Ok if present. They default to Err when not present.
pub trait CldrPaths {
pub trait CldrPaths: std::fmt::Debug {
/// Path to checkout of cldr-core:
/// /~https://github.com/unicode-cldr/cldr-core
fn cldr_core(&self) -> Result<PathBuf, Error>;

/// Path to checkout of cldr-dates:
/// /~https://github.com/unicode-cldr/cldr-dates-full
fn cldr_dates(&self) -> Result<PathBuf, Error>;
}

Expand All @@ -27,8 +32,6 @@ pub trait CldrPaths {
#[non_exhaustive]
#[derive(Debug, PartialEq)]
pub struct CldrPathsLocal {
/// Path to checkout of cldr-core:
/// /~https://github.com/unicode-cldr/cldr-core
pub cldr_core: Result<PathBuf, MissingSourceError>,
pub cldr_dates: Result<PathBuf, MissingSourceError>,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use std::path::PathBuf;
/// // Calling demo(&data_provider) will cause the data to actually get downloaded.
/// //demo(&data_provider);
/// ```
#[derive(Debug)]
pub struct CldrPathsDownload {
/// Directory where downloaded files are stored.
pub cache_dir: PathBuf,
Expand Down Expand Up @@ -90,6 +91,7 @@ impl CldrPathsDownload {
}
}

#[derive(Debug)]
pub struct CldrZipFileInfo {
/// The URL to the remote zip file
pub url: String,
Expand Down
2 changes: 2 additions & 0 deletions components/cldr-json-data-provider/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use std::path::{Path, PathBuf};

/// Helper function to open a file and return failures as a crate error.
pub fn open_reader(path: &Path) -> Result<BufReader<File>, Error> {
#[cfg(feature = "log")]
log::trace!("Reading: {:?}", path);
File::open(&path)
.map(BufReader::new)
.map_err(|e| Error::IoError(e, path.to_path_buf()))
Expand Down
1 change: 1 addition & 0 deletions components/cldr-json-data-provider/src/support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub(crate) trait DataKeySupport {
fn supports_key(data_key: &DataKey) -> Result<(), DataError>;
}

#[derive(Debug)]
pub(crate) struct LazyCldrProvider<T> {
src: RwLock<Option<T>>,
}
Expand Down
1 change: 1 addition & 0 deletions components/cldr-json-data-provider/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::CldrPaths;
use icu_data_provider::iter::DataEntryCollection;
use icu_data_provider::prelude::*;

#[derive(Debug)]
pub struct CldrJsonDataProvider<'a, 'd> {
pub cldr_paths: &'a dyn CldrPaths,
plurals: LazyCldrProvider<PluralsProvider<'d>>,
Expand Down
2 changes: 1 addition & 1 deletion components/fs-data-provider/src/export/fs_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ impl DataExporter for FilesystemExporter {
let mut path_buf = self.root.clone();
path_buf.extend(req.data_key.get_components().iter());
path_buf.extend(req.data_entry.get_components().iter());
log::trace!("Initializing: {}", path_buf.to_string_lossy());
log::trace!("Writing: {}", req);
self.write_to_path(path_buf, obj)
}

Expand Down
2 changes: 2 additions & 0 deletions resources/testdata/.cargo/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[alias]
gen-testdata = "run --features=icu4x-gen-testdata --"
93 changes: 93 additions & 0 deletions resources/testdata/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
[package]
name = "icu-testdata"
description = "Test data for ICU4X, generated from CLDR."
version = "0.0.1"
authors = ["The ICU4X Project Developers"]
edition = "2018"
readme = "README.md"
repository = "/~https://github.com/unicode-org/icu4x"
license-file = "../../LICENSE"
categories = ["internationalization"]
include = [
"src/**/*",
"Cargo.toml",
"README.md"
]

# icu4x_testdata metadata: schema defined in ./src/metadata.rs
[package.metadata.icu4x_testdata]

# Locales to include in testdata. Keep this list somewhat short, but cover all features.
locales = [
# Belarusian:
# - Cyrillic script
# - Interesting plural rules
"be",

# Bangla:
# - Uses non-Latin numerals
"bn",

# Chakma:
# - High-coverage language that uses non-BMP code points
"ccp",

# English:
# - Widely understood language in software engineering
# - Includes regional variants to test similar-data fallbacks
"en",
"en-US-posix",
"en-ZA",

# Farsi:
# - Good example for RTL
"fa",

# French:
# - Often the first non-English locale to receive new data in CLDR
"fr",

# Root data
"und",

# Chinese:
# - The default "zh" uses Simplified Chinese
# - Also include Traditional Chinese to test script variants
"zh",
"zh-Hant",
]

# Git tag or sha1 for the CLDR data used to generate the testdata
gitref = "36.0.0"

[dependencies]
icu-data-provider = { path = "../../components/data-provider" }
icu-fs-data-provider = { path = "../../components/fs-data-provider" }
icu-locale = { path = "../../components/locale" }

# Dependencies for the "metadata" feature
serde = { version = "1.0", features = ["derive"], optional = true }
serde_json = { version = "1.0", optional = true }

# Dependencies for the binary
cargo_metadata = { version = "0.11.3", optional = true }
clap = { version = "2.33", optional = true }
icu-cldr-json-data-provider = { path = "../../components/cldr-json-data-provider", optional = true, features = ["download"] }
log = { version = "0.4", optional = true }
simple_logger = { version = "1.10", optional = true }

[features]
metadata = ["serde", "serde_json", "icu-locale/serde"]
icu4x-gen-testdata = [
"cargo_metadata",
"clap",
"icu-cldr-json-data-provider",
"log",
"simple_logger",
"icu-fs-data-provider/export",
"metadata",
]

[[bin]]
name = "icu4x-gen-testdata"
required-features = ["icu4x-gen-testdata"]
22 changes: 22 additions & 0 deletions resources/testdata/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
ICU4X Test Data
===============

This project contains data used for ICU4X unit tests. The data is based on a CLDR tag and a short list of locales that, together, cover a range of scenarios that are useful in unit testing.

The list of locales and the current CLDR tag can be found in [Cargo.toml](./Cargo.toml).

The output data can be found in the [data](./data/) subdirectory.

## Re-generating the data

From this directory, run:

```bash
$ cargo gen-testdata
```

To monitor the progress, run with `-v` or `-vv`:

```bash
$ cargo gen-testdata -vv
```
162 changes: 162 additions & 0 deletions resources/testdata/data/json/dates/gregory@1/be.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
{
"symbols": {
"months": {
"format": {
"abbreviated": [
"сту",
"лют",
"сак",
"кра",
"мая",
"чэр",
"ліп",
"жні",
"вер",
"кас",
"ліс",
"сне"
],
"narrow": [
"с",
"л",
"с",
"к",
"м",
"ч",
"л",
"ж",
"в",
"к",
"л",
"с"
],
"wide": [
"студзеня",
"лютага",
"сакавіка",
"красавіка",
"мая",
"чэрвеня",
"ліпеня",
"жніўня",
"верасня",
"кастрычніка",
"лістапада",
"снежня"
]
},
"stand_alone": {
"abbreviated": [
"сту",
"лют",
"сак",
"кра",
"май",
"чэр",
"ліп",
"жні",
"вер",
"кас",
"ліс",
"сне"
],
"wide": [
"студзень",
"люты",
"сакавік",
"красавік",
"май",
"чэрвень",
"ліпень",
"жнівень",
"верасень",
"кастрычнік",
"лістапад",
"снежань"
]
}
},
"weekdays": {
"format": {
"abbreviated": [
"нд",
"пн",
"аў",
"ср",
"чц",
"пт",
"сб"
],
"narrow": [
"н",
"п",
"а",
"с",
"ч",
"п",
"с"
],
"short": [
"нд",
"пн",
"аў",
"ср",
"чц",
"пт",
"сб"
],
"wide": [
"нядзеля",
"панядзелак",
"аўторак",
"серада",
"чацвер",
"пятніца",
"субота"
]
}
},
"day_periods": {
"format": {
"abbreviated": {
"am": "AM",
"pm": "PM"
},
"narrow": {
"am": "am",
"pm": "pm"
},
"wide": {
"am": "AM",
"pm": "PM"
}
},
"stand_alone": {
"narrow": {
"am": "AM",
"pm": "PM"
}
}
}
},
"patterns": {
"date": {
"full": "EEEE, d MMMM y 'г'.",
"long": "d MMMM y 'г'.",
"medium": "d.MM.y",
"short": "d.MM.yy"
},
"time": {
"full": "HH:mm:ss, zzzz",
"long": "HH:mm:ss z",
"medium": "HH:mm:ss",
"short": "HH:mm"
},
"date_time": {
"full": "{1} 'у' {0}",
"long": "{1} 'у' {0}",
"medium": "{1}, {0}",
"short": "{1}, {0}"
}
}
}
Loading

0 comments on commit 48beede

Please sign in to comment.