diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..363262c --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,20 @@ +# Release notes + +## 0.2.1 (2014-12-20) + +- changed type checking keyword argument names: `opt` -> `or_none` + and `nodups` -> `unique` +- improved error messages for constructing Structs +- significant updates to readme and examples +- using `opt=True` on `TypedField` no longer implies that `None` is + the default value +- made mixin version of `checktype()` and `checktype_seq()` +- added `check()` and `normalize()` hooks to `TypedField` +- accessing fields descriptors from classes is now permissible +- added support for default values in general, and optional values + for type-checked fields +- fixed `__repr__()` on recursive Structs + +## 0.2.0 (2014-12-15) + +- initial release diff --git a/README.md b/README.md index 14a284a..c2c1b2b 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,17 @@ *(Supports Python 3.3 and up)* -This is a small utility for making it easier to create "struct" classes -in Python without writing boilerplate code. Structs are similar to the -standard library's `collections.namedtuple` but are more flexible, -relying on an inheritance-based approach instead of `eval()`ing a code -template. +This small library makes it easier to create "struct" classes in Python +without writing boilerplate code. Structs are similar to the standard +library's [`collections.namedtuple`][1] but are more flexible, relying on an +inheritance-based approach instead of `eval()`ing a code template. If +you like using `namedtuple` classes but wish they were more composable +and extensible, this project is for you. ## Example Writing struct classes by hand is tedious and error prone. Consider a -simple Point2D class. The bare minimum we can write is +simple point class. The bare minimum we can write in Python is ```python class Point2D: @@ -20,8 +21,8 @@ class Point2D: self.y = y ``` -but for it to be of any use, we'll need structural equality semantics -and perhaps some pretty printing for debugging. +We'll likely want to compare points for equality and pretty-print them +for debugging. ```python class Point2D: @@ -29,76 +30,158 @@ class Point2D: self.x = x self.y = y def __repr__(self): - print('Point2D({}, {})'.format(self.x, self.y)) - __str__ = __repr__ + # Separate __str__() would be nice too + return 'Point2D({!r}, {!r})'.format(self.x, self.y) def __eq__(self, other): - # Nevermind type-checking and subtyping. + # Should check other's type too return self.x == other.x and self.y == other.y def __hash__(self): + # Required because we're overriding __eq__(). return hash(self.x) ^ hash(self.y) ``` -If you're the sort of heathen who likes to use dynamic type checks -in Python code, you'll want to add extra argument checking to the -constructor. And we'll probably want to disallow inadvertently -reassigning to x and y after construction, or else the hash value -could become inconsistent -- a big problem if the point is stored -in a hash-based collection. +Already the code is becoming pretty verbose for such a simple concept. +Worse, it violates the [DRY principle](http://en.wikipedia.org/wiki/Don%27t_repeat_yourself) +in that the `x` and `y` fields appear many times. This isn't very +robust. If we decide to turn this into a `Point3D` class, we'll have +to upgrade each method to accommodate a new z coordinate. We could be +in for an infuriating bug if we forget to update `__eq__()` or +`__hash__()`. Adding more utilities like a copy/replace method will +exacerbate the situation. -Even if we do all that, the code isn't robust to change. If we decide -to make this a Point3D class, we'll have to update each method to -accommodate the new z coordinate. One oversight and we're in for a -potentially hard-to-find bug. +Then there's the added code for consistency checking. Maybe you're the +sort of heathen who prefers dynamic type checking over blindly trusting +Mama Ducktype. Or maybe you want to disallow overwriting `x` and `y` so +as to avoid changing its hash value. Either way you'd need to use +descriptors or properties to intercept writes. -`namedtuple` takes care of many of these problems, but it's not -extensible. You can't easily derive a new class from a namedtuple -class without implementing much of this boilerplate. It also forces -immutability, which may be inappropriate for your use case. +SimpleStruct provides a simple alternative. Here is a `Point2D` class +that provides everything described above. -SimpleStruct provides a simple alternative. For the above case, -we just write +```python +from numbers import Number # standard library abstract base class +from simplestruct import Struct, Field, TypedField + +class Point2D(Struct): + # Note that field declaration order matters. + x = TypedField(Number) + y = TypedField(Number) +``` + +Of course, customizations are possible. Type checking is by no means +required, objects may be mutable so long as they are not hashed, +and you can add your own non-Field attributes and properties. - from simplestruct import Struct, Field +```python +class Point2D(Struct): + _immutable = False + x = Field + y = Field - class Point2D(Struct): - x = Field - y = Field + # magnitude won't be considered when hashing or testing equality + @property + def magnitude(self): + return (self.x**2 + self.y**2) ** .5 +``` + +For more usage examples, see the sample files: + +File | Purpose +---|--- +[point.py](examples/point.py) | introduction, basic use +[typed.py](examples/typed.py) | type-checked fields +[vector.py](examples/vector.py) | advanced features +[abstract.py](examples/abstract.py) | mixing structs and metaclasses + +## Comparison and feature matrix + +The most important problems mentioned above are solved by using +`namedtuple`, but this approach begins to break down when you +start to customize classes. To add a property to a `namedtuple`, +you must define a subclass: + +```python +BasePerson = namedtuple('BasePerson', 'fname lname age') +class Person(BasePerson): + @property + def full_name(self): + return self.fname + ' ' + self.lname +``` -## Feature matrix +If on the other hand you want to extend an existing `namedtuple` with +new fields, it's a bit harder. You need to regenerate (not inherit) +the boilerplate methods so they recognize the new fields. This can be +done using multiple inheritance: + +```python +BaseEmployee = namedtuple('BaseEmployee', Employee._fields + ('salary',)) +class Employee(BaseEmployee, Person): + pass +``` + +Implementation wise, `namedtuple` works by dynamically evaluating +a templated class definition based on the built-in `tuple` type. +This gives it a speed advantage, but is also the main reason why +it is less extensible (and unable to handle mutable values). + +In contrast, SimpleStruct is based on metaclasses, descriptors, and +dynamic dispatch. The below matrix summarizes the feature comparison. Feature | Avoids boilerplate for | Supported by `namedtuple`? ---|:---:|:---: -construction | `__init__()` | ✓ -extra attributes on self | | ✗ +easy construction | `__init__()` | ✓ +extra attributes on self | | subclasses only pretty printing | `__str()__`, `__repr()__` | ✓ structural equality | `__eq__()` | ✓ -inheritance | | ✗ +easy inheritance | | ✗ optional mutability | | ✗ hashing (if immutable) | `__hash__()` | ✓ pickling / deep-copying | | ✓ tuple decomposition | `__len__`, `__iter__` | ✓ -optional type checking | | ✗ +optional type checking | `__init__()`, `@property` | ✗ +`_asdict()` / `_replace()` | | ✓ + +[MacroPy][2]'s case classes provide similar functionality, but is +implemented in a very different way. Instead of metaclass hacking +or source code templating, it relies on syntactic transformation +of the module's AST. This allows for a syntax that's very different +from what we've seen above. So different, in fact, that we might view +MacroPy as an extension to the Python language rather than as just +a library. MacroPy case classes are subject to limitations on +inheritance and class members. + +## Installation ## -The `_asdict()` and `_replace()` methods from `namedtuple` are also -provided. +As with most Python packages, SimpleStruct is available on PyPI: -One advantage that `namedtuple` does have is speed. It is based on -the built-in Python tuple type, whereas SimpleStruct has the added -overhead of descriptor function calls. +``` +python -m pip install simplestruct +``` + +Or grab a development version if you're so inclined: + +``` +python -m pip install /~https://github.com/brandjon/simplestruct/tree/tarball/develop +``` +Python 3.3 and 3.4 are supported. There are no additional dependencies. -## To use ### +## Developers ## -See the `examples/` directory. +Tests can be run with `python setup.py test`, or alternatively by +installing [Tox](http://testrun.org/tox/latest/) and running +`python -m tox` in the project root. Tox has the advantage of automatically +testing under both Python 3.3 and 3.4. Building a source distribution +(`python setup.py sdist`) requires the setuptools extension package +[setuptools-git](/~https://github.com/wichert/setuptools-git). +## References ## -## TODO ### +[1]: https://docs.python.org/3/library/collections.html#collections.namedtuple +[[1]] The standard library's `namedtuple` feature -Features TODO: -- add support for `__slots__` -- make exceptions appear to be raised from the stack frame of user code - where the type error occurred, rather than inside this library (with - a flag to disable, for debugging) +[2]: /~https://github.com/lihaoyi/macropy#case-classes +[[2]] Li Haoyi's case classes (part of MacroPy) -Packaging TODO: -- fix up setup.py, make installable +[3]: http://harts.net/reece/2013/06/02/using-namedtuples-with-method-and-instance-variable-inheritance/ +[[3]] Reece Hart's blog post on inheriting from `namedtuple` diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..5ab9543 --- /dev/null +++ b/TODO.md @@ -0,0 +1,5 @@ +# Wishlist # +- add support for `__slots__` +- make exceptions appear to be raised from the stack frame of user code + where the type error occurred, rather than inside this library (with + a flag to disable, for debugging) diff --git a/examples/abstract.py b/examples/abstract.py index 09fdd34..0a4fd6d 100644 --- a/examples/abstract.py +++ b/examples/abstract.py @@ -3,25 +3,26 @@ from abc import ABCMeta, abstractmethod from simplestruct import Struct, Field, MetaStruct + +# A simple ABC. Subclasses must provide an override for foo(). class Abstract(metaclass=ABCMeta): @abstractmethod def foo(self): pass -# If we ran this code -# -# class Concrete(Abstract, Struct): -# f = Field -# def foo(self): -# return self.f ** 2 -# -# we would get the following error: -# -# TypeError: metaclass conflict: the metaclass of a derived class -# must be a (non-strict) subclass of the metaclasses of all its bases -# -# So let's make a trivial subclass of ABCMeta and MetaStruct. +# ABCs rely on a metaclass that conflicts with Struct's metaclass. +try: + class Concrete(Abstract, Struct): + f = Field + def foo(self): + return self.f ** 2 + +except TypeError as e: + print(e) + # metaclass conflict: the metaclass of a derived class must + # be a (non-strict) subclass of the metaclasses of all its bases +# So let's make a trivial subclass of ABCMeta and MetaStruct. class ABCMetaStruct(MetaStruct, ABCMeta): pass @@ -33,13 +34,13 @@ def foo(self): c = Concrete(5) print(c.foo()) # 25 -# For convenience we can also do +# For convenience we can make a version of Struct that +# incorporates the common metaclass. class ABCStruct(Struct, metaclass=ABCMetaStruct): pass -# and then - +# Now we only have to do: class Concrete(Abstract, ABCStruct): f = Field def foo(self): diff --git a/examples/point.py b/examples/point.py index 7bc22db..d54aeeb 100644 --- a/examples/point.py +++ b/examples/point.py @@ -1,53 +1,107 @@ -"""Illustrates the use of Struct classes and their differences -from normal classes. +"""Illustrates the use of Struct classes, and their differences +from normal classes and namedtuples. """ -from simplestruct import Struct, Field, TypedField +from collections import namedtuple +from simplestruct import Struct, Field + + +############################################################################### +# Definition # +############################################################################### # Standard Python class. -class PointA: +class PyPoint: def __init__(self, x, y): self.x = x self.y = y # Struct class. -class PointB(Struct): +class SPoint(Struct): # Field declaration order matters. - x = Field + x = Field # shorthand for "x = Field()" y = Field # The constructor is implicitly defined. -# Initialization is the same for both. -# Keywords, *args, and *kargs are allowed. -pa1 = PointA(1, 2) -pa2 = PointA(1, y=2) -pb1 = PointB(*[1, 2]) -pb2 = PointB(**{'x': 1, 'y': 2}) +# namedtuple class +NTPoint = namedtuple('NTPoint', 'x y') + + +############################################################################### +# Construction and pretty-printing # +############################################################################### -# Structs have pretty-printing. +# Initialization is the same for all three classes. +py_point = PyPoint(1, 2) +struct_point = SPoint(1, 2) +tuple_point = NTPoint(1, 2) + +# Structs and namedtuples both have pretty-printing. print('==== Printing ====') -print(pa1) # <__main__.PointA object at ...> -print(pb1) # PointB(x=1, y=2) +print(py_point) # <__main__.Pypoint object at ...> +print(struct_point) # SPoint(x=1, y=2) +print(tuple_point) # NTPoint(x=1, y=2) + +# Structs print their contents using whichever formatting method +# was called originally. namedtuples always use repr. +struct_point2 = SPoint('a', 'b') +tuple_point2 = NTPoint('a', 'b') +print(str(struct_point2)) # SPoint(a, b) +print(repr(struct_point2)) # SPoint('a', 'b') +print(str(tuple_point2)) # NTPoint('a', 'b') +print(repr(tuple_point2)) # NTPoint('a', 'b') + +# All three classes can also be constructed using +# keywords, *args, and **kargs. +py_point2 = PyPoint(1, y=2) +struct_point2 = SPoint(*[1, 2]) +tuple_point2 = NTPoint(**{'x': 1, 'y': 2}) -# Structs have structural equality (for like-typed objects)... + +############################################################################### +# Equality and hashing # +############################################################################### + +# Structs and namedtuples both have structural equality. print('\n==== Equality ====') -print(pa1 == pa2) # False -print(pb1 == pb2) # True -print(pa1 == pb1) # False +print(py_point == py_point2) # False +print(struct_point == struct_point2) # True +print(tuple_point == tuple_point2) # True -# ... with a corresponding hash function. +# Structs, unlike namedtuple, are only equal to other +# instances of the same class. +class OtherSPoint(Struct): + x, y = Field, Field +OtherNTPoint = namedtuple('OtherNTPoint', 'x y') +struct_point2 = OtherSPoint(1, 2) +tuple_point2 = OtherNTPoint(1, 2) +print(struct_point == struct_point2) # False +print(tuple_point == tuple_point2) # True + +# Structs and namedtuples have hash functions based on +# structural value. print('\n==== Hashing ====') -print((hash(pa1) == hash(pa2))) # False (almost certainly) -print((hash(pb1) == hash(pb2))) # True - -# Struct with typed fields. -class TypedPoint(Struct): - x = TypedField(int) - y = TypedField(int) - -print('\n==== Type checking ====') -tp1 = TypedPoint(1, 2) -try: - tp2 = TypedPoint(1, 'b') -except TypeError: - print('Exception') +print(hash(py_point) == hash(py_point2)) # False (almost certainly) +print(hash(struct_point) == hash(struct_point)) # True +print(hash(tuple_point) == hash(tuple_point2)) # True + + +############################################################################### +# Other features # +############################################################################### + +# Structs implement some of the same convenience methods as namedtuples. +print('\n==== Convenience methods ====') +print(struct_point._asdict()) # OrderedDict([('x', 1), ('y', 2)]) +print(tuple_point._asdict()) # OrderedDict([('x', 1), ('y', 2)]) +print(struct_point._replace(x=3)) # SPoint(x=3, y=2) +print(tuple_point._replace(x=3)) # NTPoint(x=3, y=2) +# Note that _replace() creates a copy without modifying the original object. + +# Both can be iterated over and decomposed into their components. +print(len(struct_point)) # 2 +x, y = struct_point +print((x, y)) # (1, 2) +print(len(tuple_point)) # 2 +x, y = tuple_point +print((x, y)) # (1, 2) diff --git a/examples/typed.py b/examples/typed.py new file mode 100644 index 0000000..23b1bf4 --- /dev/null +++ b/examples/typed.py @@ -0,0 +1,110 @@ +"""Shows use of type-checked fields.""" + +from numbers import Number +from simplestruct import Struct, Field, TypedField + + +# The standard abstract base class Number is handy because it +# lets us restrict TypedPoint to any of int, float, complex, etc. + +class TypedPoint(Struct): + x = TypedField(Number) + y = TypedField(Number) + +p = TypedPoint(1, 2.0) +try: + p = TypedPoint('a', 'b') +except TypeError as e: + print(e) + + +# We can enumerate specific allowed classes (like isinstance()). + +class IntFloatPoint(Struct): + x = TypedField((int, float)) + y = TypedField((int, float)) + +p = IntFloatPoint(1, 2.0) +try: + p = IntFloatPoint(1j, 2 + 3j) +except TypeError as e: + print(e) + + +# We can take a sequence of values, all of which satisfy the +# type specification. + +class Vector(Struct): + vals = TypedField(Number, seq=True) + +v = Vector([1, 2, 3, 4]) +# The sequence is converted to a tuple to help ensure immutability. +print(v) +try: + v = Vector(5) +except TypeError as e: + print(e) +try: + v = Vector([1, 'b', 3, 4]) +except TypeError as e: + print(e) +# Construction from non-sequence iterables like generators is disallowed. +try: + v = Vector((x for x in range(1, 5))) +except TypeError as e: + print(e) + + +# Sequences may be checked for uniqueness. +# (This is implemented naively in O(n^2) time.) + +class Ids(Struct): + vals = TypedField(int, seq=True, unique=True) + +try: + ids = Ids([1, 2, 3, 2]) +except TypeError as e: + print(e) + + +# If None is passed as the first argument of TypedField, +# any type is admitted. + +class Array(Struct): + vals = TypedField(None, seq=True) + +a = Array([1, 'b', False]) +# It still must be a sequence. +try: + a = Array(True) +except TypeError as e: + print(e) + + +# Typed fields can be set to allow None. + +class Person(Struct): + name = Field + salary = TypedField(int, or_none=True) + +a = Person('Alice', 100000) +b = Person('Bob', None) + +# This is different from adding NoneType to the sequence of +# allowed types, as that would mean the elements could be +# any type. Also note that or_none=True does not make passing +# in the field value to the constructor optional. + + +# The same Field instance can be used as a descriptor multiple +# times. (Each occurrence automatically gets a copy.) This can +# help shorten definitions. + +myfield = TypedField((int, float), or_none=True) + +class NullablePoint(Struct): + x = myfield + y = myfield + z = myfield + +p = NullablePoint(1, 2.0, None) diff --git a/examples/vector.py b/examples/vector.py index 82db621..e3d315c 100644 --- a/examples/vector.py +++ b/examples/vector.py @@ -1,59 +1,145 @@ -"""Illustrates inheritance, non-field data, and mutability.""" +"""Illustrates more advanced features like inheritance, mutability, +and user-supplied constructors. +""" from simplestruct import Struct, Field + +# Default values on fields work exactly like default values for +# constructor arguments. This includes the restriction that +# a non-default argument cannot follow a default argument. + +class AxisPoint(Struct): + x = Field(default=0) + y = Field(default=0) + +print('==== Default values ====') +p1 = AxisPoint(x=2) +print(p1) # AxisPoint(x=2, y=0) +p2 = AxisPoint(y=3) +print(p2) # AxisPoint(x=0, y=3) + + +# Subclasses by default do not inherit fields, but this can +# be enabled with a class-level flag. + class Point2D(Struct): x = Field y = Field -# Derived class that adds a computed magnitude data. -class Vector2D(Point2D): - # Special flag to inherit x and y fields without - # needing to redeclare. +class Point3D(Point2D): _inherit_fields = True - - # Constructor takes in the field values. - def __init__(self, x, y): - # mag is not a field for the purposes of pretty printing, - # equality comparison, etc. It could alternatively be - # implemented as a @property. - self.mag = (x**2 + y**2) ** .5 - - # self.x and self.y are already automatically initialized, - # but can be modified in __init__(), even though this - # Struct is immutable. Be careful not to hash self until - # after __init__() is done. - - # No need to call super().__init__(). + z = Field -p1 = Point2D(3, 4) -v1 = Vector2D(3, 4) +print('\n==== Inheritance ====') +p = Point3D(1, 2, 3) +print(p) # Point3D(x=1, y=2, z=3) -print(p1) # Point2D(x=3, y=4) -print(v1) # Vector2D(x=3, y=4) -print(v1.mag) # 5.0 +# The flag must be redefined on each subclass that wants to +# inherit fields. -# Equality does not hold between different types. -print(p1 == v1) # False +# The list of fields can be programmatically accessed via the +# _struct attribute. -# Structs are immutable by default. -try: - p1.x = 7 -except AttributeError: - print('Exception') +print(p._struct) # (, , ) +print([f.name for f in p._struct]) # ['x', 'y', 'z'] -# Let's make a mutable 3D point. -class Point3D(Point2D): +# Equality does not hold on different types, even if they are +# in the same class hierarchy and share the same fields. + +class Point3D_2(Point3D): _inherit_fields = True + +p2 = Point3D_2(1, 2, 3) +print(p == p2) # False + + +# Structs are immutable by default, but this can be disabled +# with a class-level flag. + +class MutablePoint(Struct): _immutable = False - z = Field + x = Field + y = Field -p2 = Point3D(3, 4, 5) -print(p2) # Point3D(x=3, y=4, z=5) -p2.x = 7 -print(p2) # Point3D(x=7, y=4, z=5) +print('\n==== Mutability ====') +p = Point2D(1, 2) +try: + p.x = 3 +except AttributeError as e: + print(e) +p = MutablePoint(1, 2) +p.x = 3 +print(p) # MutablePoint(3, 2) +# Mutable structs can't be hashed (analogous to Python lists, dicts, sets). try: - hash(p2) -except TypeError: - print('Exception') + hash(p) +except TypeError as e: + print(e) + + +# Like other classes, a Struct is free to define its own constructor. +# The arguments are the declared fields, in order of their declaration. +# +# Fields are initialized in __new__(). A subclass that overrides +# __new__() must call super.__new__() (not type.__new__()). +# __init__() does not need to call super().__init__() or do any work +# on behalf of the Struct system. +# +# If the fields have default values, these are substituted in before +# calling the constructor. Thus providing default parameter values +# in the constructor argument list is meaningless. + +class DoublingVector2D(Struct): + + x = Field + y = Field + + def __new__(cls, x, y): + print('Vector2D.__new__() has been called') + return super().__new__(cls, x, y) + + def __init__(self, x, y): + # There is no need to call super().__init__(). + + # The field values self.x and self.y have already been + # initialized by __new__(). + + # Before the call to __init__(), the instance attribute + # _initialized is set to False. It is changed to True + # once __init__() has finished executing. If there are + # multiple __init__() calls chained via super(), it is + # changed once the outermost call returns. + + assert not self._initialized + + # Despite the fact that this Struct is immutable, we + # are free to reassign fields until the flag is set. + # Likewise, we may not hash this instance until the + # flag is set. + + self.x *= 2 + self.y *= 2 + try: + hash(self) + except TypeError as e: + print(e) + + # We can create additional non-field attributes. + self.magnitude = (self.x**2 + self.y**2) ** .5 + # Since magnitude is not declared as a field, it is not + # considered during equality comparison, hashing, pretty + # printing, etc. Non-field attributes are generally + # incidental to the value of the Struct, or else can be + # deterministically derived from the fields. They can + # be overwritten at any time, whether or not the Struct + # is immutable. + + # Alternatively, We could define magnitude as a @property, + # but then it would be recomputed each time it is used. + +print('\n==== Custom constructor ====') +v = DoublingVector2D(1.5, 2) +print(v) # DoublingVector2D(x=3, y=4) +print(v.magnitude) # 5.0 diff --git a/setup.py b/setup.py index f949d18..5ce1fa3 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name = 'SimpleStruct', - version = '0.2.0', + version = '0.2.1', url = '/~https://github.com/brandjon/simplestruct', author = 'Jon Brandvein', diff --git a/simplestruct/__init__.py b/simplestruct/__init__.py index 7c30fbb..d0d7ccf 100644 --- a/simplestruct/__init__.py +++ b/simplestruct/__init__.py @@ -3,7 +3,7 @@ checking, mutability, and inheritance. """ -__version__ = '0.2.0' +__version__ = '0.2.1' from .struct import * from .fields import * diff --git a/simplestruct/fields.py b/simplestruct/fields.py index 3cbd7d2..44d0a8c 100644 --- a/simplestruct/fields.py +++ b/simplestruct/fields.py @@ -7,10 +7,10 @@ from .struct import Field -from .type import normalize_kind, checktype, checktype_seq +from .type import TypeChecker -class TypedField(Field): +class TypedField(Field, TypeChecker): """A field with dynamically-checked type constraints. @@ -18,24 +18,52 @@ class TypedField(Field): If seq is False, the field value must satisfy kind. Otherwise, the field value must be a sequence of elements that satisfy kind. The sequence gets converted to a tuple if it isn't already. - If nodup is also True, the elements must be distinct (as + If unique is also True, the elements must be distinct (as determined by kind.__eq__()). + + If or_none is True, None is a valid value. """ - def __init__(self, kind, *, seq=False, nodups=False): - super().__init__() - self.kind = normalize_kind(kind) + def __init__(self, kind, *, + seq=False, unique=False, or_none=False, **kargs): + super().__init__(**kargs) + self.kind = kind self.seq = seq - self.nodups = nodups + self.unique = unique + self.or_none = or_none def copy(self): - return type(self)(self.kind, seq=self.seq, nodups=self.nodups) + return type(self)(self.kind, seq=self.seq, unique=self.unique, + or_none=self.or_none, default=self.default) - def __set__(self, inst, value): - if self.seq: - checktype_seq(value, self.kind, self.nodups) + @property + def kind(self): + return self._kind + @kind.setter + def kind(self, k): + self._kind = self.normalize_kind(k) + + def check(self, inst, value): + """Raise TypeError if value doesn't satisfy the constraints + for use on instance inst. + """ + if not (self.or_none and value is None): + if self.seq: + self.checktype_seq(value, self.kind, + unique=self.unique, inst=inst) + else: + self.checktype(value, self.kind, inst=inst) + + def normalize(self, inst, value): + """Return value or a normalized form of it for use on + instance inst. + """ + if (not (self.or_none and value is None) and + self.seq): value = tuple(value) - else: - checktype(value, self.kind) - + return value + + def __set__(self, inst, value): + self.check(inst, value) + value = self.normalize(inst, value) super().__set__(inst, value) diff --git a/simplestruct/struct.py b/simplestruct/struct.py index 9b76bce..139ad82 100644 --- a/simplestruct/struct.py +++ b/simplestruct/struct.py @@ -11,6 +11,7 @@ from collections import OrderedDict, Counter from functools import reduce from inspect import Signature, Parameter +from reprlib import recursive_repr def hash_seq(seq): @@ -30,20 +31,40 @@ class Field: equality semantics. """ - def __init__(self): + # TODO: The current copy() is not ideal because a subclass that + # overrides it needs to know about the fields of the base class, + # so that it can pass those to the newly constructed object. + # For instance, TypedField needs to know to pass in + # default=self.default. + # + # One possible solution is to get meta: make Field itself be a + # Struct, and let attributes like default be Struct fields. + # Then the use of copy() becomes _replace(name=new_name), + # and subclasses simply set _inherit_fields = True. + # This solution would require a new non-Struct BaseField class + # for bootstrapping. + + NO_DEFAULT = object() + + def __init__(self, default=NO_DEFAULT): # name is the attribute name through which this field is # accessed from the Struct. This will be set automatically # by MetaStruct. self.name = None + self.default = default def copy(self): # This is used by MetaStruct to get a fresh instance # of the field for each of its occurrences. - return type(self)() + return type(self)(default=self.default) + + @property + def has_default(self): + return self.default is not self.NO_DEFAULT def __get__(self, inst, value): if inst is None: - raise AttributeError('Cannot retrieve field from class') + return self return inst.__dict__[self.name] def __set__(self, inst, value): @@ -114,9 +135,12 @@ def __new__(mcls, clsname, bases, namespace, **kargs): cls._struct = tuple(fields) - cls._signature = Signature( - parameters=[Parameter(f.name, Parameter.POSITIONAL_OR_KEYWORD) - for f in cls._struct]) + params = [] + for f in cls._struct: + default = f.default if f.has_default else Parameter.empty + params.append(Parameter(f.name, Parameter.POSITIONAL_OR_KEYWORD, + default=default)) + cls._signature = Signature(params) return cls @@ -167,23 +191,42 @@ def __new__(cls, *args, **kargs): # _initialized is read during field initialization. inst._initialized = False + f = None try: boundargs = cls._signature.bind(*args, **kargs) + # Include default arguments. + for param in cls._signature.parameters.values(): + if (param.name not in boundargs.arguments and + param.default is not param.empty): + boundargs.arguments[param.name] = param.default for f in cls._struct: setattr(inst, f.name, boundargs.arguments[f.name]) + f = None except TypeError as exc: - raise TypeError('Error constructing ' + cls.__name__) from exc + if f is not None: + where = "{} (field '{}')".format(cls.__name__, f.name) + else: + where = cls.__name__ + raise TypeError('Error constructing {}: {}'.format( + where, exc)) from exc return inst + # str() and repr() both recurse over their fields with + # whichever function was used initially. Both are protected + # from recursive cycles with the help of reprlib. + def _fmt_helper(self, fmt): return '{}({})'.format( self.__class__.__name__, ', '.join('{}={}'.format(f.name, fmt(getattr(self, f.name))) for f in self._struct)) + @recursive_repr() def __str__(self): return self._fmt_helper(str) + + @recursive_repr() def __repr__(self): return self._fmt_helper(repr) @@ -235,3 +278,10 @@ def _replace(self, **kargs): for f in self._struct} fields.update(kargs) return type(self)(**fields) + + # XXX: We could provide a copy() method as well, analogous to + # list, dict, and other collections. Unlike the above methods, + # it would not have an underscore prefix, and potentially clash + # with a user-defined field named "copy". But in this case, + # the user field should simply take precedence and shadow + # this feature. diff --git a/simplestruct/type.py b/simplestruct/type.py index a6edb7d..aa4a556 100644 --- a/simplestruct/type.py +++ b/simplestruct/type.py @@ -3,92 +3,118 @@ Spare me the "It's not the Python way" lectures. I've lost too much time to type errors in places where I never had any intention of allowing duck-typed alternative values. - -For our purposes, a "kind" is a tuple of types. A value satisfies -a kind if it is an instance of any of the types. For convenience, -kinds may also be specified as a single type, a sequence other than -a tuple, and as None (equivalent to (object,)). """ __all__ = [ - 'normalize_kind', + 'TypeChecker', 'checktype', 'checktype_seq', ] -def str_valtype(val): - """Get a string describing the type of val.""" - return val.__class__.__name__ - -def normalize_kind(kind): - """Make a proper kind out of one of the alternative forms.""" - if kind is None: - return (object,) - elif isinstance(kind, type): - return (kind,) - else: - return tuple(kind) - -def str_kind(kind): - """Get a string describing a kind.""" - if len(kind) == 0: - return '()' - elif len(kind) == 1: - return kind[0].__name__ - elif len(kind) == 2: - return kind[0].__name__ + ' or ' + kind[1].__name__ - else: - return 'one of {' + ', '.join(t.__name__ for t in kind) + '}' - - -def checktype(val, kind): - """Raise TypeError if val does not satisfy kind.""" - kind = normalize_kind(kind) - if not isinstance(val, kind): - raise TypeError('Expected {}; got {}'.format( - str_kind(kind), str_valtype(val))) - -def checktype_seq(seq, kind, nodups=False): - """Raise TypeError if seq is not a sequence of elements satisfying - kind. Optionally require elements to be unique. +class TypeChecker: + + """A simple type checker supporting sequences and unions. + Suitable for use as a mixin. - As a special case, a string is considered to be an atomic value - rather than a sequence of single-character strings. (Thus, - checktype_seq('foo', str) will fail.) + A "kind" is a tuple of types. A value satisfies a kind if it is + an instance of any of the types. """ - kind = normalize_kind(kind) - exp = str_kind(kind) - # Make sure we have a sequence. - try: - iterator = iter(seq) - # Generators aren't sequences. This avoids a confusing bug - # where we consume a generator by type-checking it, and leave - # only an exhausted iterator for the user code. - len(seq) - except TypeError: - got = str_valtype(seq) - raise TypeError('Expected sequence of {}; ' - 'got {} instead of sequence'.format(exp, got)) + def str_valtype(self, val): + """Get a string describing the type of val.""" + if val is None: + return 'None' + return val.__class__.__name__ + + def normalize_kind(self, kindlike): + """Make a kind out of a possible shorthand. If the given + argument is a sequence of types or a singular type, it becomes + a kind that accepts exactly those types. If the given argument + is None, it becomes a type that accepts anything. + """ + if kindlike is None: + return (object,) + elif isinstance(kindlike, type): + return (kindlike,) + else: + return tuple(kindlike) + + def str_kind(self, kind): + """Get a string describing a kind.""" + if len(kind) == 0: + return 'Nothing' + elif len(kind) == 1: + return kind[0].__name__ + elif len(kind) == 2: + return kind[0].__name__ + ' or ' + kind[1].__name__ + else: + return 'one of {' + ', '.join(t.__name__ for t in kind) + '}' + + def checktype(self, val, kind, **kargs): + """Raise TypeError if val does not satisfy kind.""" + if not isinstance(val, kind): + raise TypeError('Expected {}; got {}'.format( + self.str_kind(kind), self.str_valtype(val))) - if isinstance(seq, str): - raise TypeError('Expected sequence of {}; got single str ' - '(strings do not count as character ' - 'sequences)'.format(exp)) + def checktype_seq(self, seq, kind, *, unique=False, **kargs): + """Raise TypeError if seq is not a sequence of elements satisfying + kind. Optionally require elements to be unique. - for i, item in enumerate(iterator): - if not isinstance(item, kind): - got = str_valtype(item) + As a special case, a string is considered to be an atomic value + rather than a sequence of single-character strings. (Thus, + checktype_seq('foo', str) will fail.) + """ + exp = self.str_kind(kind) + + # Make sure we have a sequence. + try: + iterator = iter(seq) + # Generators aren't sequences. This avoids a confusing bug + # where we consume a generator by type-checking it, and leave + # only an exhausted iterator for the user code. + len(seq) + except TypeError: + got = self.str_valtype(seq) raise TypeError('Expected sequence of {}; ' - 'got sequence with {} at position {}'.format( - exp, got, i)) - - if nodups: - seen = [] - for i, item in enumerate(seq): - if item in seen: - raise TypeError('Duplicate element {} at position {}'.format( - repr(item), i)) - seen.append(item) + 'got {} instead of sequence'.format(exp, got)) + + if isinstance(seq, str): + raise TypeError('Expected sequence of {}; got single str ' + '(strings do not count as character ' + 'sequences)'.format(exp)) + + for i, item in enumerate(iterator): + # Depend on checktype() to check individual elements, + # but generate an error message that includes the position + # of the failure. + try: + self.checktype(item, kind, **kargs) + except TypeError: + got = self.str_valtype(item) + raise TypeError('Expected sequence of {}; ' + 'got sequence with {} at position {}'.format( + exp, got, i)) from None + + if unique: + seen = [] + for i, item in enumerate(seq): + if item in seen: + raise TypeError('Duplicate element {} at ' + 'position {}'.format(repr(item), i)) + seen.append(item) + + +# We export some convenience methods so the caller doesn't have to +# instantiate TypeChecker. These methods automatically normalize kind. + +checker = TypeChecker() + +def checktype(val, kind): + kind = checker.normalize_kind(kind) + checker.checktype(val, kind) + +def checktype_seq(val, kind, *, unique=False): + kind = checker.normalize_kind(kind) + checker.checktype_seq(val, kind, unique=unique) diff --git a/tests/test_fields.py b/tests/test_fields.py index 963507d..dcf26be 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -16,6 +16,8 @@ class Foo(Struct): f = Foo(1) with self.assertRaises(TypeError): Foo('a') + with self.assertRaises(TypeError): + Foo(None) # Sequence case. class Foo(Struct): @@ -25,12 +27,18 @@ class Foo(Struct): with self.assertRaises(TypeError): Foo([1, 'a']) - # Nodups sequence. + # Sequence without duplicates. class Foo(Struct): - bar = TypedField(int, seq=True, nodups=True) + bar = TypedField(int, seq=True, unique=True) Foo([1, 2]) with self.assertRaises(TypeError): Foo([1, 2, 1]) + + # Optional case. + class Foo(Struct): + _immutable = False + bar = TypedField(int, or_none=True) + f1 = Foo(None) if __name__ == '__main__': unittest.main() diff --git a/tests/test_struct.py b/tests/test_struct.py index b25dc3b..19e5990 100644 --- a/tests/test_struct.py +++ b/tests/test_struct.py @@ -51,9 +51,10 @@ def test_Struct(self): # Basic instantiation and pretty printing. class Foo(Struct): bar = Field() - f = Foo(5) - self.assertEqual(f.bar, 5) - self.assertEqual(str(f), 'Foo(bar=5)') + f = Foo('a') + self.assertEqual(f.bar, 'a') + self.assertEqual(str(f), 'Foo(bar=a)') + self.assertEqual(repr(f), "Foo(bar='a')") # Equality and hashing. class Foo(Struct): @@ -74,6 +75,16 @@ class Foo(Struct): with self.assertRaises(TypeError): hash(f) + # Tuple decomposition. + class Foo(Struct): + a = Field() + b = Field() + f = Foo(1, 2) + a, b = f + self.assertEqual(len(f), 2) + self.assertEqual((a, b), (1, 2)) + + def test_construct(self): # Construction by keyword. class Foo(Struct): a = Field() @@ -86,6 +97,15 @@ class Foo(Struct): names = [f.name for f in Foo._struct] self.assertEqual(names, ['a', 'b', 'c']) + # Construction with defaults. + class Foo(Struct): + a = Field() + b = Field(default='b') + f = Foo(1, 2) + self.assertEqual((f.a, f.b), (1, 2)) + f = Foo(1) + self.assertEqual((f.a, f.b), (1, 'b')) + # Parentheses-less shorthand. class Foo(Struct): bar = Field @@ -103,15 +123,6 @@ class Foo2(Struct): # overlap, there'd be a name collision anyway. ids = {id(f) for f in Foo1._struct + Foo2._struct} self.assertTrue(len(ids) == 3) - - # Tuple decomposition. - class Foo(Struct): - a = Field() - b = Field() - f = Foo(1, 2) - a, b = f - self.assertEqual(len(f), 2) - self.assertEqual((a, b), (1, 2)) def test_mutability(self): # Mutable, unhashable. @@ -219,6 +230,17 @@ class Bar(Foo): foo = Foo(1) bar = Bar(1) self.assertNotEqual(foo, bar) + + def test_recur(self): + # __repr__ for recursive objects. + class Foo(Struct): + _immutable = False + a = Field() + f = Foo(None) + f.a = f + s = repr(f) + exp_s = 'Foo(a=...)' + self.assertEqual(s, exp_s) if __name__ == '__main__': diff --git a/tests/test_type.py b/tests/test_type.py index 5ec2571..6f2a412 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -8,16 +8,33 @@ class ChecktypeCase(unittest.TestCase): + def test_strs(self): + c = TypeChecker() + self.assertEqual(c.str_valtype(None), 'None') + self.assertEqual(c.str_valtype(5), 'int') + self.assertEqual(c.str_kind(()), 'Nothing') + self.assertEqual(c.str_kind((int,)), 'int') + self.assertEqual(c.str_kind((int, str)), 'int or str') + self.assertEqual(c.str_kind((int, str, bool)), + 'one of {int, str, bool}') + + def test_normalize(self): + c = TypeChecker() + self.assertEqual(c.normalize_kind((int,)), (int,)) + self.assertEqual(c.normalize_kind([int,]), (int,)) + self.assertEqual(c.normalize_kind(int), (int,)) + self.assertEqual(c.normalize_kind(None), (object,)) + def test_checktype(self): checktype('a', str) checktype(True, int) # This is correct, bool subtypes int checktype(5, (str, int)) with self.assertRaisesRegex( - TypeError, 'Expected int; got NoneType'): + TypeError, 'Expected int; got None'): checktype(None, int) with self.assertRaisesRegex( - TypeError, 'Expected str or int; got NoneType'): + TypeError, 'Expected str or int; got None'): checktype(None, (str, int)) def test_checktype_seq(self): @@ -43,7 +60,7 @@ def test_checktype_seq(self): checktype_seq([5, 3, 5, 8], int) with self.assertRaisesRegex( TypeError, 'Duplicate element 5 at position 2'): - checktype_seq([5, 3, 5, 8], int, nodups=True) + checktype_seq([5, 3, 5, 8], int, unique=True) if __name__ == '__main__':