diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6e4266f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/Powerthesaurus-1.0.alfredworkflow b/Powerthesaurus-1.0.alfredworkflow
new file mode 100644
index 0000000..2d90cd0
Binary files /dev/null and b/Powerthesaurus-1.0.alfredworkflow differ
diff --git a/demo.gif b/demo.gif
new file mode 100644
index 0000000..15aca8d
Binary files /dev/null and b/demo.gif differ
diff --git a/icon.png b/icon.png
new file mode 100644
index 0000000..fa9b601
Binary files /dev/null and b/icon.png differ
diff --git a/src/bs4/1631353.py b/src/bs4/1631353.py
new file mode 100644
index 0000000..af75dd2
--- /dev/null
+++ b/src/bs4/1631353.py
@@ -0,0 +1,5 @@
+doc = """<script>
+h=window.location.protocol+"//",r='<body onload="';
+</script>"""
+from bs4.diagnose import diagnose
+diagnose(doc)
diff --git a/src/bs4/__init__.py b/src/bs4/__init__.py
new file mode 100644
index 0000000..46caac0
--- /dev/null
+++ b/src/bs4/__init__.py
@@ -0,0 +1,529 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup uses a pluggable XML or HTML parser to parse a
+(possibly invalid) document into a tree representation. Beautiful Soup
+provides methods and Pythonic idioms that make it easy to navigate,
+search, and modify the parse tree.
+
+Beautiful Soup works with Python 2.7 and up. It works better if lxml
+and/or html5lib is installed.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/bs4/doc/
+
+"""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "4.5.3"
+__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
+__license__ = "MIT"
+
+__all__ = ['BeautifulSoup']
+
+import os
+import re
+import traceback
+import warnings
+
+from .builder import builder_registry, ParserRejectedMarkup
+from .dammit import UnicodeDammit
+from .element import (
+    CData,
+    Comment,
+    DEFAULT_OUTPUT_ENCODING,
+    Declaration,
+    Doctype,
+    NavigableString,
+    PageElement,
+    ProcessingInstruction,
+    ResultSet,
+    SoupStrainer,
+    Tag,
+    )
+
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 3 without converting it.
+'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+
+class BeautifulSoup(Tag):
+    """
+    This class defines the basic interface called by the tree builders.
+
+    These methods will be called by the parser:
+      reset()
+      feed(markup)
+
+    The tree builder may call these methods from its feed() implementation:
+      handle_starttag(name, attrs) # See note about return value
+      handle_endtag(name)
+      handle_data(data) # Appends to the current data node
+      endData(containerClass=NavigableString) # Ends the current data node
+
+    No matter how complicated the underlying parser is, you should be
+    able to build a tree using 'start tag' events, 'end tag' events,
+    'data' events, and "done with data" events.
+
+    If you encounter an empty-element tag (aka a self-closing tag,
+    like HTML's <br> tag), call handle_starttag and then
+    handle_endtag.
+    """
+    ROOT_TAG_NAME = u'[document]'
+
+    # If the end-user gives no indication which tree builder they
+    # want, look for one with these features.
+    DEFAULT_BUILDER_FEATURES = ['html', 'fast']
+
+    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
+
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+
+    def __init__(self, markup="", features=None, builder=None,
+                 parse_only=None, from_encoding=None, exclude_encodings=None,
+                 **kwargs):
+        """The Soup object is initialized as the 'root tag', and the
+        provided markup (which can be a string or a file-like object)
+        is fed into the underlying parser."""
+
+        if 'convertEntities' in kwargs:
+            warnings.warn(
+                "BS4 does not respect the convertEntities argument to the "
+                "BeautifulSoup constructor. Entities are always converted "
+                "to Unicode characters.")
+
+        if 'markupMassage' in kwargs:
+            del kwargs['markupMassage']
+            warnings.warn(
+                "BS4 does not respect the markupMassage argument to the "
+                "BeautifulSoup constructor. The tree builder is responsible "
+                "for any necessary markup massage.")
+
+        if 'smartQuotesTo' in kwargs:
+            del kwargs['smartQuotesTo']
+            warnings.warn(
+                "BS4 does not respect the smartQuotesTo argument to the "
+                "BeautifulSoup constructor. Smart quotes are always converted "
+                "to Unicode characters.")
+
+        if 'selfClosingTags' in kwargs:
+            del kwargs['selfClosingTags']
+            warnings.warn(
+                "BS4 does not respect the selfClosingTags argument to the "
+                "BeautifulSoup constructor. The tree builder is responsible "
+                "for understanding self-closing tags.")
+
+        if 'isHTML' in kwargs:
+            del kwargs['isHTML']
+            warnings.warn(
+                "BS4 does not respect the isHTML argument to the "
+                "BeautifulSoup constructor. Suggest you use "
+                "features='lxml' for HTML and features='lxml-xml' for "
+                "XML.")
+
+        def deprecated_argument(old_name, new_name):
+            if old_name in kwargs:
+                warnings.warn(
+                    'The "%s" argument to the BeautifulSoup constructor '
+                    'has been renamed to "%s."' % (old_name, new_name))
+                value = kwargs[old_name]
+                del kwargs[old_name]
+                return value
+            return None
+
+        parse_only = parse_only or deprecated_argument(
+            "parseOnlyThese", "parse_only")
+
+        from_encoding = from_encoding or deprecated_argument(
+            "fromEncoding", "from_encoding")
+
+        if from_encoding and isinstance(markup, unicode):
+            warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
+            from_encoding = None
+
+        if len(kwargs) > 0:
+            arg = kwargs.keys().pop()
+            raise TypeError(
+                "__init__() got an unexpected keyword argument '%s'" % arg)
+
+        if builder is None:
+            original_features = features
+            if isinstance(features, basestring):
+                features = [features]
+            if features is None or len(features) == 0:
+                features = self.DEFAULT_BUILDER_FEATURES
+            builder_class = builder_registry.lookup(*features)
+            if builder_class is None:
+                raise FeatureNotFound(
+                    "Couldn't find a tree builder with the features you "
+                    "requested: %s. Do you need to install a parser library?"
+                    % ",".join(features))
+            builder = builder_class()
+            if not (original_features == builder.NAME or
+                    original_features in builder.ALTERNATE_NAMES):
+                if builder.is_xml:
+                    markup_type = "XML"
+                else:
+                    markup_type = "HTML"
+
+                caller = traceback.extract_stack()[0]
+                filename = caller[0]
+                line_number = caller[1]
+                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
+                    filename=filename,
+                    line_number=line_number,
+                    parser=builder.NAME,
+                    markup_type=markup_type))
+
+        self.builder = builder
+        self.is_xml = builder.is_xml
+        self.known_xml = self.is_xml
+        self.builder.soup = self
+
+        self.parse_only = parse_only
+
+        if hasattr(markup, 'read'):        # It's a file-type object.
+            markup = markup.read()
+        elif len(markup) <= 256 and (
+                (isinstance(markup, bytes) and not b'<' in markup)
+                or (isinstance(markup, unicode) and not u'<' in markup)
+        ):
+            # Print out warnings for a couple beginner problems
+            # involving passing non-markup to Beautiful Soup.
+            # Beautiful Soup will still parse the input as markup,
+            # just in case that's what the user really wants.
+            if (isinstance(markup, unicode)
+                and not os.path.supports_unicode_filenames):
+                possible_filename = markup.encode("utf8")
+            else:
+                possible_filename = markup
+            is_file = False
+            try:
+                is_file = os.path.exists(possible_filename)
+            except Exception, e:
+                # This is almost certainly a problem involving
+                # characters not valid in filenames on this
+                # system. Just let it go.
+                pass
+            if is_file:
+                if isinstance(markup, unicode):
+                    markup = markup.encode("utf8")
+                warnings.warn(
+                    '"%s" looks like a filename, not markup. You should'
+                    'probably open this file and pass the filehandle into'
+                    'Beautiful Soup.' % markup)
+            self._check_markup_is_url(markup)
+
+        for (self.markup, self.original_encoding, self.declared_html_encoding,
+         self.contains_replacement_characters) in (
+             self.builder.prepare_markup(
+                 markup, from_encoding, exclude_encodings=exclude_encodings)):
+            self.reset()
+            try:
+                self._feed()
+                break
+            except ParserRejectedMarkup:
+                pass
+
+        # Clear out the markup and remove the builder's circular
+        # reference to this object.
+        self.markup = None
+        self.builder.soup = None
+
+    def __copy__(self):
+        copy = type(self)(
+            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
+        )
+
+        # Although we encoded the tree to UTF-8, that may not have
+        # been the encoding of the original markup. Set the copy's
+        # .original_encoding to reflect the original object's
+        # .original_encoding.
+        copy.original_encoding = self.original_encoding
+        return copy
+
+    def __getstate__(self):
+        # Frequently a tree builder can't be pickled.
+        d = dict(self.__dict__)
+        if 'builder' in d and not self.builder.picklable:
+            d['builder'] = None
+        return d
+
+    @staticmethod
+    def _check_markup_is_url(markup):
+        """ 
+        Check if markup looks like it's actually a url and raise a warning 
+        if so. Markup can be unicode or str (py2) / bytes (py3).
+        """
+        if isinstance(markup, bytes):
+            space = b' '
+            cant_start_with = (b"http:", b"https:")
+        elif isinstance(markup, unicode):
+            space = u' '
+            cant_start_with = (u"http:", u"https:")
+        else:
+            return
+
+        if any(markup.startswith(prefix) for prefix in cant_start_with):
+            if not space in markup:
+                if isinstance(markup, bytes):
+                    decoded_markup = markup.decode('utf-8', 'replace')
+                else:
+                    decoded_markup = markup
+                warnings.warn(
+                    '"%s" looks like a URL. Beautiful Soup is not an'
+                    ' HTTP client. You should probably use an HTTP client like'
+                    ' requests to get the document behind the URL, and feed'
+                    ' that document to Beautiful Soup.' % decoded_markup
+                )
+
+    def _feed(self):
+        # Convert the document to Unicode.
+        self.builder.reset()
+
+        self.builder.feed(self.markup)
+        # Close out any unfinished strings and close all the open tags.
+        self.endData()
+        while self.currentTag.name != self.ROOT_TAG_NAME:
+            self.popTag()
+
+    def reset(self):
+        Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
+        self.hidden = 1
+        self.builder.reset()
+        self.current_data = []
+        self.currentTag = None
+        self.tagStack = []
+        self.preserve_whitespace_tag_stack = []
+        self.pushTag(self)
+
+    def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
+        """Create a new tag associated with this soup."""
+        return Tag(None, self.builder, name, namespace, nsprefix, attrs)
+
+    def new_string(self, s, subclass=NavigableString):
+        """Create a new NavigableString associated with this soup."""
+        return subclass(s)
+
+    def insert_before(self, successor):
+        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
+
+    def insert_after(self, successor):
+        raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
+
+    def popTag(self):
+        tag = self.tagStack.pop()
+        if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
+            self.preserve_whitespace_tag_stack.pop()
+        #print "Pop", tag.name
+        if self.tagStack:
+            self.currentTag = self.tagStack[-1]
+        return self.currentTag
+
+    def pushTag(self, tag):
+        #print "Push", tag.name
+        if self.currentTag:
+            self.currentTag.contents.append(tag)
+        self.tagStack.append(tag)
+        self.currentTag = self.tagStack[-1]
+        if tag.name in self.builder.preserve_whitespace_tags:
+            self.preserve_whitespace_tag_stack.append(tag)
+
+    def endData(self, containerClass=NavigableString):
+        if self.current_data:
+            current_data = u''.join(self.current_data)
+            # If whitespace is not preserved, and this string contains
+            # nothing but ASCII spaces, replace it with a single space
+            # or newline.
+            if not self.preserve_whitespace_tag_stack:
+                strippable = True
+                for i in current_data:
+                    if i not in self.ASCII_SPACES:
+                        strippable = False
+                        break
+                if strippable:
+                    if '\n' in current_data:
+                        current_data = '\n'
+                    else:
+                        current_data = ' '
+
+            # Reset the data collector.
+            self.current_data = []
+
+            # Should we add this string to the tree at all?
+            if self.parse_only and len(self.tagStack) <= 1 and \
+                   (not self.parse_only.text or \
+                    not self.parse_only.search(current_data)):
+                return
+
+            o = containerClass(current_data)
+            self.object_was_parsed(o)
+
+    def object_was_parsed(self, o, parent=None, most_recent_element=None):
+        """Add an object to the parse tree."""
+        parent = parent or self.currentTag
+        previous_element = most_recent_element or self._most_recent_element
+
+        next_element = previous_sibling = next_sibling = None
+        if isinstance(o, Tag):
+            next_element = o.next_element
+            next_sibling = o.next_sibling
+            previous_sibling = o.previous_sibling
+            if not previous_element:
+                previous_element = o.previous_element
+
+        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
+
+        self._most_recent_element = o
+        parent.contents.append(o)
+
+        if parent.next_sibling:
+            # This node is being inserted into an element that has
+            # already been parsed. Deal with any dangling references.
+            index = len(parent.contents)-1
+            while index >= 0:
+                if parent.contents[index] is o:
+                    break
+                index -= 1
+            else:
+                raise ValueError(
+                    "Error building tree: supposedly %r was inserted "
+                    "into %r after the fact, but I don't see it!" % (
+                        o, parent
+                    )
+                )
+            if index == 0:
+                previous_element = parent
+                previous_sibling = None
+            else:
+                previous_element = previous_sibling = parent.contents[index-1]
+            if index == len(parent.contents)-1:
+                next_element = parent.next_sibling
+                next_sibling = None
+            else:
+                next_element = next_sibling = parent.contents[index+1]
+
+            o.previous_element = previous_element
+            if previous_element:
+                previous_element.next_element = o
+            o.next_element = next_element
+            if next_element:
+                next_element.previous_element = o
+            o.next_sibling = next_sibling
+            if next_sibling:
+                next_sibling.previous_sibling = o
+            o.previous_sibling = previous_sibling
+            if previous_sibling:
+                previous_sibling.next_sibling = o
+
+    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
+        """Pops the tag stack up to and including the most recent
+        instance of the given tag. If inclusivePop is false, pops the tag
+        stack up to but *not* including the most recent instqance of
+        the given tag."""
+        #print "Popping to %s" % name
+        if name == self.ROOT_TAG_NAME:
+            # The BeautifulSoup object itself can never be popped.
+            return
+
+        most_recently_popped = None
+
+        stack_size = len(self.tagStack)
+        for i in range(stack_size - 1, 0, -1):
+            t = self.tagStack[i]
+            if (name == t.name and nsprefix == t.prefix):
+                if inclusivePop:
+                    most_recently_popped = self.popTag()
+                break
+            most_recently_popped = self.popTag()
+
+        return most_recently_popped
+
+    def handle_starttag(self, name, namespace, nsprefix, attrs):
+        """Push a start tag on to the stack.
+
+        If this method returns None, the tag was rejected by the
+        SoupStrainer. You should proceed as if the tag had not occurred
+        in the document. For instance, if this was a self-closing tag,
+        don't call handle_endtag.
+        """
+
+        # print "Start tag %s: %s" % (name, attrs)
+        self.endData()
+
+        if (self.parse_only and len(self.tagStack) <= 1
+            and (self.parse_only.text
+                 or not self.parse_only.search_tag(name, attrs))):
+            return None
+
+        tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
+                  self.currentTag, self._most_recent_element)
+        if tag is None:
+            return tag
+        if self._most_recent_element:
+            self._most_recent_element.next_element = tag
+        self._most_recent_element = tag
+        self.pushTag(tag)
+        return tag
+
+    def handle_endtag(self, name, nsprefix=None):
+        #print "End tag: " + name
+        self.endData()
+        self._popToTag(name, nsprefix)
+
+    def handle_data(self, data):
+        self.current_data.append(data)
+
+    def decode(self, pretty_print=False,
+               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+               formatter="minimal"):
+        """Returns a string or Unicode representation of this document.
+        To get Unicode, pass None for encoding."""
+
+        if self.is_xml:
+            # Print the XML declaration
+            encoding_part = ''
+            if eventual_encoding != None:
+                encoding_part = ' encoding="%s"' % eventual_encoding
+            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
+        else:
+            prefix = u''
+        if not pretty_print:
+            indent_level = None
+        else:
+            indent_level = 0
+        return prefix + super(BeautifulSoup, self).decode(
+            indent_level, eventual_encoding, formatter)
+
+# Alias to make it easier to type import: 'from bs4 import _soup'
+_s = BeautifulSoup
+_soup = BeautifulSoup
+
+class BeautifulStoneSoup(BeautifulSoup):
+    """Deprecated interface to an XML parser."""
+
+    def __init__(self, *args, **kwargs):
+        kwargs['features'] = 'xml'
+        warnings.warn(
+            'The BeautifulStoneSoup class is deprecated. Instead of using '
+            'it, pass features="xml" into the BeautifulSoup constructor.')
+        super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
+
+
+class StopParsing(Exception):
+    pass
+
+class FeatureNotFound(ValueError):
+    pass
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+    import sys
+    soup = BeautifulSoup(sys.stdin)
+    print soup.prettify()
diff --git a/src/bs4/builder/__init__.py b/src/bs4/builder/__init__.py
new file mode 100644
index 0000000..601979b
--- /dev/null
+++ b/src/bs4/builder/__init__.py
@@ -0,0 +1,328 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from collections import defaultdict
+import itertools
+import sys
+from bs4.element import (
+    CharsetMetaAttributeValue,
+    ContentMetaAttributeValue,
+    HTMLAwareEntitySubstitution,
+    whitespace_re
+    )
+
+__all__ = [
+    'HTMLTreeBuilder',
+    'SAXTreeBuilder',
+    'TreeBuilder',
+    'TreeBuilderRegistry',
+    ]
+
+# Some useful features for a TreeBuilder to have.
+FAST = 'fast'
+PERMISSIVE = 'permissive'
+STRICT = 'strict'
+XML = 'xml'
+HTML = 'html'
+HTML_5 = 'html5'
+
+
+class TreeBuilderRegistry(object):
+
+    def __init__(self):
+        self.builders_for_feature = defaultdict(list)
+        self.builders = []
+
+    def register(self, treebuilder_class):
+        """Register a treebuilder based on its advertised features."""
+        for feature in treebuilder_class.features:
+            self.builders_for_feature[feature].insert(0, treebuilder_class)
+        self.builders.insert(0, treebuilder_class)
+
+    def lookup(self, *features):
+        if len(self.builders) == 0:
+            # There are no builders at all.
+            return None
+
+        if len(features) == 0:
+            # They didn't ask for any features. Give them the most
+            # recently registered builder.
+            return self.builders[0]
+
+        # Go down the list of features in order, and eliminate any builders
+        # that don't match every feature.
+        features = list(features)
+        features.reverse()
+        candidates = None
+        candidate_set = None
+        while len(features) > 0:
+            feature = features.pop()
+            we_have_the_feature = self.builders_for_feature.get(feature, [])
+            if len(we_have_the_feature) > 0:
+                if candidates is None:
+                    candidates = we_have_the_feature
+                    candidate_set = set(candidates)
+                else:
+                    # Eliminate any candidates that don't have this feature.
+                    candidate_set = candidate_set.intersection(
+                        set(we_have_the_feature))
+
+        # The only valid candidates are the ones in candidate_set.
+        # Go through the original list of candidates and pick the first one
+        # that's in candidate_set.
+        if candidate_set is None:
+            return None
+        for candidate in candidates:
+            if candidate in candidate_set:
+                return candidate
+        return None
+
+# The BeautifulSoup class will take feature lists from developers and use them
+# to look up builders in this registry.
+builder_registry = TreeBuilderRegistry()
+
+class TreeBuilder(object):
+    """Turn a document into a Beautiful Soup object tree."""
+
+    NAME = "[Unknown tree builder]"
+    ALTERNATE_NAMES = []
+    features = []
+
+    is_xml = False
+    picklable = False
+    preserve_whitespace_tags = set()
+    empty_element_tags = None # A tag will be considered an empty-element
+                              # tag when and only when it has no contents.
+
+    # A value for these tag/attribute combinations is a space- or
+    # comma-separated list of CDATA, rather than a single CDATA.
+    cdata_list_attributes = {}
+
+
+    def __init__(self):
+        self.soup = None
+
+    def reset(self):
+        pass
+
+    def can_be_empty_element(self, tag_name):
+        """Might a tag with this name be an empty-element tag?
+
+        The final markup may or may not actually present this tag as
+        self-closing.
+
+        For instance: an HTMLBuilder does not consider a <p> tag to be
+        an empty-element tag (it's not in
+        HTMLBuilder.empty_element_tags). This means an empty <p> tag
+        will be presented as "<p></p>", not "<p />".
+
+        The default implementation has no opinion about which tags are
+        empty-element tags, so a tag will be presented as an
+        empty-element tag if and only if it has no contents.
+        "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
+        be left alone.
+        """
+        if self.empty_element_tags is None:
+            return True
+        return tag_name in self.empty_element_tags
+
+    def feed(self, markup):
+        raise NotImplementedError()
+
+    def prepare_markup(self, markup, user_specified_encoding=None,
+                       document_declared_encoding=None):
+        return markup, None, None, False
+
+    def test_fragment_to_document(self, fragment):
+        """Wrap an HTML fragment to make it look like a document.
+
+        Different parsers do this differently. For instance, lxml
+        introduces an empty <head> tag, and html5lib
+        doesn't. Abstracting this away lets us write simple tests
+        which run HTML fragments through the parser and compare the
+        results against other HTML fragments.
+
+        This method should not be used outside of tests.
+        """
+        return fragment
+
+    def set_up_substitutions(self, tag):
+        return False
+
+    def _replace_cdata_list_attribute_values(self, tag_name, attrs):
+        """Replaces class="foo bar" with class=["foo", "bar"]
+
+        Modifies its input in place.
+        """
+        if not attrs:
+            return attrs
+        if self.cdata_list_attributes:
+            universal = self.cdata_list_attributes.get('*', [])
+            tag_specific = self.cdata_list_attributes.get(
+                tag_name.lower(), None)
+            for attr in attrs.keys():
+                if attr in universal or (tag_specific and attr in tag_specific):
+                    # We have a "class"-type attribute whose string
+                    # value is a whitespace-separated list of
+                    # values. Split it into a list.
+                    value = attrs[attr]
+                    if isinstance(value, basestring):
+                        values = whitespace_re.split(value)
+                    else:
+                        # html5lib sometimes calls setAttributes twice
+                        # for the same tag when rearranging the parse
+                        # tree. On the second call the attribute value
+                        # here is already a list.  If this happens,
+                        # leave the value alone rather than trying to
+                        # split it again.
+                        values = value
+                    attrs[attr] = values
+        return attrs
+
+class SAXTreeBuilder(TreeBuilder):
+    """A Beautiful Soup treebuilder that listens for SAX events."""
+
+    def feed(self, markup):
+        raise NotImplementedError()
+
+    def close(self):
+        pass
+
+    def startElement(self, name, attrs):
+        attrs = dict((key[1], value) for key, value in list(attrs.items()))
+        #print "Start %s, %r" % (name, attrs)
+        self.soup.handle_starttag(name, attrs)
+
+    def endElement(self, name):
+        #print "End %s" % name
+        self.soup.handle_endtag(name)
+
+    def startElementNS(self, nsTuple, nodeName, attrs):
+        # Throw away (ns, nodeName) for now.
+        self.startElement(nodeName, attrs)
+
+    def endElementNS(self, nsTuple, nodeName):
+        # Throw away (ns, nodeName) for now.
+        self.endElement(nodeName)
+        #handler.endElementNS((ns, node.nodeName), node.nodeName)
+
+    def startPrefixMapping(self, prefix, nodeValue):
+        # Ignore the prefix for now.
+        pass
+
+    def endPrefixMapping(self, prefix):
+        # Ignore the prefix for now.
+        # handler.endPrefixMapping(prefix)
+        pass
+
+    def characters(self, content):
+        self.soup.handle_data(content)
+
+    def startDocument(self):
+        pass
+
+    def endDocument(self):
+        pass
+
+
+class HTMLTreeBuilder(TreeBuilder):
+    """This TreeBuilder knows facts about HTML.
+
+    Such as which tags are empty-element tags.
+    """
+
+    preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
+    empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
+                              'spacer', 'link', 'frame', 'base'])
+
+    # The HTML standard defines these attributes as containing a
+    # space-separated list of values, not a single value. That is,
+    # class="foo bar" means that the 'class' attribute has two values,
+    # 'foo' and 'bar', not the single value 'foo bar'.  When we
+    # encounter one of these attributes, we will parse its value into
+    # a list of values if possible. Upon output, the list will be
+    # converted back into a string.
+    cdata_list_attributes = {
+        "*" : ['class', 'accesskey', 'dropzone'],
+        "a" : ['rel', 'rev'],
+        "link" :  ['rel', 'rev'],
+        "td" : ["headers"],
+        "th" : ["headers"],
+        "td" : ["headers"],
+        "form" : ["accept-charset"],
+        "object" : ["archive"],
+
+        # These are HTML5 specific, as are *.accesskey and *.dropzone above.
+        "area" : ["rel"],
+        "icon" : ["sizes"],
+        "iframe" : ["sandbox"],
+        "output" : ["for"],
+        }
+
+    def set_up_substitutions(self, tag):
+        # We are only interested in <meta> tags
+        if tag.name != 'meta':
+            return False
+
+        http_equiv = tag.get('http-equiv')
+        content = tag.get('content')
+        charset = tag.get('charset')
+
+        # We are interested in <meta> tags that say what encoding the
+        # document was originally in. This means HTML 5-style <meta>
+        # tags that provide the "charset" attribute. It also means
+        # HTML 4-style <meta> tags that provide the "content"
+        # attribute and have "http-equiv" set to "content-type".
+        #
+        # In both cases we will replace the value of the appropriate
+        # attribute with a standin object that can take on any
+        # encoding.
+        meta_encoding = None
+        if charset is not None:
+            # HTML 5 style:
+            # <meta charset="utf8">
+            meta_encoding = charset
+            tag['charset'] = CharsetMetaAttributeValue(charset)
+
+        elif (content is not None and http_equiv is not None
+              and http_equiv.lower() == 'content-type'):
+            # HTML 4 style:
+            # <meta http-equiv="content-type" content="text/html; charset=utf8">
+            tag['content'] = ContentMetaAttributeValue(content)
+
+        return (meta_encoding is not None)
+
+def register_treebuilders_from(module):
+    """Copy TreeBuilders from the given module into this module."""
+    # I'm fairly sure this is not the best way to do this.
+    this_module = sys.modules['bs4.builder']
+    for name in module.__all__:
+        obj = getattr(module, name)
+
+        if issubclass(obj, TreeBuilder):
+            setattr(this_module, name, obj)
+            this_module.__all__.append(name)
+            # Register the builder while we're at it.
+            this_module.builder_registry.register(obj)
+
+class ParserRejectedMarkup(Exception):
+    pass
+
+# Builders are registered in reverse order of priority, so that custom
+# builder registrations will take precedence. In general, we want lxml
+# to take precedence over html5lib, because it's faster. And we only
+# want to use HTMLParser as a last result.
+from . import _htmlparser
+register_treebuilders_from(_htmlparser)
+try:
+    from . import _html5lib
+    register_treebuilders_from(_html5lib)
+except ImportError:
+    # They don't have html5lib installed.
+    pass
+try:
+    from . import _lxml
+    register_treebuilders_from(_lxml)
+except ImportError:
+    # They don't have lxml installed.
+    pass
diff --git a/src/bs4/builder/_html5lib.py b/src/bs4/builder/_html5lib.py
new file mode 100644
index 0000000..5f54893
--- /dev/null
+++ b/src/bs4/builder/_html5lib.py
@@ -0,0 +1,426 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+__all__ = [
+    'HTML5TreeBuilder',
+    ]
+
+import warnings
+import re
+from bs4.builder import (
+    PERMISSIVE,
+    HTML,
+    HTML_5,
+    HTMLTreeBuilder,
+    )
+from bs4.element import (
+    NamespacedAttribute,
+    whitespace_re,
+)
+import html5lib
+from html5lib.constants import (
+    namespaces,
+    prefixes,
+    )
+from bs4.element import (
+    Comment,
+    Doctype,
+    NavigableString,
+    Tag,
+    )
+
+try:
+    # Pre-0.99999999
+    from html5lib.treebuilders import _base as treebuilder_base
+    new_html5lib = False
+except ImportError, e:
+    # 0.99999999 and up
+    from html5lib.treebuilders import base as treebuilder_base
+    new_html5lib = True
+
+class HTML5TreeBuilder(HTMLTreeBuilder):
+    """Use html5lib to build a tree."""
+
+    NAME = "html5lib"
+
+    features = [NAME, PERMISSIVE, HTML_5, HTML]
+
+    def prepare_markup(self, markup, user_specified_encoding,
+                       document_declared_encoding=None, exclude_encodings=None):
+        # Store the user-specified encoding for use later on.
+        self.user_specified_encoding = user_specified_encoding
+
+        # document_declared_encoding and exclude_encodings aren't used
+        # ATM because the html5lib TreeBuilder doesn't use
+        # UnicodeDammit.
+        if exclude_encodings:
+            warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
+        yield (markup, None, None, False)
+
+    # These methods are defined by Beautiful Soup.
+    def feed(self, markup):
+        if self.soup.parse_only is not None:
+            warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
+        parser = html5lib.HTMLParser(tree=self.create_treebuilder)
+
+        extra_kwargs = dict()
+        if not isinstance(markup, unicode):
+            if new_html5lib:
+                extra_kwargs['override_encoding'] = self.user_specified_encoding
+            else:
+                extra_kwargs['encoding'] = self.user_specified_encoding
+        doc = parser.parse(markup, **extra_kwargs)
+
+        # Set the character encoding detected by the tokenizer.
+        if isinstance(markup, unicode):
+            # We need to special-case this because html5lib sets
+            # charEncoding to UTF-8 if it gets Unicode input.
+            doc.original_encoding = None
+        else:
+            original_encoding = parser.tokenizer.stream.charEncoding[0]
+            if not isinstance(original_encoding, basestring):
+                # In 0.99999999 and up, the encoding is an html5lib
+                # Encoding object. We want to use a string for compatibility
+                # with other tree builders.
+                original_encoding = original_encoding.name
+            doc.original_encoding = original_encoding
+
+    def create_treebuilder(self, namespaceHTMLElements):
+        self.underlying_builder = TreeBuilderForHtml5lib(
+            namespaceHTMLElements, self.soup)
+        return self.underlying_builder
+
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<html><head></head><body>%s</body></html>' % fragment
+
+
+class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
+
+    def __init__(self, namespaceHTMLElements, soup=None):
+        if soup:
+            self.soup = soup
+        else:
+            from bs4 import BeautifulSoup
+            self.soup = BeautifulSoup("", "html.parser")
+        super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
+
+    def documentClass(self):
+        self.soup.reset()
+        return Element(self.soup, self.soup, None)
+
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        doctype = Doctype.for_name_and_ids(name, publicId, systemId)
+        self.soup.object_was_parsed(doctype)
+
+    def elementClass(self, name, namespace):
+        tag = self.soup.new_tag(name, namespace)
+        return Element(tag, self.soup, namespace)
+
+    def commentClass(self, data):
+        return TextNode(Comment(data), self.soup)
+
+    def fragmentClass(self):
+        from bs4 import BeautifulSoup
+        self.soup = BeautifulSoup("", "html.parser")
+        self.soup.name = "[document_fragment]"
+        return Element(self.soup, self.soup, None)
+
+    def appendChild(self, node):
+        # XXX This code is not covered by the BS4 tests.
+        self.soup.append(node.element)
+
+    def getDocument(self):
+        return self.soup
+
+    def getFragment(self):
+        return treebuilder_base.TreeBuilder.getFragment(self).element
+
+    def testSerializer(self, element):
+        from bs4 import BeautifulSoup
+        rv = []
+        doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
+
+        def serializeElement(element, indent=0):
+            if isinstance(element, BeautifulSoup):
+                pass
+            if isinstance(element, Doctype):
+                m = doctype_re.match(element)
+                if m:
+                    name = m.group(1)
+                    if m.lastindex > 1:
+                        publicId = m.group(2) or ""
+                        systemId = m.group(3) or m.group(4) or ""
+                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (' ' * indent, name, publicId, systemId))
+                    else:
+                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, name))
+                else:
+                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
+            elif isinstance(element, Comment):
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element))
+            elif isinstance(element, NavigableString):
+                rv.append("|%s\"%s\"" % (' ' * indent, element))
+            else:
+                if element.namespace:
+                    name = "%s %s" % (prefixes[element.namespace],
+                                      element.name)
+                else:
+                    name = element.name
+                rv.append("|%s<%s>" % (' ' * indent, name))
+                if element.attrs:
+                    attributes = []
+                    for name, value in element.attrs.items():
+                        if isinstance(name, NamespacedAttribute):
+                            name = "%s %s" % (prefixes[name.namespace], name.name)
+                        if isinstance(value, list):
+                            value = " ".join(value)
+                        attributes.append((name, value))
+
+                    for name, value in sorted(attributes):
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+                indent += 2
+                for child in element.children:
+                    serializeElement(child, indent)
+        serializeElement(element, 0)
+
+        return "\n".join(rv)
+
+class AttrList(object):
+    def __init__(self, element):
+        self.element = element
+        self.attrs = dict(self.element.attrs)
+    def __iter__(self):
+        return list(self.attrs.items()).__iter__()
+    def __setitem__(self, name, value):
+        # If this attribute is a multi-valued attribute for this element,
+        # turn its value into a list.
+        list_attr = HTML5TreeBuilder.cdata_list_attributes
+        if (name in list_attr['*']
+            or (self.element.name in list_attr
+                and name in list_attr[self.element.name])):
+            # A node that is being cloned may have already undergone
+            # this procedure.
+            if not isinstance(value, list):
+                value = whitespace_re.split(value)
+        self.element[name] = value
+    def items(self):
+        return list(self.attrs.items())
+    def keys(self):
+        return list(self.attrs.keys())
+    def __len__(self):
+        return len(self.attrs)
+    def __getitem__(self, name):
+        return self.attrs[name]
+    def __contains__(self, name):
+        return name in list(self.attrs.keys())
+
+
+class Element(treebuilder_base.Node):
+    def __init__(self, element, soup, namespace):
+        treebuilder_base.Node.__init__(self, element.name)
+        self.element = element
+        self.soup = soup
+        self.namespace = namespace
+
+    def appendChild(self, node):
+        string_child = child = None
+        if isinstance(node, basestring):
+            # Some other piece of code decided to pass in a string
+            # instead of creating a TextElement object to contain the
+            # string.
+            string_child = child = node
+        elif isinstance(node, Tag):
+            # Some other piece of code decided to pass in a Tag
+            # instead of creating an Element object to contain the
+            # Tag.
+            child = node
+        elif node.element.__class__ == NavigableString:
+            string_child = child = node.element
+            node.parent = self
+        else:
+            child = node.element
+            node.parent = self
+
+        if not isinstance(child, basestring) and child.parent is not None:
+            node.element.extract()
+
+        if (string_child and self.element.contents
+            and self.element.contents[-1].__class__ == NavigableString):
+            # We are appending a string onto another string.
+            # TODO This has O(n^2) performance, for input like
+            # "a</a>a</a>a</a>..."
+            old_element = self.element.contents[-1]
+            new_element = self.soup.new_string(old_element + string_child)
+            old_element.replace_with(new_element)
+            self.soup._most_recent_element = new_element
+        else:
+            if isinstance(node, basestring):
+                # Create a brand new NavigableString from this string.
+                child = self.soup.new_string(node)
+
+            # Tell Beautiful Soup to act as if it parsed this element
+            # immediately after the parent's last descendant. (Or
+            # immediately after the parent, if it has no children.)
+            if self.element.contents:
+                most_recent_element = self.element._last_descendant(False)
+            elif self.element.next_element is not None:
+                # Something from further ahead in the parse tree is
+                # being inserted into this earlier element. This is
+                # very annoying because it means an expensive search
+                # for the last element in the tree.
+                most_recent_element = self.soup._last_descendant()
+            else:
+                most_recent_element = self.element
+
+            self.soup.object_was_parsed(
+                child, parent=self.element,
+                most_recent_element=most_recent_element)
+
+    def getAttributes(self):
+        if isinstance(self.element, Comment):
+            return {}
+        return AttrList(self.element)
+
+    def setAttributes(self, attributes):
+
+        if attributes is not None and len(attributes) > 0:
+
+            converted_attributes = []
+            for name, value in list(attributes.items()):
+                if isinstance(name, tuple):
+                    new_name = NamespacedAttribute(*name)
+                    del attributes[name]
+                    attributes[new_name] = value
+
+            self.soup.builder._replace_cdata_list_attribute_values(
+                self.name, attributes)
+            for name, value in attributes.items():
+                self.element[name] = value
+
+            # The attributes may contain variables that need substitution.
+            # Call set_up_substitutions manually.
+            #
+            # The Tag constructor called this method when the Tag was created,
+            # but we just set/changed the attributes, so call it again.
+            self.soup.builder.set_up_substitutions(self.element)
+    attributes = property(getAttributes, setAttributes)
+
+    def insertText(self, data, insertBefore=None):
+        text = TextNode(self.soup.new_string(data), self.soup)
+        if insertBefore:
+            self.insertBefore(text, insertBefore)
+        else:
+            self.appendChild(text)
+
+    def insertBefore(self, node, refNode):
+        index = self.element.index(refNode.element)
+        if (node.element.__class__ == NavigableString and self.element.contents
+            and self.element.contents[index-1].__class__ == NavigableString):
+            # (See comments in appendChild)
+            old_node = self.element.contents[index-1]
+            new_str = self.soup.new_string(old_node + node.element)
+            old_node.replace_with(new_str)
+        else:
+            self.element.insert(index, node.element)
+            node.parent = self
+
+    def removeChild(self, node):
+        node.element.extract()
+
+    def reparentChildren(self, new_parent):
+        """Move all of this tag's children into another tag."""
+        # print "MOVE", self.element.contents
+        # print "FROM", self.element
+        # print "TO", new_parent.element
+
+        element = self.element
+        new_parent_element = new_parent.element
+        # Determine what this tag's next_element will be once all the children
+        # are removed.
+        final_next_element = element.next_sibling
+
+        new_parents_last_descendant = new_parent_element._last_descendant(False, False)
+        if len(new_parent_element.contents) > 0:
+            # The new parent already contains children. We will be
+            # appending this tag's children to the end.
+            new_parents_last_child = new_parent_element.contents[-1]
+            new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
+        else:
+            # The new parent contains no children.
+            new_parents_last_child = None
+            new_parents_last_descendant_next_element = new_parent_element.next_element
+
+        to_append = element.contents
+        if len(to_append) > 0:
+            # Set the first child's previous_element and previous_sibling
+            # to elements within the new parent
+            first_child = to_append[0]
+            if new_parents_last_descendant:
+                first_child.previous_element = new_parents_last_descendant
+            else:
+                first_child.previous_element = new_parent_element
+            first_child.previous_sibling = new_parents_last_child
+            if new_parents_last_descendant:
+                new_parents_last_descendant.next_element = first_child
+            else:
+                new_parent_element.next_element = first_child
+            if new_parents_last_child:
+                new_parents_last_child.next_sibling = first_child
+
+            # Find the very last element being moved. It is now the
+            # parent's last descendant. It has no .next_sibling and
+            # its .next_element is whatever the previous last
+            # descendant had.
+            last_childs_last_descendant = to_append[-1]._last_descendant(False, True)
+
+            last_childs_last_descendant.next_element = new_parents_last_descendant_next_element
+            if new_parents_last_descendant_next_element:
+                # TODO: This code has no test coverage and I'm not sure
+                # how to get html5lib to go through this path, but it's
+                # just the other side of the previous line.
+                new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant
+            last_childs_last_descendant.next_sibling = None
+
+        for child in to_append:
+            child.parent = new_parent_element
+            new_parent_element.contents.append(child)
+
+        # Now that this element has no children, change its .next_element.
+        element.contents = []
+        element.next_element = final_next_element
+
+        # print "DONE WITH MOVE"
+        # print "FROM", self.element
+        # print "TO", new_parent_element
+
+    def cloneNode(self):
+        tag = self.soup.new_tag(self.element.name, self.namespace)
+        node = Element(tag, self.soup, self.namespace)
+        for key,value in self.attributes:
+            node.attributes[key] = value
+        return node
+
+    def hasContent(self):
+        return self.element.contents
+
+    def getNameTuple(self):
+        if self.namespace == None:
+            return namespaces["html"], self.name
+        else:
+            return self.namespace, self.name
+
+    nameTuple = property(getNameTuple)
+
+class TextNode(Element):
+    def __init__(self, element, soup):
+        treebuilder_base.Node.__init__(self, None)
+        self.element = element
+        self.soup = soup
+
+    def cloneNode(self):
+        raise NotImplementedError
diff --git a/src/bs4/builder/_htmlparser.py b/src/bs4/builder/_htmlparser.py
new file mode 100644
index 0000000..823ca15
--- /dev/null
+++ b/src/bs4/builder/_htmlparser.py
@@ -0,0 +1,265 @@
+"""Use the HTMLParser library to parse HTML files that aren't too bad."""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+__all__ = [
+    'HTMLParserTreeBuilder',
+    ]
+
+from HTMLParser import HTMLParser
+
+try:
+    from HTMLParser import HTMLParseError
+except ImportError, e:
+    # HTMLParseError is removed in Python 3.5. Since it can never be
+    # thrown in 3.5, we can just define our own class as a placeholder.
+    class HTMLParseError(Exception):
+        pass
+
+import sys
+import warnings
+
+# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
+# argument, which we'd like to set to False. Unfortunately,
+# http://bugs.python.org/issue13273 makes strict=True a better bet
+# before Python 3.2.3.
+#
+# At the end of this file, we monkeypatch HTMLParser so that
+# strict=True works well on Python 3.2.2.
+major, minor, release = sys.version_info[:3]
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
+
+
+from bs4.element import (
+    CData,
+    Comment,
+    Declaration,
+    Doctype,
+    ProcessingInstruction,
+    )
+from bs4.dammit import EntitySubstitution, UnicodeDammit
+
+from bs4.builder import (
+    HTML,
+    HTMLTreeBuilder,
+    STRICT,
+    )
+
+
+HTMLPARSER = 'html.parser'
+
+class BeautifulSoupHTMLParser(HTMLParser):
+    def handle_starttag(self, name, attrs):
+        # XXX namespace
+        attr_dict = {}
+        for key, value in attrs:
+            # Change None attribute values to the empty string
+            # for consistency with the other tree builders.
+            if value is None:
+                value = ''
+            attr_dict[key] = value
+            attrvalue = '""'
+        self.soup.handle_starttag(name, None, None, attr_dict)
+
+    def handle_endtag(self, name):
+        self.soup.handle_endtag(name)
+
+    def handle_data(self, data):
+        self.soup.handle_data(data)
+
+    def handle_charref(self, name):
+        # XXX workaround for a bug in HTMLParser. Remove this once
+        # it's fixed in all supported versions.
+        # http://bugs.python.org/issue13633
+        if name.startswith('x'):
+            real_name = int(name.lstrip('x'), 16)
+        elif name.startswith('X'):
+            real_name = int(name.lstrip('X'), 16)
+        else:
+            real_name = int(name)
+
+        try:
+            data = unichr(real_name)
+        except (ValueError, OverflowError), e:
+            data = u"\N{REPLACEMENT CHARACTER}"
+
+        self.handle_data(data)
+
+    def handle_entityref(self, name):
+        character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
+        if character is not None:
+            data = character
+        else:
+            data = "&%s;" % name
+        self.handle_data(data)
+
+    def handle_comment(self, data):
+        self.soup.endData()
+        self.soup.handle_data(data)
+        self.soup.endData(Comment)
+
+    def handle_decl(self, data):
+        self.soup.endData()
+        if data.startswith("DOCTYPE "):
+            data = data[len("DOCTYPE "):]
+        elif data == 'DOCTYPE':
+            # i.e. "<!DOCTYPE>"
+            data = ''
+        self.soup.handle_data(data)
+        self.soup.endData(Doctype)
+
+    def unknown_decl(self, data):
+        if data.upper().startswith('CDATA['):
+            cls = CData
+            data = data[len('CDATA['):]
+        else:
+            cls = Declaration
+        self.soup.endData()
+        self.soup.handle_data(data)
+        self.soup.endData(cls)
+
+    def handle_pi(self, data):
+        self.soup.endData()
+        self.soup.handle_data(data)
+        self.soup.endData(ProcessingInstruction)
+
+
+class HTMLParserTreeBuilder(HTMLTreeBuilder):
+
+    is_xml = False
+    picklable = True
+    NAME = HTMLPARSER
+    features = [NAME, HTML, STRICT]
+
+    def __init__(self, *args, **kwargs):
+        if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
+            kwargs['strict'] = False
+        if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+            kwargs['convert_charrefs'] = False
+        self.parser_args = (args, kwargs)
+
+    def prepare_markup(self, markup, user_specified_encoding=None,
+                       document_declared_encoding=None, exclude_encodings=None):
+        """
+        :return: A 4-tuple (markup, original encoding, encoding
+        declared within markup, whether any characters had to be
+        replaced with REPLACEMENT CHARACTER).
+        """
+        if isinstance(markup, unicode):
+            yield (markup, None, None, False)
+            return
+
+        try_encodings = [user_specified_encoding, document_declared_encoding]
+        dammit = UnicodeDammit(markup, try_encodings, is_html=True,
+                               exclude_encodings=exclude_encodings)
+        yield (dammit.markup, dammit.original_encoding,
+               dammit.declared_html_encoding,
+               dammit.contains_replacement_characters)
+
+    def feed(self, markup):
+        args, kwargs = self.parser_args
+        parser = BeautifulSoupHTMLParser(*args, **kwargs)
+        parser.soup = self.soup
+        try:
+            parser.feed(markup)
+        except HTMLParseError, e:
+            warnings.warn(RuntimeWarning(
+                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
+            raise e
+
+# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
+# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
+# string.
+#
+# XXX This code can be removed once most Python 3 users are on 3.2.3.
+if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
+    import re
+    attrfind_tolerant = re.compile(
+        r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
+        r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
+    HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
+
+    locatestarttagend = re.compile(r"""
+  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
+  (?:\s+                             # whitespace before attribute name
+    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
+      (?:\s*=\s*                     # value indicator
+        (?:'[^']*'                   # LITA-enclosed value
+          |\"[^\"]*\"                # LIT-enclosed value
+          |[^'\">\s]+                # bare value
+         )
+       )?
+     )
+   )*
+  \s*                                # trailing whitespace
+""", re.VERBOSE)
+    BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
+
+    from html.parser import tagfind, attrfind
+
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = tagfind.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = rawdata[i+1:k].lower()
+        while k < endpos:
+            if self.strict:
+                m = attrfind.match(rawdata, k)
+            else:
+                m = attrfind_tolerant.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+            if attrvalue:
+                attrvalue = self.unescape(attrvalue)
+            attrs.append((attrname.lower(), attrvalue))
+            k = m.end()
+
+        end = rawdata[k:endpos].strip()
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + self.__starttag_text.count("\n")
+                offset = len(self.__starttag_text) \
+                         - self.__starttag_text.rfind("\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            if self.strict:
+                self.error("junk characters in start tag: %r"
+                           % (rawdata[k:endpos][:20],))
+            self.handle_data(rawdata[i:endpos])
+            return endpos
+        if end.endswith('/>'):
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            self.handle_starttag(tag, attrs)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+        return endpos
+
+    def set_cdata_mode(self, elem):
+        self.cdata_elem = elem.lower()
+        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+
+    BeautifulSoupHTMLParser.parse_starttag = parse_starttag
+    BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
+
+    CONSTRUCTOR_TAKES_STRICT = True
diff --git a/src/bs4/builder/_lxml.py b/src/bs4/builder/_lxml.py
new file mode 100644
index 0000000..d2ca287
--- /dev/null
+++ b/src/bs4/builder/_lxml.py
@@ -0,0 +1,258 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__all__ = [
+    'LXMLTreeBuilderForXML',
+    'LXMLTreeBuilder',
+    ]
+
+from io import BytesIO
+from StringIO import StringIO
+import collections
+from lxml import etree
+from bs4.element import (
+    Comment,
+    Doctype,
+    NamespacedAttribute,
+    ProcessingInstruction,
+    XMLProcessingInstruction,
+)
+from bs4.builder import (
+    FAST,
+    HTML,
+    HTMLTreeBuilder,
+    PERMISSIVE,
+    ParserRejectedMarkup,
+    TreeBuilder,
+    XML)
+from bs4.dammit import EncodingDetector
+
+LXML = 'lxml'
+
+class LXMLTreeBuilderForXML(TreeBuilder):
+    DEFAULT_PARSER_CLASS = etree.XMLParser
+
+    is_xml = True
+    processing_instruction_class = XMLProcessingInstruction
+
+    NAME = "lxml-xml"
+    ALTERNATE_NAMES = ["xml"]
+
+    # Well, it's permissive by XML parser standards.
+    features = [NAME, LXML, XML, FAST, PERMISSIVE]
+
+    CHUNK_SIZE = 512
+
+    # This namespace mapping is specified in the XML Namespace
+    # standard.
+    DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
+
+    def default_parser(self, encoding):
+        # This can either return a parser object or a class, which
+        # will be instantiated with default arguments.
+        if self._default_parser is not None:
+            return self._default_parser
+        return etree.XMLParser(
+            target=self, strip_cdata=False, recover=True, encoding=encoding)
+
+    def parser_for(self, encoding):
+        # Use the default parser.
+        parser = self.default_parser(encoding)
+
+        if isinstance(parser, collections.Callable):
+            # Instantiate the parser with default arguments
+            parser = parser(target=self, strip_cdata=False, encoding=encoding)
+        return parser
+
+    def __init__(self, parser=None, empty_element_tags=None):
+        # TODO: Issue a warning if parser is present but not a
+        # callable, since that means there's no way to create new
+        # parsers for different encodings.
+        self._default_parser = parser
+        if empty_element_tags is not None:
+            self.empty_element_tags = set(empty_element_tags)
+        self.soup = None
+        self.nsmaps = [self.DEFAULT_NSMAPS]
+
+    def _getNsTag(self, tag):
+        # Split the namespace URL out of a fully-qualified lxml tag
+        # name. Copied from lxml's src/lxml/sax.py.
+        if tag[0] == '{':
+            return tuple(tag[1:].split('}', 1))
+        else:
+            return (None, tag)
+
+    def prepare_markup(self, markup, user_specified_encoding=None,
+                       exclude_encodings=None,
+                       document_declared_encoding=None):
+        """
+        :yield: A series of 4-tuples.
+         (markup, encoding, declared encoding,
+          has undergone character replacement)
+
+        Each 4-tuple represents a strategy for parsing the document.
+        """
+        # Instead of using UnicodeDammit to convert the bytestring to
+        # Unicode using different encodings, use EncodingDetector to
+        # iterate over the encodings, and tell lxml to try to parse
+        # the document as each one in turn.
+        is_html = not self.is_xml
+        if is_html:
+            self.processing_instruction_class = ProcessingInstruction
+        else:
+            self.processing_instruction_class = XMLProcessingInstruction
+
+        if isinstance(markup, unicode):
+            # We were given Unicode. Maybe lxml can parse Unicode on
+            # this system?
+            yield markup, None, document_declared_encoding, False
+
+        if isinstance(markup, unicode):
+            # No, apparently not. Convert the Unicode to UTF-8 and
+            # tell lxml to parse it as UTF-8.
+            yield (markup.encode("utf8"), "utf8",
+                   document_declared_encoding, False)
+
+        try_encodings = [user_specified_encoding, document_declared_encoding]
+        detector = EncodingDetector(
+            markup, try_encodings, is_html, exclude_encodings)
+        for encoding in detector.encodings:
+            yield (detector.markup, encoding, document_declared_encoding, False)
+
+    def feed(self, markup):
+        if isinstance(markup, bytes):
+            markup = BytesIO(markup)
+        elif isinstance(markup, unicode):
+            markup = StringIO(markup)
+
+        # Call feed() at least once, even if the markup is empty,
+        # or the parser won't be initialized.
+        data = markup.read(self.CHUNK_SIZE)
+        try:
+            self.parser = self.parser_for(self.soup.original_encoding)
+            self.parser.feed(data)
+            while len(data) != 0:
+                # Now call feed() on the rest of the data, chunk by chunk.
+                data = markup.read(self.CHUNK_SIZE)
+                if len(data) != 0:
+                    self.parser.feed(data)
+            self.parser.close()
+        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+            raise ParserRejectedMarkup(str(e))
+
+    def close(self):
+        self.nsmaps = [self.DEFAULT_NSMAPS]
+
+    def start(self, name, attrs, nsmap={}):
+        # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
+        attrs = dict(attrs)
+        nsprefix = None
+        # Invert each namespace map as it comes in.
+        if len(self.nsmaps) > 1:
+            # There are no new namespaces for this tag, but
+            # non-default namespaces are in play, so we need a
+            # separate tag stack to know when they end.
+            self.nsmaps.append(None)
+        elif len(nsmap) > 0:
+            # A new namespace mapping has come into play.
+            inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+            self.nsmaps.append(inverted_nsmap)
+            # Also treat the namespace mapping as a set of attributes on the
+            # tag, so we can recreate it later.
+            attrs = attrs.copy()
+            for prefix, namespace in nsmap.items():
+                attribute = NamespacedAttribute(
+                    "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
+                attrs[attribute] = namespace
+
+        # Namespaces are in play. Find any attributes that came in
+        # from lxml with namespaces attached to their names, and
+        # turn then into NamespacedAttribute objects.
+        new_attrs = {}
+        for attr, value in attrs.items():
+            namespace, attr = self._getNsTag(attr)
+            if namespace is None:
+                new_attrs[attr] = value
+            else:
+                nsprefix = self._prefix_for_namespace(namespace)
+                attr = NamespacedAttribute(nsprefix, attr, namespace)
+                new_attrs[attr] = value
+        attrs = new_attrs
+
+        namespace, name = self._getNsTag(name)
+        nsprefix = self._prefix_for_namespace(namespace)
+        self.soup.handle_starttag(name, namespace, nsprefix, attrs)
+
+    def _prefix_for_namespace(self, namespace):
+        """Find the currently active prefix for the given namespace."""
+        if namespace is None:
+            return None
+        for inverted_nsmap in reversed(self.nsmaps):
+            if inverted_nsmap is not None and namespace in inverted_nsmap:
+                return inverted_nsmap[namespace]
+        return None
+
+    def end(self, name):
+        self.soup.endData()
+        completed_tag = self.soup.tagStack[-1]
+        namespace, name = self._getNsTag(name)
+        nsprefix = None
+        if namespace is not None:
+            for inverted_nsmap in reversed(self.nsmaps):
+                if inverted_nsmap is not None and namespace in inverted_nsmap:
+                    nsprefix = inverted_nsmap[namespace]
+                    break
+        self.soup.handle_endtag(name, nsprefix)
+        if len(self.nsmaps) > 1:
+            # This tag, or one of its parents, introduced a namespace
+            # mapping, so pop it off the stack.
+            self.nsmaps.pop()
+
+    def pi(self, target, data):
+        self.soup.endData()
+        self.soup.handle_data(target + ' ' + data)
+        self.soup.endData(self.processing_instruction_class)
+
+    def data(self, content):
+        self.soup.handle_data(content)
+
+    def doctype(self, name, pubid, system):
+        self.soup.endData()
+        doctype = Doctype.for_name_and_ids(name, pubid, system)
+        self.soup.object_was_parsed(doctype)
+
+    def comment(self, content):
+        "Handle comments as Comment objects."
+        self.soup.endData()
+        self.soup.handle_data(content)
+        self.soup.endData(Comment)
+
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+
+
+class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+
+    NAME = LXML
+    ALTERNATE_NAMES = ["lxml-html"]
+
+    features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
+    is_xml = False
+    processing_instruction_class = ProcessingInstruction
+
+    def default_parser(self, encoding):
+        return etree.HTMLParser
+
+    def feed(self, markup):
+        encoding = self.soup.original_encoding
+        try:
+            self.parser = self.parser_for(encoding)
+            self.parser.feed(markup)
+            self.parser.close()
+        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+            raise ParserRejectedMarkup(str(e))
+
+
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<html><body>%s</body></html>' % fragment
diff --git a/src/bs4/dammit.py b/src/bs4/dammit.py
new file mode 100644
index 0000000..7965565
--- /dev/null
+++ b/src/bs4/dammit.py
@@ -0,0 +1,842 @@
+# -*- coding: utf-8 -*-
+"""Beautiful Soup bonus library: Unicode, Dammit
+
+This library converts a bytestream to Unicode through any means
+necessary. It is heavily based on code from Mark Pilgrim's Universal
+Feed Parser. It works best on XML and HTML, but it does not rewrite the
+XML or HTML to reflect a new encoding; that's the tree builder's job.
+"""
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import codecs
+from htmlentitydefs import codepoint2name
+import re
+import logging
+import string
+
+# Import a library to autodetect character encodings.
+chardet_type = None
+try:
+    # First try the fast C implementation.
+    #  PyPI package: cchardet
+    import cchardet
+    def chardet_dammit(s):
+        return cchardet.detect(s)['encoding']
+except ImportError:
+    try:
+        # Fall back to the pure Python implementation
+        #  Debian package: python-chardet
+        #  PyPI package: chardet
+        import chardet
+        def chardet_dammit(s):
+            return chardet.detect(s)['encoding']
+        #import chardet.constants
+        #chardet.constants._debug = 1
+    except ImportError:
+        # No chardet available.
+        def chardet_dammit(s):
+            return None
+
+# Available from http://cjkpython.i18n.org/.
+try:
+    import iconv_codec
+except ImportError:
+    pass
+
+xml_encoding_re = re.compile(
+    '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+html_meta_re = re.compile(
+    '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+
+class EntitySubstitution(object):
+
+    """Substitute XML or HTML entities for the corresponding characters."""
+
+    def _populate_class_variables():
+        lookup = {}
+        reverse_lookup = {}
+        characters_for_re = []
+        for codepoint, name in list(codepoint2name.items()):
+            character = unichr(codepoint)
+            if codepoint != 34:
+                # There's no point in turning the quotation mark into
+                # &quot;, unless it happens within an attribute value, which
+                # is handled elsewhere.
+                characters_for_re.append(character)
+                lookup[character] = name
+            # But we do want to turn &quot; into the quotation mark.
+            reverse_lookup[name] = character
+        re_definition = "[%s]" % "".join(characters_for_re)
+        return lookup, reverse_lookup, re.compile(re_definition)
+    (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
+     CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
+
+    CHARACTER_TO_XML_ENTITY = {
+        "'": "apos",
+        '"': "quot",
+        "&": "amp",
+        "<": "lt",
+        ">": "gt",
+        }
+
+    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+                                           "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           ")")
+
+    AMPERSAND_OR_BRACKET = re.compile("([<>&])")
+
+    @classmethod
+    def _substitute_html_entity(cls, matchobj):
+        entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
+        return "&%s;" % entity
+
+    @classmethod
+    def _substitute_xml_entity(cls, matchobj):
+        """Used with a regular expression to substitute the
+        appropriate XML entity for an XML special character."""
+        entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
+        return "&%s;" % entity
+
+    @classmethod
+    def quoted_attribute_value(self, value):
+        """Make a value into a quoted XML attribute, possibly escaping it.
+
+         Most strings will be quoted using double quotes.
+
+          Bob's Bar -> "Bob's Bar"
+
+         If a string contains double quotes, it will be quoted using
+         single quotes.
+
+          Welcome to "my bar" -> 'Welcome to "my bar"'
+
+         If a string contains both single and double quotes, the
+         double quotes will be escaped, and the string will be quoted
+         using double quotes.
+
+          Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
+        """
+        quote_with = '"'
+        if '"' in value:
+            if "'" in value:
+                # The string contains both single and double
+                # quotes.  Turn the double quotes into
+                # entities. We quote the double quotes rather than
+                # the single quotes because the entity name is
+                # "&quot;" whether this is HTML or XML.  If we
+                # quoted the single quotes, we'd have to decide
+                # between &apos; and &squot;.
+                replace_with = "&quot;"
+                value = value.replace('"', replace_with)
+            else:
+                # There are double quotes but no single quotes.
+                # We can use single quotes to quote the attribute.
+                quote_with = "'"
+        return quote_with + value + quote_with
+
+    @classmethod
+    def substitute_xml(cls, value, make_quoted_attribute=False):
+        """Substitute XML entities for special XML characters.
+
+        :param value: A string to be substituted. The less-than sign
+          will become &lt;, the greater-than sign will become &gt;,
+          and any ampersands will become &amp;. If you want ampersands
+          that appear to be part of an entity definition to be left
+          alone, use substitute_xml_containing_entities() instead.
+
+        :param make_quoted_attribute: If True, then the string will be
+         quoted, as befits an attribute value.
+        """
+        # Escape angle brackets and ampersands.
+        value = cls.AMPERSAND_OR_BRACKET.sub(
+            cls._substitute_xml_entity, value)
+
+        if make_quoted_attribute:
+            value = cls.quoted_attribute_value(value)
+        return value
+
+    @classmethod
+    def substitute_xml_containing_entities(
+        cls, value, make_quoted_attribute=False):
+        """Substitute XML entities for special XML characters.
+
+        :param value: A string to be substituted. The less-than sign will
+          become &lt;, the greater-than sign will become &gt;, and any
+          ampersands that are not part of an entity defition will
+          become &amp;.
+
+        :param make_quoted_attribute: If True, then the string will be
+         quoted, as befits an attribute value.
+        """
+        # Escape angle brackets, and ampersands that aren't part of
+        # entities.
+        value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
+            cls._substitute_xml_entity, value)
+
+        if make_quoted_attribute:
+            value = cls.quoted_attribute_value(value)
+        return value
+
+    @classmethod
+    def substitute_html(cls, s):
+        """Replace certain Unicode characters with named HTML entities.
+
+        This differs from data.encode(encoding, 'xmlcharrefreplace')
+        in that the goal is to make the result more readable (to those
+        with ASCII displays) rather than to recover from
+        errors. There's absolutely nothing wrong with a UTF-8 string
+        containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
+        character with "&eacute;" will make it more readable to some
+        people.
+        """
+        return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
+            cls._substitute_html_entity, s)
+
+
+class EncodingDetector:
+    """Suggests a number of possible encodings for a bytestring.
+
+    Order of precedence:
+
+    1. Encodings you specifically tell EncodingDetector to try first
+    (the override_encodings argument to the constructor).
+
+    2. An encoding declared within the bytestring itself, either in an
+    XML declaration (if the bytestring is to be interpreted as an XML
+    document), or in a <meta> tag (if the bytestring is to be
+    interpreted as an HTML document.)
+
+    3. An encoding detected through textual analysis by chardet,
+    cchardet, or a similar external library.
+
+    4. UTF-8.
+
+    5. Windows-1252.
+    """
+    def __init__(self, markup, override_encodings=None, is_html=False,
+                 exclude_encodings=None):
+        self.override_encodings = override_encodings or []
+        exclude_encodings = exclude_encodings or []
+        self.exclude_encodings = set([x.lower() for x in exclude_encodings])
+        self.chardet_encoding = None
+        self.is_html = is_html
+        self.declared_encoding = None
+
+        # First order of business: strip a byte-order mark.
+        self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
+
+    def _usable(self, encoding, tried):
+        if encoding is not None:
+            encoding = encoding.lower()
+            if encoding in self.exclude_encodings:
+                return False
+            if encoding not in tried:
+                tried.add(encoding)
+                return True
+        return False
+
+    @property
+    def encodings(self):
+        """Yield a number of encodings that might work for this markup."""
+        tried = set()
+        for e in self.override_encodings:
+            if self._usable(e, tried):
+                yield e
+
+        # Did the document originally start with a byte-order mark
+        # that indicated its encoding?
+        if self._usable(self.sniffed_encoding, tried):
+            yield self.sniffed_encoding
+
+        # Look within the document for an XML or HTML encoding
+        # declaration.
+        if self.declared_encoding is None:
+            self.declared_encoding = self.find_declared_encoding(
+                self.markup, self.is_html)
+        if self._usable(self.declared_encoding, tried):
+            yield self.declared_encoding
+
+        # Use third-party character set detection to guess at the
+        # encoding.
+        if self.chardet_encoding is None:
+            self.chardet_encoding = chardet_dammit(self.markup)
+        if self._usable(self.chardet_encoding, tried):
+            yield self.chardet_encoding
+
+        # As a last-ditch effort, try utf-8 and windows-1252.
+        for e in ('utf-8', 'windows-1252'):
+            if self._usable(e, tried):
+                yield e
+
+    @classmethod
+    def strip_byte_order_mark(cls, data):
+        """If a byte-order mark is present, strip it and return the encoding it implies."""
+        encoding = None
+        if isinstance(data, unicode):
+            # Unicode data cannot have a byte-order mark.
+            return data, encoding
+        if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
+               and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16be'
+            data = data[2:]
+        elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
+                 and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16le'
+            data = data[2:]
+        elif data[:3] == b'\xef\xbb\xbf':
+            encoding = 'utf-8'
+            data = data[3:]
+        elif data[:4] == b'\x00\x00\xfe\xff':
+            encoding = 'utf-32be'
+            data = data[4:]
+        elif data[:4] == b'\xff\xfe\x00\x00':
+            encoding = 'utf-32le'
+            data = data[4:]
+        return data, encoding
+
+    @classmethod
+    def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
+        """Given a document, tries to find its declared encoding.
+
+        An XML encoding is declared at the beginning of the document.
+
+        An HTML encoding is declared in a <meta> tag, hopefully near the
+        beginning of the document.
+        """
+        if search_entire_document:
+            xml_endpos = html_endpos = len(markup)
+        else:
+            xml_endpos = 1024
+            html_endpos = max(2048, int(len(markup) * 0.05))
+
+        declared_encoding = None
+        declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
+        if not declared_encoding_match and is_html:
+            declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
+        if declared_encoding_match is not None:
+            declared_encoding = declared_encoding_match.groups()[0].decode(
+                'ascii', 'replace')
+        if declared_encoding:
+            return declared_encoding.lower()
+        return None
+
+class UnicodeDammit:
+    """A class for detecting the encoding of a *ML document and
+    converting it to a Unicode string. If the source encoding is
+    windows-1252, can replace MS smart quotes with their HTML or XML
+    equivalents."""
+
+    # This dictionary maps commonly seen values for "charset" in HTML
+    # meta tags to the corresponding Python codec names. It only covers
+    # values that aren't in Python's aliases and can't be determined
+    # by the heuristics in find_codec.
+    CHARSET_ALIASES = {"macintosh": "mac-roman",
+                       "x-sjis": "shift-jis"}
+
+    ENCODINGS_WITH_SMART_QUOTES = [
+        "windows-1252",
+        "iso-8859-1",
+        "iso-8859-2",
+        ]
+
+    def __init__(self, markup, override_encodings=[],
+                 smart_quotes_to=None, is_html=False, exclude_encodings=[]):
+        self.smart_quotes_to = smart_quotes_to
+        self.tried_encodings = []
+        self.contains_replacement_characters = False
+        self.is_html = is_html
+        self.log = logging.getLogger(__name__)
+        self.detector = EncodingDetector(
+            markup, override_encodings, is_html, exclude_encodings)
+
+        # Short-circuit if the data is in Unicode to begin with.
+        if isinstance(markup, unicode) or markup == '':
+            self.markup = markup
+            self.unicode_markup = unicode(markup)
+            self.original_encoding = None
+            return
+
+        # The encoding detector may have stripped a byte-order mark.
+        # Use the stripped markup from this point on.
+        self.markup = self.detector.markup
+
+        u = None
+        for encoding in self.detector.encodings:
+            markup = self.detector.markup
+            u = self._convert_from(encoding)
+            if u is not None:
+                break
+
+        if not u:
+            # None of the encodings worked. As an absolute last resort,
+            # try them again with character replacement.
+
+            for encoding in self.detector.encodings:
+                if encoding != "ascii":
+                    u = self._convert_from(encoding, "replace")
+                if u is not None:
+                    self.log.warning(
+                            "Some characters could not be decoded, and were "
+                            "replaced with REPLACEMENT CHARACTER."
+                    )
+                    self.contains_replacement_characters = True
+                    break
+
+        # If none of that worked, we could at this point force it to
+        # ASCII, but that would destroy so much data that I think
+        # giving up is better.
+        self.unicode_markup = u
+        if not u:
+            self.original_encoding = None
+
+    def _sub_ms_char(self, match):
+        """Changes a MS smart quote character to an XML or HTML
+        entity, or an ASCII character."""
+        orig = match.group(1)
+        if self.smart_quotes_to == 'ascii':
+            sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
+        else:
+            sub = self.MS_CHARS.get(orig)
+            if type(sub) == tuple:
+                if self.smart_quotes_to == 'xml':
+                    sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
+                else:
+                    sub = '&'.encode() + sub[0].encode() + ';'.encode()
+            else:
+                sub = sub.encode()
+        return sub
+
+    def _convert_from(self, proposed, errors="strict"):
+        proposed = self.find_codec(proposed)
+        if not proposed or (proposed, errors) in self.tried_encodings:
+            return None
+        self.tried_encodings.append((proposed, errors))
+        markup = self.markup
+        # Convert smart quotes to HTML if coming from an encoding
+        # that might have them.
+        if (self.smart_quotes_to is not None
+            and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
+            smart_quotes_re = b"([\x80-\x9f])"
+            smart_quotes_compiled = re.compile(smart_quotes_re)
+            markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
+
+        try:
+            #print "Trying to convert document to %s (errors=%s)" % (
+            #    proposed, errors)
+            u = self._to_unicode(markup, proposed, errors)
+            self.markup = u
+            self.original_encoding = proposed
+        except Exception as e:
+            #print "That didn't work!"
+            #print e
+            return None
+        #print "Correct encoding: %s" % proposed
+        return self.markup
+
+    def _to_unicode(self, data, encoding, errors="strict"):
+        '''Given a string and its encoding, decodes the string into Unicode.
+        %encoding is a string recognized by encodings.aliases'''
+        return unicode(data, encoding, errors)
+
+    @property
+    def declared_html_encoding(self):
+        if not self.is_html:
+            return None
+        return self.detector.declared_encoding
+
+    def find_codec(self, charset):
+        value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
+               or (charset and self._codec(charset.replace("-", "")))
+               or (charset and self._codec(charset.replace("-", "_")))
+               or (charset and charset.lower())
+               or charset
+                )
+        if value:
+            return value.lower()
+        return None
+
+    def _codec(self, charset):
+        if not charset:
+            return charset
+        codec = None
+        try:
+            codecs.lookup(charset)
+            codec = charset
+        except (LookupError, ValueError):
+            pass
+        return codec
+
+
+    # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
+    MS_CHARS = {b'\x80': ('euro', '20AC'),
+                b'\x81': ' ',
+                b'\x82': ('sbquo', '201A'),
+                b'\x83': ('fnof', '192'),
+                b'\x84': ('bdquo', '201E'),
+                b'\x85': ('hellip', '2026'),
+                b'\x86': ('dagger', '2020'),
+                b'\x87': ('Dagger', '2021'),
+                b'\x88': ('circ', '2C6'),
+                b'\x89': ('permil', '2030'),
+                b'\x8A': ('Scaron', '160'),
+                b'\x8B': ('lsaquo', '2039'),
+                b'\x8C': ('OElig', '152'),
+                b'\x8D': '?',
+                b'\x8E': ('#x17D', '17D'),
+                b'\x8F': '?',
+                b'\x90': '?',
+                b'\x91': ('lsquo', '2018'),
+                b'\x92': ('rsquo', '2019'),
+                b'\x93': ('ldquo', '201C'),
+                b'\x94': ('rdquo', '201D'),
+                b'\x95': ('bull', '2022'),
+                b'\x96': ('ndash', '2013'),
+                b'\x97': ('mdash', '2014'),
+                b'\x98': ('tilde', '2DC'),
+                b'\x99': ('trade', '2122'),
+                b'\x9a': ('scaron', '161'),
+                b'\x9b': ('rsaquo', '203A'),
+                b'\x9c': ('oelig', '153'),
+                b'\x9d': '?',
+                b'\x9e': ('#x17E', '17E'),
+                b'\x9f': ('Yuml', ''),}
+
+    # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
+    # horrors like stripping diacritical marks to turn á into a, but also
+    # contains non-horrors like turning “ into ".
+    MS_CHARS_TO_ASCII = {
+        b'\x80' : 'EUR',
+        b'\x81' : ' ',
+        b'\x82' : ',',
+        b'\x83' : 'f',
+        b'\x84' : ',,',
+        b'\x85' : '...',
+        b'\x86' : '+',
+        b'\x87' : '++',
+        b'\x88' : '^',
+        b'\x89' : '%',
+        b'\x8a' : 'S',
+        b'\x8b' : '<',
+        b'\x8c' : 'OE',
+        b'\x8d' : '?',
+        b'\x8e' : 'Z',
+        b'\x8f' : '?',
+        b'\x90' : '?',
+        b'\x91' : "'",
+        b'\x92' : "'",
+        b'\x93' : '"',
+        b'\x94' : '"',
+        b'\x95' : '*',
+        b'\x96' : '-',
+        b'\x97' : '--',
+        b'\x98' : '~',
+        b'\x99' : '(TM)',
+        b'\x9a' : 's',
+        b'\x9b' : '>',
+        b'\x9c' : 'oe',
+        b'\x9d' : '?',
+        b'\x9e' : 'z',
+        b'\x9f' : 'Y',
+        b'\xa0' : ' ',
+        b'\xa1' : '!',
+        b'\xa2' : 'c',
+        b'\xa3' : 'GBP',
+        b'\xa4' : '$', #This approximation is especially parochial--this is the
+                       #generic currency symbol.
+        b'\xa5' : 'YEN',
+        b'\xa6' : '|',
+        b'\xa7' : 'S',
+        b'\xa8' : '..',
+        b'\xa9' : '',
+        b'\xaa' : '(th)',
+        b'\xab' : '<<',
+        b'\xac' : '!',
+        b'\xad' : ' ',
+        b'\xae' : '(R)',
+        b'\xaf' : '-',
+        b'\xb0' : 'o',
+        b'\xb1' : '+-',
+        b'\xb2' : '2',
+        b'\xb3' : '3',
+        b'\xb4' : ("'", 'acute'),
+        b'\xb5' : 'u',
+        b'\xb6' : 'P',
+        b'\xb7' : '*',
+        b'\xb8' : ',',
+        b'\xb9' : '1',
+        b'\xba' : '(th)',
+        b'\xbb' : '>>',
+        b'\xbc' : '1/4',
+        b'\xbd' : '1/2',
+        b'\xbe' : '3/4',
+        b'\xbf' : '?',
+        b'\xc0' : 'A',
+        b'\xc1' : 'A',
+        b'\xc2' : 'A',
+        b'\xc3' : 'A',
+        b'\xc4' : 'A',
+        b'\xc5' : 'A',
+        b'\xc6' : 'AE',
+        b'\xc7' : 'C',
+        b'\xc8' : 'E',
+        b'\xc9' : 'E',
+        b'\xca' : 'E',
+        b'\xcb' : 'E',
+        b'\xcc' : 'I',
+        b'\xcd' : 'I',
+        b'\xce' : 'I',
+        b'\xcf' : 'I',
+        b'\xd0' : 'D',
+        b'\xd1' : 'N',
+        b'\xd2' : 'O',
+        b'\xd3' : 'O',
+        b'\xd4' : 'O',
+        b'\xd5' : 'O',
+        b'\xd6' : 'O',
+        b'\xd7' : '*',
+        b'\xd8' : 'O',
+        b'\xd9' : 'U',
+        b'\xda' : 'U',
+        b'\xdb' : 'U',
+        b'\xdc' : 'U',
+        b'\xdd' : 'Y',
+        b'\xde' : 'b',
+        b'\xdf' : 'B',
+        b'\xe0' : 'a',
+        b'\xe1' : 'a',
+        b'\xe2' : 'a',
+        b'\xe3' : 'a',
+        b'\xe4' : 'a',
+        b'\xe5' : 'a',
+        b'\xe6' : 'ae',
+        b'\xe7' : 'c',
+        b'\xe8' : 'e',
+        b'\xe9' : 'e',
+        b'\xea' : 'e',
+        b'\xeb' : 'e',
+        b'\xec' : 'i',
+        b'\xed' : 'i',
+        b'\xee' : 'i',
+        b'\xef' : 'i',
+        b'\xf0' : 'o',
+        b'\xf1' : 'n',
+        b'\xf2' : 'o',
+        b'\xf3' : 'o',
+        b'\xf4' : 'o',
+        b'\xf5' : 'o',
+        b'\xf6' : 'o',
+        b'\xf7' : '/',
+        b'\xf8' : 'o',
+        b'\xf9' : 'u',
+        b'\xfa' : 'u',
+        b'\xfb' : 'u',
+        b'\xfc' : 'u',
+        b'\xfd' : 'y',
+        b'\xfe' : 'b',
+        b'\xff' : 'y',
+        }
+
+    # A map used when removing rogue Windows-1252/ISO-8859-1
+    # characters in otherwise UTF-8 documents.
+    #
+    # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
+    # Windows-1252.
+    WINDOWS_1252_TO_UTF8 = {
+        0x80 : b'\xe2\x82\xac', # €
+        0x82 : b'\xe2\x80\x9a', # ‚
+        0x83 : b'\xc6\x92',     # ƒ
+        0x84 : b'\xe2\x80\x9e', # „
+        0x85 : b'\xe2\x80\xa6', # …
+        0x86 : b'\xe2\x80\xa0', # †
+        0x87 : b'\xe2\x80\xa1', # ‡
+        0x88 : b'\xcb\x86',     # ˆ
+        0x89 : b'\xe2\x80\xb0', # ‰
+        0x8a : b'\xc5\xa0',     # Š
+        0x8b : b'\xe2\x80\xb9', # ‹
+        0x8c : b'\xc5\x92',     # Œ
+        0x8e : b'\xc5\xbd',     # Ž
+        0x91 : b'\xe2\x80\x98', # ‘
+        0x92 : b'\xe2\x80\x99', # ’
+        0x93 : b'\xe2\x80\x9c', # “
+        0x94 : b'\xe2\x80\x9d', # ”
+        0x95 : b'\xe2\x80\xa2', # •
+        0x96 : b'\xe2\x80\x93', # –
+        0x97 : b'\xe2\x80\x94', # —
+        0x98 : b'\xcb\x9c',     # ˜
+        0x99 : b'\xe2\x84\xa2', # ™
+        0x9a : b'\xc5\xa1',     # š
+        0x9b : b'\xe2\x80\xba', # ›
+        0x9c : b'\xc5\x93',     # œ
+        0x9e : b'\xc5\xbe',     # ž
+        0x9f : b'\xc5\xb8',     # Ÿ
+        0xa0 : b'\xc2\xa0',     #  
+        0xa1 : b'\xc2\xa1',     # ¡
+        0xa2 : b'\xc2\xa2',     # ¢
+        0xa3 : b'\xc2\xa3',     # £
+        0xa4 : b'\xc2\xa4',     # ¤
+        0xa5 : b'\xc2\xa5',     # ¥
+        0xa6 : b'\xc2\xa6',     # ¦
+        0xa7 : b'\xc2\xa7',     # §
+        0xa8 : b'\xc2\xa8',     # ¨
+        0xa9 : b'\xc2\xa9',     # ©
+        0xaa : b'\xc2\xaa',     # ª
+        0xab : b'\xc2\xab',     # «
+        0xac : b'\xc2\xac',     # ¬
+        0xad : b'\xc2\xad',     # ­
+        0xae : b'\xc2\xae',     # ®
+        0xaf : b'\xc2\xaf',     # ¯
+        0xb0 : b'\xc2\xb0',     # °
+        0xb1 : b'\xc2\xb1',     # ±
+        0xb2 : b'\xc2\xb2',     # ²
+        0xb3 : b'\xc2\xb3',     # ³
+        0xb4 : b'\xc2\xb4',     # ´
+        0xb5 : b'\xc2\xb5',     # µ
+        0xb6 : b'\xc2\xb6',     # ¶
+        0xb7 : b'\xc2\xb7',     # ·
+        0xb8 : b'\xc2\xb8',     # ¸
+        0xb9 : b'\xc2\xb9',     # ¹
+        0xba : b'\xc2\xba',     # º
+        0xbb : b'\xc2\xbb',     # »
+        0xbc : b'\xc2\xbc',     # ¼
+        0xbd : b'\xc2\xbd',     # ½
+        0xbe : b'\xc2\xbe',     # ¾
+        0xbf : b'\xc2\xbf',     # ¿
+        0xc0 : b'\xc3\x80',     # À
+        0xc1 : b'\xc3\x81',     # Á
+        0xc2 : b'\xc3\x82',     # Â
+        0xc3 : b'\xc3\x83',     # Ã
+        0xc4 : b'\xc3\x84',     # Ä
+        0xc5 : b'\xc3\x85',     # Å
+        0xc6 : b'\xc3\x86',     # Æ
+        0xc7 : b'\xc3\x87',     # Ç
+        0xc8 : b'\xc3\x88',     # È
+        0xc9 : b'\xc3\x89',     # É
+        0xca : b'\xc3\x8a',     # Ê
+        0xcb : b'\xc3\x8b',     # Ë
+        0xcc : b'\xc3\x8c',     # Ì
+        0xcd : b'\xc3\x8d',     # Í
+        0xce : b'\xc3\x8e',     # Î
+        0xcf : b'\xc3\x8f',     # Ï
+        0xd0 : b'\xc3\x90',     # Ð
+        0xd1 : b'\xc3\x91',     # Ñ
+        0xd2 : b'\xc3\x92',     # Ò
+        0xd3 : b'\xc3\x93',     # Ó
+        0xd4 : b'\xc3\x94',     # Ô
+        0xd5 : b'\xc3\x95',     # Õ
+        0xd6 : b'\xc3\x96',     # Ö
+        0xd7 : b'\xc3\x97',     # ×
+        0xd8 : b'\xc3\x98',     # Ø
+        0xd9 : b'\xc3\x99',     # Ù
+        0xda : b'\xc3\x9a',     # Ú
+        0xdb : b'\xc3\x9b',     # Û
+        0xdc : b'\xc3\x9c',     # Ü
+        0xdd : b'\xc3\x9d',     # Ý
+        0xde : b'\xc3\x9e',     # Þ
+        0xdf : b'\xc3\x9f',     # ß
+        0xe0 : b'\xc3\xa0',     # à
+        0xe1 : b'\xa1',         # á
+        0xe2 : b'\xc3\xa2',     # â
+        0xe3 : b'\xc3\xa3',     # ã
+        0xe4 : b'\xc3\xa4',     # ä
+        0xe5 : b'\xc3\xa5',     # å
+        0xe6 : b'\xc3\xa6',     # æ
+        0xe7 : b'\xc3\xa7',     # ç
+        0xe8 : b'\xc3\xa8',     # è
+        0xe9 : b'\xc3\xa9',     # é
+        0xea : b'\xc3\xaa',     # ê
+        0xeb : b'\xc3\xab',     # ë
+        0xec : b'\xc3\xac',     # ì
+        0xed : b'\xc3\xad',     # í
+        0xee : b'\xc3\xae',     # î
+        0xef : b'\xc3\xaf',     # ï
+        0xf0 : b'\xc3\xb0',     # ð
+        0xf1 : b'\xc3\xb1',     # ñ
+        0xf2 : b'\xc3\xb2',     # ò
+        0xf3 : b'\xc3\xb3',     # ó
+        0xf4 : b'\xc3\xb4',     # ô
+        0xf5 : b'\xc3\xb5',     # õ
+        0xf6 : b'\xc3\xb6',     # ö
+        0xf7 : b'\xc3\xb7',     # ÷
+        0xf8 : b'\xc3\xb8',     # ø
+        0xf9 : b'\xc3\xb9',     # ù
+        0xfa : b'\xc3\xba',     # ú
+        0xfb : b'\xc3\xbb',     # û
+        0xfc : b'\xc3\xbc',     # ü
+        0xfd : b'\xc3\xbd',     # ý
+        0xfe : b'\xc3\xbe',     # þ
+        }
+
+    MULTIBYTE_MARKERS_AND_SIZES = [
+        (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
+        (0xe0, 0xef, 3), # 3-byte characters start with E0-EF
+        (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
+        ]
+
+    FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
+    LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
+
+    @classmethod
+    def detwingle(cls, in_bytes, main_encoding="utf8",
+                  embedded_encoding="windows-1252"):
+        """Fix characters from one encoding embedded in some other encoding.
+
+        Currently the only situation supported is Windows-1252 (or its
+        subset ISO-8859-1), embedded in UTF-8.
+
+        The input must be a bytestring. If you've already converted
+        the document to Unicode, you're too late.
+
+        The output is a bytestring in which `embedded_encoding`
+        characters have been converted to their `main_encoding`
+        equivalents.
+        """
+        if embedded_encoding.replace('_', '-').lower() not in (
+            'windows-1252', 'windows_1252'):
+            raise NotImplementedError(
+                "Windows-1252 and ISO-8859-1 are the only currently supported "
+                "embedded encodings.")
+
+        if main_encoding.lower() not in ('utf8', 'utf-8'):
+            raise NotImplementedError(
+                "UTF-8 is the only currently supported main encoding.")
+
+        byte_chunks = []
+
+        chunk_start = 0
+        pos = 0
+        while pos < len(in_bytes):
+            byte = in_bytes[pos]
+            if not isinstance(byte, int):
+                # Python 2.x
+                byte = ord(byte)
+            if (byte >= cls.FIRST_MULTIBYTE_MARKER
+                and byte <= cls.LAST_MULTIBYTE_MARKER):
+                # This is the start of a UTF-8 multibyte character. Skip
+                # to the end.
+                for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
+                    if byte >= start and byte <= end:
+                        pos += size
+                        break
+            elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
+                # We found a Windows-1252 character!
+                # Save the string up to this point as a chunk.
+                byte_chunks.append(in_bytes[chunk_start:pos])
+
+                # Now translate the Windows-1252 character into UTF-8
+                # and add it as another, one-byte chunk.
+                byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
+                pos += 1
+                chunk_start = pos
+            else:
+                # Go on to the next character.
+                pos += 1
+        if chunk_start == 0:
+            # The string is unchanged.
+            return in_bytes
+        else:
+            # Store the final chunk.
+            byte_chunks.append(in_bytes[chunk_start:])
+        return b''.join(byte_chunks)
+
diff --git a/src/bs4/diagnose.py b/src/bs4/diagnose.py
new file mode 100644
index 0000000..8768332
--- /dev/null
+++ b/src/bs4/diagnose.py
@@ -0,0 +1,219 @@
+"""Diagnostic functions, mainly for use when doing tech support."""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import cProfile
+from StringIO import StringIO
+from HTMLParser import HTMLParser
+import bs4
+from bs4 import BeautifulSoup, __version__
+from bs4.builder import builder_registry
+
+import os
+import pstats
+import random
+import tempfile
+import time
+import traceback
+import sys
+import cProfile
+
+def diagnose(data):
+    """Diagnostic suite for isolating common problems."""
+    print "Diagnostic running on Beautiful Soup %s" % __version__
+    print "Python version %s" % sys.version
+
+    basic_parsers = ["html.parser", "html5lib", "lxml"]
+    for name in basic_parsers:
+        for builder in builder_registry.builders:
+            if name in builder.features:
+                break
+        else:
+            basic_parsers.remove(name)
+            print (
+                "I noticed that %s is not installed. Installing it may help." %
+                name)
+
+    if 'lxml' in basic_parsers:
+        basic_parsers.append(["lxml", "xml"])
+        try:
+            from lxml import etree
+            print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
+        except ImportError, e:
+            print (
+                "lxml is not installed or couldn't be imported.")
+
+
+    if 'html5lib' in basic_parsers:
+        try:
+            import html5lib
+            print "Found html5lib version %s" % html5lib.__version__
+        except ImportError, e:
+            print (
+                "html5lib is not installed or couldn't be imported.")
+
+    if hasattr(data, 'read'):
+        data = data.read()
+    elif os.path.exists(data):
+        print '"%s" looks like a filename. Reading data from the file.' % data
+        with open(data) as fp:
+            data = fp.read()
+    elif data.startswith("http:") or data.startswith("https:"):
+        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
+        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+        return
+    print
+
+    for parser in basic_parsers:
+        print "Trying to parse your markup with %s" % parser
+        success = False
+        try:
+            soup = BeautifulSoup(data, parser)
+            success = True
+        except Exception, e:
+            print "%s could not parse the markup." % parser
+            traceback.print_exc()
+        if success:
+            print "Here's what %s did with the markup:" % parser
+            print soup.prettify()
+
+        print "-" * 80
+
+def lxml_trace(data, html=True, **kwargs):
+    """Print out the lxml events that occur during parsing.
+
+    This lets you see how lxml parses a document when no Beautiful
+    Soup code is running.
+    """
+    from lxml import etree
+    for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+        print("%s, %4s, %s" % (event, element.tag, element.text))
+
+class AnnouncingParser(HTMLParser):
+    """Announces HTMLParser parse events, without doing anything else."""
+
+    def _p(self, s):
+        print(s)
+
+    def handle_starttag(self, name, attrs):
+        self._p("%s START" % name)
+
+    def handle_endtag(self, name):
+        self._p("%s END" % name)
+
+    def handle_data(self, data):
+        self._p("%s DATA" % data)
+
+    def handle_charref(self, name):
+        self._p("%s CHARREF" % name)
+
+    def handle_entityref(self, name):
+        self._p("%s ENTITYREF" % name)
+
+    def handle_comment(self, data):
+        self._p("%s COMMENT" % data)
+
+    def handle_decl(self, data):
+        self._p("%s DECL" % data)
+
+    def unknown_decl(self, data):
+        self._p("%s UNKNOWN-DECL" % data)
+
+    def handle_pi(self, data):
+        self._p("%s PI" % data)
+
+def htmlparser_trace(data):
+    """Print out the HTMLParser events that occur during parsing.
+
+    This lets you see how HTMLParser parses a document when no
+    Beautiful Soup code is running.
+    """
+    parser = AnnouncingParser()
+    parser.feed(data)
+
+_vowels = "aeiou"
+_consonants = "bcdfghjklmnpqrstvwxyz"
+
+def rword(length=5):
+    "Generate a random word-like string."
+    s = ''
+    for i in range(length):
+        if i % 2 == 0:
+            t = _consonants
+        else:
+            t = _vowels
+        s += random.choice(t)
+    return s
+
+def rsentence(length=4):
+    "Generate a random sentence-like string."
+    return " ".join(rword(random.randint(4,9)) for i in range(length))
+        
+def rdoc(num_elements=1000):
+    """Randomly generate an invalid HTML document."""
+    tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
+    elements = []
+    for i in range(num_elements):
+        choice = random.randint(0,3)
+        if choice == 0:
+            # New tag.
+            tag_name = random.choice(tag_names)
+            elements.append("<%s>" % tag_name)
+        elif choice == 1:
+            elements.append(rsentence(random.randint(1,4)))
+        elif choice == 2:
+            # Close a tag.
+            tag_name = random.choice(tag_names)
+            elements.append("</%s>" % tag_name)
+    return "<html>" + "\n".join(elements) + "</html>"
+
+def benchmark_parsers(num_elements=100000):
+    """Very basic head-to-head performance benchmark."""
+    print "Comparative parser benchmark on Beautiful Soup %s" % __version__
+    data = rdoc(num_elements)
+    print "Generated a large invalid HTML document (%d bytes)." % len(data)
+    
+    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
+        success = False
+        try:
+            a = time.time()
+            soup = BeautifulSoup(data, parser)
+            b = time.time()
+            success = True
+        except Exception, e:
+            print "%s could not parse the markup." % parser
+            traceback.print_exc()
+        if success:
+            print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
+
+    from lxml import etree
+    a = time.time()
+    etree.HTML(data)
+    b = time.time()
+    print "Raw lxml parsed the markup in %.2fs." % (b-a)
+
+    import html5lib
+    parser = html5lib.HTMLParser()
+    a = time.time()
+    parser.parse(data)
+    b = time.time()
+    print "Raw html5lib parsed the markup in %.2fs." % (b-a)
+
+def profile(num_elements=100000, parser="lxml"):
+
+    filehandle = tempfile.NamedTemporaryFile()
+    filename = filehandle.name
+
+    data = rdoc(num_elements)
+    vars = dict(bs4=bs4, data=data, parser=parser)
+    cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
+
+    stats = pstats.Stats(filename)
+    # stats.strip_dirs()
+    stats.sort_stats("cumulative")
+    stats.print_stats('_html5lib|bs4', 50)
+
+if __name__ == '__main__':
+    diagnose(sys.stdin.read())
diff --git a/src/bs4/element.py b/src/bs4/element.py
new file mode 100644
index 0000000..b100d18
--- /dev/null
+++ b/src/bs4/element.py
@@ -0,0 +1,1755 @@
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import collections
+import re
+import shlex
+import sys
+import warnings
+from bs4.dammit import EntitySubstitution
+
+DEFAULT_OUTPUT_ENCODING = "utf-8"
+PY3K = (sys.version_info[0] > 2)
+
+whitespace_re = re.compile("\s+")
+
+def _alias(attr):
+    """Alias one attribute name to another for backward compatibility"""
+    @property
+    def alias(self):
+        return getattr(self, attr)
+
+    @alias.setter
+    def alias(self):
+        return setattr(self, attr)
+    return alias
+
+
+class NamespacedAttribute(unicode):
+
+    def __new__(cls, prefix, name, namespace=None):
+        if name is None:
+            obj = unicode.__new__(cls, prefix)
+        elif prefix is None:
+            # Not really namespaced.
+            obj = unicode.__new__(cls, name)
+        else:
+            obj = unicode.__new__(cls, prefix + ":" + name)
+        obj.prefix = prefix
+        obj.name = name
+        obj.namespace = namespace
+        return obj
+
+class AttributeValueWithCharsetSubstitution(unicode):
+    """A stand-in object for a character encoding specified in HTML."""
+
+class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+    """A generic stand-in for the value of a meta tag's 'charset' attribute.
+
+    When Beautiful Soup parses the markup '<meta charset="utf8">', the
+    value of the 'charset' attribute will be one of these objects.
+    """
+
+    def __new__(cls, original_value):
+        obj = unicode.__new__(cls, original_value)
+        obj.original_value = original_value
+        return obj
+
+    def encode(self, encoding):
+        return encoding
+
+
+class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+    """A generic stand-in for the value of a meta tag's 'content' attribute.
+
+    When Beautiful Soup parses the markup:
+     <meta http-equiv="content-type" content="text/html; charset=utf8">
+
+    The value of the 'content' attribute will be one of these objects.
+    """
+
+    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+
+    def __new__(cls, original_value):
+        match = cls.CHARSET_RE.search(original_value)
+        if match is None:
+            # No substitution necessary.
+            return unicode.__new__(unicode, original_value)
+
+        obj = unicode.__new__(cls, original_value)
+        obj.original_value = original_value
+        return obj
+
+    def encode(self, encoding):
+        def rewrite(match):
+            return match.group(1) + encoding
+        return self.CHARSET_RE.sub(rewrite, self.original_value)
+
+class HTMLAwareEntitySubstitution(EntitySubstitution):
+
+    """Entity substitution rules that are aware of some HTML quirks.
+
+    Specifically, the contents of <script> and <style> tags should not
+    undergo entity substitution.
+
+    Incoming NavigableString objects are checked to see if they're the
+    direct children of a <script> or <style> tag.
+    """
+
+    cdata_containing_tags = set(["script", "style"])
+
+    preformatted_tags = set(["pre"])
+
+    preserve_whitespace_tags = set(['pre', 'textarea'])
+
+    @classmethod
+    def _substitute_if_appropriate(cls, ns, f):
+        if (isinstance(ns, NavigableString)
+            and ns.parent is not None
+            and ns.parent.name in cls.cdata_containing_tags):
+            # Do nothing.
+            return ns
+        # Substitute.
+        return f(ns)
+
+    @classmethod
+    def substitute_html(cls, ns):
+        return cls._substitute_if_appropriate(
+            ns, EntitySubstitution.substitute_html)
+
+    @classmethod
+    def substitute_xml(cls, ns):
+        return cls._substitute_if_appropriate(
+            ns, EntitySubstitution.substitute_xml)
+
+class PageElement(object):
+    """Contains the navigational information for some part of the page
+    (either a tag or a piece of text)"""
+
+    # There are five possible values for the "formatter" argument passed in
+    # to methods like encode() and prettify():
+    #
+    # "html" - All Unicode characters with corresponding HTML entities
+    #   are converted to those entities on output.
+    # "minimal" - Bare ampersands and angle brackets are converted to
+    #   XML entities: &amp; &lt; &gt;
+    # None - The null formatter. Unicode characters are never
+    #   converted to entities.  This is not recommended, but it's
+    #   faster than "minimal".
+    # A function - This function will be called on every string that
+    #  needs to undergo entity substitution.
+    #
+
+    # In an HTML document, the default "html" and "minimal" functions
+    # will leave the contents of <script> and <style> tags alone. For
+    # an XML document, all tags will be given the same treatment.
+
+    HTML_FORMATTERS = {
+        "html" : HTMLAwareEntitySubstitution.substitute_html,
+        "minimal" : HTMLAwareEntitySubstitution.substitute_xml,
+        None : None
+        }
+
+    XML_FORMATTERS = {
+        "html" : EntitySubstitution.substitute_html,
+        "minimal" : EntitySubstitution.substitute_xml,
+        None : None
+        }
+
+    def format_string(self, s, formatter='minimal'):
+        """Format the given string using the given formatter."""
+        if not callable(formatter):
+            formatter = self._formatter_for_name(formatter)
+        if formatter is None:
+            output = s
+        else:
+            output = formatter(s)
+        return output
+
+    @property
+    def _is_xml(self):
+        """Is this element part of an XML tree or an HTML tree?
+
+        This is used when mapping a formatter name ("minimal") to an
+        appropriate function (one that performs entity-substitution on
+        the contents of <script> and <style> tags, or not). It can be
+        inefficient, but it should be called very rarely.
+        """
+        if self.known_xml is not None:
+            # Most of the time we will have determined this when the
+            # document is parsed.
+            return self.known_xml
+
+        # Otherwise, it's likely that this element was created by
+        # direct invocation of the constructor from within the user's
+        # Python code.
+        if self.parent is None:
+            # This is the top-level object. It should have .known_xml set
+            # from tree creation. If not, take a guess--BS is usually
+            # used on HTML markup.
+            return getattr(self, 'is_xml', False)
+        return self.parent._is_xml
+
+    def _formatter_for_name(self, name):
+        "Look up a formatter function based on its name and the tree."
+        if self._is_xml:
+            return self.XML_FORMATTERS.get(
+                name, EntitySubstitution.substitute_xml)
+        else:
+            return self.HTML_FORMATTERS.get(
+                name, HTMLAwareEntitySubstitution.substitute_xml)
+
+    def setup(self, parent=None, previous_element=None, next_element=None,
+              previous_sibling=None, next_sibling=None):
+        """Sets up the initial relations between this element and
+        other elements."""
+        self.parent = parent
+
+        self.previous_element = previous_element
+        if previous_element is not None:
+            self.previous_element.next_element = self
+
+        self.next_element = next_element
+        if self.next_element:
+            self.next_element.previous_element = self
+
+        self.next_sibling = next_sibling
+        if self.next_sibling:
+            self.next_sibling.previous_sibling = self
+
+        if (not previous_sibling
+            and self.parent is not None and self.parent.contents):
+            previous_sibling = self.parent.contents[-1]
+
+        self.previous_sibling = previous_sibling
+        if previous_sibling:
+            self.previous_sibling.next_sibling = self
+
+    nextSibling = _alias("next_sibling")  # BS3
+    previousSibling = _alias("previous_sibling")  # BS3
+
+    def replace_with(self, replace_with):
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace one element with another when the"
+                "element to be replaced is not part of a tree.")
+        if replace_with is self:
+            return
+        if replace_with is self.parent:
+            raise ValueError("Cannot replace a Tag with its parent.")
+        old_parent = self.parent
+        my_index = self.parent.index(self)
+        self.extract()
+        old_parent.insert(my_index, replace_with)
+        return self
+    replaceWith = replace_with  # BS3
+
+    def unwrap(self):
+        my_parent = self.parent
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace an element with its contents when that"
+                "element is not part of a tree.")
+        my_index = self.parent.index(self)
+        self.extract()
+        for child in reversed(self.contents[:]):
+            my_parent.insert(my_index, child)
+        return self
+    replace_with_children = unwrap
+    replaceWithChildren = unwrap  # BS3
+
+    def wrap(self, wrap_inside):
+        me = self.replace_with(wrap_inside)
+        wrap_inside.append(me)
+        return wrap_inside
+
+    def extract(self):
+        """Destructively rips this element out of the tree."""
+        if self.parent is not None:
+            del self.parent.contents[self.parent.index(self)]
+
+        #Find the two elements that would be next to each other if
+        #this element (and any children) hadn't been parsed. Connect
+        #the two.
+        last_child = self._last_descendant()
+        next_element = last_child.next_element
+
+        if (self.previous_element is not None and
+            self.previous_element is not next_element):
+            self.previous_element.next_element = next_element
+        if next_element is not None and next_element is not self.previous_element:
+            next_element.previous_element = self.previous_element
+        self.previous_element = None
+        last_child.next_element = None
+
+        self.parent = None
+        if (self.previous_sibling is not None
+            and self.previous_sibling is not self.next_sibling):
+            self.previous_sibling.next_sibling = self.next_sibling
+        if (self.next_sibling is not None
+            and self.next_sibling is not self.previous_sibling):
+            self.next_sibling.previous_sibling = self.previous_sibling
+        self.previous_sibling = self.next_sibling = None
+        return self
+
+    def _last_descendant(self, is_initialized=True, accept_self=True):
+        "Finds the last element beneath this object to be parsed."
+        if is_initialized and self.next_sibling:
+            last_child = self.next_sibling.previous_element
+        else:
+            last_child = self
+            while isinstance(last_child, Tag) and last_child.contents:
+                last_child = last_child.contents[-1]
+        if not accept_self and last_child is self:
+            last_child = None
+        return last_child
+    # BS3: Not part of the API!
+    _lastRecursiveChild = _last_descendant
+
+    def insert(self, position, new_child):
+        if new_child is None:
+            raise ValueError("Cannot insert None into a tag.")
+        if new_child is self:
+            raise ValueError("Cannot insert a tag into itself.")
+        if (isinstance(new_child, basestring)
+            and not isinstance(new_child, NavigableString)):
+            new_child = NavigableString(new_child)
+
+        position = min(position, len(self.contents))
+        if hasattr(new_child, 'parent') and new_child.parent is not None:
+            # We're 'inserting' an element that's already one
+            # of this object's children.
+            if new_child.parent is self:
+                current_index = self.index(new_child)
+                if current_index < position:
+                    # We're moving this element further down the list
+                    # of this object's children. That means that when
+                    # we extract this element, our target index will
+                    # jump down one.
+                    position -= 1
+            new_child.extract()
+
+        new_child.parent = self
+        previous_child = None
+        if position == 0:
+            new_child.previous_sibling = None
+            new_child.previous_element = self
+        else:
+            previous_child = self.contents[position - 1]
+            new_child.previous_sibling = previous_child
+            new_child.previous_sibling.next_sibling = new_child
+            new_child.previous_element = previous_child._last_descendant(False)
+        if new_child.previous_element is not None:
+            new_child.previous_element.next_element = new_child
+
+        new_childs_last_element = new_child._last_descendant(False)
+
+        if position >= len(self.contents):
+            new_child.next_sibling = None
+
+            parent = self
+            parents_next_sibling = None
+            while parents_next_sibling is None and parent is not None:
+                parents_next_sibling = parent.next_sibling
+                parent = parent.parent
+                if parents_next_sibling is not None:
+                    # We found the element that comes next in the document.
+                    break
+            if parents_next_sibling is not None:
+                new_childs_last_element.next_element = parents_next_sibling
+            else:
+                # The last element of this tag is the last element in
+                # the document.
+                new_childs_last_element.next_element = None
+        else:
+            next_child = self.contents[position]
+            new_child.next_sibling = next_child
+            if new_child.next_sibling is not None:
+                new_child.next_sibling.previous_sibling = new_child
+            new_childs_last_element.next_element = next_child
+
+        if new_childs_last_element.next_element is not None:
+            new_childs_last_element.next_element.previous_element = new_childs_last_element
+        self.contents.insert(position, new_child)
+
+    def append(self, tag):
+        """Appends the given tag to the contents of this tag."""
+        self.insert(len(self.contents), tag)
+
+    def insert_before(self, predecessor):
+        """Makes the given element the immediate predecessor of this one.
+
+        The two elements will have the same parent, and the given element
+        will be immediately before this one.
+        """
+        if self is predecessor:
+            raise ValueError("Can't insert an element before itself.")
+        parent = self.parent
+        if parent is None:
+            raise ValueError(
+                "Element has no parent, so 'before' has no meaning.")
+        # Extract first so that the index won't be screwed up if they
+        # are siblings.
+        if isinstance(predecessor, PageElement):
+            predecessor.extract()
+        index = parent.index(self)
+        parent.insert(index, predecessor)
+
+    def insert_after(self, successor):
+        """Makes the given element the immediate successor of this one.
+
+        The two elements will have the same parent, and the given element
+        will be immediately after this one.
+        """
+        if self is successor:
+            raise ValueError("Can't insert an element after itself.")
+        parent = self.parent
+        if parent is None:
+            raise ValueError(
+                "Element has no parent, so 'after' has no meaning.")
+        # Extract first so that the index won't be screwed up if they
+        # are siblings.
+        if isinstance(successor, PageElement):
+            successor.extract()
+        index = parent.index(self)
+        parent.insert(index+1, successor)
+
+    def find_next(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears after this Tag in the document."""
+        return self._find_one(self.find_all_next, name, attrs, text, **kwargs)
+    findNext = find_next  # BS3
+
+    def find_all_next(self, name=None, attrs={}, text=None, limit=None,
+                    **kwargs):
+        """Returns all items that match the given criteria and appear
+        after this Tag in the document."""
+        return self._find_all(name, attrs, text, limit, self.next_elements,
+                             **kwargs)
+    findAllNext = find_all_next  # BS3
+
+    def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears after this Tag in the document."""
+        return self._find_one(self.find_next_siblings, name, attrs, text,
+                             **kwargs)
+    findNextSibling = find_next_sibling  # BS3
+
+    def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
+                           **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear after this Tag in the document."""
+        return self._find_all(name, attrs, text, limit,
+                              self.next_siblings, **kwargs)
+    findNextSiblings = find_next_siblings   # BS3
+    fetchNextSiblings = find_next_siblings  # BS2
+
+    def find_previous(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears before this Tag in the document."""
+        return self._find_one(
+            self.find_all_previous, name, attrs, text, **kwargs)
+    findPrevious = find_previous  # BS3
+
+    def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
+                        **kwargs):
+        """Returns all items that match the given criteria and appear
+        before this Tag in the document."""
+        return self._find_all(name, attrs, text, limit, self.previous_elements,
+                           **kwargs)
+    findAllPrevious = find_all_previous  # BS3
+    fetchPrevious = find_all_previous    # BS2
+
+    def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears before this Tag in the document."""
+        return self._find_one(self.find_previous_siblings, name, attrs, text,
+                             **kwargs)
+    findPreviousSibling = find_previous_sibling  # BS3
+
+    def find_previous_siblings(self, name=None, attrs={}, text=None,
+                               limit=None, **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear before this Tag in the document."""
+        return self._find_all(name, attrs, text, limit,
+                              self.previous_siblings, **kwargs)
+    findPreviousSiblings = find_previous_siblings   # BS3
+    fetchPreviousSiblings = find_previous_siblings  # BS2
+
+    def find_parent(self, name=None, attrs={}, **kwargs):
+        """Returns the closest parent of this Tag that matches the given
+        criteria."""
+        # NOTE: We can't use _find_one because findParents takes a different
+        # set of arguments.
+        r = None
+        l = self.find_parents(name, attrs, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findParent = find_parent  # BS3
+
+    def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
+        """Returns the parents of this Tag that match the given
+        criteria."""
+
+        return self._find_all(name, attrs, None, limit, self.parents,
+                             **kwargs)
+    findParents = find_parents   # BS3
+    fetchParents = find_parents  # BS2
+
+    @property
+    def next(self):
+        return self.next_element
+
+    @property
+    def previous(self):
+        return self.previous_element
+
+    #These methods do the real heavy lifting.
+
+    def _find_one(self, method, name, attrs, text, **kwargs):
+        r = None
+        l = method(name, attrs, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+
+    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
+        "Iterates over a generator looking for things that match."
+
+        if text is None and 'string' in kwargs:
+            text = kwargs['string']
+            del kwargs['string']
+
+        if isinstance(name, SoupStrainer):
+            strainer = name
+        else:
+            strainer = SoupStrainer(name, attrs, text, **kwargs)
+
+        if text is None and not limit and not attrs and not kwargs:
+            if name is True or name is None:
+                # Optimization to find all tags.
+                result = (element for element in generator
+                          if isinstance(element, Tag))
+                return ResultSet(strainer, result)
+            elif isinstance(name, basestring):
+                # Optimization to find all tags with a given name.
+                result = (element for element in generator
+                          if isinstance(element, Tag)
+                            and element.name == name)
+                return ResultSet(strainer, result)
+        results = ResultSet(strainer)
+        while True:
+            try:
+                i = next(generator)
+            except StopIteration:
+                break
+            if i:
+                found = strainer.search(i)
+                if found:
+                    results.append(found)
+                    if limit and len(results) >= limit:
+                        break
+        return results
+
+    #These generators can be used to navigate starting from both
+    #NavigableStrings and Tags.
+    @property
+    def next_elements(self):
+        i = self.next_element
+        while i is not None:
+            yield i
+            i = i.next_element
+
+    @property
+    def next_siblings(self):
+        i = self.next_sibling
+        while i is not None:
+            yield i
+            i = i.next_sibling
+
+    @property
+    def previous_elements(self):
+        i = self.previous_element
+        while i is not None:
+            yield i
+            i = i.previous_element
+
+    @property
+    def previous_siblings(self):
+        i = self.previous_sibling
+        while i is not None:
+            yield i
+            i = i.previous_sibling
+
+    @property
+    def parents(self):
+        i = self.parent
+        while i is not None:
+            yield i
+            i = i.parent
+
+    # Methods for supporting CSS selectors.
+
+    tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')
+
+    # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
+    #   \---------------------------/  \---/\-------------/    \-------/
+    #     |                              |         |               |
+    #     |                              |         |           The value
+    #     |                              |    ~,|,^,$,* or =
+    #     |                           Attribute
+    #    Tag
+    attribselect_re = re.compile(
+        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
+        r'=?"?(?P<value>[^\]"]*)"?\]$'
+        )
+
+    def _attr_value_as_string(self, value, default=None):
+        """Force an attribute value into a string representation.
+
+        A multi-valued attribute will be converted into a
+        space-separated stirng.
+        """
+        value = self.get(value, default)
+        if isinstance(value, list) or isinstance(value, tuple):
+            value =" ".join(value)
+        return value
+
+    def _tag_name_matches_and(self, function, tag_name):
+        if not tag_name:
+            return function
+        else:
+            def _match(tag):
+                return tag.name == tag_name and function(tag)
+            return _match
+
+    def _attribute_checker(self, operator, attribute, value=''):
+        """Create a function that performs a CSS selector operation.
+
+        Takes an operator, attribute and optional value. Returns a
+        function that will return True for elements that match that
+        combination.
+        """
+        if operator == '=':
+            # string representation of `attribute` is equal to `value`
+            return lambda el: el._attr_value_as_string(attribute) == value
+        elif operator == '~':
+            # space-separated list representation of `attribute`
+            # contains `value`
+            def _includes_value(element):
+                attribute_value = element.get(attribute, [])
+                if not isinstance(attribute_value, list):
+                    attribute_value = attribute_value.split()
+                return value in attribute_value
+            return _includes_value
+        elif operator == '^':
+            # string representation of `attribute` starts with `value`
+            return lambda el: el._attr_value_as_string(
+                attribute, '').startswith(value)
+        elif operator == '$':
+            # string representation of `attribute` ends with `value`
+            return lambda el: el._attr_value_as_string(
+                attribute, '').endswith(value)
+        elif operator == '*':
+            # string representation of `attribute` contains `value`
+            return lambda el: value in el._attr_value_as_string(attribute, '')
+        elif operator == '|':
+            # string representation of `attribute` is either exactly
+            # `value` or starts with `value` and then a dash.
+            def _is_or_starts_with_dash(element):
+                attribute_value = element._attr_value_as_string(attribute, '')
+                return (attribute_value == value or attribute_value.startswith(
+                        value + '-'))
+            return _is_or_starts_with_dash
+        else:
+            return lambda el: el.has_attr(attribute)
+
+    # Old non-property versions of the generators, for backwards
+    # compatibility with BS3.
+    def nextGenerator(self):
+        return self.next_elements
+
+    def nextSiblingGenerator(self):
+        return self.next_siblings
+
+    def previousGenerator(self):
+        return self.previous_elements
+
+    def previousSiblingGenerator(self):
+        return self.previous_siblings
+
+    def parentGenerator(self):
+        return self.parents
+
+
+class NavigableString(unicode, PageElement):
+
+    PREFIX = ''
+    SUFFIX = ''
+
+    # We can't tell just by looking at a string whether it's contained
+    # in an XML document or an HTML document.
+
+    known_xml = None
+
+    def __new__(cls, value):
+        """Create a new NavigableString.
+
+        When unpickling a NavigableString, this method is called with
+        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
+        passed in to the superclass's __new__ or the superclass won't know
+        how to handle non-ASCII characters.
+        """
+        if isinstance(value, unicode):
+            u = unicode.__new__(cls, value)
+        else:
+            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+        u.setup()
+        return u
+
+    def __copy__(self):
+        """A copy of a NavigableString has the same contents and class
+        as the original, but it is not connected to the parse tree.
+        """
+        return type(self)(self)
+
+    def __getnewargs__(self):
+        return (unicode(self),)
+
+    def __getattr__(self, attr):
+        """text.string gives you text. This is for backwards
+        compatibility for Navigable*String, but for CData* it lets you
+        get the string without the CData wrapper."""
+        if attr == 'string':
+            return self
+        else:
+            raise AttributeError(
+                "'%s' object has no attribute '%s'" % (
+                    self.__class__.__name__, attr))
+
+    def output_ready(self, formatter="minimal"):
+        output = self.format_string(self, formatter)
+        return self.PREFIX + output + self.SUFFIX
+
+    @property
+    def name(self):
+        return None
+
+    @name.setter
+    def name(self, name):
+        raise AttributeError("A NavigableString cannot be given a name.")
+
+class PreformattedString(NavigableString):
+    """A NavigableString not subject to the normal formatting rules.
+
+    The string will be passed into the formatter (to trigger side effects),
+    but the return value will be ignored.
+    """
+
+    def output_ready(self, formatter="minimal"):
+        """CData strings are passed into the formatter.
+        But the return value is ignored."""
+        self.format_string(self, formatter)
+        return self.PREFIX + self + self.SUFFIX
+
+class CData(PreformattedString):
+
+    PREFIX = u'<![CDATA['
+    SUFFIX = u']]>'
+
+class ProcessingInstruction(PreformattedString):
+    """A SGML processing instruction."""
+
+    PREFIX = u'<?'
+    SUFFIX = u'>'
+
+class XMLProcessingInstruction(ProcessingInstruction):
+    """An XML processing instruction."""
+    PREFIX = u'<?'
+    SUFFIX = u'?>'
+
+class Comment(PreformattedString):
+
+    PREFIX = u'<!--'
+    SUFFIX = u'-->'
+
+
+class Declaration(PreformattedString):
+    PREFIX = u'<?'
+    SUFFIX = u'?>'
+
+
+class Doctype(PreformattedString):
+
+    @classmethod
+    def for_name_and_ids(cls, name, pub_id, system_id):
+        value = name or ''
+        if pub_id is not None:
+            value += ' PUBLIC "%s"' % pub_id
+            if system_id is not None:
+                value += ' "%s"' % system_id
+        elif system_id is not None:
+            value += ' SYSTEM "%s"' % system_id
+
+        return Doctype(value)
+
+    PREFIX = u'<!DOCTYPE '
+    SUFFIX = u'>\n'
+
+
+class Tag(PageElement):
+
+    """Represents a found HTML tag with its attributes and contents."""
+
+    def __init__(self, parser=None, builder=None, name=None, namespace=None,
+                 prefix=None, attrs=None, parent=None, previous=None,
+                 is_xml=None):
+        "Basic constructor."
+
+        if parser is None:
+            self.parser_class = None
+        else:
+            # We don't actually store the parser object: that lets extracted
+            # chunks be garbage-collected.
+            self.parser_class = parser.__class__
+        if name is None:
+            raise ValueError("No value provided for new tag's name.")
+        self.name = name
+        self.namespace = namespace
+        self.prefix = prefix
+        if builder is not None:
+            preserve_whitespace_tags = builder.preserve_whitespace_tags
+        else:
+            if is_xml:
+                preserve_whitespace_tags = []
+            else:
+                preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
+        self.preserve_whitespace_tags = preserve_whitespace_tags
+        if attrs is None:
+            attrs = {}
+        elif attrs:
+            if builder is not None and builder.cdata_list_attributes:
+                attrs = builder._replace_cdata_list_attribute_values(
+                    self.name, attrs)
+            else:
+                attrs = dict(attrs)
+        else:
+            attrs = dict(attrs)
+
+        # If possible, determine ahead of time whether this tag is an
+        # XML tag.
+        if builder:
+            self.known_xml = builder.is_xml
+        else:
+            self.known_xml = is_xml
+        self.attrs = attrs
+        self.contents = []
+        self.setup(parent, previous)
+        self.hidden = False
+
+        # Set up any substitutions, such as the charset in a META tag.
+        if builder is not None:
+            builder.set_up_substitutions(self)
+            self.can_be_empty_element = builder.can_be_empty_element(name)
+        else:
+            self.can_be_empty_element = False
+
+    parserClass = _alias("parser_class")  # BS3
+
+    def __copy__(self):
+        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        Its contents are a copy of the old Tag's contents.
+        """
+        clone = type(self)(None, self.builder, self.name, self.namespace,
+                           self.nsprefix, self.attrs, is_xml=self._is_xml)
+        for attr in ('can_be_empty_element', 'hidden'):
+            setattr(clone, attr, getattr(self, attr))
+        for child in self.contents:
+            clone.append(child.__copy__())
+        return clone
+
+    @property
+    def is_empty_element(self):
+        """Is this tag an empty-element tag? (aka a self-closing tag)
+
+        A tag that has contents is never an empty-element tag.
+
+        A tag that has no contents may or may not be an empty-element
+        tag. It depends on the builder used to create the tag. If the
+        builder has a designated list of empty-element tags, then only
+        a tag whose name shows up in that list is considered an
+        empty-element tag.
+
+        If the builder has no designated list of empty-element tags,
+        then any tag with no contents is an empty-element tag.
+        """
+        return len(self.contents) == 0 and self.can_be_empty_element
+    isSelfClosing = is_empty_element  # BS3
+
+    @property
+    def string(self):
+        """Convenience property to get the single string within this tag.
+
+        :Return: If this tag has a single string child, return value
+         is that string. If this tag has no children, or more than one
+         child, return value is None. If this tag has one child tag,
+         return value is the 'string' attribute of the child tag,
+         recursively.
+        """
+        if len(self.contents) != 1:
+            return None
+        child = self.contents[0]
+        if isinstance(child, NavigableString):
+            return child
+        return child.string
+
+    @string.setter
+    def string(self, string):
+        self.clear()
+        self.append(string.__class__(string))
+
+    def _all_strings(self, strip=False, types=(NavigableString, CData)):
+        """Yield all strings of certain classes, possibly stripping them.
+
+        By default, yields only NavigableString and CData objects. So
+        no comments, processing instructions, etc.
+        """
+        for descendant in self.descendants:
+            if (
+                (types is None and not isinstance(descendant, NavigableString))
+                or
+                (types is not None and type(descendant) not in types)):
+                continue
+            if strip:
+                descendant = descendant.strip()
+                if len(descendant) == 0:
+                    continue
+            yield descendant
+
+    strings = property(_all_strings)
+
+    @property
+    def stripped_strings(self):
+        for string in self._all_strings(True):
+            yield string
+
+    def get_text(self, separator=u"", strip=False,
+                 types=(NavigableString, CData)):
+        """
+        Get all child strings, concatenated using the given separator.
+        """
+        return separator.join([s for s in self._all_strings(
+                    strip, types=types)])
+    getText = get_text
+    text = property(get_text)
+
+    def decompose(self):
+        """Recursively destroys the contents of this tree."""
+        self.extract()
+        i = self
+        while i is not None:
+            next = i.next_element
+            i.__dict__.clear()
+            i.contents = []
+            i = next
+
+    def clear(self, decompose=False):
+        """
+        Extract all children. If decompose is True, decompose instead.
+        """
+        if decompose:
+            for element in self.contents[:]:
+                if isinstance(element, Tag):
+                    element.decompose()
+                else:
+                    element.extract()
+        else:
+            for element in self.contents[:]:
+                element.extract()
+
+    def index(self, element):
+        """
+        Find the index of a child by identity, not value. Avoids issues with
+        tag.contents.index(element) getting the index of equal elements.
+        """
+        for i, child in enumerate(self.contents):
+            if child is element:
+                return i
+        raise ValueError("Tag.index: element not in tag")
+
+    def get(self, key, default=None):
+        """Returns the value of the 'key' attribute for the tag, or
+        the value given for 'default' if it doesn't have that
+        attribute."""
+        return self.attrs.get(key, default)
+
+    def has_attr(self, key):
+        return key in self.attrs
+
+    def __hash__(self):
+        return str(self).__hash__()
+
+    def __getitem__(self, key):
+        """tag[key] returns the value of the 'key' attribute for the tag,
+        and throws an exception if it's not there."""
+        return self.attrs[key]
+
+    def __iter__(self):
+        "Iterating over a tag iterates over its contents."
+        return iter(self.contents)
+
+    def __len__(self):
+        "The length of a tag is the length of its list of contents."
+        return len(self.contents)
+
+    def __contains__(self, x):
+        return x in self.contents
+
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
+    def __setitem__(self, key, value):
+        """Setting tag[key] sets the value of the 'key' attribute for the
+        tag."""
+        self.attrs[key] = value
+
+    def __delitem__(self, key):
+        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        self.attrs.pop(key, None)
+
+    def __call__(self, *args, **kwargs):
+        """Calling a tag like a function is the same as calling its
+        find_all() method. Eg. tag('a') returns a list of all the A tags
+        found within this tag."""
+        return self.find_all(*args, **kwargs)
+
+    def __getattr__(self, tag):
+        #print "Getattr %s.%s" % (self.__class__, tag)
+        if len(tag) > 3 and tag.endswith('Tag'):
+            # BS3: soup.aTag -> "soup.find("a")
+            tag_name = tag[:-3]
+            warnings.warn(
+                '.%sTag is deprecated, use .find("%s") instead.' % (
+                    tag_name, tag_name))
+            return self.find(tag_name)
+        # We special case contents to avoid recursion.
+        elif not tag.startswith("__") and not tag == "contents":
+            return self.find(tag)
+        raise AttributeError(
+            "'%s' object has no attribute '%s'" % (self.__class__, tag))
+
+    def __eq__(self, other):
+        """Returns true iff this tag has the same name, the same attributes,
+        and the same contents (recursively) as the given tag."""
+        if self is other:
+            return True
+        if (not hasattr(other, 'name') or
+            not hasattr(other, 'attrs') or
+            not hasattr(other, 'contents') or
+            self.name != other.name or
+            self.attrs != other.attrs or
+            len(self) != len(other)):
+            return False
+        for i, my_child in enumerate(self.contents):
+            if my_child != other.contents[i]:
+                return False
+        return True
+
+    def __ne__(self, other):
+        """Returns true iff this tag is not identical to the other tag,
+        as defined in __eq__."""
+        return not self == other
+
+    def __repr__(self, encoding="unicode-escape"):
+        """Renders this tag as a string."""
+        if PY3K:
+            # "The return value must be a string object", i.e. Unicode
+            return self.decode()
+        else:
+            # "The return value must be a string object", i.e. a bytestring.
+            # By convention, the return value of __repr__ should also be
+            # an ASCII string.
+            return self.encode(encoding)
+
+    def __unicode__(self):
+        return self.decode()
+
+    def __str__(self):
+        if PY3K:
+            return self.decode()
+        else:
+            return self.encode()
+
+    if PY3K:
+        __str__ = __repr__ = __unicode__
+
+    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
+               indent_level=None, formatter="minimal",
+               errors="xmlcharrefreplace"):
+        # Turn the data structure into Unicode, then encode the
+        # Unicode.
+        u = self.decode(indent_level, encoding, formatter)
+        return u.encode(encoding, errors)
+
+    def _should_pretty_print(self, indent_level):
+        """Should this tag be pretty-printed?"""
+
+        return (
+            indent_level is not None
+            and self.name not in self.preserve_whitespace_tags
+        )
+
+    def decode(self, indent_level=None,
+               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+               formatter="minimal"):
+        """Returns a Unicode representation of this tag and its contents.
+
+        :param eventual_encoding: The tag is destined to be
+           encoded into this encoding. This method is _not_
+           responsible for performing that encoding. This information
+           is passed in so that it can be substituted in if the
+           document contains a <META> tag that mentions the document's
+           encoding.
+        """
+
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not callable(formatter):
+            formatter = self._formatter_for_name(formatter)
+
+        attrs = []
+        if self.attrs:
+            for key, val in sorted(self.attrs.items()):
+                if val is None:
+                    decoded = key
+                else:
+                    if isinstance(val, list) or isinstance(val, tuple):
+                        val = ' '.join(val)
+                    elif not isinstance(val, basestring):
+                        val = unicode(val)
+                    elif (
+                        isinstance(val, AttributeValueWithCharsetSubstitution)
+                        and eventual_encoding is not None):
+                        val = val.encode(eventual_encoding)
+
+                    text = self.format_string(val, formatter)
+                    decoded = (
+                        unicode(key) + '='
+                        + EntitySubstitution.quoted_attribute_value(text))
+                attrs.append(decoded)
+        close = ''
+        closeTag = ''
+
+        prefix = ''
+        if self.prefix:
+            prefix = self.prefix + ":"
+
+        if self.is_empty_element:
+            close = '/'
+        else:
+            closeTag = '</%s%s>' % (prefix, self.name)
+
+        pretty_print = self._should_pretty_print(indent_level)
+        space = ''
+        indent_space = ''
+        if indent_level is not None:
+            indent_space = (' ' * (indent_level - 1))
+        if pretty_print:
+            space = indent_space
+            indent_contents = indent_level + 1
+        else:
+            indent_contents = None
+        contents = self.decode_contents(
+            indent_contents, eventual_encoding, formatter)
+
+        if self.hidden:
+            # This is the 'document root' object.
+            s = contents
+        else:
+            s = []
+            attribute_string = ''
+            if attrs:
+                attribute_string = ' ' + ' '.join(attrs)
+            if indent_level is not None:
+                # Even if this particular tag is not pretty-printed,
+                # we should indent up to the start of the tag.
+                s.append(indent_space)
+            s.append('<%s%s%s%s>' % (
+                    prefix, self.name, attribute_string, close))
+            if pretty_print:
+                s.append("\n")
+            s.append(contents)
+            if pretty_print and contents and contents[-1] != "\n":
+                s.append("\n")
+            if pretty_print and closeTag:
+                s.append(space)
+            s.append(closeTag)
+            if indent_level is not None and closeTag and self.next_sibling:
+                # Even if this particular tag is not pretty-printed,
+                # we're now done with the tag, and we should add a
+                # newline if appropriate.
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def prettify(self, encoding=None, formatter="minimal"):
+        if encoding is None:
+            return self.decode(True, formatter=formatter)
+        else:
+            return self.encode(encoding, True, formatter=formatter)
+
+    def decode_contents(self, indent_level=None,
+                       eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+                       formatter="minimal"):
+        """Renders the contents of this tag as a Unicode string.
+
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+
+        :param eventual_encoding: The tag is destined to be
+           encoded into this encoding. This method is _not_
+           responsible for performing that encoding. This information
+           is passed in so that it can be substituted in if the
+           document contains a <META> tag that mentions the document's
+           encoding.
+
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
+        """
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not callable(formatter):
+            formatter = self._formatter_for_name(formatter)
+
+        pretty_print = (indent_level is not None)
+        s = []
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.output_ready(formatter)
+            elif isinstance(c, Tag):
+                s.append(c.decode(indent_level, eventual_encoding,
+                                  formatter))
+            if text and indent_level and not self.name == 'pre':
+                text = text.strip()
+            if text:
+                if pretty_print and not self.name == 'pre':
+                    s.append(" " * (indent_level - 1))
+                s.append(text)
+                if pretty_print and not self.name == 'pre':
+                    s.append("\n")
+        return ''.join(s)
+
+    def encode_contents(
+        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
+        formatter="minimal"):
+        """Renders the contents of this tag as a bytestring.
+
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+
+        :param eventual_encoding: The bytestring will be in this encoding.
+
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
+        """
+
+        contents = self.decode_contents(indent_level, encoding, formatter)
+        return contents.encode(encoding)
+
+    # Old method for BS3 compatibility
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        if not prettyPrint:
+            indentLevel = None
+        return self.encode_contents(
+            indent_level=indentLevel, encoding=encoding)
+
+    #Soup methods
+
+    def find(self, name=None, attrs={}, recursive=True, text=None,
+             **kwargs):
+        """Return only the first child of this Tag matching the given
+        criteria."""
+        r = None
+        l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findChild = find
+
+    def find_all(self, name=None, attrs={}, recursive=True, text=None,
+                 limit=None, **kwargs):
+        """Extracts a list of Tag objects that match the given
+        criteria.  You can specify the name of the Tag and any
+        attributes you want the Tag to have.
+
+        The value of a key-value pair in the 'attrs' map can be a
+        string, a list of strings, a regular expression object, or a
+        callable that takes a string and returns whether or not the
+        string matches for some custom definition of 'matches'. The
+        same is true of the tag name."""
+
+        generator = self.descendants
+        if not recursive:
+            generator = self.children
+        return self._find_all(name, attrs, text, limit, generator, **kwargs)
+    findAll = find_all       # BS3
+    findChildren = find_all  # BS2
+
+    #Generator methods
+    @property
+    def children(self):
+        # return iter() to make the purpose of the method clear
+        return iter(self.contents)  # XXX This seems to be untested.
+
+    @property
+    def descendants(self):
+        if not len(self.contents):
+            return
+        stopNode = self._last_descendant().next_element
+        current = self.contents[0]
+        while current is not stopNode:
+            yield current
+            current = current.next_element
+
+    # CSS selector code
+
+    _selector_combinators = ['>', '+', '~']
+    _select_debug = False
+    quoted_colon = re.compile('"[^"]*:[^"]*"')
+    def select_one(self, selector):
+        """Perform a CSS selection operation on the current element."""
+        value = self.select(selector, limit=1)
+        if value:
+            return value[0]
+        return None
+
+    def select(self, selector, _candidate_generator=None, limit=None):
+        """Perform a CSS selection operation on the current element."""
+
+        # Handle grouping selectors if ',' exists, ie: p,a
+        if ',' in selector:
+            context = []
+            for partial_selector in selector.split(','):
+                partial_selector = partial_selector.strip()
+                if partial_selector == '':
+                    raise ValueError('Invalid group selection syntax: %s' % selector)
+                candidates = self.select(partial_selector, limit=limit)
+                for candidate in candidates:
+                    if candidate not in context:
+                        context.append(candidate)
+
+                if limit and len(context) >= limit:
+                    break
+            return context
+        tokens = shlex.split(selector)
+        current_context = [self]
+
+        if tokens[-1] in self._selector_combinators:
+            raise ValueError(
+                'Final combinator "%s" is missing an argument.' % tokens[-1])
+
+        if self._select_debug:
+            print 'Running CSS selector "%s"' % selector
+
+        for index, token in enumerate(tokens):
+            new_context = []
+            new_context_ids = set([])
+
+            if tokens[index-1] in self._selector_combinators:
+                # This token was consumed by the previous combinator. Skip it.
+                if self._select_debug:
+                    print '  Token was consumed by the previous combinator.'
+                continue
+
+            if self._select_debug:
+                print ' Considering token "%s"' % token
+            recursive_candidate_generator = None
+            tag_name = None
+
+            # Each operation corresponds to a checker function, a rule
+            # for determining whether a candidate matches the
+            # selector. Candidates are generated by the active
+            # iterator.
+            checker = None
+
+            m = self.attribselect_re.match(token)
+            if m is not None:
+                # Attribute selector
+                tag_name, attribute, operator, value = m.groups()
+                checker = self._attribute_checker(operator, attribute, value)
+
+            elif '#' in token:
+                # ID selector
+                tag_name, tag_id = token.split('#', 1)
+                def id_matches(tag):
+                    return tag.get('id', None) == tag_id
+                checker = id_matches
+
+            elif '.' in token:
+                # Class selector
+                tag_name, klass = token.split('.', 1)
+                classes = set(klass.split('.'))
+                def classes_match(candidate):
+                    return classes.issubset(candidate.get('class', []))
+                checker = classes_match
+
+            elif ':' in token and not self.quoted_colon.search(token):
+                # Pseudo-class
+                tag_name, pseudo = token.split(':', 1)
+                if tag_name == '':
+                    raise ValueError(
+                        "A pseudo-class must be prefixed with a tag name.")
+                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
+                found = []
+                if pseudo_attributes is None:
+                    pseudo_type = pseudo
+                    pseudo_value = None
+                else:
+                    pseudo_type, pseudo_value = pseudo_attributes.groups()
+                if pseudo_type == 'nth-of-type':
+                    try:
+                        pseudo_value = int(pseudo_value)
+                    except:
+                        raise NotImplementedError(
+                            'Only numeric values are currently supported for the nth-of-type pseudo-class.')
+                    if pseudo_value < 1:
+                        raise ValueError(
+                            'nth-of-type pseudo-class value must be at least 1.')
+                    class Counter(object):
+                        def __init__(self, destination):
+                            self.count = 0
+                            self.destination = destination
+
+                        def nth_child_of_type(self, tag):
+                            self.count += 1
+                            if self.count == self.destination:
+                                return True
+                            else:
+                                return False
+                    checker = Counter(pseudo_value).nth_child_of_type
+                else:
+                    raise NotImplementedError(
+                        'Only the following pseudo-classes are implemented: nth-of-type.')
+
+            elif token == '*':
+                # Star selector -- matches everything
+                pass
+            elif token == '>':
+                # Run the next token as a CSS selector against the
+                # direct children of each tag in the current context.
+                recursive_candidate_generator = lambda tag: tag.children
+            elif token == '~':
+                # Run the next token as a CSS selector against the
+                # siblings of each tag in the current context.
+                recursive_candidate_generator = lambda tag: tag.next_siblings
+            elif token == '+':
+                # For each tag in the current context, run the next
+                # token as a CSS selector against the tag's next
+                # sibling that's a tag.
+                def next_tag_sibling(tag):
+                    yield tag.find_next_sibling(True)
+                recursive_candidate_generator = next_tag_sibling
+
+            elif self.tag_name_re.match(token):
+                # Just a tag name.
+                tag_name = token
+            else:
+                raise ValueError(
+                    'Unsupported or invalid CSS selector: "%s"' % token)
+            if recursive_candidate_generator:
+                # This happens when the selector looks like  "> foo".
+                #
+                # The generator calls select() recursively on every
+                # member of the current context, passing in a different
+                # candidate generator and a different selector.
+                #
+                # In the case of "> foo", the candidate generator is
+                # one that yields a tag's direct children (">"), and
+                # the selector is "foo".
+                next_token = tokens[index+1]
+                def recursive_select(tag):
+                    if self._select_debug:
+                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
+                        print '-' * 40
+                    for i in tag.select(next_token, recursive_candidate_generator):
+                        if self._select_debug:
+                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
+                        yield i
+                    if self._select_debug:
+                        print '-' * 40
+                _use_candidate_generator = recursive_select
+            elif _candidate_generator is None:
+                # By default, a tag's candidates are all of its
+                # children. If tag_name is defined, only yield tags
+                # with that name.
+                if self._select_debug:
+                    if tag_name:
+                        check = "[any]"
+                    else:
+                        check = tag_name
+                    print '   Default candidate generator, tag name="%s"' % check
+                if self._select_debug:
+                    # This is redundant with later code, but it stops
+                    # a bunch of bogus tags from cluttering up the
+                    # debug log.
+                    def default_candidate_generator(tag):
+                        for child in tag.descendants:
+                            if not isinstance(child, Tag):
+                                continue
+                            if tag_name and not child.name == tag_name:
+                                continue
+                            yield child
+                    _use_candidate_generator = default_candidate_generator
+                else:
+                    _use_candidate_generator = lambda tag: tag.descendants
+            else:
+                _use_candidate_generator = _candidate_generator
+
+            count = 0
+            for tag in current_context:
+                if self._select_debug:
+                    print "    Running candidate generator on %s %s" % (
+                        tag.name, repr(tag.attrs))
+                for candidate in _use_candidate_generator(tag):
+                    if not isinstance(candidate, Tag):
+                        continue
+                    if tag_name and candidate.name != tag_name:
+                        continue
+                    if checker is not None:
+                        try:
+                            result = checker(candidate)
+                        except StopIteration:
+                            # The checker has decided we should no longer
+                            # run the generator.
+                            break
+                    if checker is None or result:
+                        if self._select_debug:
+                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
+                        if id(candidate) not in new_context_ids:
+                            # If a tag matches a selector more than once,
+                            # don't include it in the context more than once.
+                            new_context.append(candidate)
+                            new_context_ids.add(id(candidate))
+                    elif self._select_debug:
+                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
+
+            current_context = new_context
+        if limit and len(current_context) >= limit:
+            current_context = current_context[:limit]
+
+        if self._select_debug:
+            print "Final verdict:"
+            for i in current_context:
+                print " %s %s" % (i.name, i.attrs)
+        return current_context
+
+    # Old names for backwards compatibility
+    def childGenerator(self):
+        return self.children
+
+    def recursiveChildGenerator(self):
+        return self.descendants
+
+    def has_key(self, key):
+        """This was kind of misleading because has_key() (attributes)
+        was different from __in__ (contents). has_key() is gone in
+        Python 3, anyway."""
+        warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % (
+                key))
+        return self.has_attr(key)
+
+# Next, a couple classes to represent queries and their results.
+class SoupStrainer(object):
+    """Encapsulates a number of ways of matching a markup element (tag or
+    text)."""
+
+    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        self.name = self._normalize_search_value(name)
+        if not isinstance(attrs, dict):
+            # Treat a non-dict value for attrs as a search for the 'class'
+            # attribute.
+            kwargs['class'] = attrs
+            attrs = None
+
+        if 'class_' in kwargs:
+            # Treat class_="foo" as a search for the 'class'
+            # attribute, overriding any non-dict value for attrs.
+            kwargs['class'] = kwargs['class_']
+            del kwargs['class_']
+
+        if kwargs:
+            if attrs:
+                attrs = attrs.copy()
+                attrs.update(kwargs)
+            else:
+                attrs = kwargs
+        normalized_attrs = {}
+        for key, value in attrs.items():
+            normalized_attrs[key] = self._normalize_search_value(value)
+
+        self.attrs = normalized_attrs
+        self.text = self._normalize_search_value(text)
+
+    def _normalize_search_value(self, value):
+        # Leave it alone if it's a Unicode string, a callable, a
+        # regular expression, a boolean, or None.
+        if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
+            or isinstance(value, bool) or value is None):
+            return value
+
+        # If it's a bytestring, convert it to Unicode, treating it as UTF-8.
+        if isinstance(value, bytes):
+            return value.decode("utf8")
+
+        # If it's listlike, convert it into a list of strings.
+        if hasattr(value, '__iter__'):
+            new_value = []
+            for v in value:
+                if (hasattr(v, '__iter__') and not isinstance(v, bytes)
+                    and not isinstance(v, unicode)):
+                    # This is almost certainly the user's mistake. In the
+                    # interests of avoiding infinite loops, we'll let
+                    # it through as-is rather than doing a recursive call.
+                    new_value.append(v)
+                else:
+                    new_value.append(self._normalize_search_value(v))
+            return new_value
+
+        # Otherwise, convert it into a Unicode string.
+        # The unicode(str()) thing is so this will do the same thing on Python 2
+        # and Python 3.
+        return unicode(str(value))
+
+    def __str__(self):
+        if self.text:
+            return self.text
+        else:
+            return "%s|%s" % (self.name, self.attrs)
+
+    def search_tag(self, markup_name=None, markup_attrs={}):
+        found = None
+        markup = None
+        if isinstance(markup_name, Tag):
+            markup = markup_name
+            markup_attrs = markup
+        call_function_with_tag_data = (
+            isinstance(self.name, collections.Callable)
+            and not isinstance(markup_name, Tag))
+
+        if ((not self.name)
+            or call_function_with_tag_data
+            or (markup and self._matches(markup, self.name))
+            or (not markup and self._matches(markup_name, self.name))):
+            if call_function_with_tag_data:
+                match = self.name(markup_name, markup_attrs)
+            else:
+                match = True
+                markup_attr_map = None
+                for attr, match_against in list(self.attrs.items()):
+                    if not markup_attr_map:
+                        if hasattr(markup_attrs, 'get'):
+                            markup_attr_map = markup_attrs
+                        else:
+                            markup_attr_map = {}
+                            for k, v in markup_attrs:
+                                markup_attr_map[k] = v
+                    attr_value = markup_attr_map.get(attr)
+                    if not self._matches(attr_value, match_against):
+                        match = False
+                        break
+            if match:
+                if markup:
+                    found = markup
+                else:
+                    found = markup_name
+        if found and self.text and not self._matches(found.string, self.text):
+            found = None
+        return found
+    searchTag = search_tag
+
+    def search(self, markup):
+        # print 'looking for %s in %s' % (self, markup)
+        found = None
+        # If given a list of items, scan it for a text element that
+        # matches.
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+            for element in markup:
+                if isinstance(element, NavigableString) \
+                       and self.search(element):
+                    found = element
+                    break
+        # If it's a Tag, make sure its name or attributes match.
+        # Don't bother with Tags if we're searching for text.
+        elif isinstance(markup, Tag):
+            if not self.text or self.name or self.attrs:
+                found = self.search_tag(markup)
+        # If it's text, make sure the text matches.
+        elif isinstance(markup, NavigableString) or \
+                 isinstance(markup, basestring):
+            if not self.name and not self.attrs and self._matches(markup, self.text):
+                found = markup
+        else:
+            raise Exception(
+                "I don't know how to match against a %s" % markup.__class__)
+        return found
+
+    def _matches(self, markup, match_against):
+        # print u"Matching %s against %s" % (markup, match_against)
+        result = False
+        if isinstance(markup, list) or isinstance(markup, tuple):
+            # This should only happen when searching a multi-valued attribute
+            # like 'class'.
+            for item in markup:
+                if self._matches(item, match_against):
+                    return True
+            # We didn't match any particular value of the multivalue
+            # attribute, but maybe we match the attribute value when
+            # considered as a string.
+            if self._matches(' '.join(markup), match_against):
+                return True
+            return False
+
+        if match_against is True:
+            # True matches any non-None value.
+            return markup is not None
+
+        if isinstance(match_against, collections.Callable):
+            return match_against(markup)
+
+        # Custom callables take the tag as an argument, but all
+        # other ways of matching match the tag name as a string.
+        if isinstance(markup, Tag):
+            markup = markup.name
+
+        # Ensure that `markup` is either a Unicode string, or None.
+        markup = self._normalize_search_value(markup)
+
+        if markup is None:
+            # None matches None, False, an empty string, an empty list, and so on.
+            return not match_against
+
+        if isinstance(match_against, unicode):
+            # Exact string match
+            return markup == match_against
+
+        if hasattr(match_against, 'match'):
+            # Regexp match
+            return match_against.search(markup)
+
+        if hasattr(match_against, '__iter__'):
+            # The markup must be an exact match against something
+            # in the iterable.
+            return markup in match_against
+
+
+class ResultSet(list):
+    """A ResultSet is just a list that keeps track of the SoupStrainer
+    that created it."""
+    def __init__(self, source, result=()):
+        super(ResultSet, self).__init__(result)
+        self.source = source
diff --git a/src/bs4/testing.py b/src/bs4/testing.py
new file mode 100644
index 0000000..3a6ed42
--- /dev/null
+++ b/src/bs4/testing.py
@@ -0,0 +1,717 @@
+"""Helper classes for tests."""
+
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+__license__ = "MIT"
+
+import pickle
+import copy
+import functools
+import unittest
+from unittest import TestCase
+from bs4 import BeautifulSoup
+from bs4.element import (
+    CharsetMetaAttributeValue,
+    Comment,
+    ContentMetaAttributeValue,
+    Doctype,
+    SoupStrainer,
+)
+
+from bs4.builder import HTMLParserTreeBuilder
+default_builder = HTMLParserTreeBuilder
+
+
+class SoupTest(unittest.TestCase):
+
+    @property
+    def default_builder(self):
+        return default_builder()
+
+    def soup(self, markup, **kwargs):
+        """Build a Beautiful Soup object from markup."""
+        builder = kwargs.pop('builder', self.default_builder)
+        return BeautifulSoup(markup, builder=builder, **kwargs)
+
+    def document_for(self, markup):
+        """Turn an HTML fragment into a document.
+
+        The details depend on the builder.
+        """
+        return self.default_builder.test_fragment_to_document(markup)
+
+    def assertSoupEquals(self, to_parse, compare_parsed_to=None):
+        builder = self.default_builder
+        obj = BeautifulSoup(to_parse, builder=builder)
+        if compare_parsed_to is None:
+            compare_parsed_to = to_parse
+
+        self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
+
+    def assertConnectedness(self, element):
+        """Ensure that next_element and previous_element are properly
+        set for all descendants of the given element.
+        """
+        earlier = None
+        for e in element.descendants:
+            if earlier:
+                self.assertEqual(e, earlier.next_element)
+                self.assertEqual(earlier, e.previous_element)
+            earlier = e
+
+class HTMLTreeBuilderSmokeTest(object):
+
+    """A basic test of a treebuilder's competence.
+
+    Any HTML treebuilder, present or future, should be able to pass
+    these tests. With invalid markup, there's room for interpretation,
+    and different parsers can handle it differently. But with the
+    markup in these tests, there's not much room for interpretation.
+    """
+
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), tree.decode())
+
+    def assertDoctypeHandled(self, doctype_fragment):
+        """Assert that a given doctype string is handled correctly."""
+        doctype_str, soup = self._document_with_doctype(doctype_fragment)
+
+        # Make sure a Doctype object was created.
+        doctype = soup.contents[0]
+        self.assertEqual(doctype.__class__, Doctype)
+        self.assertEqual(doctype, doctype_fragment)
+        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
+
+        # Make sure that the doctype was correctly associated with the
+        # parse tree and that the rest of the document parsed.
+        self.assertEqual(soup.p.contents[0], 'foo')
+
+    def _document_with_doctype(self, doctype_fragment):
+        """Generate and parse a document with the given doctype."""
+        doctype = '<!DOCTYPE %s>' % doctype_fragment
+        markup = doctype + '\n<p>foo</p>'
+        soup = self.soup(markup)
+        return doctype, soup
+
+    def test_normal_doctypes(self):
+        """Make sure normal, everyday HTML doctypes are handled correctly."""
+        self.assertDoctypeHandled("html")
+        self.assertDoctypeHandled(
+            'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
+
+    def test_empty_doctype(self):
+        soup = self.soup("<!DOCTYPE>")
+        doctype = soup.contents[0]
+        self.assertEqual("", doctype.strip())
+
+    def test_public_doctype_with_url(self):
+        doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
+        self.assertDoctypeHandled(doctype)
+
+    def test_system_doctype(self):
+        self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
+
+    def test_namespaced_system_doctype(self):
+        # We can handle a namespaced doctype with a system ID.
+        self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
+
+    def test_namespaced_public_doctype(self):
+        # Test a namespaced doctype with a public id.
+        self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
+
+    def test_real_xhtml_document(self):
+        """A real XHTML document should come out more or less the same as it went in."""
+        markup = b"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+        soup = self.soup(markup)
+        self.assertEqual(
+            soup.encode("utf-8").replace(b"\n", b""),
+            markup.replace(b"\n", b""))
+
+    def test_processing_instruction(self):
+        # We test both Unicode and bytestring to verify that
+        # process_markup correctly sets processing_instruction_class
+        # even when the markup is already Unicode and there is no
+        # need to process anything.
+        markup = u"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.decode())
+
+        markup = b"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
+    def test_deepcopy(self):
+        """Make sure you can copy the tree builder.
+
+        This is important because the builder is part of a
+        BeautifulSoup object, and we want to be able to copy that.
+        """
+        copy.deepcopy(self.default_builder)
+
+    def test_p_tag_is_never_empty_element(self):
+        """A <p> tag is never designated as an empty-element tag.
+
+        Even if the markup shows it as an empty-element tag, it
+        shouldn't be presented that way.
+        """
+        soup = self.soup("<p/>")
+        self.assertFalse(soup.p.is_empty_element)
+        self.assertEqual(str(soup.p), "<p></p>")
+
+    def test_unclosed_tags_get_closed(self):
+        """A tag that's not closed by the end of the document should be closed.
+
+        This applies to all tags except empty-element tags.
+        """
+        self.assertSoupEquals("<p>", "<p></p>")
+        self.assertSoupEquals("<b>", "<b></b>")
+
+        self.assertSoupEquals("<br>", "<br/>")
+
+    def test_br_is_always_empty_element_tag(self):
+        """A <br> tag is designated as an empty-element tag.
+
+        Some parsers treat <br></br> as one <br/> tag, some parsers as
+        two tags, but it should always be an empty-element tag.
+        """
+        soup = self.soup("<br></br>")
+        self.assertTrue(soup.br.is_empty_element)
+        self.assertEqual(str(soup.br), "<br/>")
+
+    def test_nested_formatting_elements(self):
+        self.assertSoupEquals("<em><em></em></em>")
+
+    def test_double_head(self):
+        html = '''<!DOCTYPE html>
+<html>
+<head>
+<title>Ordinary HEAD element test</title>
+</head>
+<script type="text/javascript">
+alert("Help!");
+</script>
+<body>
+Hello, world!
+</body>
+</html>
+'''
+        soup = self.soup(html)
+        self.assertEqual("text/javascript", soup.find('script')['type'])
+
+    def test_comment(self):
+        # Comments are represented as Comment objects.
+        markup = "<p>foo<!--foobar-->baz</p>"
+        self.assertSoupEquals(markup)
+
+        soup = self.soup(markup)
+        comment = soup.find(text="foobar")
+        self.assertEqual(comment.__class__, Comment)
+
+        # The comment is properly integrated into the tree.
+        foo = soup.find(text="foo")
+        self.assertEqual(comment, foo.next_element)
+        baz = soup.find(text="baz")
+        self.assertEqual(comment, baz.previous_element)
+
+    def test_preserved_whitespace_in_pre_and_textarea(self):
+        """Whitespace must be preserved in <pre> and <textarea> tags,
+        even if that would mean not prettifying the markup.
+        """
+        pre_markup = "<pre>   </pre>"
+        textarea_markup = "<textarea> woo\nwoo  </textarea>"
+        self.assertSoupEquals(pre_markup)
+        self.assertSoupEquals(textarea_markup)
+
+        soup = self.soup(pre_markup)
+        self.assertEqual(soup.pre.prettify(), pre_markup)
+
+        soup = self.soup(textarea_markup)
+        self.assertEqual(soup.textarea.prettify(), textarea_markup)
+
+        soup = self.soup("<textarea></textarea>")
+        self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "<b>Inside a B tag</b>"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
+        self.assertSoupEquals(nested_b_tag)
+
+        double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
+        self.assertSoupEquals(nested_b_tag)
+
+    def test_nested_block_level_elements(self):
+        """Block elements can be nested."""
+        soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
+        blockquote = soup.blockquote
+        self.assertEqual(blockquote.p.b.string, 'Foo')
+        self.assertEqual(blockquote.b.string, 'Foo')
+
+    def test_correctly_nested_tables(self):
+        """One table can go inside another one."""
+        markup = ('<table id="1">'
+                  '<tr>'
+                  "<td>Here's another table:"
+                  '<table id="2">'
+                  '<tr><td>foo</td></tr>'
+                  '</table></td>')
+
+        self.assertSoupEquals(
+            markup,
+            '<table id="1"><tr><td>Here\'s another table:'
+            '<table id="2"><tr><td>foo</td></tr></table>'
+            '</td></tr></table>')
+
+        self.assertSoupEquals(
+            "<table><thead><tr><td>Foo</td></tr></thead>"
+            "<tbody><tr><td>Bar</td></tr></tbody>"
+            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
+
+    def test_deeply_nested_multivalued_attribute(self):
+        # html5lib can set the attributes of the same tag many times
+        # as it rearranges the tree. This has caused problems with
+        # multivalued attributes.
+        markup = '<table><div><div class="css"></div></div></table>'
+        soup = self.soup(markup)
+        self.assertEqual(["css"], soup.div.div['class'])
+
+    def test_multivalued_attribute_on_html(self):
+        # html5lib uses a different API to set the attributes ot the
+        # <html> tag. This has caused problems with multivalued
+        # attributes.
+        markup = '<html class="a b"></html>'
+        soup = self.soup(markup)
+        self.assertEqual(["a", "b"], soup.html['class'])
+
+    def test_angle_brackets_in_attribute_values_are_escaped(self):
+        self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
+
+    def test_entities_in_attributes_converted_to_unicode(self):
+        expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
+        self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
+        self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
+        self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
+        self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
+
+    def test_entities_in_text_converted_to_unicode(self):
+        expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
+        self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
+        self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
+        self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
+        self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
+
+    def test_quot_entity_converted_to_quotation_mark(self):
+        self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
+                              '<p>I said "good day!"</p>')
+
+    def test_out_of_range_entity(self):
+        expect = u"\N{REPLACEMENT CHARACTER}"
+        self.assertSoupEquals("&#10000000000000;", expect)
+        self.assertSoupEquals("&#x10000000000000;", expect)
+        self.assertSoupEquals("&#1000000000;", expect)
+
+    def test_multipart_strings(self):
+        "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
+        soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
+        self.assertEqual("p", soup.h2.string.next_element.name)
+        self.assertEqual("p", soup.p.name)
+        self.assertConnectedness(soup)
+
+    def test_head_tag_between_head_and_body(self):
+        "Prevent recurrence of a bug in the html5lib treebuilder."
+        content = """<html><head></head>
+  <link></link>
+  <body>foo</body>
+</html>
+"""
+        soup = self.soup(content)
+        self.assertNotEqual(None, soup.html.body)
+        self.assertConnectedness(soup)
+
+    def test_multiple_copies_of_a_tag(self):
+        "Prevent recurrence of a bug in the html5lib treebuilder."
+        content = """<!DOCTYPE html>
+<html>
+ <body>
+   <article id="a" >
+   <div><a href="1"></div>
+   <footer>
+     <a href="2"></a>
+   </footer>
+  </article>
+  </body>
+</html>
+"""
+        soup = self.soup(content)
+        self.assertConnectedness(soup.article)
+
+    def test_basic_namespaces(self):
+        """Parsers don't need to *understand* namespaces, but at the
+        very least they should not choke on namespaces or lose
+        data."""
+
+        markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode())
+        html = soup.html
+        self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
+        self.assertEqual(
+            'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
+        self.assertEqual(
+            'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
+
+    def test_multivalued_attribute_value_becomes_list(self):
+        markup = b'<a class="foo bar">'
+        soup = self.soup(markup)
+        self.assertEqual(['foo', 'bar'], soup.a['class'])
+
+    #
+    # Generally speaking, tests below this point are more tests of
+    # Beautiful Soup than tests of the tree builders. But parsers are
+    # weird, so we run these tests separately for every tree builder
+    # to detect any differences between them.
+    #
+
+    def test_can_parse_unicode_document(self):
+        # A seemingly innocuous document... but it's in Unicode! And
+        # it contains characters that can't be represented in the
+        # encoding found in the  declaration! The horror!
+        markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
+        soup = self.soup(markup)
+        self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
+
+    def test_soupstrainer(self):
+        """Parsers should be able to work with SoupStrainers."""
+        strainer = SoupStrainer("b")
+        soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
+                         parse_only=strainer)
+        self.assertEqual(soup.decode(), "<b>bold</b>")
+
+    def test_single_quote_attribute_values_become_double_quotes(self):
+        self.assertSoupEquals("<foo attr='bar'></foo>",
+                              '<foo attr="bar"></foo>')
+
+    def test_attribute_values_with_nested_quotes_are_left_alone(self):
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        self.assertSoupEquals(text)
+
+    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        soup = self.soup(text)
+        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+        self.assertSoupEquals(
+            soup.foo.decode(),
+            """<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
+
+    def test_ampersand_in_attribute_value_gets_escaped(self):
+        self.assertSoupEquals('<this is="really messed up & stuff"></this>',
+                              '<this is="really messed up &amp; stuff"></this>')
+
+        self.assertSoupEquals(
+            '<a href="http://example.org?a=1&b=2;3">foo</a>',
+            '<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
+
+    def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
+        self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
+
+    def test_entities_in_strings_converted_during_parsing(self):
+        # Both XML and HTML entities are converted to Unicode characters
+        # during parsing.
+        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
+        self.assertSoupEquals(text, expected)
+
+    def test_smart_quotes_converted_on_the_way_in(self):
+        # Microsoft smart quotes are converted to Unicode characters during
+        # parsing.
+        quote = b"<p>\x91Foo\x92</p>"
+        soup = self.soup(quote)
+        self.assertEqual(
+            soup.p.string,
+            u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
+
+    def test_non_breaking_spaces_converted_on_the_way_in(self):
+        soup = self.soup("<a>&nbsp;&nbsp;</a>")
+        self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
+
+    def test_entities_converted_on_the_way_out(self):
+        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
+        soup = self.soup(text)
+        self.assertEqual(soup.p.encode("utf-8"), expected)
+
+    def test_real_iso_latin_document(self):
+        # Smoke test of interrelated functionality, using an
+        # easy-to-understand document.
+
+        # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
+        unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
+
+        # That's because we're going to encode it into ISO-Latin-1, and use
+        # that to test.
+        iso_latin_html = unicode_html.encode("iso-8859-1")
+
+        # Parse the ISO-Latin-1 HTML.
+        soup = self.soup(iso_latin_html)
+        # Encode it to UTF-8.
+        result = soup.encode("utf-8")
+
+        # What do we expect the result to look like? Well, it would
+        # look like unicode_html, except that the META tag would say
+        # UTF-8 instead of ISO-Latin-1.
+        expected = unicode_html.replace("ISO-Latin-1", "utf-8")
+
+        # And, of course, it would be in UTF-8, not Unicode.
+        expected = expected.encode("utf-8")
+
+        # Ta-da!
+        self.assertEqual(result, expected)
+
+    def test_real_shift_jis_document(self):
+        # Smoke test to make sure the parser can handle a document in
+        # Shift-JIS encoding, without choking.
+        shift_jis_html = (
+            b'<html><head></head><body><pre>'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'</pre></body></html>')
+        unicode_html = shift_jis_html.decode("shift-jis")
+        soup = self.soup(unicode_html)
+
+        # Make sure the parse tree is correctly encoded to various
+        # encodings.
+        self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
+        self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
+
+    def test_real_hebrew_document(self):
+        # A real-world test to make sure we can convert ISO-8859-9 (a
+        # Hebrew encoding) to UTF-8.
+        hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
+        soup = self.soup(
+            hebrew_document, from_encoding="iso8859-8")
+        # Some tree builders call it iso8859-8, others call it iso-8859-9.
+        # That's not a difference we really care about.
+        assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
+        self.assertEqual(
+            soup.encode('utf-8'),
+            hebrew_document.decode("iso8859-8").encode("utf-8"))
+
+    def test_meta_tag_reflects_current_encoding(self):
+        # Here's the <meta> tag saying that a document is
+        # encoded in Shift-JIS.
+        meta_tag = ('<meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type"/>')
+
+        # Here's a document incorporating that meta tag.
+        shift_jis_html = (
+            '<html><head>\n%s\n'
+            '<meta http-equiv="Content-language" content="ja"/>'
+            '</head><body>Shift-JIS markup goes here.') % meta_tag
+        soup = self.soup(shift_jis_html)
+
+        # Parse the document, and the charset is seemingly unaffected.
+        parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
+        content = parsed_meta['content']
+        self.assertEqual('text/html; charset=x-sjis', content)
+
+        # But that value is actually a ContentMetaAttributeValue object.
+        self.assertTrue(isinstance(content, ContentMetaAttributeValue))
+
+        # And it will take on a value that reflects its current
+        # encoding.
+        self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
+
+        # For the rest of the story, see TestSubstitutions in
+        # test_tree.py.
+
+    def test_html5_style_meta_tag_reflects_current_encoding(self):
+        # Here's the <meta> tag saying that a document is
+        # encoded in Shift-JIS.
+        meta_tag = ('<meta id="encoding" charset="x-sjis" />')
+
+        # Here's a document incorporating that meta tag.
+        shift_jis_html = (
+            '<html><head>\n%s\n'
+            '<meta http-equiv="Content-language" content="ja"/>'
+            '</head><body>Shift-JIS markup goes here.') % meta_tag
+        soup = self.soup(shift_jis_html)
+
+        # Parse the document, and the charset is seemingly unaffected.
+        parsed_meta = soup.find('meta', id="encoding")
+        charset = parsed_meta['charset']
+        self.assertEqual('x-sjis', charset)
+
+        # But that value is actually a CharsetMetaAttributeValue object.
+        self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
+
+        # And it will take on a value that reflects its current
+        # encoding.
+        self.assertEqual('utf8', charset.encode("utf8"))
+
+    def test_tag_with_no_attributes_can_have_attributes_added(self):
+        data = self.soup("<a>text</a>")
+        data.a['foo'] = 'bar'
+        self.assertEqual('<a foo="bar">text</a>', data.a.decode())
+
+class XMLTreeBuilderSmokeTest(object):
+
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), tree.decode())
+
+    def test_docstring_generated(self):
+        soup = self.soup("<root/>")
+        self.assertEqual(
+            soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
+
+    def test_xml_declaration(self):
+        markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
+    def test_processing_instruction(self):
+        markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode("utf8"))
+
+    def test_real_xhtml_document(self):
+        """A real XHTML document should come out *exactly* the same as it went in."""
+        markup = b"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+        soup = self.soup(markup)
+        self.assertEqual(
+            soup.encode("utf-8"), markup)
+
+    def test_formatter_processes_script_tag_for_xml_documents(self):
+        doc = """
+  <script type="text/javascript">
+  </script>
+"""
+        soup = BeautifulSoup(doc, "lxml-xml")
+        # lxml would have stripped this while parsing, but we can add
+        # it later.
+        soup.script.string = 'console.log("< < hey > > ");'
+        encoded = soup.encode()
+        self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
+
+    def test_can_parse_unicode_document(self):
+        markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
+        soup = self.soup(markup)
+        self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
+
+    def test_popping_namespaced_tag(self):
+        markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
+        soup = self.soup(markup)
+        self.assertEqual(
+            unicode(soup.rss), markup)
+
+    def test_docstring_includes_correct_encoding(self):
+        soup = self.soup("<root/>")
+        self.assertEqual(
+            soup.encode("latin1"),
+            b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
+
+    def test_large_xml_document(self):
+        """A large XML document should come out the same as it went in."""
+        markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
+                  + b'0' * (2**12)
+                  + b'</root>')
+        soup = self.soup(markup)
+        self.assertEqual(soup.encode("utf-8"), markup)
+
+
+    def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
+        self.assertSoupEquals("<p>", "<p/>")
+        self.assertSoupEquals("<p>foo</p>")
+
+    def test_namespaces_are_preserved(self):
+        markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
+        soup = self.soup(markup)
+        root = soup.root
+        self.assertEqual("http://example.com/", root['xmlns:a'])
+        self.assertEqual("http://example.net/", root['xmlns:b'])
+
+    def test_closing_namespaced_tag(self):
+        markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
+        soup = self.soup(markup)
+        self.assertEqual(unicode(soup.p), markup)
+
+    def test_namespaced_attributes(self):
+        markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
+        soup = self.soup(markup)
+        self.assertEqual(unicode(soup.foo), markup)
+
+    def test_namespaced_attributes_xml_namespace(self):
+        markup = '<foo xml:lang="fr">bar</foo>'
+        soup = self.soup(markup)
+        self.assertEqual(unicode(soup.foo), markup)
+
+class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
+    """Smoke test for a tree builder that supports HTML5."""
+
+    def test_real_xhtml_document(self):
+        # Since XHTML is not HTML5, HTML5 parsers are not tested to handle
+        # XHTML documents in any particular way.
+        pass
+
+    def test_html_tags_have_namespace(self):
+        markup = "<a>"
+        soup = self.soup(markup)
+        self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
+
+    def test_svg_tags_have_namespace(self):
+        markup = '<svg><circle/></svg>'
+        soup = self.soup(markup)
+        namespace = "http://www.w3.org/2000/svg"
+        self.assertEqual(namespace, soup.svg.namespace)
+        self.assertEqual(namespace, soup.circle.namespace)
+
+
+    def test_mathml_tags_have_namespace(self):
+        markup = '<math><msqrt>5</msqrt></math>'
+        soup = self.soup(markup)
+        namespace = 'http://www.w3.org/1998/Math/MathML'
+        self.assertEqual(namespace, soup.math.namespace)
+        self.assertEqual(namespace, soup.msqrt.namespace)
+
+    def test_xml_declaration_becomes_comment(self):
+        markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
+        soup = self.soup(markup)
+        self.assertTrue(isinstance(soup.contents[0], Comment))
+        self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
+        self.assertEqual("html", soup.contents[0].next_element.name)
+
+def skipIf(condition, reason):
+   def nothing(test, *args, **kwargs):
+       return None
+
+   def decorator(test_item):
+       if condition:
+           return nothing
+       else:
+           return test_item
+
+   return decorator
diff --git a/src/bs4/tests/__init__.py b/src/bs4/tests/__init__.py
new file mode 100644
index 0000000..142c8cc
--- /dev/null
+++ b/src/bs4/tests/__init__.py
@@ -0,0 +1 @@
+"The beautifulsoup tests."
diff --git a/src/bs4/tests/test_builder_registry.py b/src/bs4/tests/test_builder_registry.py
new file mode 100644
index 0000000..90cad82
--- /dev/null
+++ b/src/bs4/tests/test_builder_registry.py
@@ -0,0 +1,147 @@
+"""Tests of the builder registry."""
+
+import unittest
+import warnings
+
+from bs4 import BeautifulSoup
+from bs4.builder import (
+    builder_registry as registry,
+    HTMLParserTreeBuilder,
+    TreeBuilderRegistry,
+)
+
+try:
+    from bs4.builder import HTML5TreeBuilder
+    HTML5LIB_PRESENT = True
+except ImportError:
+    HTML5LIB_PRESENT = False
+
+try:
+    from bs4.builder import (
+        LXMLTreeBuilderForXML,
+        LXMLTreeBuilder,
+        )
+    LXML_PRESENT = True
+except ImportError:
+    LXML_PRESENT = False
+
+
+class BuiltInRegistryTest(unittest.TestCase):
+    """Test the built-in registry with the default builders registered."""
+
+    def test_combination(self):
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('fast', 'html'),
+                             LXMLTreeBuilder)
+
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('permissive', 'xml'),
+                             LXMLTreeBuilderForXML)
+        self.assertEqual(registry.lookup('strict', 'html'),
+                          HTMLParserTreeBuilder)
+        if HTML5LIB_PRESENT:
+            self.assertEqual(registry.lookup('html5lib', 'html'),
+                              HTML5TreeBuilder)
+
+    def test_lookup_by_markup_type(self):
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
+            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
+        else:
+            self.assertEqual(registry.lookup('xml'), None)
+            if HTML5LIB_PRESENT:
+                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
+            else:
+                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
+
+    def test_named_library(self):
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('lxml', 'xml'),
+                             LXMLTreeBuilderForXML)
+            self.assertEqual(registry.lookup('lxml', 'html'),
+                             LXMLTreeBuilder)
+        if HTML5LIB_PRESENT:
+            self.assertEqual(registry.lookup('html5lib'),
+                              HTML5TreeBuilder)
+
+        self.assertEqual(registry.lookup('html.parser'),
+                          HTMLParserTreeBuilder)
+
+    def test_beautifulsoup_constructor_does_lookup(self):
+
+        with warnings.catch_warnings(record=True) as w:
+            # This will create a warning about not explicitly
+            # specifying a parser, but we'll ignore it.
+
+            # You can pass in a string.
+            BeautifulSoup("", features="html")
+            # Or a list of strings.
+            BeautifulSoup("", features=["html", "fast"])
+
+        # You'll get an exception if BS can't find an appropriate
+        # builder.
+        self.assertRaises(ValueError, BeautifulSoup,
+                          "", features="no-such-feature")
+
+class RegistryTest(unittest.TestCase):
+    """Test the TreeBuilderRegistry class in general."""
+
+    def setUp(self):
+        self.registry = TreeBuilderRegistry()
+
+    def builder_for_features(self, *feature_list):
+        cls = type('Builder_' + '_'.join(feature_list),
+                   (object,), {'features' : feature_list})
+
+        self.registry.register(cls)
+        return cls
+
+    def test_register_with_no_features(self):
+        builder = self.builder_for_features()
+
+        # Since the builder advertises no features, you can't find it
+        # by looking up features.
+        self.assertEqual(self.registry.lookup('foo'), None)
+
+        # But you can find it by doing a lookup with no features, if
+        # this happens to be the only registered builder.
+        self.assertEqual(self.registry.lookup(), builder)
+
+    def test_register_with_features_makes_lookup_succeed(self):
+        builder = self.builder_for_features('foo', 'bar')
+        self.assertEqual(self.registry.lookup('foo'), builder)
+        self.assertEqual(self.registry.lookup('bar'), builder)
+
+    def test_lookup_fails_when_no_builder_implements_feature(self):
+        builder = self.builder_for_features('foo', 'bar')
+        self.assertEqual(self.registry.lookup('baz'), None)
+
+    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
+        builder1 = self.builder_for_features('foo')
+        builder2 = self.builder_for_features('bar')
+        self.assertEqual(self.registry.lookup(), builder2)
+
+    def test_lookup_fails_when_no_tree_builders_registered(self):
+        self.assertEqual(self.registry.lookup(), None)
+
+    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
+        has_one = self.builder_for_features('foo')
+        has_the_other = self.builder_for_features('bar')
+        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
+        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
+        lacks_one = self.builder_for_features('bar')
+        has_the_other = self.builder_for_features('foo')
+
+        # There are two builders featuring 'foo' and 'bar', but
+        # the one that also features 'quux' was registered later.
+        self.assertEqual(self.registry.lookup('foo', 'bar'),
+                          has_both_late)
+
+        # There is only one builder featuring 'foo', 'bar', and 'baz'.
+        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
+                          has_both_early)
+
+    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
+        builder1 = self.builder_for_features('foo', 'bar')
+        builder2 = self.builder_for_features('foo', 'baz')
+        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
diff --git a/src/bs4/tests/test_docs.py b/src/bs4/tests/test_docs.py
new file mode 100644
index 0000000..5b9f677
--- /dev/null
+++ b/src/bs4/tests/test_docs.py
@@ -0,0 +1,36 @@
+"Test harness for doctests."
+
+# pylint: disable-msg=E0611,W0142
+
+__metaclass__ = type
+__all__ = [
+    'additional_tests',
+    ]
+
+import atexit
+import doctest
+import os
+#from pkg_resources import (
+#    resource_filename, resource_exists, resource_listdir, cleanup_resources)
+import unittest
+
+DOCTEST_FLAGS = (
+    doctest.ELLIPSIS |
+    doctest.NORMALIZE_WHITESPACE |
+    doctest.REPORT_NDIFF)
+
+
+# def additional_tests():
+#     "Run the doc tests (README.txt and docs/*, if any exist)"
+#     doctest_files = [
+#         os.path.abspath(resource_filename('bs4', 'README.txt'))]
+#     if resource_exists('bs4', 'docs'):
+#         for name in resource_listdir('bs4', 'docs'):
+#             if name.endswith('.txt'):
+#                 doctest_files.append(
+#                     os.path.abspath(
+#                         resource_filename('bs4', 'docs/%s' % name)))
+#     kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
+#     atexit.register(cleanup_resources)
+#     return unittest.TestSuite((
+#         doctest.DocFileSuite(*doctest_files, **kwargs)))
diff --git a/src/bs4/tests/test_html5lib.py b/src/bs4/tests/test_html5lib.py
new file mode 100644
index 0000000..0f89d62
--- /dev/null
+++ b/src/bs4/tests/test_html5lib.py
@@ -0,0 +1,130 @@
+"""Tests to ensure that the html5lib tree builder generates good trees."""
+
+import warnings
+
+try:
+    from bs4.builder import HTML5TreeBuilder
+    HTML5LIB_PRESENT = True
+except ImportError, e:
+    HTML5LIB_PRESENT = False
+from bs4.element import SoupStrainer
+from bs4.testing import (
+    HTML5TreeBuilderSmokeTest,
+    SoupTest,
+    skipIf,
+)
+
+@skipIf(
+    not HTML5LIB_PRESENT,
+    "html5lib seems not to be present, not testing its tree builder.")
+class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
+    """See ``HTML5TreeBuilderSmokeTest``."""
+
+    @property
+    def default_builder(self):
+        return HTML5TreeBuilder()
+
+    def test_soupstrainer(self):
+        # The html5lib tree builder does not support SoupStrainers.
+        strainer = SoupStrainer("b")
+        markup = "<p>A <b>bold</b> statement.</p>"
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(markup, parse_only=strainer)
+        self.assertEqual(
+            soup.decode(), self.document_for(markup))
+
+        self.assertTrue(
+            "the html5lib tree builder doesn't support parse_only" in
+            str(w[0].message))
+
+    def test_correctly_nested_tables(self):
+        """html5lib inserts <tbody> tags where other parsers don't."""
+        markup = ('<table id="1">'
+                  '<tr>'
+                  "<td>Here's another table:"
+                  '<table id="2">'
+                  '<tr><td>foo</td></tr>'
+                  '</table></td>')
+
+        self.assertSoupEquals(
+            markup,
+            '<table id="1"><tbody><tr><td>Here\'s another table:'
+            '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
+            '</td></tr></tbody></table>')
+
+        self.assertSoupEquals(
+            "<table><thead><tr><td>Foo</td></tr></thead>"
+            "<tbody><tr><td>Bar</td></tr></tbody>"
+            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
+
+    def test_xml_declaration_followed_by_doctype(self):
+        markup = '''<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html>
+<html>
+  <head>
+  </head>
+  <body>
+   <p>foo</p>
+  </body>
+</html>'''
+        soup = self.soup(markup)
+        # Verify that we can reach the <p> tag; this means the tree is connected.
+        self.assertEqual(b"<p>foo</p>", soup.p.encode())
+
+    def test_reparented_markup(self):
+        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
+        soup = self.soup(markup)
+        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
+        self.assertEqual(2, len(soup.find_all('p')))
+
+
+    def test_reparented_markup_ends_with_whitespace(self):
+        markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
+        soup = self.soup(markup)
+        self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
+        self.assertEqual(2, len(soup.find_all('p')))
+
+    def test_reparented_markup_containing_identical_whitespace_nodes(self):
+        """Verify that we keep the two whitespace nodes in this
+        document distinct when reparenting the adjacent <tbody> tags.
+        """
+        markup = '<table> <tbody><tbody><ims></tbody> </table>'
+        soup = self.soup(markup)
+        space1, space2 = soup.find_all(string=' ')
+        tbody1, tbody2 = soup.find_all('tbody')
+        assert space1.next_element is tbody1
+        assert tbody2.next_element is space2
+
+    def test_reparented_markup_containing_children(self):
+        markup = '<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>'
+        soup = self.soup(markup)
+        noscript = soup.noscript
+        self.assertEqual("target", noscript.next_element)
+        target = soup.find(string='target')
+
+        # The 'aftermath' string was duplicated; we want the second one.
+        final_aftermath = soup.find_all(string='aftermath')[-1]
+
+        # The <noscript> tag was moved beneath a copy of the <a> tag,
+        # but the 'target' string within is still connected to the
+        # (second) 'aftermath' string.
+        self.assertEqual(final_aftermath, target.next_element)
+        self.assertEqual(target, final_aftermath.previous_element)
+        
+    def test_processing_instruction(self):
+        """Processing instructions become comments."""
+        markup = b"""<?PITarget PIContent?>"""
+        soup = self.soup(markup)
+        assert str(soup).startswith("<!--?PITarget PIContent?-->")
+
+    def test_cloned_multivalue_node(self):
+        markup = b"""<a class="my_class"><p></a>"""
+        soup = self.soup(markup)
+        a1, a2 = soup.find_all('a')
+        self.assertEqual(a1, a2)
+        assert a1 is not a2
+
+    def test_foster_parenting(self):
+        markup = b"""<table><td></tbody>A"""
+        soup = self.soup(markup)
+        self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode())
diff --git a/src/bs4/tests/test_htmlparser.py b/src/bs4/tests/test_htmlparser.py
new file mode 100644
index 0000000..b45e35f
--- /dev/null
+++ b/src/bs4/tests/test_htmlparser.py
@@ -0,0 +1,32 @@
+"""Tests to ensure that the html.parser tree builder generates good
+trees."""
+
+from pdb import set_trace
+import pickle
+from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
+from bs4.builder import HTMLParserTreeBuilder
+
+class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+
+    @property
+    def default_builder(self):
+        return HTMLParserTreeBuilder()
+
+    def test_namespaced_system_doctype(self):
+        # html.parser can't handle namespaced doctypes, so skip this one.
+        pass
+
+    def test_namespaced_public_doctype(self):
+        # html.parser can't handle namespaced doctypes, so skip this one.
+        pass
+
+    def test_builder_is_pickled(self):
+        """Unlike most tree builders, HTMLParserTreeBuilder and will
+        be restored after pickling.
+        """
+        tree = self.soup("<a><b>foo</a>")
+        dumped = pickle.dumps(tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
+
+
diff --git a/src/bs4/tests/test_lxml.py b/src/bs4/tests/test_lxml.py
new file mode 100644
index 0000000..a05870b
--- /dev/null
+++ b/src/bs4/tests/test_lxml.py
@@ -0,0 +1,76 @@
+"""Tests to ensure that the lxml tree builder generates good trees."""
+
+import re
+import warnings
+
+try:
+    import lxml.etree
+    LXML_PRESENT = True
+    LXML_VERSION = lxml.etree.LXML_VERSION
+except ImportError, e:
+    LXML_PRESENT = False
+    LXML_VERSION = (0,)
+
+if LXML_PRESENT:
+    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
+
+from bs4 import (
+    BeautifulSoup,
+    BeautifulStoneSoup,
+    )
+from bs4.element import Comment, Doctype, SoupStrainer
+from bs4.testing import skipIf
+from bs4.tests import test_htmlparser
+from bs4.testing import (
+    HTMLTreeBuilderSmokeTest,
+    XMLTreeBuilderSmokeTest,
+    SoupTest,
+    skipIf,
+)
+
+@skipIf(
+    not LXML_PRESENT,
+    "lxml seems not to be present, not testing its tree builder.")
+class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+    """See ``HTMLTreeBuilderSmokeTest``."""
+
+    @property
+    def default_builder(self):
+        return LXMLTreeBuilder()
+
+    def test_out_of_range_entity(self):
+        self.assertSoupEquals(
+            "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
+        self.assertSoupEquals(
+            "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
+        self.assertSoupEquals(
+            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
+
+    # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
+    # test if an old version of lxml is installed.
+
+    @skipIf(
+        not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
+        "Skipping doctype test for old version of lxml to avoid segfault.")
+    def test_empty_doctype(self):
+        soup = self.soup("<!DOCTYPE>")
+        doctype = soup.contents[0]
+        self.assertEqual("", doctype.strip())
+
+    def test_beautifulstonesoup_is_xml_parser(self):
+        # Make sure that the deprecated BSS class uses an xml builder
+        # if one is installed.
+        with warnings.catch_warnings(record=True) as w:
+            soup = BeautifulStoneSoup("<b />")
+        self.assertEqual(u"<b/>", unicode(soup.b))
+        self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
+
+@skipIf(
+    not LXML_PRESENT,
+    "lxml seems not to be present, not testing its XML tree builder.")
+class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
+    """See ``HTMLTreeBuilderSmokeTest``."""
+
+    @property
+    def default_builder(self):
+        return LXMLTreeBuilderForXML()
diff --git a/src/bs4/tests/test_soup.py b/src/bs4/tests/test_soup.py
new file mode 100644
index 0000000..f3e69ed
--- /dev/null
+++ b/src/bs4/tests/test_soup.py
@@ -0,0 +1,501 @@
+# -*- coding: utf-8 -*-
+"""Tests of Beautiful Soup as a whole."""
+
+from pdb import set_trace
+import logging
+import unittest
+import sys
+import tempfile
+
+from bs4 import (
+    BeautifulSoup,
+    BeautifulStoneSoup,
+)
+from bs4.element import (
+    CharsetMetaAttributeValue,
+    ContentMetaAttributeValue,
+    SoupStrainer,
+    NamespacedAttribute,
+    )
+import bs4.dammit
+from bs4.dammit import (
+    EntitySubstitution,
+    UnicodeDammit,
+    EncodingDetector,
+)
+from bs4.testing import (
+    SoupTest,
+    skipIf,
+)
+import warnings
+
+try:
+    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
+    LXML_PRESENT = True
+except ImportError, e:
+    LXML_PRESENT = False
+
+PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
+
+class TestConstructor(SoupTest):
+
+    def test_short_unicode_input(self):
+        data = u"<h1>éé</h1>"
+        soup = self.soup(data)
+        self.assertEqual(u"éé", soup.h1.string)
+
+    def test_embedded_null(self):
+        data = u"<h1>foo\0bar</h1>"
+        soup = self.soup(data)
+        self.assertEqual(u"foo\0bar", soup.h1.string)
+
+    def test_exclude_encodings(self):
+        utf8_data = u"Räksmörgås".encode("utf-8")
+        soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
+        self.assertEqual("windows-1252", soup.original_encoding)
+
+
+class TestWarnings(SoupTest):
+
+    def _no_parser_specified(self, s, is_there=True):
+        v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
+        self.assertTrue(v)
+
+    def test_warning_if_no_parser_specified(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>")
+        msg = str(w[0].message)
+        self._assert_no_parser_specified(msg)
+
+    def test_warning_if_parser_specified_too_vague(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>", "html")
+        msg = str(w[0].message)
+        self._assert_no_parser_specified(msg)
+
+    def test_no_warning_if_explicit_parser_specified(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>", "html.parser")
+        self.assertEqual([], w)
+
+    def test_parseOnlyThese_renamed_to_parse_only(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
+        msg = str(w[0].message)
+        self.assertTrue("parseOnlyThese" in msg)
+        self.assertTrue("parse_only" in msg)
+        self.assertEqual(b"<b></b>", soup.encode())
+
+    def test_fromEncoding_renamed_to_from_encoding(self):
+        with warnings.catch_warnings(record=True) as w:
+            utf8 = b"\xc3\xa9"
+            soup = self.soup(utf8, fromEncoding="utf8")
+        msg = str(w[0].message)
+        self.assertTrue("fromEncoding" in msg)
+        self.assertTrue("from_encoding" in msg)
+        self.assertEqual("utf8", soup.original_encoding)
+
+    def test_unrecognized_keyword_argument(self):
+        self.assertRaises(
+            TypeError, self.soup, "<a>", no_such_argument=True)
+
+class TestWarnings(SoupTest):
+
+    def test_disk_file_warning(self):
+        filehandle = tempfile.NamedTemporaryFile()
+        filename = filehandle.name
+        try:
+            with warnings.catch_warnings(record=True) as w:
+                soup = self.soup(filename)
+            msg = str(w[0].message)
+            self.assertTrue("looks like a filename" in msg)
+        finally:
+            filehandle.close()
+
+        # The file no longer exists, so Beautiful Soup will no longer issue the warning.
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(filename)
+        self.assertEqual(0, len(w))
+
+    def test_url_warning_with_bytes_url(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            soup = self.soup(b"http://www.crummybytes.com/")
+        # Be aware this isn't the only warning that can be raised during
+        # execution..
+        self.assertTrue(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+    def test_url_warning_with_unicode_url(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            # note - this url must differ from the bytes one otherwise
+            # python's warnings system swallows the second warning
+            soup = self.soup(u"http://www.crummyunicode.com/")
+        self.assertTrue(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+    def test_url_warning_with_bytes_and_space(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            soup = self.soup(b"http://www.crummybytes.com/ is great")
+        self.assertFalse(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+    def test_url_warning_with_unicode_and_space(self):
+        with warnings.catch_warnings(record=True) as warning_list:
+            soup = self.soup(u"http://www.crummyuncode.com/ is great")
+        self.assertFalse(any("looks like a URL" in str(w.message) 
+            for w in warning_list))
+
+
+class TestSelectiveParsing(SoupTest):
+
+    def test_parse_with_soupstrainer(self):
+        markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
+        strainer = SoupStrainer("b")
+        soup = self.soup(markup, parse_only=strainer)
+        self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
+
+
+class TestEntitySubstitution(unittest.TestCase):
+    """Standalone tests of the EntitySubstitution class."""
+    def setUp(self):
+        self.sub = EntitySubstitution
+
+    def test_simple_html_substitution(self):
+        # Unicode characters corresponding to named HTML entites
+        # are substituted, and no others.
+        s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
+        self.assertEqual(self.sub.substitute_html(s),
+                          u"foo&forall;\N{SNOWMAN}&otilde;bar")
+
+    def test_smart_quote_substitution(self):
+        # MS smart quotes are a common source of frustration, so we
+        # give them a special test.
+        quotes = b"\x91\x92foo\x93\x94"
+        dammit = UnicodeDammit(quotes)
+        self.assertEqual(self.sub.substitute_html(dammit.markup),
+                          "&lsquo;&rsquo;foo&ldquo;&rdquo;")
+
+    def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
+        s = 'Welcome to "my bar"'
+        self.assertEqual(self.sub.substitute_xml(s, False), s)
+
+    def test_xml_attribute_quoting_normally_uses_double_quotes(self):
+        self.assertEqual(self.sub.substitute_xml("Welcome", True),
+                          '"Welcome"')
+        self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
+                          '"Bob\'s Bar"')
+
+    def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
+        s = 'Welcome to "my bar"'
+        self.assertEqual(self.sub.substitute_xml(s, True),
+                          "'Welcome to \"my bar\"'")
+
+    def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
+        s = 'Welcome to "Bob\'s Bar"'
+        self.assertEqual(
+            self.sub.substitute_xml(s, True),
+            '"Welcome to &quot;Bob\'s Bar&quot;"')
+
+    def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
+        quoted = 'Welcome to "Bob\'s Bar"'
+        self.assertEqual(self.sub.substitute_xml(quoted), quoted)
+
+    def test_xml_quoting_handles_angle_brackets(self):
+        self.assertEqual(
+            self.sub.substitute_xml("foo<bar>"),
+            "foo&lt;bar&gt;")
+
+    def test_xml_quoting_handles_ampersands(self):
+        self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
+
+    def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
+        self.assertEqual(
+            self.sub.substitute_xml("&Aacute;T&T"),
+            "&amp;Aacute;T&amp;T")
+
+    def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
+        self.assertEqual(
+            self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
+            "&Aacute;T&amp;T")
+
+    def test_quotes_not_html_substituted(self):
+        """There's no need to do this except inside attribute values."""
+        text = 'Bob\'s "bar"'
+        self.assertEqual(self.sub.substitute_html(text), text)
+
+
+class TestEncodingConversion(SoupTest):
+    # Test Beautiful Soup's ability to decode and encode from various
+    # encodings.
+
+    def setUp(self):
+        super(TestEncodingConversion, self).setUp()
+        self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
+        self.utf8_data = self.unicode_data.encode("utf-8")
+        # Just so you know what it looks like.
+        self.assertEqual(
+            self.utf8_data,
+            b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
+
+    def test_ascii_in_unicode_out(self):
+        # ASCII input is converted to Unicode. The original_encoding
+        # attribute is set to 'utf-8', a superset of ASCII.
+        chardet = bs4.dammit.chardet_dammit
+        logging.disable(logging.WARNING)
+        try:
+            def noop(str):
+                return None
+            # Disable chardet, which will realize that the ASCII is ASCII.
+            bs4.dammit.chardet_dammit = noop
+            ascii = b"<foo>a</foo>"
+            soup_from_ascii = self.soup(ascii)
+            unicode_output = soup_from_ascii.decode()
+            self.assertTrue(isinstance(unicode_output, unicode))
+            self.assertEqual(unicode_output, self.document_for(ascii.decode()))
+            self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
+        finally:
+            logging.disable(logging.NOTSET)
+            bs4.dammit.chardet_dammit = chardet
+
+    def test_unicode_in_unicode_out(self):
+        # Unicode input is left alone. The original_encoding attribute
+        # is not set.
+        soup_from_unicode = self.soup(self.unicode_data)
+        self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
+        self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
+        self.assertEqual(soup_from_unicode.original_encoding, None)
+
+    def test_utf8_in_unicode_out(self):
+        # UTF-8 input is converted to Unicode. The original_encoding
+        # attribute is set.
+        soup_from_utf8 = self.soup(self.utf8_data)
+        self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
+        self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
+
+    def test_utf8_out(self):
+        # The internal data structures can be encoded as UTF-8.
+        soup_from_unicode = self.soup(self.unicode_data)
+        self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
+
+    @skipIf(
+        PYTHON_3_PRE_3_2,
+        "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
+    def test_attribute_name_containing_unicode_characters(self):
+        markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
+        self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
+
+class TestUnicodeDammit(unittest.TestCase):
+    """Standalone tests of UnicodeDammit."""
+
+    def test_unicode_input(self):
+        markup = u"I'm already Unicode! \N{SNOWMAN}"
+        dammit = UnicodeDammit(markup)
+        self.assertEqual(dammit.unicode_markup, markup)
+
+    def test_smart_quotes_to_unicode(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup)
+        self.assertEqual(
+            dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
+
+    def test_smart_quotes_to_xml_entities(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="xml")
+        self.assertEqual(
+            dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
+
+    def test_smart_quotes_to_html_entities(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="html")
+        self.assertEqual(
+            dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
+
+    def test_smart_quotes_to_ascii(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
+        self.assertEqual(
+            dammit.unicode_markup, """<foo>''""</foo>""")
+
+    def test_detect_utf8(self):
+        utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
+        dammit = UnicodeDammit(utf8)
+        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+        self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}')
+
+
+    def test_convert_hebrew(self):
+        hebrew = b"\xed\xe5\xec\xf9"
+        dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
+        self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
+        self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
+
+    def test_dont_see_smart_quotes_where_there_are_none(self):
+        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
+        dammit = UnicodeDammit(utf_8)
+        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+        self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
+
+    def test_ignore_inappropriate_codecs(self):
+        utf8_data = u"Räksmörgås".encode("utf-8")
+        dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
+        self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+
+    def test_ignore_invalid_codecs(self):
+        utf8_data = u"Räksmörgås".encode("utf-8")
+        for bad_encoding in ['.utf8', '...', 'utF---16.!']:
+            dammit = UnicodeDammit(utf8_data, [bad_encoding])
+            self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
+
+    def test_exclude_encodings(self):
+        # This is UTF-8.
+        utf8_data = u"Räksmörgås".encode("utf-8")
+
+        # But if we exclude UTF-8 from consideration, the guess is
+        # Windows-1252.
+        dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
+        self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
+
+        # And if we exclude that, there is no valid guess at all.
+        dammit = UnicodeDammit(
+            utf8_data, exclude_encodings=["utf-8", "windows-1252"])
+        self.assertEqual(dammit.original_encoding, None)
+
+    def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
+        detected = EncodingDetector(
+            b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
+        encodings = list(detected.encodings)
+        assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
+
+    def test_detect_html5_style_meta_tag(self):
+
+        for data in (
+            b'<html><meta charset="euc-jp" /></html>',
+            b"<html><meta charset='euc-jp' /></html>",
+            b"<html><meta charset=euc-jp /></html>",
+            b"<html><meta charset=euc-jp/></html>"):
+            dammit = UnicodeDammit(data, is_html=True)
+            self.assertEqual(
+                "euc-jp", dammit.original_encoding)
+
+    def test_last_ditch_entity_replacement(self):
+        # This is a UTF-8 document that contains bytestrings
+        # completely incompatible with UTF-8 (ie. encoded with some other
+        # encoding).
+        #
+        # Since there is no consistent encoding for the document,
+        # Unicode, Dammit will eventually encode the document as UTF-8
+        # and encode the incompatible characters as REPLACEMENT
+        # CHARACTER.
+        #
+        # If chardet is installed, it will detect that the document
+        # can be converted into ISO-8859-1 without errors. This happens
+        # to be the wrong encoding, but it is a consistent encoding, so the
+        # code we're testing here won't run.
+        #
+        # So we temporarily disable chardet if it's present.
+        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
+<html><b>\330\250\330\252\330\261</b>
+<i>\310\322\321\220\312\321\355\344</i></html>"""
+        chardet = bs4.dammit.chardet_dammit
+        logging.disable(logging.WARNING)
+        try:
+            def noop(str):
+                return None
+            bs4.dammit.chardet_dammit = noop
+            dammit = UnicodeDammit(doc)
+            self.assertEqual(True, dammit.contains_replacement_characters)
+            self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+
+            soup = BeautifulSoup(doc, "html.parser")
+            self.assertTrue(soup.contains_replacement_characters)
+        finally:
+            logging.disable(logging.NOTSET)
+            bs4.dammit.chardet_dammit = chardet
+
+    def test_byte_order_mark_removed(self):
+        # A document written in UTF-16LE will have its byte order marker stripped.
+        data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
+        dammit = UnicodeDammit(data)
+        self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
+        self.assertEqual("utf-16le", dammit.original_encoding)
+
+    def test_detwingle(self):
+        # Here's a UTF8 document.
+        utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
+
+        # Here's a Windows-1252 document.
+        windows_1252 = (
+            u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
+            u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
+
+        # Through some unholy alchemy, they've been stuck together.
+        doc = utf8 + windows_1252 + utf8
+
+        # The document can't be turned into UTF-8:
+        self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
+
+        # Unicode, Dammit thinks the whole document is Windows-1252,
+        # and decodes it into "â˜ƒâ˜ƒâ˜ƒ“Hi, I like Windows!”â˜ƒâ˜ƒâ˜ƒ"
+
+        # But if we run it through fix_embedded_windows_1252, it's fixed:
+
+        fixed = UnicodeDammit.detwingle(doc)
+        self.assertEqual(
+            u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
+
+    def test_detwingle_ignores_multibyte_characters(self):
+        # Each of these characters has a UTF-8 representation ending
+        # in \x93. \x93 is a smart quote if interpreted as
+        # Windows-1252. But our code knows to skip over multibyte
+        # UTF-8 characters, so they'll survive the process unscathed.
+        for tricky_unicode_char in (
+            u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
+            u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
+            u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
+            ):
+            input = tricky_unicode_char.encode("utf8")
+            self.assertTrue(input.endswith(b'\x93'))
+            output = UnicodeDammit.detwingle(input)
+            self.assertEqual(output, input)
+
+class TestNamedspacedAttribute(SoupTest):
+
+    def test_name_may_be_none(self):
+        a = NamespacedAttribute("xmlns", None)
+        self.assertEqual(a, "xmlns")
+
+    def test_attribute_is_equivalent_to_colon_separated_string(self):
+        a = NamespacedAttribute("a", "b")
+        self.assertEqual("a:b", a)
+
+    def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
+        a = NamespacedAttribute("a", "b", "c")
+        b = NamespacedAttribute("a", "b", "c")
+        self.assertEqual(a, b)
+
+        # The actual namespace is not considered.
+        c = NamespacedAttribute("a", "b", None)
+        self.assertEqual(a, c)
+
+        # But name and prefix are important.
+        d = NamespacedAttribute("a", "z", "c")
+        self.assertNotEqual(a, d)
+
+        e = NamespacedAttribute("z", "b", "c")
+        self.assertNotEqual(a, e)
+
+
+class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
+
+    def test_content_meta_attribute_value(self):
+        value = CharsetMetaAttributeValue("euc-jp")
+        self.assertEqual("euc-jp", value)
+        self.assertEqual("euc-jp", value.original_value)
+        self.assertEqual("utf8", value.encode("utf8"))
+
+
+    def test_content_meta_attribute_value(self):
+        value = ContentMetaAttributeValue("text/html; charset=euc-jp")
+        self.assertEqual("text/html; charset=euc-jp", value)
+        self.assertEqual("text/html; charset=euc-jp", value.original_value)
+        self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
diff --git a/src/bs4/tests/test_tree.py b/src/bs4/tests/test_tree.py
new file mode 100644
index 0000000..a4fe0b1
--- /dev/null
+++ b/src/bs4/tests/test_tree.py
@@ -0,0 +1,2044 @@
+# -*- coding: utf-8 -*-
+"""Tests for Beautiful Soup's tree traversal methods.
+
+The tree traversal methods are the main advantage of using Beautiful
+Soup over just using a parser.
+
+Different parsers will build different Beautiful Soup trees given the
+same markup, but all Beautiful Soup trees can be traversed with the
+methods tested here.
+"""
+
+from pdb import set_trace
+import copy
+import pickle
+import re
+import warnings
+from bs4 import BeautifulSoup
+from bs4.builder import (
+    builder_registry,
+    HTMLParserTreeBuilder,
+)
+from bs4.element import (
+    PY3K,
+    CData,
+    Comment,
+    Declaration,
+    Doctype,
+    NavigableString,
+    SoupStrainer,
+    Tag,
+)
+from bs4.testing import (
+    SoupTest,
+    skipIf,
+)
+
+XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
+LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
+
+class TreeTest(SoupTest):
+
+    def assertSelects(self, tags, should_match):
+        """Make sure that the given tags have the correct text.
+
+        This is used in tests that define a bunch of tags, each
+        containing a single string, and then select certain strings by
+        some mechanism.
+        """
+        self.assertEqual([tag.string for tag in tags], should_match)
+
+    def assertSelectsIDs(self, tags, should_match):
+        """Make sure that the given tags have the correct IDs.
+
+        This is used in tests that define a bunch of tags, each
+        containing a single string, and then select certain strings by
+        some mechanism.
+        """
+        self.assertEqual([tag['id'] for tag in tags], should_match)
+
+
+class TestFind(TreeTest):
+    """Basic tests of the find() method.
+
+    find() just calls find_all() with limit=1, so it's not tested all
+    that thouroughly here.
+    """
+
+    def test_find_tag(self):
+        soup = self.soup("<a>1</a><b>2</b><a>3</a><b>4</b>")
+        self.assertEqual(soup.find("b").string, "2")
+
+    def test_unicode_text_find(self):
+        soup = self.soup(u'<h1>Räksmörgås</h1>')
+        self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås')
+
+    def test_unicode_attribute_find(self):
+        soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>')
+        str(soup)
+        self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text)
+
+
+    def test_find_everything(self):
+        """Test an optimization that finds all tags."""
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        self.assertEqual(2, len(soup.find_all()))
+
+    def test_find_everything_with_name(self):
+        """Test an optimization that finds all tags with a given name."""
+        soup = self.soup("<a>foo</a><b>bar</b><a>baz</a>")
+        self.assertEqual(2, len(soup.find_all('a')))
+
+class TestFindAll(TreeTest):
+    """Basic tests of the find_all() method."""
+
+    def test_find_all_text_nodes(self):
+        """You can search the tree for text nodes."""
+        soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
+        # Exact match.
+        self.assertEqual(soup.find_all(string="bar"), [u"bar"])
+        self.assertEqual(soup.find_all(text="bar"), [u"bar"])
+        # Match any of a number of strings.
+        self.assertEqual(
+            soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
+        # Match a regular expression.
+        self.assertEqual(soup.find_all(text=re.compile('.*')),
+                         [u"Foo", u"bar", u'\xbb'])
+        # Match anything.
+        self.assertEqual(soup.find_all(text=True),
+                         [u"Foo", u"bar", u'\xbb'])
+
+    def test_find_all_limit(self):
+        """You can limit the number of items returned by find_all."""
+        soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
+        self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
+        self.assertSelects(soup.find_all('a', limit=1), ["1"])
+        self.assertSelects(
+            soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
+
+        # A limit of 0 means no limit.
+        self.assertSelects(
+            soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
+
+    def test_calling_a_tag_is_calling_findall(self):
+        soup = self.soup("<a>1</a><b>2<a id='foo'>3</a></b>")
+        self.assertSelects(soup('a', limit=1), ["1"])
+        self.assertSelects(soup.b(id="foo"), ["3"])
+
+    def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self):
+        soup = self.soup("<a></a>")
+        # Create a self-referential list.
+        l = []
+        l.append(l)
+
+        # Without special code in _normalize_search_value, this would cause infinite
+        # recursion.
+        self.assertEqual([], soup.find_all(l))
+
+    def test_find_all_resultset(self):
+        """All find_all calls return a ResultSet"""
+        soup = self.soup("<a></a>")
+        result = soup.find_all("a")
+        self.assertTrue(hasattr(result, "source"))
+
+        result = soup.find_all(True)
+        self.assertTrue(hasattr(result, "source"))
+
+        result = soup.find_all(text="foo")
+        self.assertTrue(hasattr(result, "source"))
+
+
+class TestFindAllBasicNamespaces(TreeTest):
+
+    def test_find_by_namespaced_name(self):
+        soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
+        self.assertEqual("4", soup.find("mathml:msqrt").string)
+        self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
+
+
+class TestFindAllByName(TreeTest):
+    """Test ways of finding tags by tag name."""
+
+    def setUp(self):
+        super(TreeTest, self).setUp()
+        self.tree =  self.soup("""<a>First tag.</a>
+                                  <b>Second tag.</b>
+                                  <c>Third <a>Nested tag.</a> tag.</c>""")
+
+    def test_find_all_by_tag_name(self):
+        # Find all the <a> tags.
+        self.assertSelects(
+            self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
+
+    def test_find_all_by_name_and_text(self):
+        self.assertSelects(
+            self.tree.find_all('a', text='First tag.'), ['First tag.'])
+
+        self.assertSelects(
+            self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.'])
+
+        self.assertSelects(
+            self.tree.find_all('a', text=re.compile("tag")),
+            ['First tag.', 'Nested tag.'])
+
+
+    def test_find_all_on_non_root_element(self):
+        # You can call find_all on any node, not just the root.
+        self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
+
+    def test_calling_element_invokes_find_all(self):
+        self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_strainer(self):
+        self.assertSelects(
+            self.tree.find_all(SoupStrainer('a')),
+            ['First tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_names(self):
+        self.assertSelects(
+            self.tree.find_all(['a', 'b']),
+            ['First tag.', 'Second tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_dict(self):
+        self.assertSelects(
+            self.tree.find_all({'a' : True, 'b' : True}),
+            ['First tag.', 'Second tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_re(self):
+        self.assertSelects(
+            self.tree.find_all(re.compile('^[ab]$')),
+            ['First tag.', 'Second tag.', 'Nested tag.'])
+
+    def test_find_all_with_tags_matching_method(self):
+        # You can define an oracle method that determines whether
+        # a tag matches the search.
+        def id_matches_name(tag):
+            return tag.name == tag.get('id')
+
+        tree = self.soup("""<a id="a">Match 1.</a>
+                            <a id="1">Does not match.</a>
+                            <b id="b">Match 2.</a>""")
+
+        self.assertSelects(
+            tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
+
+    def test_find_with_multi_valued_attribute(self):
+        soup = self.soup(
+            "<div class='a b'>1</div><div class='a c'>2</div><div class='a d'>3</div>"
+        )
+        r1 = soup.find('div', 'a d');
+        r2 = soup.find('div', re.compile(r'a d'));
+        r3, r4 = soup.find_all('div', ['a b', 'a d']);
+        self.assertEqual('3', r1.string)
+        self.assertEqual('3', r2.string)
+        self.assertEqual('1', r3.string)
+        self.assertEqual('3', r4.string)
+
+class TestFindAllByAttribute(TreeTest):
+
+    def test_find_all_by_attribute_name(self):
+        # You can pass in keyword arguments to find_all to search by
+        # attribute.
+        tree = self.soup("""
+                         <a id="first">Matching a.</a>
+                         <a id="second">
+                          Non-matching <b id="first">Matching b.</b>a.
+                         </a>""")
+        self.assertSelects(tree.find_all(id='first'),
+                           ["Matching a.", "Matching b."])
+
+    def test_find_all_by_utf8_attribute_value(self):
+        peace = u"םולש".encode("utf8")
+        data = u'<a title="םולש"></a>'.encode("utf8")
+        soup = self.soup(data)
+        self.assertEqual([soup.a], soup.find_all(title=peace))
+        self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
+        self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"]))
+
+    def test_find_all_by_attribute_dict(self):
+        # You can pass in a dictionary as the argument 'attrs'. This
+        # lets you search for attributes like 'name' (a fixed argument
+        # to find_all) and 'class' (a reserved word in Python.)
+        tree = self.soup("""
+                         <a name="name1" class="class1">Name match.</a>
+                         <a name="name2" class="class2">Class match.</a>
+                         <a name="name3" class="class3">Non-match.</a>
+                         <name1>A tag called 'name1'.</name1>
+                         """)
+
+        # This doesn't do what you want.
+        self.assertSelects(tree.find_all(name='name1'),
+                           ["A tag called 'name1'."])
+        # This does what you want.
+        self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
+                           ["Name match."])
+
+        self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
+                           ["Class match."])
+
+    def test_find_all_by_class(self):
+        tree = self.soup("""
+                         <a class="1">Class 1.</a>
+                         <a class="2">Class 2.</a>
+                         <b class="1">Class 1.</b>
+                         <c class="3 4">Class 3 and 4.</c>
+                         """)
+
+        # Passing in the class_ keyword argument will search against
+        # the 'class' attribute.
+        self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.'])
+        self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.'])
+        self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.'])
+
+        # Passing in a string to 'attrs' will also search the CSS class.
+        self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
+        self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
+        self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
+        self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.'])
+
+    def test_find_by_class_when_multiple_classes_present(self):
+        tree = self.soup("<gar class='foo bar'>Found it</gar>")
+
+        f = tree.find_all("gar", class_=re.compile("o"))
+        self.assertSelects(f, ["Found it"])
+
+        f = tree.find_all("gar", class_=re.compile("a"))
+        self.assertSelects(f, ["Found it"])
+
+        # If the search fails to match the individual strings "foo" and "bar",
+        # it will be tried against the combined string "foo bar".
+        f = tree.find_all("gar", class_=re.compile("o b"))
+        self.assertSelects(f, ["Found it"])
+
+    def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
+        soup = self.soup("<a class='bar'>Found it</a>")
+
+        self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"])
+
+        def big_attribute_value(value):
+            return len(value) > 3
+
+        self.assertSelects(soup.find_all("a", big_attribute_value), [])
+
+        def small_attribute_value(value):
+            return len(value) <= 3
+
+        self.assertSelects(
+            soup.find_all("a", small_attribute_value), ["Found it"])
+
+    def test_find_all_with_string_for_attrs_finds_multiple_classes(self):
+        soup = self.soup('<a class="foo bar"></a><a class="foo"></a>')
+        a, a2 = soup.find_all("a")
+        self.assertEqual([a, a2], soup.find_all("a", "foo"))
+        self.assertEqual([a], soup.find_all("a", "bar"))
+
+        # If you specify the class as a string that contains a
+        # space, only that specific value will be found.
+        self.assertEqual([a], soup.find_all("a", class_="foo bar"))
+        self.assertEqual([a], soup.find_all("a", "foo bar"))
+        self.assertEqual([], soup.find_all("a", "bar foo"))
+
+    def test_find_all_by_attribute_soupstrainer(self):
+        tree = self.soup("""
+                         <a id="first">Match.</a>
+                         <a id="second">Non-match.</a>""")
+
+        strainer = SoupStrainer(attrs={'id' : 'first'})
+        self.assertSelects(tree.find_all(strainer), ['Match.'])
+
+    def test_find_all_with_missing_attribute(self):
+        # You can pass in None as the value of an attribute to find_all.
+        # This will match tags that do not have that attribute set.
+        tree = self.soup("""<a id="1">ID present.</a>
+                            <a>No ID present.</a>
+                            <a id="">ID is empty.</a>""")
+        self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
+
+    def test_find_all_with_defined_attribute(self):
+        # You can pass in None as the value of an attribute to find_all.
+        # This will match tags that have that attribute set to any value.
+        tree = self.soup("""<a id="1">ID present.</a>
+                            <a>No ID present.</a>
+                            <a id="">ID is empty.</a>""")
+        self.assertSelects(
+            tree.find_all(id=True), ["ID present.", "ID is empty."])
+
+    def test_find_all_with_numeric_attribute(self):
+        # If you search for a number, it's treated as a string.
+        tree = self.soup("""<a id=1>Unquoted attribute.</a>
+                            <a id="1">Quoted attribute.</a>""")
+
+        expected = ["Unquoted attribute.", "Quoted attribute."]
+        self.assertSelects(tree.find_all(id=1), expected)
+        self.assertSelects(tree.find_all(id="1"), expected)
+
+    def test_find_all_with_list_attribute_values(self):
+        # You can pass a list of attribute values instead of just one,
+        # and you'll get tags that match any of the values.
+        tree = self.soup("""<a id="1">1</a>
+                            <a id="2">2</a>
+                            <a id="3">3</a>
+                            <a>No ID.</a>""")
+        self.assertSelects(tree.find_all(id=["1", "3", "4"]),
+                           ["1", "3"])
+
+    def test_find_all_with_regular_expression_attribute_value(self):
+        # You can pass a regular expression as an attribute value, and
+        # you'll get tags whose values for that attribute match the
+        # regular expression.
+        tree = self.soup("""<a id="a">One a.</a>
+                            <a id="aa">Two as.</a>
+                            <a id="ab">Mixed as and bs.</a>
+                            <a id="b">One b.</a>
+                            <a>No ID.</a>""")
+
+        self.assertSelects(tree.find_all(id=re.compile("^a+$")),
+                           ["One a.", "Two as."])
+
+    def test_find_by_name_and_containing_string(self):
+        soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
+        a = soup.a
+
+        self.assertEqual([a], soup.find_all("a", text="foo"))
+        self.assertEqual([], soup.find_all("a", text="bar"))
+        self.assertEqual([], soup.find_all("a", text="bar"))
+
+    def test_find_by_name_and_containing_string_when_string_is_buried(self):
+        soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
+        self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
+
+    def test_find_by_attribute_and_containing_string(self):
+        soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
+        a = soup.a
+
+        self.assertEqual([a], soup.find_all(id=2, text="foo"))
+        self.assertEqual([], soup.find_all(id=1, text="bar"))
+
+
+
+
+class TestIndex(TreeTest):
+    """Test Tag.index"""
+    def test_index(self):
+        tree = self.soup("""<div>
+                            <a>Identical</a>
+                            <b>Not identical</b>
+                            <a>Identical</a>
+
+                            <c><d>Identical with child</d></c>
+                            <b>Also not identical</b>
+                            <c><d>Identical with child</d></c>
+                            </div>""")
+        div = tree.div
+        for i, element in enumerate(div.contents):
+            self.assertEqual(i, div.index(element))
+        self.assertRaises(ValueError, tree.index, 1)
+
+
+class TestParentOperations(TreeTest):
+    """Test navigation and searching through an element's parents."""
+
+    def setUp(self):
+        super(TestParentOperations, self).setUp()
+        self.tree = self.soup('''<ul id="empty"></ul>
+                                 <ul id="top">
+                                  <ul id="middle">
+                                   <ul id="bottom">
+                                    <b>Start here</b>
+                                   </ul>
+                                  </ul>''')
+        self.start = self.tree.b
+
+
+    def test_parent(self):
+        self.assertEqual(self.start.parent['id'], 'bottom')
+        self.assertEqual(self.start.parent.parent['id'], 'middle')
+        self.assertEqual(self.start.parent.parent.parent['id'], 'top')
+
+    def test_parent_of_top_tag_is_soup_object(self):
+        top_tag = self.tree.contents[0]
+        self.assertEqual(top_tag.parent, self.tree)
+
+    def test_soup_object_has_no_parent(self):
+        self.assertEqual(None, self.tree.parent)
+
+    def test_find_parents(self):
+        self.assertSelectsIDs(
+            self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
+        self.assertSelectsIDs(
+            self.start.find_parents('ul', id="middle"), ['middle'])
+
+    def test_find_parent(self):
+        self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
+        self.assertEqual(self.start.find_parent('ul', id='top')['id'], 'top')
+
+    def test_parent_of_text_element(self):
+        text = self.tree.find(text="Start here")
+        self.assertEqual(text.parent.name, 'b')
+
+    def test_text_element_find_parent(self):
+        text = self.tree.find(text="Start here")
+        self.assertEqual(text.find_parent('ul')['id'], 'bottom')
+
+    def test_parent_generator(self):
+        parents = [parent['id'] for parent in self.start.parents
+                   if parent is not None and 'id' in parent.attrs]
+        self.assertEqual(parents, ['bottom', 'middle', 'top'])
+
+
+class ProximityTest(TreeTest):
+
+    def setUp(self):
+        super(TreeTest, self).setUp()
+        self.tree = self.soup(
+            '<html id="start"><head></head><body><b id="1">One</b><b id="2">Two</b><b id="3">Three</b></body></html>')
+
+
+class TestNextOperations(ProximityTest):
+
+    def setUp(self):
+        super(TestNextOperations, self).setUp()
+        self.start = self.tree.b
+
+    def test_next(self):
+        self.assertEqual(self.start.next_element, "One")
+        self.assertEqual(self.start.next_element.next_element['id'], "2")
+
+    def test_next_of_last_item_is_none(self):
+        last = self.tree.find(text="Three")
+        self.assertEqual(last.next_element, None)
+
+    def test_next_of_root_is_none(self):
+        # The document root is outside the next/previous chain.
+        self.assertEqual(self.tree.next_element, None)
+
+    def test_find_all_next(self):
+        self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
+        self.start.find_all_next(id=3)
+        self.assertSelects(self.start.find_all_next(id=3), ["Three"])
+
+    def test_find_next(self):
+        self.assertEqual(self.start.find_next('b')['id'], '2')
+        self.assertEqual(self.start.find_next(text="Three"), "Three")
+
+    def test_find_next_for_text_element(self):
+        text = self.tree.find(text="One")
+        self.assertEqual(text.find_next("b").string, "Two")
+        self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
+
+    def test_next_generator(self):
+        start = self.tree.find(text="Two")
+        successors = [node for node in start.next_elements]
+        # There are two successors: the final <b> tag and its text contents.
+        tag, contents = successors
+        self.assertEqual(tag['id'], '3')
+        self.assertEqual(contents, "Three")
+
+class TestPreviousOperations(ProximityTest):
+
+    def setUp(self):
+        super(TestPreviousOperations, self).setUp()
+        self.end = self.tree.find(text="Three")
+
+    def test_previous(self):
+        self.assertEqual(self.end.previous_element['id'], "3")
+        self.assertEqual(self.end.previous_element.previous_element, "Two")
+
+    def test_previous_of_first_item_is_none(self):
+        first = self.tree.find('html')
+        self.assertEqual(first.previous_element, None)
+
+    def test_previous_of_root_is_none(self):
+        # The document root is outside the next/previous chain.
+        # XXX This is broken!
+        #self.assertEqual(self.tree.previous_element, None)
+        pass
+
+    def test_find_all_previous(self):
+        # The <b> tag containing the "Three" node is the predecessor
+        # of the "Three" node itself, which is why "Three" shows up
+        # here.
+        self.assertSelects(
+            self.end.find_all_previous('b'), ["Three", "Two", "One"])
+        self.assertSelects(self.end.find_all_previous(id=1), ["One"])
+
+    def test_find_previous(self):
+        self.assertEqual(self.end.find_previous('b')['id'], '3')
+        self.assertEqual(self.end.find_previous(text="One"), "One")
+
+    def test_find_previous_for_text_element(self):
+        text = self.tree.find(text="Three")
+        self.assertEqual(text.find_previous("b").string, "Three")
+        self.assertSelects(
+            text.find_all_previous("b"), ["Three", "Two", "One"])
+
+    def test_previous_generator(self):
+        start = self.tree.find(text="One")
+        predecessors = [node for node in start.previous_elements]
+
+        # There are four predecessors: the <b> tag containing "One"
+        # the <body> tag, the <head> tag, and the <html> tag.
+        b, body, head, html = predecessors
+        self.assertEqual(b['id'], '1')
+        self.assertEqual(body.name, "body")
+        self.assertEqual(head.name, "head")
+        self.assertEqual(html.name, "html")
+
+
+class SiblingTest(TreeTest):
+
+    def setUp(self):
+        super(SiblingTest, self).setUp()
+        markup = '''<html>
+                    <span id="1">
+                     <span id="1.1"></span>
+                    </span>
+                    <span id="2">
+                     <span id="2.1"></span>
+                    </span>
+                    <span id="3">
+                     <span id="3.1"></span>
+                    </span>
+                    <span id="4"></span>
+                    </html>'''
+        # All that whitespace looks good but makes the tests more
+        # difficult. Get rid of it.
+        markup = re.compile("\n\s*").sub("", markup)
+        self.tree = self.soup(markup)
+
+
+class TestNextSibling(SiblingTest):
+
+    def setUp(self):
+        super(TestNextSibling, self).setUp()
+        self.start = self.tree.find(id="1")
+
+    def test_next_sibling_of_root_is_none(self):
+        self.assertEqual(self.tree.next_sibling, None)
+
+    def test_next_sibling(self):
+        self.assertEqual(self.start.next_sibling['id'], '2')
+        self.assertEqual(self.start.next_sibling.next_sibling['id'], '3')
+
+        # Note the difference between next_sibling and next_element.
+        self.assertEqual(self.start.next_element['id'], '1.1')
+
+    def test_next_sibling_may_not_exist(self):
+        self.assertEqual(self.tree.html.next_sibling, None)
+
+        nested_span = self.tree.find(id="1.1")
+        self.assertEqual(nested_span.next_sibling, None)
+
+        last_span = self.tree.find(id="4")
+        self.assertEqual(last_span.next_sibling, None)
+
+    def test_find_next_sibling(self):
+        self.assertEqual(self.start.find_next_sibling('span')['id'], '2')
+
+    def test_next_siblings(self):
+        self.assertSelectsIDs(self.start.find_next_siblings("span"),
+                              ['2', '3', '4'])
+
+        self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
+
+    def test_next_sibling_for_text_element(self):
+        soup = self.soup("Foo<b>bar</b>baz")
+        start = soup.find(text="Foo")
+        self.assertEqual(start.next_sibling.name, 'b')
+        self.assertEqual(start.next_sibling.next_sibling, 'baz')
+
+        self.assertSelects(start.find_next_siblings('b'), ['bar'])
+        self.assertEqual(start.find_next_sibling(text="baz"), "baz")
+        self.assertEqual(start.find_next_sibling(text="nonesuch"), None)
+
+
+class TestPreviousSibling(SiblingTest):
+
+    def setUp(self):
+        super(TestPreviousSibling, self).setUp()
+        self.end = self.tree.find(id="4")
+
+    def test_previous_sibling_of_root_is_none(self):
+        self.assertEqual(self.tree.previous_sibling, None)
+
+    def test_previous_sibling(self):
+        self.assertEqual(self.end.previous_sibling['id'], '3')
+        self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2')
+
+        # Note the difference between previous_sibling and previous_element.
+        self.assertEqual(self.end.previous_element['id'], '3.1')
+
+    def test_previous_sibling_may_not_exist(self):
+        self.assertEqual(self.tree.html.previous_sibling, None)
+
+        nested_span = self.tree.find(id="1.1")
+        self.assertEqual(nested_span.previous_sibling, None)
+
+        first_span = self.tree.find(id="1")
+        self.assertEqual(first_span.previous_sibling, None)
+
+    def test_find_previous_sibling(self):
+        self.assertEqual(self.end.find_previous_sibling('span')['id'], '3')
+
+    def test_previous_siblings(self):
+        self.assertSelectsIDs(self.end.find_previous_siblings("span"),
+                              ['3', '2', '1'])
+
+        self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
+
+    def test_previous_sibling_for_text_element(self):
+        soup = self.soup("Foo<b>bar</b>baz")
+        start = soup.find(text="baz")
+        self.assertEqual(start.previous_sibling.name, 'b')
+        self.assertEqual(start.previous_sibling.previous_sibling, 'Foo')
+
+        self.assertSelects(start.find_previous_siblings('b'), ['bar'])
+        self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo")
+        self.assertEqual(start.find_previous_sibling(text="nonesuch"), None)
+
+
+class TestTagCreation(SoupTest):
+    """Test the ability to create new tags."""
+    def test_new_tag(self):
+        soup = self.soup("")
+        new_tag = soup.new_tag("foo", bar="baz")
+        self.assertTrue(isinstance(new_tag, Tag))
+        self.assertEqual("foo", new_tag.name)
+        self.assertEqual(dict(bar="baz"), new_tag.attrs)
+        self.assertEqual(None, new_tag.parent)
+
+    def test_tag_inherits_self_closing_rules_from_builder(self):
+        if XML_BUILDER_PRESENT:
+            xml_soup = BeautifulSoup("", "lxml-xml")
+            xml_br = xml_soup.new_tag("br")
+            xml_p = xml_soup.new_tag("p")
+
+            # Both the <br> and <p> tag are empty-element, just because
+            # they have no contents.
+            self.assertEqual(b"<br/>", xml_br.encode())
+            self.assertEqual(b"<p/>", xml_p.encode())
+
+        html_soup = BeautifulSoup("", "html.parser")
+        html_br = html_soup.new_tag("br")
+        html_p = html_soup.new_tag("p")
+
+        # The HTML builder users HTML's rules about which tags are
+        # empty-element tags, and the new tags reflect these rules.
+        self.assertEqual(b"<br/>", html_br.encode())
+        self.assertEqual(b"<p></p>", html_p.encode())
+
+    def test_new_string_creates_navigablestring(self):
+        soup = self.soup("")
+        s = soup.new_string("foo")
+        self.assertEqual("foo", s)
+        self.assertTrue(isinstance(s, NavigableString))
+
+    def test_new_string_can_create_navigablestring_subclass(self):
+        soup = self.soup("")
+        s = soup.new_string("foo", Comment)
+        self.assertEqual("foo", s)
+        self.assertTrue(isinstance(s, Comment))
+
+class TestTreeModification(SoupTest):
+
+    def test_attribute_modification(self):
+        soup = self.soup('<a id="1"></a>')
+        soup.a['id'] = 2
+        self.assertEqual(soup.decode(), self.document_for('<a id="2"></a>'))
+        del(soup.a['id'])
+        self.assertEqual(soup.decode(), self.document_for('<a></a>'))
+        soup.a['id2'] = 'foo'
+        self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
+
+    def test_new_tag_creation(self):
+        builder = builder_registry.lookup('html')()
+        soup = self.soup("<body></body>", builder=builder)
+        a = Tag(soup, builder, 'a')
+        ol = Tag(soup, builder, 'ol')
+        a['href'] = 'http://foo.com/'
+        soup.body.insert(0, a)
+        soup.body.insert(1, ol)
+        self.assertEqual(
+            soup.body.encode(),
+            b'<body><a href="http://foo.com/"></a><ol></ol></body>')
+
+    def test_append_to_contents_moves_tag(self):
+        doc = """<p id="1">Don't leave me <b>here</b>.</p>
+                <p id="2">Don\'t leave!</p>"""
+        soup = self.soup(doc)
+        second_para = soup.find(id='2')
+        bold = soup.b
+
+        # Move the <b> tag to the end of the second paragraph.
+        soup.find(id='2').append(soup.b)
+
+        # The <b> tag is now a child of the second paragraph.
+        self.assertEqual(bold.parent, second_para)
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                '<p id="1">Don\'t leave me .</p>\n'
+                '<p id="2">Don\'t leave!<b>here</b></p>'))
+
+    def test_replace_with_returns_thing_that_was_replaced(self):
+        text = "<a></a><b><c></c></b>"
+        soup = self.soup(text)
+        a = soup.a
+        new_a = a.replace_with(soup.c)
+        self.assertEqual(a, new_a)
+
+    def test_unwrap_returns_thing_that_was_replaced(self):
+        text = "<a><b></b><c></c></a>"
+        soup = self.soup(text)
+        a = soup.a
+        new_a = a.unwrap()
+        self.assertEqual(a, new_a)
+
+    def test_replace_with_and_unwrap_give_useful_exception_when_tag_has_no_parent(self):
+        soup = self.soup("<a><b>Foo</b></a><c>Bar</c>")
+        a = soup.a
+        a.extract()
+        self.assertEqual(None, a.parent)
+        self.assertRaises(ValueError, a.unwrap)
+        self.assertRaises(ValueError, a.replace_with, soup.c)
+
+    def test_replace_tag_with_itself(self):
+        text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
+        soup = self.soup(text)
+        c = soup.c
+        soup.c.replace_with(c)
+        self.assertEqual(soup.decode(), self.document_for(text))
+
+    def test_replace_tag_with_its_parent_raises_exception(self):
+        text = "<a><b></b></a>"
+        soup = self.soup(text)
+        self.assertRaises(ValueError, soup.b.replace_with, soup.a)
+
+    def test_insert_tag_into_itself_raises_exception(self):
+        text = "<a><b></b></a>"
+        soup = self.soup(text)
+        self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
+
+    def test_replace_with_maintains_next_element_throughout(self):
+        soup = self.soup('<p><a>one</a><b>three</b></p>')
+        a = soup.a
+        b = a.contents[0]
+        # Make it so the <a> tag has two text children.
+        a.insert(1, "two")
+
+        # Now replace each one with the empty string.
+        left, right = a.contents
+        left.replaceWith('')
+        right.replaceWith('')
+
+        # The <b> tag is still connected to the tree.
+        self.assertEqual("three", soup.b.string)
+
+    def test_replace_final_node(self):
+        soup = self.soup("<b>Argh!</b>")
+        soup.find(text="Argh!").replace_with("Hooray!")
+        new_text = soup.find(text="Hooray!")
+        b = soup.b
+        self.assertEqual(new_text.previous_element, b)
+        self.assertEqual(new_text.parent, b)
+        self.assertEqual(new_text.previous_element.next_element, new_text)
+        self.assertEqual(new_text.next_element, None)
+
+    def test_consecutive_text_nodes(self):
+        # A builder should never create two consecutive text nodes,
+        # but if you insert one next to another, Beautiful Soup will
+        # handle it correctly.
+        soup = self.soup("<a><b>Argh!</b><c></c></a>")
+        soup.b.insert(1, "Hooray!")
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<a><b>Argh!Hooray!</b><c></c></a>"))
+
+        new_text = soup.find(text="Hooray!")
+        self.assertEqual(new_text.previous_element, "Argh!")
+        self.assertEqual(new_text.previous_element.next_element, new_text)
+
+        self.assertEqual(new_text.previous_sibling, "Argh!")
+        self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
+
+        self.assertEqual(new_text.next_sibling, None)
+        self.assertEqual(new_text.next_element, soup.c)
+
+    def test_insert_string(self):
+        soup = self.soup("<a></a>")
+        soup.a.insert(0, "bar")
+        soup.a.insert(0, "foo")
+        # The string were added to the tag.
+        self.assertEqual(["foo", "bar"], soup.a.contents)
+        # And they were converted to NavigableStrings.
+        self.assertEqual(soup.a.contents[0].next_element, "bar")
+
+    def test_insert_tag(self):
+        builder = self.default_builder
+        soup = self.soup(
+            "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
+        magic_tag = Tag(soup, builder, 'magictag')
+        magic_tag.insert(0, "the")
+        soup.a.insert(1, magic_tag)
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<a><b>Find</b><magictag>the</magictag><c>lady!</c><d></d></a>"))
+
+        # Make sure all the relationships are hooked up correctly.
+        b_tag = soup.b
+        self.assertEqual(b_tag.next_sibling, magic_tag)
+        self.assertEqual(magic_tag.previous_sibling, b_tag)
+
+        find = b_tag.find(text="Find")
+        self.assertEqual(find.next_element, magic_tag)
+        self.assertEqual(magic_tag.previous_element, find)
+
+        c_tag = soup.c
+        self.assertEqual(magic_tag.next_sibling, c_tag)
+        self.assertEqual(c_tag.previous_sibling, magic_tag)
+
+        the = magic_tag.find(text="the")
+        self.assertEqual(the.parent, magic_tag)
+        self.assertEqual(the.next_element, c_tag)
+        self.assertEqual(c_tag.previous_element, the)
+
+    def test_append_child_thats_already_at_the_end(self):
+        data = "<a><b></b></a>"
+        soup = self.soup(data)
+        soup.a.append(soup.b)
+        self.assertEqual(data, soup.decode())
+
+    def test_move_tag_to_beginning_of_parent(self):
+        data = "<a><b></b><c></c><d></d></a>"
+        soup = self.soup(data)
+        soup.a.insert(0, soup.d)
+        self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
+
+    def test_insert_works_on_empty_element_tag(self):
+        # This is a little strange, since most HTML parsers don't allow
+        # markup like this to come through. But in general, we don't
+        # know what the parser would or wouldn't have allowed, so
+        # I'm letting this succeed for now.
+        soup = self.soup("<br/>")
+        soup.br.insert(1, "Contents")
+        self.assertEqual(str(soup.br), "<br>Contents</br>")
+
+    def test_insert_before(self):
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        soup.b.insert_before("BAZ")
+        soup.a.insert_before("QUUX")
+        self.assertEqual(
+            soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
+
+        soup.a.insert_before(soup.b)
+        self.assertEqual(
+            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
+
+    def test_insert_after(self):
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        soup.b.insert_after("BAZ")
+        soup.a.insert_after("QUUX")
+        self.assertEqual(
+            soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
+        soup.b.insert_after(soup.a)
+        self.assertEqual(
+            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
+
+    def test_insert_after_raises_exception_if_after_has_no_meaning(self):
+        soup = self.soup("")
+        tag = soup.new_tag("a")
+        string = soup.new_string("")
+        self.assertRaises(ValueError, string.insert_after, tag)
+        self.assertRaises(NotImplementedError, soup.insert_after, tag)
+        self.assertRaises(ValueError, tag.insert_after, tag)
+
+    def test_insert_before_raises_notimplementederror_if_before_has_no_meaning(self):
+        soup = self.soup("")
+        tag = soup.new_tag("a")
+        string = soup.new_string("")
+        self.assertRaises(ValueError, string.insert_before, tag)
+        self.assertRaises(NotImplementedError, soup.insert_before, tag)
+        self.assertRaises(ValueError, tag.insert_before, tag)
+
+    def test_replace_with(self):
+        soup = self.soup(
+                "<p>There's <b>no</b> business like <b>show</b> business</p>")
+        no, show = soup.find_all('b')
+        show.replace_with(no)
+        self.assertEqual(
+            soup.decode(),
+            self.document_for(
+                "<p>There's  business like <b>no</b> business</p>"))
+
+        self.assertEqual(show.parent, None)
+        self.assertEqual(no.parent, soup.p)
+        self.assertEqual(no.next_element, "no")
+        self.assertEqual(no.next_sibling, " business")
+
+    def test_replace_first_child(self):
+        data = "<a><b></b><c></c></a>"
+        soup = self.soup(data)
+        soup.b.replace_with(soup.c)
+        self.assertEqual("<a><c></c></a>", soup.decode())
+
+    def test_replace_last_child(self):
+        data = "<a><b></b><c></c></a>"
+        soup = self.soup(data)
+        soup.c.replace_with(soup.b)
+        self.assertEqual("<a><b></b></a>", soup.decode())
+
+    def test_nested_tag_replace_with(self):
+        soup = self.soup(
+            """<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
+
+        # Replace the entire <b> tag and its contents ("reserve the
+        # right") with the <f> tag ("refuse").
+        remove_tag = soup.b
+        move_tag = soup.f
+        remove_tag.replace_with(move_tag)
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<a>We<f>refuse</f></a><e>to<g>service</g></e>"))
+
+        # The <b> tag is now an orphan.
+        self.assertEqual(remove_tag.parent, None)
+        self.assertEqual(remove_tag.find(text="right").next_element, None)
+        self.assertEqual(remove_tag.previous_element, None)
+        self.assertEqual(remove_tag.next_sibling, None)
+        self.assertEqual(remove_tag.previous_sibling, None)
+
+        # The <f> tag is now connected to the <a> tag.
+        self.assertEqual(move_tag.parent, soup.a)
+        self.assertEqual(move_tag.previous_element, "We")
+        self.assertEqual(move_tag.next_element.next_element, soup.e)
+        self.assertEqual(move_tag.next_sibling, None)
+
+        # The gap where the <f> tag used to be has been mended, and
+        # the word "to" is now connected to the <g> tag.
+        to_text = soup.find(text="to")
+        g_tag = soup.g
+        self.assertEqual(to_text.next_element, g_tag)
+        self.assertEqual(to_text.next_sibling, g_tag)
+        self.assertEqual(g_tag.previous_element, to_text)
+        self.assertEqual(g_tag.previous_sibling, to_text)
+
+    def test_unwrap(self):
+        tree = self.soup("""
+            <p>Unneeded <em>formatting</em> is unneeded</p>
+            """)
+        tree.em.unwrap()
+        self.assertEqual(tree.em, None)
+        self.assertEqual(tree.p.text, "Unneeded formatting is unneeded")
+
+    def test_wrap(self):
+        soup = self.soup("I wish I was bold.")
+        value = soup.string.wrap(soup.new_tag("b"))
+        self.assertEqual(value.decode(), "<b>I wish I was bold.</b>")
+        self.assertEqual(
+            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
+
+    def test_wrap_extracts_tag_from_elsewhere(self):
+        soup = self.soup("<b></b>I wish I was bold.")
+        soup.b.next_sibling.wrap(soup.b)
+        self.assertEqual(
+            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
+
+    def test_wrap_puts_new_contents_at_the_end(self):
+        soup = self.soup("<b>I like being bold.</b>I wish I was bold.")
+        soup.b.next_sibling.wrap(soup.b)
+        self.assertEqual(2, len(soup.b.contents))
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<b>I like being bold.I wish I was bold.</b>"))
+
+    def test_extract(self):
+        soup = self.soup(
+            '<html><body>Some content. <div id="nav">Nav crap</div> More content.</body></html>')
+
+        self.assertEqual(len(soup.body.contents), 3)
+        extracted = soup.find(id="nav").extract()
+
+        self.assertEqual(
+            soup.decode(), "<html><body>Some content.  More content.</body></html>")
+        self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
+
+        # The extracted tag is now an orphan.
+        self.assertEqual(len(soup.body.contents), 2)
+        self.assertEqual(extracted.parent, None)
+        self.assertEqual(extracted.previous_element, None)
+        self.assertEqual(extracted.next_element.next_element, None)
+
+        # The gap where the extracted tag used to be has been mended.
+        content_1 = soup.find(text="Some content. ")
+        content_2 = soup.find(text=" More content.")
+        self.assertEqual(content_1.next_element, content_2)
+        self.assertEqual(content_1.next_sibling, content_2)
+        self.assertEqual(content_2.previous_element, content_1)
+        self.assertEqual(content_2.previous_sibling, content_1)
+
+    def test_extract_distinguishes_between_identical_strings(self):
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        foo_1 = soup.a.string
+        bar_1 = soup.b.string
+        foo_2 = soup.new_string("foo")
+        bar_2 = soup.new_string("bar")
+        soup.a.append(foo_2)
+        soup.b.append(bar_2)
+
+        # Now there are two identical strings in the <a> tag, and two
+        # in the <b> tag. Let's remove the first "foo" and the second
+        # "bar".
+        foo_1.extract()
+        bar_2.extract()
+        self.assertEqual(foo_2, soup.a.string)
+        self.assertEqual(bar_2, soup.b.string)
+
+    def test_extract_multiples_of_same_tag(self):
+        soup = self.soup("""
+<html>
+<head>
+<script>foo</script>
+</head>
+<body>
+ <script>bar</script>
+ <a></a>
+</body>
+<script>baz</script>
+</html>""")
+        [soup.script.extract() for i in soup.find_all("script")]
+        self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
+
+
+    def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
+        soup = self.soup(
+ '<html>\n'
+ '<body>hi</body>\n'
+ '</html>')
+        soup.find('body').extract()
+        self.assertEqual(None, soup.find('body'))
+
+
+    def test_clear(self):
+        """Tag.clear()"""
+        soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
+        # clear using extract()
+        a = soup.a
+        soup.p.clear()
+        self.assertEqual(len(soup.p.contents), 0)
+        self.assertTrue(hasattr(a, "contents"))
+
+        # clear using decompose()
+        em = a.em
+        a.clear(decompose=True)
+        self.assertEqual(0, len(em.contents))
+
+    def test_string_set(self):
+        """Tag.string = 'string'"""
+        soup = self.soup("<a></a> <b><c></c></b>")
+        soup.a.string = "foo"
+        self.assertEqual(soup.a.contents, ["foo"])
+        soup.b.string = "bar"
+        self.assertEqual(soup.b.contents, ["bar"])
+
+    def test_string_set_does_not_affect_original_string(self):
+        soup = self.soup("<a><b>foo</b><c>bar</c>")
+        soup.b.string = soup.c.string
+        self.assertEqual(soup.a.encode(), b"<a><b>bar</b><c>bar</c></a>")
+
+    def test_set_string_preserves_class_of_string(self):
+        soup = self.soup("<a></a>")
+        cdata = CData("foo")
+        soup.a.string = cdata
+        self.assertTrue(isinstance(soup.a.string, CData))
+
+class TestElementObjects(SoupTest):
+    """Test various features of element objects."""
+
+    def test_len(self):
+        """The length of an element is its number of children."""
+        soup = self.soup("<top>1<b>2</b>3</top>")
+
+        # The BeautifulSoup object itself contains one element: the
+        # <top> tag.
+        self.assertEqual(len(soup.contents), 1)
+        self.assertEqual(len(soup), 1)
+
+        # The <top> tag contains three elements: the text node "1", the
+        # <b> tag, and the text node "3".
+        self.assertEqual(len(soup.top), 3)
+        self.assertEqual(len(soup.top.contents), 3)
+
+    def test_member_access_invokes_find(self):
+        """Accessing a Python member .foo invokes find('foo')"""
+        soup = self.soup('<b><i></i></b>')
+        self.assertEqual(soup.b, soup.find('b'))
+        self.assertEqual(soup.b.i, soup.find('b').find('i'))
+        self.assertEqual(soup.a, None)
+
+    def test_deprecated_member_access(self):
+        soup = self.soup('<b><i></i></b>')
+        with warnings.catch_warnings(record=True) as w:
+            tag = soup.bTag
+        self.assertEqual(soup.b, tag)
+        self.assertEqual(
+            '.bTag is deprecated, use .find("b") instead.',
+            str(w[0].message))
+
+    def test_has_attr(self):
+        """has_attr() checks for the presence of an attribute.
+
+        Please note note: has_attr() is different from
+        __in__. has_attr() checks the tag's attributes and __in__
+        checks the tag's chidlren.
+        """
+        soup = self.soup("<foo attr='bar'>")
+        self.assertTrue(soup.foo.has_attr('attr'))
+        self.assertFalse(soup.foo.has_attr('attr2'))
+
+
+    def test_attributes_come_out_in_alphabetical_order(self):
+        markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
+        self.assertSoupEquals(markup, '<b a="1" f="2" m="3" y="4" z="5"></b>')
+
+    def test_string(self):
+        # A tag that contains only a text node makes that node
+        # available as .string.
+        soup = self.soup("<b>foo</b>")
+        self.assertEqual(soup.b.string, 'foo')
+
+    def test_empty_tag_has_no_string(self):
+        # A tag with no children has no .stirng.
+        soup = self.soup("<b></b>")
+        self.assertEqual(soup.b.string, None)
+
+    def test_tag_with_multiple_children_has_no_string(self):
+        # A tag with no children has no .string.
+        soup = self.soup("<a>foo<b></b><b></b></b>")
+        self.assertEqual(soup.b.string, None)
+
+        soup = self.soup("<a>foo<b></b>bar</b>")
+        self.assertEqual(soup.b.string, None)
+
+        # Even if all the children are strings, due to trickery,
+        # it won't work--but this would be a good optimization.
+        soup = self.soup("<a>foo</b>")
+        soup.a.insert(1, "bar")
+        self.assertEqual(soup.a.string, None)
+
+    def test_tag_with_recursive_string_has_string(self):
+        # A tag with a single child which has a .string inherits that
+        # .string.
+        soup = self.soup("<a><b>foo</b></a>")
+        self.assertEqual(soup.a.string, "foo")
+        self.assertEqual(soup.string, "foo")
+
+    def test_lack_of_string(self):
+        """Only a tag containing a single text node has a .string."""
+        soup = self.soup("<b>f<i>e</i>o</b>")
+        self.assertFalse(soup.b.string)
+
+        soup = self.soup("<b></b>")
+        self.assertFalse(soup.b.string)
+
+    def test_all_text(self):
+        """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
+        soup = self.soup("<a>a<b>r</b>   <r> t </r></a>")
+        self.assertEqual(soup.a.text, "ar  t ")
+        self.assertEqual(soup.a.get_text(strip=True), "art")
+        self.assertEqual(soup.a.get_text(","), "a,r, , t ")
+        self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
+
+    def test_get_text_ignores_comments(self):
+        soup = self.soup("foo<!--IGNORE-->bar")
+        self.assertEqual(soup.get_text(), "foobar")
+
+        self.assertEqual(
+            soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar")
+        self.assertEqual(
+            soup.get_text(types=None), "fooIGNOREbar")
+
+    def test_all_strings_ignores_comments(self):
+        soup = self.soup("foo<!--IGNORE-->bar")
+        self.assertEqual(['foo', 'bar'], list(soup.strings))
+
+class TestCDAtaListAttributes(SoupTest):
+
+    """Testing cdata-list attributes like 'class'.
+    """
+    def test_single_value_becomes_list(self):
+        soup = self.soup("<a class='foo'>")
+        self.assertEqual(["foo"],soup.a['class'])
+
+    def test_multiple_values_becomes_list(self):
+        soup = self.soup("<a class='foo bar'>")
+        self.assertEqual(["foo", "bar"], soup.a['class'])
+
+    def test_multiple_values_separated_by_weird_whitespace(self):
+        soup = self.soup("<a class='foo\tbar\nbaz'>")
+        self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
+
+    def test_attributes_joined_into_string_on_output(self):
+        soup = self.soup("<a class='foo\tbar'>")
+        self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
+
+    def test_accept_charset(self):
+        soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
+        self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
+
+    def test_cdata_attribute_applying_only_to_one_tag(self):
+        data = '<a accept-charset="ISO-8859-1 UTF-8"></a>'
+        soup = self.soup(data)
+        # We saw in another test that accept-charset is a cdata-list
+        # attribute for the <form> tag. But it's not a cdata-list
+        # attribute for any other tag.
+        self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset'])
+
+    def test_string_has_immutable_name_property(self):
+        string = self.soup("s").string
+        self.assertEqual(None, string.name)
+        def t():
+            string.name = 'foo'
+        self.assertRaises(AttributeError, t)
+
+class TestPersistence(SoupTest):
+    "Testing features like pickle and deepcopy."
+
+    def setUp(self):
+        super(TestPersistence, self).setUp()
+        self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+"http://www.w3.org/TR/REC-html40/transitional.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
+<link rev="made" href="mailto:leonardr@segfault.org">
+<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
+<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
+<meta name="author" content="Leonard Richardson">
+</head>
+<body>
+<a href="foo">foo</a>
+<a href="foo"><b>bar</b></a>
+</body>
+</html>"""
+        self.tree = self.soup(self.page)
+
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        dumped = pickle.dumps(self.tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), self.tree.decode())
+
+    def test_deepcopy_identity(self):
+        # Making a deepcopy of a tree yields an identical tree.
+        copied = copy.deepcopy(self.tree)
+        self.assertEqual(copied.decode(), self.tree.decode())
+
+    def test_copy_preserves_encoding(self):
+        soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
+        encoding = soup.original_encoding
+        copy = soup.__copy__()
+        self.assertEqual(u"<p> </p>", unicode(copy))
+        self.assertEqual(encoding, copy.original_encoding)
+
+    def test_unicode_pickle(self):
+        # A tree containing Unicode characters can be pickled.
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.decode(), soup.decode())
+
+    def test_copy_navigablestring_is_not_attached_to_tree(self):
+        html = u"<b>Foo<a></a></b><b>Bar</b>"
+        soup = self.soup(html)
+        s1 = soup.find(string="Foo")
+        s2 = copy.copy(s1)
+        self.assertEqual(s1, s2)
+        self.assertEqual(None, s2.parent)
+        self.assertEqual(None, s2.next_element)
+        self.assertNotEqual(None, s1.next_sibling)
+        self.assertEqual(None, s2.next_sibling)
+        self.assertEqual(None, s2.previous_element)
+
+    def test_copy_navigablestring_subclass_has_same_type(self):
+        html = u"<b><!--Foo--></b>"
+        soup = self.soup(html)
+        s1 = soup.string
+        s2 = copy.copy(s1)
+        self.assertEqual(s1, s2)
+        self.assertTrue(isinstance(s2, Comment))
+
+    def test_copy_entire_soup(self):
+        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        soup = self.soup(html)
+        soup_copy = copy.copy(soup)
+        self.assertEqual(soup, soup_copy)
+
+    def test_copy_tag_copies_contents(self):
+        html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end"
+        soup = self.soup(html)
+        div = soup.div
+        div_copy = copy.copy(div)
+
+        # The two tags look the same, and evaluate to equal.
+        self.assertEqual(unicode(div), unicode(div_copy))
+        self.assertEqual(div, div_copy)
+
+        # But they're not the same object.
+        self.assertFalse(div is div_copy)
+
+        # And they don't have the same relation to the parse tree. The
+        # copy is not associated with a parse tree at all.
+        self.assertEqual(None, div_copy.parent)
+        self.assertEqual(None, div_copy.previous_element)
+        self.assertEqual(None, div_copy.find(string='Bar').next_element)
+        self.assertNotEqual(None, div.find(string='Bar').next_element)
+
+class TestSubstitutions(SoupTest):
+
+    def test_default_formatter_is_minimal(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="minimal")
+        # The < is converted back into &lt; but the e-with-acute is left alone.
+        self.assertEqual(
+            decoded,
+            self.document_for(
+                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+
+    def test_formatter_html(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="html")
+        self.assertEqual(
+            decoded,
+            self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+
+    def test_formatter_minimal(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="minimal")
+        # The < is converted back into &lt; but the e-with-acute is left alone.
+        self.assertEqual(
+            decoded,
+            self.document_for(
+                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+
+    def test_formatter_null(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter=None)
+        # Neither the angle brackets nor the e-with-acute are converted.
+        # This is not valid HTML, but it's what the user wanted.
+        self.assertEqual(decoded,
+                          self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
+
+    def test_formatter_custom(self):
+        markup = u"<b>&lt;foo&gt;</b><b>bar</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter = lambda x: x.upper())
+        # Instead of normal entity conversion code, the custom
+        # callable is called on every string.
+        self.assertEqual(
+            decoded,
+            self.document_for(u"<b><FOO></b><b>BAR</b>"))
+
+    def test_formatter_is_run_on_attribute_values(self):
+        markup = u'<a href="http://a.com?a=b&c=é">e</a>'
+        soup = self.soup(markup)
+        a = soup.a
+
+        expect_minimal = u'<a href="http://a.com?a=b&amp;c=é">e</a>'
+
+        self.assertEqual(expect_minimal, a.decode())
+        self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
+
+        expect_html = u'<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
+        self.assertEqual(expect_html, a.decode(formatter="html"))
+
+        self.assertEqual(markup, a.decode(formatter=None))
+        expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
+        self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
+
+    def test_formatter_skips_script_tag_for_html_documents(self):
+        doc = """
+  <script type="text/javascript">
+   console.log("< < hey > > ");
+  </script>
+"""
+        encoded = BeautifulSoup(doc, 'html.parser').encode()
+        self.assertTrue(b"< < hey > >" in encoded)
+
+    def test_formatter_skips_style_tag_for_html_documents(self):
+        doc = """
+  <style type="text/css">
+   console.log("< < hey > > ");
+  </style>
+"""
+        encoded = BeautifulSoup(doc, 'html.parser').encode()
+        self.assertTrue(b"< < hey > >" in encoded)
+
+    def test_prettify_leaves_preformatted_text_alone(self):
+        soup = self.soup("<div>  foo  <pre>  \tbar\n  \n  </pre>  baz  ")
+        # Everything outside the <pre> tag is reformatted, but everything
+        # inside is left alone.
+        self.assertEqual(
+            u'<div>\n foo\n <pre>  \tbar\n  \n  </pre>\n baz\n</div>',
+            soup.div.prettify())
+
+    def test_prettify_accepts_formatter(self):
+        soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
+        pretty = soup.prettify(formatter = lambda x: x.upper())
+        self.assertTrue("FOO" in pretty)
+
+    def test_prettify_outputs_unicode_by_default(self):
+        soup = self.soup("<a></a>")
+        self.assertEqual(unicode, type(soup.prettify()))
+
+    def test_prettify_can_encode_data(self):
+        soup = self.soup("<a></a>")
+        self.assertEqual(bytes, type(soup.prettify("utf-8")))
+
+    def test_html_entity_substitution_off_by_default(self):
+        markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
+        soup = self.soup(markup)
+        encoded = soup.b.encode("utf-8")
+        self.assertEqual(encoded, markup.encode('utf-8'))
+
+    def test_encoding_substitution(self):
+        # Here's the <meta> tag saying that a document is
+        # encoded in Shift-JIS.
+        meta_tag = ('<meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type"/>')
+        soup = self.soup(meta_tag)
+
+        # Parse the document, and the charset apprears unchanged.
+        self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis')
+
+        # Encode the document into some encoding, and the encoding is
+        # substituted into the meta tag.
+        utf_8 = soup.encode("utf-8")
+        self.assertTrue(b"charset=utf-8" in utf_8)
+
+        euc_jp = soup.encode("euc_jp")
+        self.assertTrue(b"charset=euc_jp" in euc_jp)
+
+        shift_jis = soup.encode("shift-jis")
+        self.assertTrue(b"charset=shift-jis" in shift_jis)
+
+        utf_16_u = soup.encode("utf-16").decode("utf-16")
+        self.assertTrue("charset=utf-16" in utf_16_u)
+
+    def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
+        markup = ('<head><meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type"/></head><pre>foo</pre>')
+
+        # Beautiful Soup used to try to rewrite the meta tag even if the
+        # meta tag got filtered out by the strainer. This test makes
+        # sure that doesn't happen.
+        strainer = SoupStrainer('pre')
+        soup = self.soup(markup, parse_only=strainer)
+        self.assertEqual(soup.contents[0].name, 'pre')
+
+class TestEncoding(SoupTest):
+    """Test the ability to encode objects into strings."""
+
+    def test_unicode_string_can_be_encoded(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(soup.b.string.encode("utf-8"),
+                          u"\N{SNOWMAN}".encode("utf-8"))
+
+    def test_tag_containing_unicode_string_can_be_encoded(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(
+            soup.b.encode("utf-8"), html.encode("utf-8"))
+
+    def test_encoding_substitutes_unrecognized_characters_by_default(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
+
+    def test_encoding_can_be_made_strict(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertRaises(
+            UnicodeEncodeError, soup.encode, "ascii", errors="strict")
+
+    def test_decode_contents(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
+
+    def test_encode_contents(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(
+            u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
+                encoding="utf8"))
+
+    def test_deprecated_renderContents(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(
+            u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
+
+    def test_repr(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        if PY3K:
+            self.assertEqual(html, repr(soup))
+        else:
+            self.assertEqual(b'<b>\\u2603</b>', repr(soup))
+
+class TestNavigableStringSubclasses(SoupTest):
+
+    def test_cdata(self):
+        # None of the current builders turn CDATA sections into CData
+        # objects, but you can create them manually.
+        soup = self.soup("")
+        cdata = CData("foo")
+        soup.insert(1, cdata)
+        self.assertEqual(str(soup), "<![CDATA[foo]]>")
+        self.assertEqual(soup.find(text="foo"), "foo")
+        self.assertEqual(soup.contents[0], "foo")
+
+    def test_cdata_is_never_formatted(self):
+        """Text inside a CData object is passed into the formatter.
+
+        But the return value is ignored.
+        """
+
+        self.count = 0
+        def increment(*args):
+            self.count += 1
+            return "BITTER FAILURE"
+
+        soup = self.soup("")
+        cdata = CData("<><><>")
+        soup.insert(1, cdata)
+        self.assertEqual(
+            b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
+        self.assertEqual(1, self.count)
+
+    def test_doctype_ends_in_newline(self):
+        # Unlike other NavigableString subclasses, a DOCTYPE always ends
+        # in a newline.
+        doctype = Doctype("foo")
+        soup = self.soup("")
+        soup.insert(1, doctype)
+        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
+
+    def test_declaration(self):
+        d = Declaration("foo")
+        self.assertEqual("<?foo?>", d.output_ready())
+
+class TestSoupSelector(TreeTest):
+
+    HTML = """
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+"http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>The title</title>
+<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
+</head>
+<body>
+<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
+<div id="main" class="fancy">
+<div id="inner">
+<h1 id="header1">An H1</h1>
+<p>Some text</p>
+<p class="onep" id="p1">Some more text</p>
+<h2 id="header2">An H2</h2>
+<p class="class1 class2 class3" id="pmulti">Another</p>
+<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
+<h2 id="header3">Another H2</h2>
+<a id="me" href="http://simonwillison.net/" rel="me">me</a>
+<span class="s1">
+<a href="#" id="s1a1">span1a1</a>
+<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
+<span class="span2">
+<a href="#" id="s2a1">span2a1</a>
+</span>
+<span class="span3"></span>
+<custom-dashed-tag class="dashed" id="dash2"/>
+<div data-tag="dashedvalue" id="data1"/>
+</span>
+</div>
+<x id="xid">
+<z id="zida"/>
+<z id="zidab"/>
+<z id="zidac"/>
+</x>
+<y id="yid">
+<z id="zidb"/>
+</y>
+<p lang="en" id="lang-en">English</p>
+<p lang="en-gb" id="lang-en-gb">English UK</p>
+<p lang="en-us" id="lang-en-us">English US</p>
+<p lang="fr" id="lang-fr">French</p>
+</div>
+
+<div id="footer">
+</div>
+"""
+
+    def setUp(self):
+        self.soup = BeautifulSoup(self.HTML, 'html.parser')
+
+    def assertSelects(self, selector, expected_ids, **kwargs):
+        el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
+        el_ids.sort()
+        expected_ids.sort()
+        self.assertEqual(expected_ids, el_ids,
+            "Selector %s, expected [%s], got [%s]" % (
+                selector, ', '.join(expected_ids), ', '.join(el_ids)
+            )
+        )
+
+    assertSelect = assertSelects
+
+    def assertSelectMultiple(self, *tests):
+        for selector, expected_ids in tests:
+            self.assertSelect(selector, expected_ids)
+
+    def test_one_tag_one(self):
+        els = self.soup.select('title')
+        self.assertEqual(len(els), 1)
+        self.assertEqual(els[0].name, 'title')
+        self.assertEqual(els[0].contents, [u'The title'])
+
+    def test_one_tag_many(self):
+        els = self.soup.select('div')
+        self.assertEqual(len(els), 4)
+        for div in els:
+            self.assertEqual(div.name, 'div')
+
+        el = self.soup.select_one('div')
+        self.assertEqual('main', el['id'])
+
+    def test_select_one_returns_none_if_no_match(self):
+        match = self.soup.select_one('nonexistenttag')
+        self.assertEqual(None, match)
+
+
+    def test_tag_in_tag_one(self):
+        els = self.soup.select('div div')
+        self.assertSelects('div div', ['inner', 'data1'])
+
+    def test_tag_in_tag_many(self):
+        for selector in ('html div', 'html body div', 'body div'):
+            self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
+
+
+    def test_limit(self):
+        self.assertSelects('html div', ['main'], limit=1)
+        self.assertSelects('html body div', ['inner', 'main'], limit=2)
+        self.assertSelects('body div', ['data1', 'main', 'inner', 'footer'],
+                           limit=10)
+
+    def test_tag_no_match(self):
+        self.assertEqual(len(self.soup.select('del')), 0)
+
+    def test_invalid_tag(self):
+        self.assertRaises(ValueError, self.soup.select, 'tag%t')
+
+    def test_select_dashed_tag_ids(self):
+        self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
+
+    def test_select_dashed_by_id(self):
+        dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
+        self.assertEqual(dashed[0].name, 'custom-dashed-tag')
+        self.assertEqual(dashed[0]['id'], 'dash2')
+
+    def test_dashed_tag_text(self):
+        self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.')
+
+    def test_select_dashed_matches_find_all(self):
+        self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
+
+    def test_header_tags(self):
+        self.assertSelectMultiple(
+            ('h1', ['header1']),
+            ('h2', ['header2', 'header3']),
+        )
+
+    def test_class_one(self):
+        for selector in ('.onep', 'p.onep', 'html p.onep'):
+            els = self.soup.select(selector)
+            self.assertEqual(len(els), 1)
+            self.assertEqual(els[0].name, 'p')
+            self.assertEqual(els[0]['class'], ['onep'])
+
+    def test_class_mismatched_tag(self):
+        els = self.soup.select('div.onep')
+        self.assertEqual(len(els), 0)
+
+    def test_one_id(self):
+        for selector in ('div#inner', '#inner', 'div div#inner'):
+            self.assertSelects(selector, ['inner'])
+
+    def test_bad_id(self):
+        els = self.soup.select('#doesnotexist')
+        self.assertEqual(len(els), 0)
+
+    def test_items_in_id(self):
+        els = self.soup.select('div#inner p')
+        self.assertEqual(len(els), 3)
+        for el in els:
+            self.assertEqual(el.name, 'p')
+        self.assertEqual(els[1]['class'], ['onep'])
+        self.assertFalse(els[0].has_attr('class'))
+
+    def test_a_bunch_of_emptys(self):
+        for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
+            self.assertEqual(len(self.soup.select(selector)), 0)
+
+    def test_multi_class_support(self):
+        for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
+            '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
+            self.assertSelects(selector, ['pmulti'])
+
+    def test_multi_class_selection(self):
+        for selector in ('.class1.class3', '.class3.class2',
+                         '.class1.class2.class3'):
+            self.assertSelects(selector, ['pmulti'])
+
+    def test_child_selector(self):
+        self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
+        self.assertSelects('.s1 > a span', ['s1a2s1'])
+
+    def test_child_selector_id(self):
+        self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1'])
+
+    def test_attribute_equals(self):
+        self.assertSelectMultiple(
+            ('p[class="onep"]', ['p1']),
+            ('p[id="p1"]', ['p1']),
+            ('[class="onep"]', ['p1']),
+            ('[id="p1"]', ['p1']),
+            ('link[rel="stylesheet"]', ['l1']),
+            ('link[type="text/css"]', ['l1']),
+            ('link[href="blah.css"]', ['l1']),
+            ('link[href="no-blah.css"]', []),
+            ('[rel="stylesheet"]', ['l1']),
+            ('[type="text/css"]', ['l1']),
+            ('[href="blah.css"]', ['l1']),
+            ('[href="no-blah.css"]', []),
+            ('p[href="no-blah.css"]', []),
+            ('[href="no-blah.css"]', []),
+        )
+
+    def test_attribute_tilde(self):
+        self.assertSelectMultiple(
+            ('p[class~="class1"]', ['pmulti']),
+            ('p[class~="class2"]', ['pmulti']),
+            ('p[class~="class3"]', ['pmulti']),
+            ('[class~="class1"]', ['pmulti']),
+            ('[class~="class2"]', ['pmulti']),
+            ('[class~="class3"]', ['pmulti']),
+            ('a[rel~="friend"]', ['bob']),
+            ('a[rel~="met"]', ['bob']),
+            ('[rel~="friend"]', ['bob']),
+            ('[rel~="met"]', ['bob']),
+        )
+
+    def test_attribute_startswith(self):
+        self.assertSelectMultiple(
+            ('[rel^="style"]', ['l1']),
+            ('link[rel^="style"]', ['l1']),
+            ('notlink[rel^="notstyle"]', []),
+            ('[rel^="notstyle"]', []),
+            ('link[rel^="notstyle"]', []),
+            ('link[href^="bla"]', ['l1']),
+            ('a[href^="http://"]', ['bob', 'me']),
+            ('[href^="http://"]', ['bob', 'me']),
+            ('[id^="p"]', ['pmulti', 'p1']),
+            ('[id^="m"]', ['me', 'main']),
+            ('div[id^="m"]', ['main']),
+            ('a[id^="m"]', ['me']),
+            ('div[data-tag^="dashed"]', ['data1'])
+        )
+
+    def test_attribute_endswith(self):
+        self.assertSelectMultiple(
+            ('[href$=".css"]', ['l1']),
+            ('link[href$=".css"]', ['l1']),
+            ('link[id$="1"]', ['l1']),
+            ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
+            ('div[id$="1"]', ['data1']),
+            ('[id$="noending"]', []),
+        )
+
+    def test_attribute_contains(self):
+        self.assertSelectMultiple(
+            # From test_attribute_startswith
+            ('[rel*="style"]', ['l1']),
+            ('link[rel*="style"]', ['l1']),
+            ('notlink[rel*="notstyle"]', []),
+            ('[rel*="notstyle"]', []),
+            ('link[rel*="notstyle"]', []),
+            ('link[href*="bla"]', ['l1']),
+            ('[href*="http://"]', ['bob', 'me']),
+            ('[id*="p"]', ['pmulti', 'p1']),
+            ('div[id*="m"]', ['main']),
+            ('a[id*="m"]', ['me']),
+            # From test_attribute_endswith
+            ('[href*=".css"]', ['l1']),
+            ('link[href*=".css"]', ['l1']),
+            ('link[id*="1"]', ['l1']),
+            ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
+            ('div[id*="1"]', ['data1']),
+            ('[id*="noending"]', []),
+            # New for this test
+            ('[href*="."]', ['bob', 'me', 'l1']),
+            ('a[href*="."]', ['bob', 'me']),
+            ('link[href*="."]', ['l1']),
+            ('div[id*="n"]', ['main', 'inner']),
+            ('div[id*="nn"]', ['inner']),
+            ('div[data-tag*="edval"]', ['data1'])
+        )
+
+    def test_attribute_exact_or_hypen(self):
+        self.assertSelectMultiple(
+            ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
+            ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
+            ('p[lang|="fr"]', ['lang-fr']),
+            ('p[lang|="gb"]', []),
+        )
+
+    def test_attribute_exists(self):
+        self.assertSelectMultiple(
+            ('[rel]', ['l1', 'bob', 'me']),
+            ('link[rel]', ['l1']),
+            ('a[rel]', ['bob', 'me']),
+            ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
+            ('p[class]', ['p1', 'pmulti']),
+            ('[blah]', []),
+            ('p[blah]', []),
+            ('div[data-tag]', ['data1'])
+        )
+
+    def test_quoted_space_in_selector_name(self):
+        html = """<div style="display: wrong">nope</div>
+        <div style="display: right">yes</div>
+        """
+        soup = BeautifulSoup(html, 'html.parser')
+        [chosen] = soup.select('div[style="display: right"]')
+        self.assertEqual("yes", chosen.string)
+
+    def test_unsupported_pseudoclass(self):
+        self.assertRaises(
+            NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
+
+        self.assertRaises(
+            NotImplementedError, self.soup.select, "a:nth-of-type(a)")
+
+
+    def test_nth_of_type(self):
+        # Try to select first paragraph
+        els = self.soup.select('div#inner p:nth-of-type(1)')
+        self.assertEqual(len(els), 1)
+        self.assertEqual(els[0].string, u'Some text')
+
+        # Try to select third paragraph
+        els = self.soup.select('div#inner p:nth-of-type(3)')
+        self.assertEqual(len(els), 1)
+        self.assertEqual(els[0].string, u'Another')
+
+        # Try to select (non-existent!) fourth paragraph
+        els = self.soup.select('div#inner p:nth-of-type(4)')
+        self.assertEqual(len(els), 0)
+
+        # Pass in an invalid value.
+        self.assertRaises(
+            ValueError, self.soup.select, 'div p:nth-of-type(0)')
+
+    def test_nth_of_type_direct_descendant(self):
+        els = self.soup.select('div#inner > p:nth-of-type(1)')
+        self.assertEqual(len(els), 1)
+        self.assertEqual(els[0].string, u'Some text')
+
+    def test_id_child_selector_nth_of_type(self):
+        self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
+
+    def test_select_on_element(self):
+        # Other tests operate on the tree; this operates on an element
+        # within the tree.
+        inner = self.soup.find("div", id="main")
+        selected = inner.select("div")
+        # The <div id="inner"> tag was selected. The <div id="footer">
+        # tag was not.
+        self.assertSelectsIDs(selected, ['inner', 'data1'])
+
+    def test_overspecified_child_id(self):
+        self.assertSelects(".fancy #inner", ['inner'])
+        self.assertSelects(".normal #inner", [])
+
+    def test_adjacent_sibling_selector(self):
+        self.assertSelects('#p1 + h2', ['header2'])
+        self.assertSelects('#p1 + h2 + p', ['pmulti'])
+        self.assertSelects('#p1 + #header2 + .class1', ['pmulti'])
+        self.assertEqual([], self.soup.select('#p1 + p'))
+
+    def test_general_sibling_selector(self):
+        self.assertSelects('#p1 ~ h2', ['header2', 'header3'])
+        self.assertSelects('#p1 ~ #header2', ['header2'])
+        self.assertSelects('#p1 ~ h2 + a', ['me'])
+        self.assertSelects('#p1 ~ h2 + [rel="me"]', ['me'])
+        self.assertEqual([], self.soup.select('#inner ~ h2'))
+
+    def test_dangling_combinator(self):
+        self.assertRaises(ValueError, self.soup.select, 'h1 >')
+
+    def test_sibling_combinator_wont_select_same_tag_twice(self):
+        self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
+
+    # Test the selector grouping operator (the comma)
+    def test_multiple_select(self):
+        self.assertSelects('x, y', ['xid', 'yid'])
+
+    def test_multiple_select_with_no_space(self):
+        self.assertSelects('x,y', ['xid', 'yid'])
+
+    def test_multiple_select_with_more_space(self):
+        self.assertSelects('x,    y', ['xid', 'yid'])
+
+    def test_multiple_select_duplicated(self):
+        self.assertSelects('x, x', ['xid'])
+
+    def test_multiple_select_sibling(self):
+        self.assertSelects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
+
+    def test_multiple_select_tag_and_direct_descendant(self):
+        self.assertSelects('x, y > z', ['xid', 'zidb'])
+
+    def test_multiple_select_direct_descendant_and_tags(self):
+        self.assertSelects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
+
+    def test_multiple_select_indirect_descendant(self):
+        self.assertSelects('div x,y,  z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
+
+    def test_invalid_multiple_select(self):
+        self.assertRaises(ValueError, self.soup.select, ',x, y')
+        self.assertRaises(ValueError, self.soup.select, 'x,,y')
+
+    def test_multiple_select_attrs(self):
+        self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
+
+    def test_multiple_select_ids(self):
+        self.assertSelects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
+
+    def test_multiple_select_nested(self):
+        self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])
+
+
+
diff --git a/src/icon.png b/src/icon.png
new file mode 100644
index 0000000..fa9b601
Binary files /dev/null and b/src/icon.png differ
diff --git a/src/info.plist b/src/info.plist
new file mode 100644
index 0000000..4750481
--- /dev/null
+++ b/src/info.plist
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>bundleid</key>
+	<string>me.clarencecastillo.alfred-powerthesaurus</string>
+	<key>category</key>
+	<string>Internet</string>
+	<key>connections</key>
+	<dict>
+		<key>819E593B-43C3-4EBF-9000-3D3C0F5AB6D2</key>
+		<array>
+			<dict>
+				<key>destinationuid</key>
+				<string>BFD7FE38-61C5-4AAB-87E4-20ADA5CBDA67</string>
+				<key>modifiers</key>
+				<integer>0</integer>
+				<key>modifiersubtext</key>
+				<string></string>
+				<key>vitoclose</key>
+				<false/>
+			</dict>
+		</array>
+	</dict>
+	<key>createdby</key>
+	<string>Clarence Castillo</string>
+	<key>description</key>
+	<string>Search Powerthesaurus Synonymns</string>
+	<key>disabled</key>
+	<false/>
+	<key>name</key>
+	<string>Powerthesaurus</string>
+	<key>objects</key>
+	<array>
+		<dict>
+			<key>config</key>
+			<dict>
+				<key>autopaste</key>
+				<false/>
+				<key>clipboardtext</key>
+				<string>{query}</string>
+				<key>transient</key>
+				<false/>
+			</dict>
+			<key>type</key>
+			<string>alfred.workflow.output.clipboard</string>
+			<key>uid</key>
+			<string>BFD7FE38-61C5-4AAB-87E4-20ADA5CBDA67</string>
+			<key>version</key>
+			<integer>2</integer>
+		</dict>
+		<dict>
+			<key>config</key>
+			<dict>
+				<key>alfredfiltersresults</key>
+				<false/>
+				<key>argumenttype</key>
+				<integer>0</integer>
+				<key>escaping</key>
+				<integer>102</integer>
+				<key>keyword</key>
+				<string>pow</string>
+				<key>queuedelaycustom</key>
+				<integer>3</integer>
+				<key>queuedelayimmediatelyinitially</key>
+				<true/>
+				<key>queuedelaymode</key>
+				<integer>0</integer>
+				<key>queuemode</key>
+				<integer>1</integer>
+				<key>runningsubtext</key>
+				<string>Querying Powerthesaurus.org…</string>
+				<key>script</key>
+				<string>/usr/bin/python powerthesaurus.py "{query}"</string>
+				<key>scriptargtype</key>
+				<integer>0</integer>
+				<key>scriptfile</key>
+				<string></string>
+				<key>subtext</key>
+				<string></string>
+				<key>title</key>
+				<string>Search Powerthesaurus</string>
+				<key>type</key>
+				<integer>0</integer>
+				<key>withspace</key>
+				<true/>
+			</dict>
+			<key>type</key>
+			<string>alfred.workflow.input.scriptfilter</string>
+			<key>uid</key>
+			<string>819E593B-43C3-4EBF-9000-3D3C0F5AB6D2</string>
+			<key>version</key>
+			<integer>2</integer>
+		</dict>
+	</array>
+	<key>readme</key>
+	<string></string>
+	<key>uidata</key>
+	<dict>
+		<key>819E593B-43C3-4EBF-9000-3D3C0F5AB6D2</key>
+		<dict>
+			<key>xpos</key>
+			<integer>120</integer>
+			<key>ypos</key>
+			<integer>70</integer>
+		</dict>
+		<key>BFD7FE38-61C5-4AAB-87E4-20ADA5CBDA67</key>
+		<dict>
+			<key>xpos</key>
+			<integer>500</integer>
+			<key>ypos</key>
+			<integer>70</integer>
+		</dict>
+	</dict>
+	<key>webaddress</key>
+	<string></string>
+</dict>
+</plist>
diff --git a/src/powerthesaurus.py b/src/powerthesaurus.py
new file mode 100644
index 0000000..102da1a
--- /dev/null
+++ b/src/powerthesaurus.py
@@ -0,0 +1,125 @@
+#!/usr/bin/python
+# encoding: utf-8
+#
+# Copyright © 2017 hello@clarencecastillo.me
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2017-03-06
+#
+
+"""
+Powerthesaurus API
+"""
+
+from bs4 import BeautifulSoup
+from HTMLParser import HTMLParser
+import re
+import sys
+import functools
+
+from workflow import Workflow, web
+
+ICON = 'icon.png'
+HELP_URL = '/~https://github.com/clarencecastillo/alfred-powerthesaurus'
+API_URL = 'https://www.powerthesaurus.org/'
+
+# How long to cache results for
+CACHE_MAX_AGE = 20  # seconds
+
+# h.unescape() turns HTML escapes back into real characters
+h = HTMLParser()
+
+log = None
+
+def cache_key(query, tags):
+    """Make filesystem-friendly cache key"""
+
+    key = query + '_' + ';'.join(tags)
+    key = key.lower()
+    key = re.sub(r'[^a-z0-9-_;\.]', '-', key)
+    key = re.sub(r'-+', '-', key)
+    log.debug('cache key : {!r} {!r} -> {!r}'.format(query, tags, key))
+    return key
+
+def parse_word(word):
+    """Extract info from crawling results"""
+
+    parsers = {
+        'word' : {
+            'tag' : 'a',
+            'class' : 'topic-link'
+        },
+        'rating' : {
+            'tag' : 'div',
+            'class' : 'rating'
+        },
+        'tags' : {
+            'tag' : 'span',
+            'class' : 't',
+            # adds ' | ' between kind of word and other related words
+            'sanitize' : lambda t: t[::-1].replace('.', ' | .', 1)[::-1]
+        }
+    }
+
+    result = { }
+    for key, parser in parsers.iteritems():
+        element = word.find(parser['tag'], parser['class'])
+        result[key] = h.unescape(element.getText()) if element else ""
+        result[key] = parser['sanitize'](result[key]) if 'sanitize' in parser else result[key]
+
+    return result
+
+def get_words(query, query_type):
+    """Extract list of words from query"""
+
+    r = web.get(API_URL + query.replace(" ", "_") + "/" + query_type)
+    log.debug('response : [{}] {}'.format(r.status_code, r.url))
+    r.raise_for_status()
+    soup = BeautifulSoup(r.text, 'html.parser')
+    results = [parse_word(s) for s in soup.find_all('tr', 'theentry')]
+    return results
+
+def main(wf):
+
+    query_type, _, query = wf.args[0].strip().partition(" ")
+    log.debug('query : {!r} {!r}'.format(query, query_type))
+
+    query_type = {
+        'ant': 'antonyms',
+        'syn': 'synonyms'
+    }[query_type]
+
+    if not query:
+        wf.add_item('Search Powerthesaurus')
+        wf.send_feedback()
+        return 0
+
+    key = cache_key(query, [query_type])
+
+    # Fetch words from API
+    words = wf.cached_data(key, functools.partial(get_words, query, query_type),
+                               max_age=CACHE_MAX_AGE)
+    log.debug('count : {} words for {!r}'.format(len(words), query))
+
+    # Show results
+    if not words:
+        wf.add_item('No ' + query_type + 's found',
+                    'Try a different word...',
+                    icon=ICON)
+
+    for word in words:
+        wf.add_item(word['word'],
+                    word['rating'] + ' | ' + word['tags'],
+                    valid=True,
+                    largetext=word['word'],
+                    copytext=word['word'],
+                    icon=ICON,
+                    arg=word['word'])
+
+    wf.send_feedback()
+
+if __name__ == '__main__':
+    wf = Workflow(help_url=HELP_URL)
+    log = wf.logger
+    sys.exit(wf.run(main))
diff --git a/src/powerthesaurus_update.png b/src/powerthesaurus_update.png
new file mode 100644
index 0000000..f909099
Binary files /dev/null and b/src/powerthesaurus_update.png differ
diff --git a/src/version b/src/version
new file mode 100644
index 0000000..d3827e7
--- /dev/null
+++ b/src/version
@@ -0,0 +1 @@
+1.0
diff --git a/src/workflow/Notify.tgz b/src/workflow/Notify.tgz
new file mode 100644
index 0000000..174e9a7
Binary files /dev/null and b/src/workflow/Notify.tgz differ
diff --git a/src/workflow/__init__.py b/src/workflow/__init__.py
new file mode 100644
index 0000000..632f1f5
--- /dev/null
+++ b/src/workflow/__init__.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (c) 2014 Dean Jackson <deanishe@deanishe.net>
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2014-02-15
+#
+
+"""A helper library for `Alfred <http://www.alfredapp.com/>`_ workflows."""
+
+import os
+
+# Workflow objects
+from .workflow import Workflow, manager
+from .workflow3 import Workflow3
+
+# Exceptions
+from .workflow import PasswordNotFound, KeychainError
+
+# Icons
+from .workflow import (
+    ICON_ACCOUNT,
+    ICON_BURN,
+    ICON_CLOCK,
+    ICON_COLOR,
+    ICON_COLOUR,
+    ICON_EJECT,
+    ICON_ERROR,
+    ICON_FAVORITE,
+    ICON_FAVOURITE,
+    ICON_GROUP,
+    ICON_HELP,
+    ICON_HOME,
+    ICON_INFO,
+    ICON_NETWORK,
+    ICON_NOTE,
+    ICON_SETTINGS,
+    ICON_SWIRL,
+    ICON_SWITCH,
+    ICON_SYNC,
+    ICON_TRASH,
+    ICON_USER,
+    ICON_WARNING,
+    ICON_WEB,
+)
+
+# Filter matching rules
+from .workflow import (
+    MATCH_ALL,
+    MATCH_ALLCHARS,
+    MATCH_ATOM,
+    MATCH_CAPITALS,
+    MATCH_INITIALS,
+    MATCH_INITIALS_CONTAIN,
+    MATCH_INITIALS_STARTSWITH,
+    MATCH_STARTSWITH,
+    MATCH_SUBSTRING,
+)
+
+
+__title__ = 'Alfred-Workflow'
+__version__ = open(os.path.join(os.path.dirname(__file__), 'version')).read()
+__author__ = 'Dean Jackson'
+__licence__ = 'MIT'
+__copyright__ = 'Copyright 2014 Dean Jackson'
+
+__all__ = [
+    'Workflow',
+    'Workflow3',
+    'manager',
+    'PasswordNotFound',
+    'KeychainError',
+    'ICON_ACCOUNT',
+    'ICON_BURN',
+    'ICON_CLOCK',
+    'ICON_COLOR',
+    'ICON_COLOUR',
+    'ICON_EJECT',
+    'ICON_ERROR',
+    'ICON_FAVORITE',
+    'ICON_FAVOURITE',
+    'ICON_GROUP',
+    'ICON_HELP',
+    'ICON_HOME',
+    'ICON_INFO',
+    'ICON_NETWORK',
+    'ICON_NOTE',
+    'ICON_SETTINGS',
+    'ICON_SWIRL',
+    'ICON_SWITCH',
+    'ICON_SYNC',
+    'ICON_TRASH',
+    'ICON_USER',
+    'ICON_WARNING',
+    'ICON_WEB',
+    'MATCH_ALL',
+    'MATCH_ALLCHARS',
+    'MATCH_ATOM',
+    'MATCH_CAPITALS',
+    'MATCH_INITIALS',
+    'MATCH_INITIALS_CONTAIN',
+    'MATCH_INITIALS_STARTSWITH',
+    'MATCH_STARTSWITH',
+    'MATCH_SUBSTRING',
+]
diff --git a/src/workflow/background.py b/src/workflow/background.py
new file mode 100644
index 0000000..cf883ed
--- /dev/null
+++ b/src/workflow/background.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (c) 2014 deanishe@deanishe.net
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2014-04-06
+#
+
+"""Run background tasks."""
+
+from __future__ import print_function, unicode_literals
+
+import sys
+import os
+import subprocess
+import pickle
+
+from workflow import Workflow
+
+__all__ = ['is_running', 'run_in_background']
+
+_wf = None
+
+
+def wf():
+    global _wf
+    if _wf is None:
+        _wf = Workflow()
+    return _wf
+
+
+def _arg_cache(name):
+    """Return path to pickle cache file for arguments.
+
+    :param name: name of task
+    :type name: ``unicode``
+    :returns: Path to cache file
+    :rtype: ``unicode`` filepath
+
+    """
+    return wf().cachefile('{0}.argcache'.format(name))
+
+
+def _pid_file(name):
+    """Return path to PID file for ``name``.
+
+    :param name: name of task
+    :type name: ``unicode``
+    :returns: Path to PID file for task
+    :rtype: ``unicode`` filepath
+
+    """
+    return wf().cachefile('{0}.pid'.format(name))
+
+
+def _process_exists(pid):
+    """Check if a process with PID ``pid`` exists.
+
+    :param pid: PID to check
+    :type pid: ``int``
+    :returns: ``True`` if process exists, else ``False``
+    :rtype: ``Boolean``
+
+    """
+    try:
+        os.kill(pid, 0)
+    except OSError:  # not running
+        return False
+    return True
+
+
+def is_running(name):
+    """Test whether task is running under ``name``.
+
+    :param name: name of task
+    :type name: ``unicode``
+    :returns: ``True`` if task with name ``name`` is running, else ``False``
+    :rtype: ``Boolean``
+
+    """
+    pidfile = _pid_file(name)
+    if not os.path.exists(pidfile):
+        return False
+
+    with open(pidfile, 'rb') as file_obj:
+        pid = int(file_obj.read().strip())
+
+    if _process_exists(pid):
+        return True
+
+    elif os.path.exists(pidfile):
+        os.unlink(pidfile)
+
+    return False
+
+
+def _background(stdin='/dev/null', stdout='/dev/null',
+                stderr='/dev/null'):  # pragma: no cover
+    """Fork the current process into a background daemon.
+
+    :param stdin: where to read input
+    :type stdin: filepath
+    :param stdout: where to write stdout output
+    :type stdout: filepath
+    :param stderr: where to write stderr output
+    :type stderr: filepath
+
+    """
+    # Do first fork.
+    try:
+        pid = os.fork()
+        if pid > 0:
+            sys.exit(0)  # Exit first parent.
+    except OSError as e:
+        wf().logger.critical("fork #1 failed: ({0:d}) {1}".format(
+                             e.errno, e.strerror))
+        sys.exit(1)
+    # Decouple from parent environment.
+    os.chdir(wf().workflowdir)
+    os.umask(0)
+    os.setsid()
+    # Do second fork.
+    try:
+        pid = os.fork()
+        if pid > 0:
+            sys.exit(0)  # Exit second parent.
+    except OSError as e:
+        wf().logger.critical("fork #2 failed: ({0:d}) {1}".format(
+                             e.errno, e.strerror))
+        sys.exit(1)
+    # Now I am a daemon!
+    # Redirect standard file descriptors.
+    si = file(stdin, 'r', 0)
+    so = file(stdout, 'a+', 0)
+    se = file(stderr, 'a+', 0)
+    if hasattr(sys.stdin, 'fileno'):
+        os.dup2(si.fileno(), sys.stdin.fileno())
+    if hasattr(sys.stdout, 'fileno'):
+        os.dup2(so.fileno(), sys.stdout.fileno())
+    if hasattr(sys.stderr, 'fileno'):
+        os.dup2(se.fileno(), sys.stderr.fileno())
+
+
+def run_in_background(name, args, **kwargs):
+    r"""Cache arguments then call this script again via :func:`subprocess.call`.
+
+    :param name: name of task
+    :type name: ``unicode``
+    :param args: arguments passed as first argument to :func:`subprocess.call`
+    :param \**kwargs: keyword arguments to :func:`subprocess.call`
+    :returns: exit code of sub-process
+    :rtype: ``int``
+
+    When you call this function, it caches its arguments and then calls
+    ``background.py`` in a subprocess. The Python subprocess will load the
+    cached arguments, fork into the background, and then run the command you
+    specified.
+
+    This function will return as soon as the ``background.py`` subprocess has
+    forked, returning the exit code of *that* process (i.e. not of the command
+    you're trying to run).
+
+    If that process fails, an error will be written to the log file.
+
+    If a process is already running under the same name, this function will
+    return immediately and will not run the specified command.
+
+    """
+    if is_running(name):
+        wf().logger.info('Task `{0}` is already running'.format(name))
+        return
+
+    argcache = _arg_cache(name)
+
+    # Cache arguments
+    with open(argcache, 'wb') as file_obj:
+        pickle.dump({'args': args, 'kwargs': kwargs}, file_obj)
+        wf().logger.debug('Command arguments cached to `{0}`'.format(argcache))
+
+    # Call this script
+    cmd = ['/usr/bin/python', __file__, name]
+    wf().logger.debug('Calling {0!r} ...'.format(cmd))
+    retcode = subprocess.call(cmd)
+    if retcode:  # pragma: no cover
+        wf().logger.error('Failed to call task in background')
+    else:
+        wf().logger.debug('Executing task `{0}` in background...'.format(name))
+    return retcode
+
+
+def main(wf):  # pragma: no cover
+    """Run command in a background process.
+
+    Load cached arguments, fork into background, then call
+    :meth:`subprocess.call` with cached arguments.
+
+    """
+    name = wf.args[0]
+    argcache = _arg_cache(name)
+    if not os.path.exists(argcache):
+        wf.logger.critical('No arg cache found : {0!r}'.format(argcache))
+        return 1
+
+    # Load cached arguments
+    with open(argcache, 'rb') as file_obj:
+        data = pickle.load(file_obj)
+
+    # Cached arguments
+    args = data['args']
+    kwargs = data['kwargs']
+
+    # Delete argument cache file
+    os.unlink(argcache)
+
+    pidfile = _pid_file(name)
+
+    # Fork to background
+    _background()
+
+    # Write PID to file
+    with open(pidfile, 'wb') as file_obj:
+        file_obj.write('{0}'.format(os.getpid()))
+
+    # Run the command
+    try:
+        wf.logger.debug('Task `{0}` running'.format(name))
+        wf.logger.debug('cmd : {0!r}'.format(args))
+
+        retcode = subprocess.call(args, **kwargs)
+
+        if retcode:
+            wf.logger.error('Command failed with [{0}] : {1!r}'.format(
+                            retcode, args))
+
+    finally:
+        if os.path.exists(pidfile):
+            os.unlink(pidfile)
+        wf.logger.debug('Task `{0}` finished'.format(name))
+
+
+if __name__ == '__main__':  # pragma: no cover
+    wf().run(main)
diff --git a/src/workflow/notify.py b/src/workflow/notify.py
new file mode 100644
index 0000000..3ed1e5e
--- /dev/null
+++ b/src/workflow/notify.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (c) 2015 deanishe@deanishe.net
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2015-11-26
+#
+
+# TODO: Exclude this module from test and code coverage in py2.6
+
+"""
+Post notifications via the OS X Notification Center. This feature
+is only available on Mountain Lion (10.8) and later. It will
+silently fail on older systems.
+
+The main API is a single function, :func:`~workflow.notify.notify`.
+
+It works by copying a simple application to your workflow's data
+directory. It replaces the application's icon with your workflow's
+icon and then calls the application to post notifications.
+"""
+
+from __future__ import print_function, unicode_literals
+
+import os
+import plistlib
+import shutil
+import subprocess
+import sys
+import tarfile
+import tempfile
+import uuid
+
+import workflow
+
+
+_wf = None
+_log = None
+
+
+#: Available system sounds from System Preferences > Sound > Sound Effects
+SOUNDS = (
+    'Basso',
+    'Blow',
+    'Bottle',
+    'Frog',
+    'Funk',
+    'Glass',
+    'Hero',
+    'Morse',
+    'Ping',
+    'Pop',
+    'Purr',
+    'Sosumi',
+    'Submarine',
+    'Tink',
+)
+
+
+def wf():
+    """Return `Workflow` object for this module.
+
+    Returns:
+        workflow.Workflow: `Workflow` object for current workflow.
+    """
+    global _wf
+    if _wf is None:
+        _wf = workflow.Workflow()
+    return _wf
+
+
+def log():
+    """Return logger for this module.
+
+    Returns:
+        logging.Logger: Logger for this module.
+    """
+    global _log
+    if _log is None:
+        _log = wf().logger
+    return _log
+
+
+def notifier_program():
+    """Return path to notifier applet executable.
+
+    Returns:
+        unicode: Path to Notify.app `applet` executable.
+    """
+    return wf().datafile('Notify.app/Contents/MacOS/applet')
+
+
+def notifier_icon_path():
+    """Return path to icon file in installed Notify.app.
+
+    Returns:
+        unicode: Path to `applet.icns` within the app bundle.
+    """
+    return wf().datafile('Notify.app/Contents/Resources/applet.icns')
+
+
+def install_notifier():
+    """Extract `Notify.app` from the workflow to data directory.
+
+    Changes the bundle ID of the installed app and gives it the
+    workflow's icon.
+    """
+    archive = os.path.join(os.path.dirname(__file__), 'Notify.tgz')
+    destdir = wf().datadir
+    app_path = os.path.join(destdir, 'Notify.app')
+    n = notifier_program()
+    log().debug("Installing Notify.app to %r ...", destdir)
+    # z = zipfile.ZipFile(archive, 'r')
+    # z.extractall(destdir)
+    tgz = tarfile.open(archive, 'r:gz')
+    tgz.extractall(destdir)
+    assert os.path.exists(n), (
+        "Notify.app could not be installed in {0!r}.".format(destdir))
+
+    # Replace applet icon
+    icon = notifier_icon_path()
+    workflow_icon = wf().workflowfile('icon.png')
+    if os.path.exists(icon):
+        os.unlink(icon)
+
+    png_to_icns(workflow_icon, icon)
+
+    # Set file icon
+    # PyObjC isn't available for 2.6, so this is 2.7 only. Actually,
+    # none of this code will "work" on pre-10.8 systems. Let it run
+    # until I figure out a better way of excluding this module
+    # from coverage in py2.6.
+    if sys.version_info >= (2, 7):  # pragma: no cover
+        from AppKit import NSWorkspace, NSImage
+
+        ws = NSWorkspace.sharedWorkspace()
+        img = NSImage.alloc().init()
+        img.initWithContentsOfFile_(icon)
+        ws.setIcon_forFile_options_(img, app_path, 0)
+
+    # Change bundle ID of installed app
+    ip_path = os.path.join(app_path, 'Contents/Info.plist')
+    bundle_id = '{0}.{1}'.format(wf().bundleid, uuid.uuid4().hex)
+    data = plistlib.readPlist(ip_path)
+    log().debug('Changing bundle ID to {0!r}'.format(bundle_id))
+    data['CFBundleIdentifier'] = bundle_id
+    plistlib.writePlist(data, ip_path)
+
+
+def validate_sound(sound):
+    """Coerce `sound` to valid sound name.
+
+    Returns `None` for invalid sounds. Sound names can be found
+    in `System Preferences > Sound > Sound Effects`.
+
+    Args:
+        sound (str): Name of system sound.
+
+    Returns:
+        str: Proper name of sound or `None`.
+    """
+    if not sound:
+        return None
+
+    # Case-insensitive comparison of `sound`
+    if sound.lower() in [s.lower() for s in SOUNDS]:
+        # Title-case is correct for all system sounds as of OS X 10.11
+        return sound.title()
+    return None
+
+
+def notify(title='', text='', sound=None):
+    """Post notification via Notify.app helper.
+
+    Args:
+        title (str, optional): Notification title.
+        text (str, optional): Notification body text.
+        sound (str, optional): Name of sound to play.
+
+    Raises:
+        ValueError: Raised if both `title` and `text` are empty.
+
+    Returns:
+        bool: `True` if notification was posted, else `False`.
+    """
+    if title == text == '':
+        raise ValueError('Empty notification')
+
+    sound = validate_sound(sound) or ''
+
+    n = notifier_program()
+
+    if not os.path.exists(n):
+        install_notifier()
+
+    env = os.environ.copy()
+    enc = 'utf-8'
+    env['NOTIFY_TITLE'] = title.encode(enc)
+    env['NOTIFY_MESSAGE'] =  text.encode(enc)
+    env['NOTIFY_SOUND'] = sound.encode(enc)
+    cmd = [n]
+    retcode = subprocess.call(cmd, env=env)
+    if retcode == 0:
+        return True
+
+    log().error('Notify.app exited with status {0}.'.format(retcode))
+    return False
+
+
+def convert_image(inpath, outpath, size):
+    """Convert an image file using `sips`.
+
+    Args:
+        inpath (str): Path of source file.
+        outpath (str): Path to destination file.
+        size (int): Width and height of destination image in pixels.
+
+    Raises:
+        RuntimeError: Raised if `sips` exits with non-zero status.
+    """
+    cmd = [
+        b'sips',
+        b'-z', b'{0}'.format(size), b'{0}'.format(size),
+        inpath,
+        b'--out', outpath]
+    # log().debug(cmd)
+    with open(os.devnull, 'w') as pipe:
+        retcode = subprocess.call(cmd, stdout=pipe, stderr=subprocess.STDOUT)
+
+    if retcode != 0:
+        raise RuntimeError('sips exited with {0}'.format(retcode))
+
+
+def png_to_icns(png_path, icns_path):
+    """Convert PNG file to ICNS using `iconutil`.
+
+    Create an iconset from the source PNG file. Generate PNG files
+    in each size required by OS X, then call `iconutil` to turn
+    them into a single ICNS file.
+
+    Args:
+        png_path (str): Path to source PNG file.
+        icns_path (str): Path to destination ICNS file.
+
+    Raises:
+        RuntimeError: Raised if `iconutil` or `sips` fail.
+    """
+    tempdir = tempfile.mkdtemp(prefix='aw-', dir=wf().datadir)
+
+    try:
+        iconset = os.path.join(tempdir, 'Icon.iconset')
+
+        assert not os.path.exists(iconset), (
+            "Iconset path already exists : {0!r}".format(iconset))
+        os.makedirs(iconset)
+
+        # Copy source icon to icon set and generate all the other
+        # sizes needed
+        configs = []
+        for i in (16, 32, 128, 256, 512):
+            configs.append(('icon_{0}x{0}.png'.format(i), i))
+            configs.append((('icon_{0}x{0}@2x.png'.format(i), i*2)))
+
+        shutil.copy(png_path, os.path.join(iconset, 'icon_256x256.png'))
+        shutil.copy(png_path, os.path.join(iconset, 'icon_128x128@2x.png'))
+
+        for name, size in configs:
+            outpath = os.path.join(iconset, name)
+            if os.path.exists(outpath):
+                continue
+            convert_image(png_path, outpath, size)
+
+        cmd = [
+            b'iconutil',
+            b'-c', b'icns',
+            b'-o', icns_path,
+            iconset]
+
+        retcode = subprocess.call(cmd)
+        if retcode != 0:
+            raise RuntimeError("iconset exited with {0}".format(retcode))
+
+        assert os.path.exists(icns_path), (
+            "Generated ICNS file not found : {0!r}".format(icns_path))
+    finally:
+        try:
+            shutil.rmtree(tempdir)
+        except OSError:  # pragma: no cover
+            pass
+
+
+# def notify_native(title='', text='', sound=''):
+#     """Post notification via the native API (via pyobjc).
+
+#     At least one of `title` or `text` must be specified.
+
+#     This method will *always* show the Python launcher icon (i.e. the
+#     rocket with the snakes on it).
+
+#     Args:
+#         title (str, optional): Notification title.
+#         text (str, optional): Notification body text.
+#         sound (str, optional): Name of sound to play.
+
+#     """
+
+#     if title == text == '':
+#         raise ValueError('Empty notification')
+
+#     import Foundation
+
+#     sound = sound or Foundation.NSUserNotificationDefaultSoundName
+
+#     n = Foundation.NSUserNotification.alloc().init()
+#     n.setTitle_(title)
+#     n.setInformativeText_(text)
+#     n.setSoundName_(sound)
+#     nc = Foundation.NSUserNotificationCenter.defaultUserNotificationCenter()
+#     nc.deliverNotification_(n)
+
+
+if __name__ == '__main__':  # pragma: nocover
+    # Simple command-line script to test module with
+    # This won't work on 2.6, as `argparse` isn't available
+    # by default.
+    import argparse
+
+    from unicodedata import normalize
+
+    def uni(s):
+        """Coerce `s` to normalised Unicode."""
+        ustr = s.decode('utf-8')
+        return normalize('NFD', ustr)
+
+    p = argparse.ArgumentParser()
+    p.add_argument('-p', '--png', help="PNG image to convert to ICNS.")
+    p.add_argument('-l', '--list-sounds', help="Show available sounds.",
+                   action='store_true')
+    p.add_argument('-t', '--title',
+                   help="Notification title.", type=uni,
+                   default='')
+    p.add_argument('-s', '--sound', type=uni,
+                   help="Optional notification sound.", default='')
+    p.add_argument('text', type=uni,
+                   help="Notification body text.", default='', nargs='?')
+    o = p.parse_args()
+
+    # List available sounds
+    if o.list_sounds:
+        for sound in SOUNDS:
+            print(sound)
+        sys.exit(0)
+
+    # Convert PNG to ICNS
+    if o.png:
+        icns = os.path.join(
+            os.path.dirname(o.png),
+            b'{0}{1}'.format(os.path.splitext(os.path.basename(o.png))[0],
+                             '.icns'))
+
+        print('Converting {0!r} to {1!r} ...'.format(o.png, icns),
+              file=sys.stderr)
+
+        assert not os.path.exists(icns), (
+            "Destination file already exists : {0}".format(icns))
+
+        png_to_icns(o.png, icns)
+        sys.exit(0)
+
+    # Post notification
+    if o.title == o.text == '':
+        print('ERROR: Empty notification.', file=sys.stderr)
+        sys.exit(1)
+    else:
+        notify(o.title, o.text, o.sound)
diff --git a/src/workflow/update.py b/src/workflow/update.py
new file mode 100644
index 0000000..468d024
--- /dev/null
+++ b/src/workflow/update.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright (c) 2014 Fabio Niephaus <fabio.niephaus@gmail.com>,
+#       Dean Jackson <deanishe@deanishe.net>
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2014-08-16
+#
+
+"""Self-updating from GitHub.
+
+.. versionadded:: 1.9
+
+.. note::
+
+   This module is not intended to be used directly. Automatic updates
+   are controlled by the ``update_settings`` :class:`dict` passed to
+   :class:`~workflow.workflow.Workflow` objects.
+
+"""
+
+from __future__ import print_function, unicode_literals
+
+import os
+import tempfile
+import re
+import subprocess
+
+import workflow
+import web
+
+# __all__ = []
+
+
+RELEASES_BASE = 'https://api.github.com/repos/{0}/releases'
+
+
+_wf = None
+
+
+def wf():
+    """Lazy `Workflow` object."""
+    global _wf
+    if _wf is None:
+        _wf = workflow.Workflow()
+    return _wf
+
+
+class Version(object):
+    """Mostly semantic versioning.
+
+    The main difference to proper :ref:`semantic versioning <semver>`
+    is that this implementation doesn't require a minor or patch version.
+
+    Version strings may also be prefixed with "v", e.g.:
+
+    >>> v = Version('v1.1.1')
+    >>> v.tuple
+    (1, 1, 1, '')
+
+    >>> v = Version('2.0')
+    >>> v.tuple
+    (2, 0, 0, '')
+
+    >>> Version('3.1-beta').tuple
+    (3, 1, 0, 'beta')
+
+    >>> Version('1.0.1') > Version('0.0.1')
+    True
+    """
+
+    #: Match version and pre-release/build information in version strings
+    match_version = re.compile(r'([0-9\.]+)(.+)?').match
+
+    def __init__(self, vstr):
+        """Create new `Version` object.
+
+        Args:
+            vstr (basestring): Semantic version string.
+        """
+        self.vstr = vstr
+        self.major = 0
+        self.minor = 0
+        self.patch = 0
+        self.suffix = ''
+        self.build = ''
+        self._parse(vstr)
+
+    def _parse(self, vstr):
+        if vstr.startswith('v'):
+            m = self.match_version(vstr[1:])
+        else:
+            m = self.match_version(vstr)
+        if not m:
+            raise ValueError('Invalid version number: {0}'.format(vstr))
+
+        version, suffix = m.groups()
+        parts = self._parse_dotted_string(version)
+        self.major = parts.pop(0)
+        if len(parts):
+            self.minor = parts.pop(0)
+        if len(parts):
+            self.patch = parts.pop(0)
+        if not len(parts) == 0:
+            raise ValueError('Invalid version (too long) : {0}'.format(vstr))
+
+        if suffix:
+            # Build info
+            idx = suffix.find('+')
+            if idx > -1:
+                self.build = suffix[idx+1:]
+                suffix = suffix[:idx]
+            if suffix:
+                if not suffix.startswith('-'):
+                    raise ValueError(
+                        'Invalid suffix : `{0}`. Must start with `-`'.format(
+                            suffix))
+                self.suffix = suffix[1:]
+
+        # wf().logger.debug('version str `{}` -> {}'.format(vstr, repr(self)))
+
+    def _parse_dotted_string(self, s):
+        """Parse string ``s`` into list of ints and strings."""
+        parsed = []
+        parts = s.split('.')
+        for p in parts:
+            if p.isdigit():
+                p = int(p)
+            parsed.append(p)
+        return parsed
+
+    @property
+    def tuple(self):
+        """Version number as a tuple of major, minor, patch, pre-release."""
+        return (self.major, self.minor, self.patch, self.suffix)
+
+    def __lt__(self, other):
+        """Implement comparison."""
+        if not isinstance(other, Version):
+            raise ValueError('Not a Version instance: {0!r}'.format(other))
+        t = self.tuple[:3]
+        o = other.tuple[:3]
+        if t < o:
+            return True
+        if t == o:  # We need to compare suffixes
+            if self.suffix and not other.suffix:
+                return True
+            if other.suffix and not self.suffix:
+                return False
+            return (self._parse_dotted_string(self.suffix) <
+                    self._parse_dotted_string(other.suffix))
+        # t > o
+        return False
+
+    def __eq__(self, other):
+        """Implement comparison."""
+        if not isinstance(other, Version):
+            raise ValueError('Not a Version instance: {0!r}'.format(other))
+        return self.tuple == other.tuple
+
+    def __ne__(self, other):
+        """Implement comparison."""
+        return not self.__eq__(other)
+
+    def __gt__(self, other):
+        """Implement comparison."""
+        if not isinstance(other, Version):
+            raise ValueError('Not a Version instance: {0!r}'.format(other))
+        return other.__lt__(self)
+
+    def __le__(self, other):
+        """Implement comparison."""
+        if not isinstance(other, Version):
+            raise ValueError('Not a Version instance: {0!r}'.format(other))
+        return not other.__lt__(self)
+
+    def __ge__(self, other):
+        """Implement comparison."""
+        return not self.__lt__(other)
+
+    def __str__(self):
+        """Return semantic version string."""
+        vstr = '{0}.{1}.{2}'.format(self.major, self.minor, self.patch)
+        if self.suffix:
+            vstr += '-{0}'.format(self.suffix)
+        if self.build:
+            vstr += '+{0}'.format(self.build)
+        return vstr
+
+    def __repr__(self):
+        """Return 'code' representation of `Version`."""
+        return "Version('{0}')".format(str(self))
+
+
+def download_workflow(url):
+    """Download workflow at ``url`` to a local temporary file.
+
+    :param url: URL to .alfredworkflow file in GitHub repo
+    :returns: path to downloaded file
+
+    """
+    filename = url.split("/")[-1]
+
+    if (not url.endswith('.alfredworkflow') or
+            not filename.endswith('.alfredworkflow')):
+        raise ValueError('Attachment `{0}` not a workflow'.format(filename))
+
+    local_path = os.path.join(tempfile.gettempdir(), filename)
+
+    wf().logger.debug(
+        'Downloading updated workflow from `%s` to `%s` ...', url, local_path)
+
+    response = web.get(url)
+
+    with open(local_path, 'wb') as output:
+        output.write(response.content)
+
+    return local_path
+
+
+def build_api_url(slug):
+    """Generate releases URL from GitHub slug.
+
+    :param slug: Repo name in form ``username/repo``
+    :returns: URL to the API endpoint for the repo's releases
+
+    """
+    if len(slug.split('/')) != 2:
+        raise ValueError('Invalid GitHub slug : {0}'.format(slug))
+
+    return RELEASES_BASE.format(slug)
+
+
+def _validate_release(release):
+    """Return release for running version of Alfred."""
+    alf3 = wf().alfred_version.major == 3
+
+    downloads = {'.alfredworkflow': [], '.alfred3workflow': []}
+    dl_count = 0
+    version = release['tag_name']
+
+    for asset in release.get('assets', []):
+        url = asset.get('browser_download_url')
+        if not url:  # pragma: nocover
+            continue
+
+        ext = os.path.splitext(url)[1].lower()
+        if ext not in downloads:
+            continue
+
+        # Ignore Alfred 3-only files if Alfred 2 is running
+        if ext == '.alfred3workflow' and not alf3:
+            continue
+
+        downloads[ext].append(url)
+        dl_count += 1
+
+        # download_urls.append(url)
+
+    if dl_count == 0:
+        wf().logger.warning(
+            'Invalid release %s : No workflow file', version)
+        return None
+
+    for k in downloads:
+        if len(downloads[k]) > 1:
+            wf().logger.warning(
+                'Invalid release %s : multiple %s files', version, k)
+            return None
+
+    # Prefer .alfred3workflow file if there is one and Alfred 3 is
+    # running.
+    if alf3 and len(downloads['.alfred3workflow']):
+        download_url = downloads['.alfred3workflow'][0]
+
+    else:
+        download_url = downloads['.alfredworkflow'][0]
+
+    wf().logger.debug('Release `%s` : %s', version, download_url)
+
+    return {
+        'version': version,
+        'download_url': download_url,
+        'prerelease': release['prerelease']
+    }
+
+
+def get_valid_releases(github_slug, prereleases=False):
+    """Return list of all valid releases.
+
+    :param github_slug: ``username/repo`` for workflow's GitHub repo
+    :param prereleases: Whether to include pre-releases.
+    :returns: list of dicts. Each :class:`dict` has the form
+        ``{'version': '1.1', 'download_url': 'http://github.com/...',
+        'prerelease': False }``
+
+
+    A valid release is one that contains one ``.alfredworkflow`` file.
+
+    If the GitHub version (i.e. tag) is of the form ``v1.1``, the leading
+    ``v`` will be stripped.
+
+    """
+    api_url = build_api_url(github_slug)
+    releases = []
+
+    wf().logger.debug('Retrieving releases list from `%s` ...', api_url)
+
+    def retrieve_releases():
+        wf().logger.info(
+            'Retrieving releases for `%s` ...', github_slug)
+        return web.get(api_url).json()
+
+    slug = github_slug.replace('/', '-')
+    for release in wf().cached_data('gh-releases-{0}'.format(slug),
+                                    retrieve_releases):
+
+        wf().logger.debug('Release : %r', release)
+
+        release = _validate_release(release)
+        if release is None:
+            wf().logger.debug('Invalid release')
+            continue
+
+        elif release['prerelease'] and not prereleases:
+            wf().logger.debug('Ignoring prerelease : %s', release['version'])
+            continue
+
+        releases.append(release)
+
+    return releases
+
+
+def check_update(github_slug, current_version, prereleases=False):
+    """Check whether a newer release is available on GitHub.
+
+    :param github_slug: ``username/repo`` for workflow's GitHub repo
+    :param current_version: the currently installed version of the
+        workflow. :ref:`Semantic versioning <semver>` is required.
+    :param prereleases: Whether to include pre-releases.
+    :type current_version: ``unicode``
+    :returns: ``True`` if an update is available, else ``False``
+
+    If an update is available, its version number and download URL will
+    be cached.
+
+    """
+    releases = get_valid_releases(github_slug, prereleases)
+
+    wf().logger.info('%d releases for %s', len(releases), github_slug)
+
+    if not len(releases):
+        raise ValueError('No valid releases for %s', github_slug)
+
+    # GitHub returns releases newest-first
+    latest_release = releases[0]
+
+    # (latest_version, download_url) = get_latest_release(releases)
+    vr = Version(latest_release['version'])
+    vl = Version(current_version)
+    wf().logger.debug('Latest : %r Installed : %r', vr, vl)
+    if vr > vl:
+
+        wf().cache_data('__workflow_update_status', {
+            'version': latest_release['version'],
+            'download_url': latest_release['download_url'],
+            'available': True
+        })
+
+        return True
+
+    wf().cache_data('__workflow_update_status', {
+        'available': False
+    })
+    return False
+
+
+def install_update():
+    """If a newer release is available, download and install it.
+
+    :returns: ``True`` if an update is installed, else ``False``
+
+    """
+    update_data = wf().cached_data('__workflow_update_status', max_age=0)
+
+    if not update_data or not update_data.get('available'):
+        wf().logger.info('No update available')
+        return False
+
+    local_file = download_workflow(update_data['download_url'])
+
+    wf().logger.info('Installing updated workflow ...')
+    subprocess.call(['open', local_file])
+
+    update_data['available'] = False
+    wf().cache_data('__workflow_update_status', update_data)
+    return True
+
+
+if __name__ == '__main__':  # pragma: nocover
+    import sys
+
+    def show_help():
+        """Print help message."""
+        print('Usage : update.py (check|install) github_slug version '
+              '[--prereleases]')
+        sys.exit(1)
+
+    argv = sys.argv[:]
+    prereleases = '--prereleases' in argv
+
+    if prereleases:
+        argv.remove('--prereleases')
+
+    if len(argv) != 4:
+        show_help()
+
+    action, github_slug, version = argv[1:]
+
+    if action not in ('check', 'install'):
+        show_help()
+
+    if action == 'check':
+        check_update(github_slug, version, prereleases)
+    elif action == 'install':
+        install_update()
diff --git a/src/workflow/version b/src/workflow/version
new file mode 100644
index 0000000..614245e
--- /dev/null
+++ b/src/workflow/version
@@ -0,0 +1 @@
+1.24
\ No newline at end of file
diff --git a/src/workflow/web.py b/src/workflow/web.py
new file mode 100644
index 0000000..748b199
--- /dev/null
+++ b/src/workflow/web.py
@@ -0,0 +1,671 @@
+# encoding: utf-8
+#
+# Copyright (c) 2014 Dean Jackson <deanishe@deanishe.net>
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2014-02-15
+#
+
+"""Lightweight HTTP library with a requests-like interface."""
+
+import codecs
+import json
+import mimetypes
+import os
+import random
+import re
+import socket
+import string
+import unicodedata
+import urllib
+import urllib2
+import urlparse
+import zlib
+
+
+USER_AGENT = u'Alfred-Workflow/1.19 (+http://www.deanishe.net/alfred-workflow)'
+
+# Valid characters for multipart form data boundaries
+BOUNDARY_CHARS = string.digits + string.ascii_letters
+
+# HTTP response codes
+RESPONSES = {
+    100: 'Continue',
+    101: 'Switching Protocols',
+    200: 'OK',
+    201: 'Created',
+    202: 'Accepted',
+    203: 'Non-Authoritative Information',
+    204: 'No Content',
+    205: 'Reset Content',
+    206: 'Partial Content',
+    300: 'Multiple Choices',
+    301: 'Moved Permanently',
+    302: 'Found',
+    303: 'See Other',
+    304: 'Not Modified',
+    305: 'Use Proxy',
+    307: 'Temporary Redirect',
+    400: 'Bad Request',
+    401: 'Unauthorized',
+    402: 'Payment Required',
+    403: 'Forbidden',
+    404: 'Not Found',
+    405: 'Method Not Allowed',
+    406: 'Not Acceptable',
+    407: 'Proxy Authentication Required',
+    408: 'Request Timeout',
+    409: 'Conflict',
+    410: 'Gone',
+    411: 'Length Required',
+    412: 'Precondition Failed',
+    413: 'Request Entity Too Large',
+    414: 'Request-URI Too Long',
+    415: 'Unsupported Media Type',
+    416: 'Requested Range Not Satisfiable',
+    417: 'Expectation Failed',
+    500: 'Internal Server Error',
+    501: 'Not Implemented',
+    502: 'Bad Gateway',
+    503: 'Service Unavailable',
+    504: 'Gateway Timeout',
+    505: 'HTTP Version Not Supported'
+}
+
+
+def str_dict(dic):
+    """Convert keys and values in ``dic`` into UTF-8-encoded :class:`str`.
+
+    :param dic: :class:`dict` of Unicode strings
+    :returns: :class:`dict`
+
+    """
+    if isinstance(dic, CaseInsensitiveDictionary):
+        dic2 = CaseInsensitiveDictionary()
+    else:
+        dic2 = {}
+    for k, v in dic.items():
+        if isinstance(k, unicode):
+            k = k.encode('utf-8')
+        if isinstance(v, unicode):
+            v = v.encode('utf-8')
+        dic2[k] = v
+    return dic2
+
+
+class NoRedirectHandler(urllib2.HTTPRedirectHandler):
+    """Prevent redirections."""
+
+    def redirect_request(self, *args):
+        return None
+
+
+# Adapted from https://gist.github.com/babakness/3901174
+class CaseInsensitiveDictionary(dict):
+    """Dictionary with caseless key search.
+
+    Enables case insensitive searching while preserving case sensitivity
+    when keys are listed, ie, via keys() or items() methods.
+
+    Works by storing a lowercase version of the key as the new key and
+    stores the original key-value pair as the key's value
+    (values become dictionaries).
+
+    """
+
+    def __init__(self, initval=None):
+        """Create new case-insensitive dictionary."""
+        if isinstance(initval, dict):
+            for key, value in initval.iteritems():
+                self.__setitem__(key, value)
+
+        elif isinstance(initval, list):
+            for (key, value) in initval:
+                self.__setitem__(key, value)
+
+    def __contains__(self, key):
+        return dict.__contains__(self, key.lower())
+
+    def __getitem__(self, key):
+        return dict.__getitem__(self, key.lower())['val']
+
+    def __setitem__(self, key, value):
+        return dict.__setitem__(self, key.lower(), {'key': key, 'val': value})
+
+    def get(self, key, default=None):
+        try:
+            v = dict.__getitem__(self, key.lower())
+        except KeyError:
+            return default
+        else:
+            return v['val']
+
+    def update(self, other):
+        for k, v in other.items():
+            self[k] = v
+
+    def items(self):
+        return [(v['key'], v['val']) for v in dict.itervalues(self)]
+
+    def keys(self):
+        return [v['key'] for v in dict.itervalues(self)]
+
+    def values(self):
+        return [v['val'] for v in dict.itervalues(self)]
+
+    def iteritems(self):
+        for v in dict.itervalues(self):
+            yield v['key'], v['val']
+
+    def iterkeys(self):
+        for v in dict.itervalues(self):
+            yield v['key']
+
+    def itervalues(self):
+        for v in dict.itervalues(self):
+            yield v['val']
+
+
+class Response(object):
+    """
+    Returned by :func:`request` / :func:`get` / :func:`post` functions.
+
+    Simplified version of the ``Response`` object in the ``requests`` library.
+
+    >>> r = request('http://www.google.com')
+    >>> r.status_code
+    200
+    >>> r.encoding
+    ISO-8859-1
+    >>> r.content  # bytes
+    <html> ...
+    >>> r.text  # unicode, decoded according to charset in HTTP header/meta tag
+    u'<html> ...'
+    >>> r.json()  # content parsed as JSON
+
+    """
+
+    def __init__(self, request, stream=False):
+        """Call `request` with :mod:`urllib2` and process results.
+
+        :param request: :class:`urllib2.Request` instance
+        :param stream: Whether to stream response or retrieve it all at once
+        :type stream: ``bool``
+
+        """
+        self.request = request
+        self._stream = stream
+        self.url = None
+        self.raw = None
+        self._encoding = None
+        self.error = None
+        self.status_code = None
+        self.reason = None
+        self.headers = CaseInsensitiveDictionary()
+        self._content = None
+        self._content_loaded = False
+        self._gzipped = False
+
+        # Execute query
+        try:
+            self.raw = urllib2.urlopen(request)
+        except urllib2.HTTPError as err:
+            self.error = err
+            try:
+                self.url = err.geturl()
+            # sometimes (e.g. when authentication fails)
+            # urllib can't get a URL from an HTTPError
+            # This behaviour changes across Python versions,
+            # so no test cover (it isn't important).
+            except AttributeError:  # pragma: no cover
+                pass
+            self.status_code = err.code
+        else:
+            self.status_code = self.raw.getcode()
+            self.url = self.raw.geturl()
+        self.reason = RESPONSES.get(self.status_code)
+
+        # Parse additional info if request succeeded
+        if not self.error:
+            headers = self.raw.info()
+            self.transfer_encoding = headers.getencoding()
+            self.mimetype = headers.gettype()
+            for key in headers.keys():
+                self.headers[key.lower()] = headers.get(key)
+
+            # Is content gzipped?
+            # Transfer-Encoding appears to not be used in the wild
+            # (contrary to the HTTP standard), but no harm in testing
+            # for it
+            if ('gzip' in headers.get('content-encoding', '') or
+                    'gzip' in headers.get('transfer-encoding', '')):
+                self._gzipped = True
+
+    @property
+    def stream(self):
+        """Whether response is streamed.
+
+        Returns:
+            bool: `True` if response is streamed.
+        """
+        return self._stream
+
+    @stream.setter
+    def stream(self, value):
+        if self._content_loaded:
+            raise RuntimeError("`content` has already been read from "
+                               "this Response.")
+
+        self._stream = value
+
+    def json(self):
+        """Decode response contents as JSON.
+
+        :returns: object decoded from JSON
+        :rtype: :class:`list` / :class:`dict`
+
+        """
+        return json.loads(self.content, self.encoding or 'utf-8')
+
+    @property
+    def encoding(self):
+        """Text encoding of document or ``None``.
+
+        :returns: :class:`str` or ``None``
+
+        """
+        if not self._encoding:
+            self._encoding = self._get_encoding()
+
+        return self._encoding
+
+    @property
+    def content(self):
+        """Raw content of response (i.e. bytes).
+
+        :returns: Body of HTTP response
+        :rtype: :class:`str`
+
+        """
+        if not self._content:
+
+            # Decompress gzipped content
+            if self._gzipped:
+                decoder = zlib.decompressobj(16 + zlib.MAX_WBITS)
+                self._content = decoder.decompress(self.raw.read())
+
+            else:
+                self._content = self.raw.read()
+
+            self._content_loaded = True
+
+        return self._content
+
+    @property
+    def text(self):
+        """Unicode-decoded content of response body.
+
+        If no encoding can be determined from HTTP headers or the content
+        itself, the encoded response body will be returned instead.
+
+        :returns: Body of HTTP response
+        :rtype: :class:`unicode` or :class:`str`
+
+        """
+        if self.encoding:
+            return unicodedata.normalize('NFC', unicode(self.content,
+                                                        self.encoding))
+        return self.content
+
+    def iter_content(self, chunk_size=4096, decode_unicode=False):
+        """Iterate over response data.
+
+        .. versionadded:: 1.6
+
+        :param chunk_size: Number of bytes to read into memory
+        :type chunk_size: ``int``
+        :param decode_unicode: Decode to Unicode using detected encoding
+        :type decode_unicode: ``Boolean``
+        :returns: iterator
+
+        """
+        if not self.stream:
+            raise RuntimeError("You cannot call `iter_content` on a "
+                               "Response unless you passed `stream=True`"
+                               " to `get()`/`post()`/`request()`.")
+
+        if self._content_loaded:
+            raise RuntimeError(
+                "`content` has already been read from this Response.")
+
+        def decode_stream(iterator, r):
+
+            decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
+
+            for chunk in iterator:
+                data = decoder.decode(chunk)
+                if data:
+                    yield data
+
+            data = decoder.decode(b'', final=True)
+            if data:  # pragma: no cover
+                yield data
+
+        def generate():
+
+            if self._gzipped:
+                decoder = zlib.decompressobj(16 + zlib.MAX_WBITS)
+
+            while True:
+                chunk = self.raw.read(chunk_size)
+                if not chunk:
+                    break
+
+                if self._gzipped:
+                    chunk = decoder.decompress(chunk)
+
+                yield chunk
+
+        chunks = generate()
+
+        if decode_unicode and self.encoding:
+            chunks = decode_stream(chunks, self)
+
+        return chunks
+
+    def save_to_path(self, filepath):
+        """Save retrieved data to file at ``filepath``.
+
+        .. versionadded: 1.9.6
+
+        :param filepath: Path to save retrieved data.
+
+        """
+        filepath = os.path.abspath(filepath)
+        dirname = os.path.dirname(filepath)
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+
+        self.stream = True
+
+        with open(filepath, 'wb') as fileobj:
+            for data in self.iter_content():
+                fileobj.write(data)
+
+    def raise_for_status(self):
+        """Raise stored error if one occurred.
+
+        error will be instance of :class:`urllib2.HTTPError`
+        """
+        if self.error is not None:
+            raise self.error
+        return
+
+    def _get_encoding(self):
+        """Get encoding from HTTP headers or content.
+
+        :returns: encoding or `None`
+        :rtype: ``unicode`` or ``None``
+
+        """
+        headers = self.raw.info()
+        encoding = None
+
+        if headers.getparam('charset'):
+            encoding = headers.getparam('charset')
+
+        # HTTP Content-Type header
+        for param in headers.getplist():
+            if param.startswith('charset='):
+                encoding = param[8:]
+                break
+
+        if not self.stream:  # Try sniffing response content
+            # Encoding declared in document should override HTTP headers
+            if self.mimetype == 'text/html':  # sniff HTML headers
+                m = re.search("""<meta.+charset=["']{0,1}(.+?)["'].*>""",
+                              self.content)
+                if m:
+                    encoding = m.group(1)
+
+            elif ((self.mimetype.startswith('application/') or
+                   self.mimetype.startswith('text/')) and
+                  'xml' in self.mimetype):
+                m = re.search("""<?xml.+encoding=["'](.+?)["'][^>]*\?>""",
+                              self.content)
+                if m:
+                    encoding = m.group(1)
+
+        # Format defaults
+        if self.mimetype == 'application/json' and not encoding:
+            # The default encoding for JSON
+            encoding = 'utf-8'
+
+        elif self.mimetype == 'application/xml' and not encoding:
+            # The default for 'application/xml'
+            encoding = 'utf-8'
+
+        if encoding:
+            encoding = encoding.lower()
+
+        return encoding
+
+
+def request(method, url, params=None, data=None, headers=None, cookies=None,
+            files=None, auth=None, timeout=60, allow_redirects=False,
+            stream=False):
+    """Initiate an HTTP(S) request. Returns :class:`Response` object.
+
+    :param method: 'GET' or 'POST'
+    :type method: ``unicode``
+    :param url: URL to open
+    :type url: ``unicode``
+    :param params: mapping of URL parameters
+    :type params: :class:`dict`
+    :param data: mapping of form data ``{'field_name': 'value'}`` or
+        :class:`str`
+    :type data: :class:`dict` or :class:`str`
+    :param headers: HTTP headers
+    :type headers: :class:`dict`
+    :param cookies: cookies to send to server
+    :type cookies: :class:`dict`
+    :param files: files to upload (see below).
+    :type files: :class:`dict`
+    :param auth: username, password
+    :type auth: ``tuple``
+    :param timeout: connection timeout limit in seconds
+    :type timeout: ``int``
+    :param allow_redirects: follow redirections
+    :type allow_redirects: ``Boolean``
+    :param stream: Stream content instead of fetching it all at once.
+    :type stream: ``bool``
+    :returns: :class:`Response` object
+
+
+    The ``files`` argument is a dictionary::
+
+        {'fieldname' : { 'filename': 'blah.txt',
+                         'content': '<binary data>',
+                         'mimetype': 'text/plain'}
+        }
+
+    * ``fieldname`` is the name of the field in the HTML form.
+    * ``mimetype`` is optional. If not provided, :mod:`mimetypes` will
+      be used to guess the mimetype, or ``application/octet-stream``
+      will be used.
+
+    """
+    # TODO: cookies
+    socket.setdefaulttimeout(timeout)
+
+    # Default handlers
+    openers = []
+
+    if not allow_redirects:
+        openers.append(NoRedirectHandler())
+
+    if auth is not None:  # Add authorisation handler
+        username, password = auth
+        password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
+        password_manager.add_password(None, url, username, password)
+        auth_manager = urllib2.HTTPBasicAuthHandler(password_manager)
+        openers.append(auth_manager)
+
+    # Install our custom chain of openers
+    opener = urllib2.build_opener(*openers)
+    urllib2.install_opener(opener)
+
+    if not headers:
+        headers = CaseInsensitiveDictionary()
+    else:
+        headers = CaseInsensitiveDictionary(headers)
+
+    if 'user-agent' not in headers:
+        headers['user-agent'] = USER_AGENT
+
+    # Accept gzip-encoded content
+    encodings = [s.strip() for s in
+                 headers.get('accept-encoding', '').split(',')]
+    if 'gzip' not in encodings:
+        encodings.append('gzip')
+
+    headers['accept-encoding'] = ', '.join(encodings)
+
+    # Force POST by providing an empty data string
+    if method == 'POST' and not data:
+        data = ''
+
+    if files:
+        if not data:
+            data = {}
+        new_headers, data = encode_multipart_formdata(data, files)
+        headers.update(new_headers)
+    elif data and isinstance(data, dict):
+        data = urllib.urlencode(str_dict(data))
+
+    # Make sure everything is encoded text
+    headers = str_dict(headers)
+
+    if isinstance(url, unicode):
+        url = url.encode('utf-8')
+
+    if params:  # GET args (POST args are handled in encode_multipart_formdata)
+
+        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+
+        if query:  # Combine query string and `params`
+            url_params = urlparse.parse_qs(query)
+            # `params` take precedence over URL query string
+            url_params.update(params)
+            params = url_params
+
+        query = urllib.urlencode(str_dict(params), doseq=True)
+        url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
+
+    req = urllib2.Request(url, data, headers)
+    return Response(req, stream)
+
+
+def get(url, params=None, headers=None, cookies=None, auth=None,
+        timeout=60, allow_redirects=True, stream=False):
+    """Initiate a GET request. Arguments as for :func:`request`.
+
+    :returns: :class:`Response` instance
+
+    """
+    return request('GET', url, params, headers=headers, cookies=cookies,
+                   auth=auth, timeout=timeout, allow_redirects=allow_redirects,
+                   stream=stream)
+
+
+def post(url, params=None, data=None, headers=None, cookies=None, files=None,
+         auth=None, timeout=60, allow_redirects=False, stream=False):
+    """Initiate a POST request. Arguments as for :func:`request`.
+
+    :returns: :class:`Response` instance
+
+    """
+    return request('POST', url, params, data, headers, cookies, files, auth,
+                   timeout, allow_redirects, stream)
+
+
+def encode_multipart_formdata(fields, files):
+    """Encode form data (``fields``) and ``files`` for POST request.
+
+    :param fields: mapping of ``{name : value}`` pairs for normal form fields.
+    :type fields: :class:`dict`
+    :param files: dictionary of fieldnames/files elements for file data.
+                  See below for details.
+    :type files: :class:`dict` of :class:`dicts`
+    :returns: ``(headers, body)`` ``headers`` is a :class:`dict` of HTTP headers
+    :rtype: 2-tuple ``(dict, str)``
+
+    The ``files`` argument is a dictionary::
+
+        {'fieldname' : { 'filename': 'blah.txt',
+                         'content': '<binary data>',
+                         'mimetype': 'text/plain'}
+        }
+
+    - ``fieldname`` is the name of the field in the HTML form.
+    - ``mimetype`` is optional. If not provided, :mod:`mimetypes` will be used to guess the mimetype, or ``application/octet-stream`` will be used.
+
+    """
+    def get_content_type(filename):
+        """Return or guess mimetype of ``filename``.
+
+        :param filename: filename of file
+        :type filename: unicode/string
+        :returns: mime-type, e.g. ``text/html``
+        :rtype: :class::class:`str`
+
+        """
+
+        return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+    boundary = '-----' + ''.join(random.choice(BOUNDARY_CHARS)
+                                 for i in range(30))
+    CRLF = '\r\n'
+    output = []
+
+    # Normal form fields
+    for (name, value) in fields.items():
+        if isinstance(name, unicode):
+            name = name.encode('utf-8')
+        if isinstance(value, unicode):
+            value = value.encode('utf-8')
+        output.append('--' + boundary)
+        output.append('Content-Disposition: form-data; name="%s"' % name)
+        output.append('')
+        output.append(value)
+
+    # Files to upload
+    for name, d in files.items():
+        filename = d[u'filename']
+        content = d[u'content']
+        if u'mimetype' in d:
+            mimetype = d[u'mimetype']
+        else:
+            mimetype = get_content_type(filename)
+        if isinstance(name, unicode):
+            name = name.encode('utf-8')
+        if isinstance(filename, unicode):
+            filename = filename.encode('utf-8')
+        if isinstance(mimetype, unicode):
+            mimetype = mimetype.encode('utf-8')
+        output.append('--' + boundary)
+        output.append('Content-Disposition: form-data; '
+                      'name="%s"; filename="%s"' % (name, filename))
+        output.append('Content-Type: %s' % mimetype)
+        output.append('')
+        output.append(content)
+
+    output.append('--' + boundary + '--')
+    output.append('')
+    body = CRLF.join(output)
+    headers = {
+        'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
+        'Content-Length': str(len(body)),
+    }
+    return (headers, body)
diff --git a/src/workflow/workflow.py b/src/workflow/workflow.py
new file mode 100644
index 0000000..d824b1a
--- /dev/null
+++ b/src/workflow/workflow.py
@@ -0,0 +1,2953 @@
+# encoding: utf-8
+#
+# Copyright (c) 2014 Dean Jackson <deanishe@deanishe.net>
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2014-02-15
+#
+
+"""The :class:`Workflow` object is the main interface to this library.
+
+:class:`Workflow` is targeted at Alfred 2. Use
+:class:`~workflow.workflow3.Workflow3` if you want to use Alfred 3's new
+features, such as :ref:`workflow variables <workflow-variables>` or
+more powerful modifiers.
+
+See :ref:`setup` in the :ref:`user-manual` for an example of how to set
+up your Python script to best utilise the :class:`Workflow` object.
+
+"""
+
+from __future__ import print_function, unicode_literals
+
+import binascii
+from contextlib import contextmanager
+import cPickle
+from copy import deepcopy
+import errno
+import json
+import logging
+import logging.handlers
+import os
+import pickle
+import plistlib
+import re
+import shutil
+import signal
+import string
+import subprocess
+import sys
+import time
+import unicodedata
+
+try:
+    import xml.etree.cElementTree as ET
+except ImportError:  # pragma: no cover
+    import xml.etree.ElementTree as ET
+
+
+#: Sentinel for properties that haven't been set yet (that might
+#: correctly have the value ``None``)
+UNSET = object()
+
+####################################################################
+# Standard system icons
+####################################################################
+
+# These icons are default OS X icons. They are super-high quality, and
+# will be familiar to users.
+# This library uses `ICON_ERROR` when a workflow dies in flames, so
+# in my own workflows, I use `ICON_WARNING` for less fatal errors
+# (e.g. bad user input, no results etc.)
+
+# The system icons are all in this directory. There are many more than
+# are listed here
+
+ICON_ROOT = '/System/Library/CoreServices/CoreTypes.bundle/Contents/Resources'
+
+ICON_ACCOUNT = os.path.join(ICON_ROOT, 'Accounts.icns')
+ICON_BURN = os.path.join(ICON_ROOT, 'BurningIcon.icns')
+ICON_CLOCK = os.path.join(ICON_ROOT, 'Clock.icns')
+ICON_COLOR = os.path.join(ICON_ROOT, 'ProfileBackgroundColor.icns')
+ICON_COLOUR = ICON_COLOR  # Queen's English, if you please
+ICON_EJECT = os.path.join(ICON_ROOT, 'EjectMediaIcon.icns')
+# Shown when a workflow throws an error
+ICON_ERROR = os.path.join(ICON_ROOT, 'AlertStopIcon.icns')
+ICON_FAVORITE = os.path.join(ICON_ROOT, 'ToolbarFavoritesIcon.icns')
+ICON_FAVOURITE = ICON_FAVORITE
+ICON_GROUP = os.path.join(ICON_ROOT, 'GroupIcon.icns')
+ICON_HELP = os.path.join(ICON_ROOT, 'HelpIcon.icns')
+ICON_HOME = os.path.join(ICON_ROOT, 'HomeFolderIcon.icns')
+ICON_INFO = os.path.join(ICON_ROOT, 'ToolbarInfo.icns')
+ICON_NETWORK = os.path.join(ICON_ROOT, 'GenericNetworkIcon.icns')
+ICON_NOTE = os.path.join(ICON_ROOT, 'AlertNoteIcon.icns')
+ICON_SETTINGS = os.path.join(ICON_ROOT, 'ToolbarAdvanced.icns')
+ICON_SWIRL = os.path.join(ICON_ROOT, 'ErasingIcon.icns')
+ICON_SWITCH = os.path.join(ICON_ROOT, 'General.icns')
+ICON_SYNC = os.path.join(ICON_ROOT, 'Sync.icns')
+ICON_TRASH = os.path.join(ICON_ROOT, 'TrashIcon.icns')
+ICON_USER = os.path.join(ICON_ROOT, 'UserIcon.icns')
+ICON_WARNING = os.path.join(ICON_ROOT, 'AlertCautionIcon.icns')
+ICON_WEB = os.path.join(ICON_ROOT, 'BookmarkIcon.icns')
+
+####################################################################
+# non-ASCII to ASCII diacritic folding.
+# Used by `fold_to_ascii` method
+####################################################################
+
+ASCII_REPLACEMENTS = {
+    'À': 'A',
+    'Á': 'A',
+    'Â': 'A',
+    'Ã': 'A',
+    'Ä': 'A',
+    'Å': 'A',
+    'Æ': 'AE',
+    'Ç': 'C',
+    'È': 'E',
+    'É': 'E',
+    'Ê': 'E',
+    'Ë': 'E',
+    'Ì': 'I',
+    'Í': 'I',
+    'Î': 'I',
+    'Ï': 'I',
+    'Ð': 'D',
+    'Ñ': 'N',
+    'Ò': 'O',
+    'Ó': 'O',
+    'Ô': 'O',
+    'Õ': 'O',
+    'Ö': 'O',
+    'Ø': 'O',
+    'Ù': 'U',
+    'Ú': 'U',
+    'Û': 'U',
+    'Ü': 'U',
+    'Ý': 'Y',
+    'Þ': 'Th',
+    'ß': 'ss',
+    'à': 'a',
+    'á': 'a',
+    'â': 'a',
+    'ã': 'a',
+    'ä': 'a',
+    'å': 'a',
+    'æ': 'ae',
+    'ç': 'c',
+    'è': 'e',
+    'é': 'e',
+    'ê': 'e',
+    'ë': 'e',
+    'ì': 'i',
+    'í': 'i',
+    'î': 'i',
+    'ï': 'i',
+    'ð': 'd',
+    'ñ': 'n',
+    'ò': 'o',
+    'ó': 'o',
+    'ô': 'o',
+    'õ': 'o',
+    'ö': 'o',
+    'ø': 'o',
+    'ù': 'u',
+    'ú': 'u',
+    'û': 'u',
+    'ü': 'u',
+    'ý': 'y',
+    'þ': 'th',
+    'ÿ': 'y',
+    'Ł': 'L',
+    'ł': 'l',
+    'Ń': 'N',
+    'ń': 'n',
+    'Ņ': 'N',
+    'ņ': 'n',
+    'Ň': 'N',
+    'ň': 'n',
+    'Ŋ': 'ng',
+    'ŋ': 'NG',
+    'Ō': 'O',
+    'ō': 'o',
+    'Ŏ': 'O',
+    'ŏ': 'o',
+    'Ő': 'O',
+    'ő': 'o',
+    'Œ': 'OE',
+    'œ': 'oe',
+    'Ŕ': 'R',
+    'ŕ': 'r',
+    'Ŗ': 'R',
+    'ŗ': 'r',
+    'Ř': 'R',
+    'ř': 'r',
+    'Ś': 'S',
+    'ś': 's',
+    'Ŝ': 'S',
+    'ŝ': 's',
+    'Ş': 'S',
+    'ş': 's',
+    'Š': 'S',
+    'š': 's',
+    'Ţ': 'T',
+    'ţ': 't',
+    'Ť': 'T',
+    'ť': 't',
+    'Ŧ': 'T',
+    'ŧ': 't',
+    'Ũ': 'U',
+    'ũ': 'u',
+    'Ū': 'U',
+    'ū': 'u',
+    'Ŭ': 'U',
+    'ŭ': 'u',
+    'Ů': 'U',
+    'ů': 'u',
+    'Ű': 'U',
+    'ű': 'u',
+    'Ŵ': 'W',
+    'ŵ': 'w',
+    'Ŷ': 'Y',
+    'ŷ': 'y',
+    'Ÿ': 'Y',
+    'Ź': 'Z',
+    'ź': 'z',
+    'Ż': 'Z',
+    'ż': 'z',
+    'Ž': 'Z',
+    'ž': 'z',
+    'ſ': 's',
+    'Α': 'A',
+    'Β': 'B',
+    'Γ': 'G',
+    'Δ': 'D',
+    'Ε': 'E',
+    'Ζ': 'Z',
+    'Η': 'E',
+    'Θ': 'Th',
+    'Ι': 'I',
+    'Κ': 'K',
+    'Λ': 'L',
+    'Μ': 'M',
+    'Ν': 'N',
+    'Ξ': 'Ks',
+    'Ο': 'O',
+    'Π': 'P',
+    'Ρ': 'R',
+    'Σ': 'S',
+    'Τ': 'T',
+    'Υ': 'U',
+    'Φ': 'Ph',
+    'Χ': 'Kh',
+    'Ψ': 'Ps',
+    'Ω': 'O',
+    'α': 'a',
+    'β': 'b',
+    'γ': 'g',
+    'δ': 'd',
+    'ε': 'e',
+    'ζ': 'z',
+    'η': 'e',
+    'θ': 'th',
+    'ι': 'i',
+    'κ': 'k',
+    'λ': 'l',
+    'μ': 'm',
+    'ν': 'n',
+    'ξ': 'x',
+    'ο': 'o',
+    'π': 'p',
+    'ρ': 'r',
+    'ς': 's',
+    'σ': 's',
+    'τ': 't',
+    'υ': 'u',
+    'φ': 'ph',
+    'χ': 'kh',
+    'ψ': 'ps',
+    'ω': 'o',
+    'А': 'A',
+    'Б': 'B',
+    'В': 'V',
+    'Г': 'G',
+    'Д': 'D',
+    'Е': 'E',
+    'Ж': 'Zh',
+    'З': 'Z',
+    'И': 'I',
+    'Й': 'I',
+    'К': 'K',
+    'Л': 'L',
+    'М': 'M',
+    'Н': 'N',
+    'О': 'O',
+    'П': 'P',
+    'Р': 'R',
+    'С': 'S',
+    'Т': 'T',
+    'У': 'U',
+    'Ф': 'F',
+    'Х': 'Kh',
+    'Ц': 'Ts',
+    'Ч': 'Ch',
+    'Ш': 'Sh',
+    'Щ': 'Shch',
+    'Ъ': "'",
+    'Ы': 'Y',
+    'Ь': "'",
+    'Э': 'E',
+    'Ю': 'Iu',
+    'Я': 'Ia',
+    'а': 'a',
+    'б': 'b',
+    'в': 'v',
+    'г': 'g',
+    'д': 'd',
+    'е': 'e',
+    'ж': 'zh',
+    'з': 'z',
+    'и': 'i',
+    'й': 'i',
+    'к': 'k',
+    'л': 'l',
+    'м': 'm',
+    'н': 'n',
+    'о': 'o',
+    'п': 'p',
+    'р': 'r',
+    'с': 's',
+    'т': 't',
+    'у': 'u',
+    'ф': 'f',
+    'х': 'kh',
+    'ц': 'ts',
+    'ч': 'ch',
+    'ш': 'sh',
+    'щ': 'shch',
+    'ъ': "'",
+    'ы': 'y',
+    'ь': "'",
+    'э': 'e',
+    'ю': 'iu',
+    'я': 'ia',
+    # 'ᴀ': '',
+    # 'ᴁ': '',
+    # 'ᴂ': '',
+    # 'ᴃ': '',
+    # 'ᴄ': '',
+    # 'ᴅ': '',
+    # 'ᴆ': '',
+    # 'ᴇ': '',
+    # 'ᴈ': '',
+    # 'ᴉ': '',
+    # 'ᴊ': '',
+    # 'ᴋ': '',
+    # 'ᴌ': '',
+    # 'ᴍ': '',
+    # 'ᴎ': '',
+    # 'ᴏ': '',
+    # 'ᴐ': '',
+    # 'ᴑ': '',
+    # 'ᴒ': '',
+    # 'ᴓ': '',
+    # 'ᴔ': '',
+    # 'ᴕ': '',
+    # 'ᴖ': '',
+    # 'ᴗ': '',
+    # 'ᴘ': '',
+    # 'ᴙ': '',
+    # 'ᴚ': '',
+    # 'ᴛ': '',
+    # 'ᴜ': '',
+    # 'ᴝ': '',
+    # 'ᴞ': '',
+    # 'ᴟ': '',
+    # 'ᴠ': '',
+    # 'ᴡ': '',
+    # 'ᴢ': '',
+    # 'ᴣ': '',
+    # 'ᴤ': '',
+    # 'ᴥ': '',
+    'ᴦ': 'G',
+    'ᴧ': 'L',
+    'ᴨ': 'P',
+    'ᴩ': 'R',
+    'ᴪ': 'PS',
+    'ẞ': 'Ss',
+    'Ỳ': 'Y',
+    'ỳ': 'y',
+    'Ỵ': 'Y',
+    'ỵ': 'y',
+    'Ỹ': 'Y',
+    'ỹ': 'y',
+}
+
+####################################################################
+# Smart-to-dumb punctuation mapping
+####################################################################
+
+DUMB_PUNCTUATION = {
+    '‘': "'",
+    '’': "'",
+    '‚': "'",
+    '“': '"',
+    '”': '"',
+    '„': '"',
+    '–': '-',
+    '—': '-'
+}
+
+
+####################################################################
+# Used by `Workflow.filter`
+####################################################################
+
+# Anchor characters in a name
+#: Characters that indicate the beginning of a "word" in CamelCase
+INITIALS = string.ascii_uppercase + string.digits
+
+#: Split on non-letters, numbers
+split_on_delimiters = re.compile('[^a-zA-Z0-9]').split
+
+# Match filter flags
+#: Match items that start with ``query``
+MATCH_STARTSWITH = 1
+#: Match items whose capital letters start with ``query``
+MATCH_CAPITALS = 2
+#: Match items with a component "word" that matches ``query``
+MATCH_ATOM = 4
+#: Match items whose initials (based on atoms) start with ``query``
+MATCH_INITIALS_STARTSWITH = 8
+#: Match items whose initials (based on atoms) contain ``query``
+MATCH_INITIALS_CONTAIN = 16
+#: Combination of :const:`MATCH_INITIALS_STARTSWITH` and
+#: :const:`MATCH_INITIALS_CONTAIN`
+MATCH_INITIALS = 24
+#: Match items if ``query`` is a substring
+MATCH_SUBSTRING = 32
+#: Match items if all characters in ``query`` appear in the item in order
+MATCH_ALLCHARS = 64
+#: Combination of all other ``MATCH_*`` constants
+MATCH_ALL = 127
+
+
+####################################################################
+# Used by `Workflow.check_update`
+####################################################################
+
+# Number of days to wait between checking for updates to the workflow
+DEFAULT_UPDATE_FREQUENCY = 1
+
+
+####################################################################
+# Lockfile and Keychain access errors
+####################################################################
+
+class AcquisitionError(Exception):
+    """Raised if a lock cannot be acquired."""
+
+
+class KeychainError(Exception):
+    """Raised for unknown Keychain errors.
+
+    Raised by methods :meth:`Workflow.save_password`,
+    :meth:`Workflow.get_password` and :meth:`Workflow.delete_password`
+    when ``security`` CLI app returns an unknown error code.
+    """
+
+
+class PasswordNotFound(KeychainError):
+    """Password not in Keychain.
+
+    Raised by method :meth:`Workflow.get_password` when ``account``
+    is unknown to the Keychain.
+    """
+
+
+class PasswordExists(KeychainError):
+    """Raised when trying to overwrite an existing account password.
+
+    You should never receive this error: it is used internally
+    by the :meth:`Workflow.save_password` method to know if it needs
+    to delete the old password first (a Keychain implementation detail).
+    """
+
+
+####################################################################
+# Helper functions
+####################################################################
+
+def isascii(text):
+    """Test if ``text`` contains only ASCII characters.
+
+    :param text: text to test for ASCII-ness
+    :type text: ``unicode``
+    :returns: ``True`` if ``text`` contains only ASCII characters
+    :rtype: ``Boolean``
+
+    """
+    try:
+        text.encode('ascii')
+    except UnicodeEncodeError:
+        return False
+    return True
+
+
+####################################################################
+# Implementation classes
+####################################################################
+
+class SerializerManager(object):
+    """Contains registered serializers.
+
+    .. versionadded:: 1.8
+
+    A configured instance of this class is available at
+    ``workflow.manager``.
+
+    Use :meth:`register()` to register new (or replace
+    existing) serializers, which you can specify by name when calling
+    :class:`Workflow` data storage methods.
+
+    See :ref:`manual-serialization` and :ref:`manual-persistent-data`
+    for further information.
+
+    """
+
+    def __init__(self):
+        """Create new SerializerManager object."""
+        self._serializers = {}
+
+    def register(self, name, serializer):
+        """Register ``serializer`` object under ``name``.
+
+        Raises :class:`AttributeError` if ``serializer`` in invalid.
+
+        .. note::
+
+            ``name`` will be used as the file extension of the saved files.
+
+        :param name: Name to register ``serializer`` under
+        :type name: ``unicode`` or ``str``
+        :param serializer: object with ``load()`` and ``dump()``
+            methods
+
+        """
+        # Basic validation
+        getattr(serializer, 'load')
+        getattr(serializer, 'dump')
+
+        self._serializers[name] = serializer
+
+    def serializer(self, name):
+        """Return serializer object for ``name``.
+
+        :param name: Name of serializer to return
+        :type name: ``unicode`` or ``str``
+        :returns: serializer object or ``None`` if no such serializer
+            is registered.
+
+        """
+        return self._serializers.get(name)
+
+    def unregister(self, name):
+        """Remove registered serializer with ``name``.
+
+        Raises a :class:`ValueError` if there is no such registered
+        serializer.
+
+        :param name: Name of serializer to remove
+        :type name: ``unicode`` or ``str``
+        :returns: serializer object
+
+        """
+        if name not in self._serializers:
+            raise ValueError('No such serializer registered : {0}'.format(
+                             name))
+
+        serializer = self._serializers[name]
+        del self._serializers[name]
+
+        return serializer
+
+    @property
+    def serializers(self):
+        """Return names of registered serializers."""
+        return sorted(self._serializers.keys())
+
+
+class JSONSerializer(object):
+    """Wrapper around :mod:`json`. Sets ``indent`` and ``encoding``.
+
+    .. versionadded:: 1.8
+
+    Use this serializer if you need readable data files. JSON doesn't
+    support Python objects as well as ``cPickle``/``pickle``, so be
+    careful which data you try to serialize as JSON.
+
+    """
+
+    @classmethod
+    def load(cls, file_obj):
+        """Load serialized object from open JSON file.
+
+        .. versionadded:: 1.8
+
+        :param file_obj: file handle
+        :type file_obj: ``file`` object
+        :returns: object loaded from JSON file
+        :rtype: object
+
+        """
+        return json.load(file_obj)
+
+    @classmethod
+    def dump(cls, obj, file_obj):
+        """Serialize object ``obj`` to open JSON file.
+
+        .. versionadded:: 1.8
+
+        :param obj: Python object to serialize
+        :type obj: JSON-serializable data structure
+        :param file_obj: file handle
+        :type file_obj: ``file`` object
+
+        """
+        return json.dump(obj, file_obj, indent=2, encoding='utf-8')
+
+
+class CPickleSerializer(object):
+    """Wrapper around :mod:`cPickle`. Sets ``protocol``.
+
+    .. versionadded:: 1.8
+
+    This is the default serializer and the best combination of speed and
+    flexibility.
+
+    """
+
+    @classmethod
+    def load(cls, file_obj):
+        """Load serialized object from open pickle file.
+
+        .. versionadded:: 1.8
+
+        :param file_obj: file handle
+        :type file_obj: ``file`` object
+        :returns: object loaded from pickle file
+        :rtype: object
+
+        """
+        return cPickle.load(file_obj)
+
+    @classmethod
+    def dump(cls, obj, file_obj):
+        """Serialize object ``obj`` to open pickle file.
+
+        .. versionadded:: 1.8
+
+        :param obj: Python object to serialize
+        :type obj: Python object
+        :param file_obj: file handle
+        :type file_obj: ``file`` object
+
+        """
+        return cPickle.dump(obj, file_obj, protocol=-1)
+
+
+class PickleSerializer(object):
+    """Wrapper around :mod:`pickle`. Sets ``protocol``.
+
+    .. versionadded:: 1.8
+
+    Use this serializer if you need to add custom pickling.
+
+    """
+
+    @classmethod
+    def load(cls, file_obj):
+        """Load serialized object from open pickle file.
+
+        .. versionadded:: 1.8
+
+        :param file_obj: file handle
+        :type file_obj: ``file`` object
+        :returns: object loaded from pickle file
+        :rtype: object
+
+        """
+        return pickle.load(file_obj)
+
+    @classmethod
+    def dump(cls, obj, file_obj):
+        """Serialize object ``obj`` to open pickle file.
+
+        .. versionadded:: 1.8
+
+        :param obj: Python object to serialize
+        :type obj: Python object
+        :param file_obj: file handle
+        :type file_obj: ``file`` object
+
+        """
+        return pickle.dump(obj, file_obj, protocol=-1)
+
+
+# Set up default manager and register built-in serializers
+manager = SerializerManager()
+manager.register('cpickle', CPickleSerializer)
+manager.register('pickle', PickleSerializer)
+manager.register('json', JSONSerializer)
+
+
+class Item(object):
+    """Represents a feedback item for Alfred.
+
+    Generates Alfred-compliant XML for a single item.
+
+    You probably shouldn't use this class directly, but via
+    :meth:`Workflow.add_item`. See :meth:`~Workflow.add_item`
+    for details of arguments.
+
+    """
+
+    def __init__(self, title, subtitle='', modifier_subtitles=None,
+                 arg=None, autocomplete=None, valid=False, uid=None,
+                 icon=None, icontype=None, type=None, largetext=None,
+                 copytext=None, quicklookurl=None):
+        """Same arguments as :meth:`Workflow.add_item`."""
+        self.title = title
+        self.subtitle = subtitle
+        self.modifier_subtitles = modifier_subtitles or {}
+        self.arg = arg
+        self.autocomplete = autocomplete
+        self.valid = valid
+        self.uid = uid
+        self.icon = icon
+        self.icontype = icontype
+        self.type = type
+        self.largetext = largetext
+        self.copytext = copytext
+        self.quicklookurl = quicklookurl
+
+    @property
+    def elem(self):
+        """Create and return feedback item for Alfred.
+
+        :returns: :class:`ElementTree.Element <xml.etree.ElementTree.Element>`
+            instance for this :class:`Item` instance.
+
+        """
+        # Attributes on <item> element
+        attr = {}
+        if self.valid:
+            attr['valid'] = 'yes'
+        else:
+            attr['valid'] = 'no'
+        # Allow empty string for autocomplete. This is a useful value,
+        # as TABing the result will revert the query back to just the
+        # keyword
+        if self.autocomplete is not None:
+            attr['autocomplete'] = self.autocomplete
+
+        # Optional attributes
+        for name in ('uid', 'type'):
+            value = getattr(self, name, None)
+            if value:
+                attr[name] = value
+
+        root = ET.Element('item', attr)
+        ET.SubElement(root, 'title').text = self.title
+        ET.SubElement(root, 'subtitle').text = self.subtitle
+
+        # Add modifier subtitles
+        for mod in ('cmd', 'ctrl', 'alt', 'shift', 'fn'):
+            if mod in self.modifier_subtitles:
+                ET.SubElement(root, 'subtitle',
+                              {'mod': mod}).text = self.modifier_subtitles[mod]
+
+        # Add arg as element instead of attribute on <item>, as it's more
+        # flexible (newlines aren't allowed in attributes)
+        if self.arg:
+            ET.SubElement(root, 'arg').text = self.arg
+
+        # Add icon if there is one
+        if self.icon:
+            if self.icontype:
+                attr = dict(type=self.icontype)
+            else:
+                attr = {}
+            ET.SubElement(root, 'icon', attr).text = self.icon
+
+        if self.largetext:
+            ET.SubElement(root, 'text',
+                          {'type': 'largetype'}).text = self.largetext
+
+        if self.copytext:
+            ET.SubElement(root, 'text',
+                          {'type': 'copy'}).text = self.copytext
+
+        if self.quicklookurl:
+            ET.SubElement(root, 'quicklookurl').text = self.quicklookurl
+
+        return root
+
+
+class LockFile(object):
+    """Context manager to create lock files."""
+
+    def __init__(self, protected_path, timeout=0, delay=0.05):
+        """Create new :class:`LockFile` object."""
+        self.lockfile = protected_path + '.lock'
+        self.timeout = timeout
+        self.delay = delay
+        self._locked = False
+
+    @property
+    def locked(self):
+        """`True` if file is locked by this instance."""
+        return self._locked
+
+    def acquire(self, blocking=True):
+        """Acquire the lock if possible.
+
+        If the lock is in use and ``blocking`` is ``False``, return
+        ``False``.
+
+        Otherwise, check every `self.delay` seconds until it acquires
+        lock or exceeds `self.timeout` and raises an exception.
+
+        """
+        start = time.time()
+        while True:
+            try:
+                fd = os.open(self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR)
+                with os.fdopen(fd, 'w') as fd:
+                    fd.write('{0}'.format(os.getpid()))
+                break
+            except OSError as err:
+                if err.errno != errno.EEXIST:  # pragma: no cover
+                    raise
+                if self.timeout and (time.time() - start) >= self.timeout:
+                    raise AcquisitionError('Lock acquisition timed out.')
+                if not blocking:
+                    return False
+                time.sleep(self.delay)
+
+        self._locked = True
+        return True
+
+    def release(self):
+        """Release the lock by deleting `self.lockfile`."""
+        self._locked = False
+        os.unlink(self.lockfile)
+
+    def __enter__(self):
+        """Acquire lock."""
+        self.acquire()
+        return self
+
+    def __exit__(self, typ, value, traceback):
+        """Release lock."""
+        self.release()
+
+    def __del__(self):
+        """Clear up `self.lockfile`."""
+        if self._locked:  # pragma: no cover
+            self.release()
+
+
+@contextmanager
+def atomic_writer(file_path, mode):
+    """Atomic file writer.
+
+    :param file_path: path of file to write to.
+    :type file_path: ``unicode``
+    :param mode: sames as for `func:open`
+    :type mode: string
+
+    .. versionadded:: 1.12
+
+    Context manager that ensures the file is only written if the write
+    succeeds. The data is first written to a temporary file.
+
+    """
+    temp_suffix = '.aw.temp'
+    temp_file_path = file_path + temp_suffix
+    with open(temp_file_path, mode) as file_obj:
+        try:
+            yield file_obj
+            os.rename(temp_file_path, file_path)
+        finally:
+            try:
+                os.remove(temp_file_path)
+            except (OSError, IOError):
+                pass
+
+
+class uninterruptible(object):
+    """Decorator that postpones SIGTERM until wrapped function is complete.
+
+    .. versionadded:: 1.12
+
+    Since version 2.7, Alfred allows Script Filters to be killed. If
+    your workflow is killed in the middle of critical code (e.g.
+    writing data to disk), this may corrupt your workflow's data.
+
+    Use this decorator to wrap critical functions that *must* complete.
+    If the script is killed while a wrapped function is executing,
+    the SIGTERM will be caught and handled after your function has
+    finished executing.
+
+    Alfred-Workflow uses this internally to ensure its settings, data
+    and cache writes complete.
+
+    .. important::
+
+        This decorator is NOT thread-safe.
+
+    """
+
+    def __init__(self, func, class_name=''):
+        """Decorate `func`."""
+        self.func = func
+        self._caught_signal = None
+
+    def signal_handler(self, signum, frame):
+        """Called when process receives SIGTERM."""
+        self._caught_signal = (signum, frame)
+
+    def __call__(self, *args, **kwargs):
+        """Trap ``SIGTERM`` and call wrapped function."""
+        self._caught_signal = None
+        # Register handler for SIGTERM, then call `self.func`
+        self.old_signal_handler = signal.getsignal(signal.SIGTERM)
+        signal.signal(signal.SIGTERM, self.signal_handler)
+
+        self.func(*args, **kwargs)
+
+        # Restore old signal handler
+        signal.signal(signal.SIGTERM, self.old_signal_handler)
+
+        # Handle any signal caught during execution
+        if self._caught_signal is not None:
+            signum, frame = self._caught_signal
+            if callable(self.old_signal_handler):
+                self.old_signal_handler(signum, frame)
+            elif self.old_signal_handler == signal.SIG_DFL:
+                sys.exit(0)
+
+    def __get__(self, obj=None, klass=None):
+        """Decorator API."""
+        return self.__class__(self.func.__get__(obj, klass),
+                              klass.__name__)
+
+
+class Settings(dict):
+    """A dictionary that saves itself when changed.
+
+    Dictionary keys & values will be saved as a JSON file
+    at ``filepath``. If the file does not exist, the dictionary
+    (and settings file) will be initialised with ``defaults``.
+
+    :param filepath: where to save the settings
+    :type filepath: :class:`unicode`
+    :param defaults: dict of default settings
+    :type defaults: :class:`dict`
+
+
+    An appropriate instance is provided by :class:`Workflow` instances at
+    :attr:`Workflow.settings`.
+
+    """
+
+    def __init__(self, filepath, defaults=None):
+        """Create new :class:`Settings` object."""
+        super(Settings, self).__init__()
+        self._filepath = filepath
+        self._nosave = False
+        self._original = {}
+        if os.path.exists(self._filepath):
+            self._load()
+        elif defaults:
+            for key, val in defaults.items():
+                self[key] = val
+            self.save()  # save default settings
+
+    def _load(self):
+        """Load cached settings from JSON file `self._filepath`."""
+        self._nosave = True
+        d = {}
+        with open(self._filepath, 'rb') as file_obj:
+            for key, value in json.load(file_obj, encoding='utf-8').items():
+                d[key] = value
+        self.update(d)
+        self._original = deepcopy(d)
+        self._nosave = False
+
+    @uninterruptible
+    def save(self):
+        """Save settings to JSON file specified in ``self._filepath``.
+
+        If you're using this class via :attr:`Workflow.settings`, which
+        you probably are, ``self._filepath`` will be ``settings.json``
+        in your workflow's data directory (see :attr:`~Workflow.datadir`).
+        """
+        if self._nosave:
+            return
+        data = {}
+        data.update(self)
+        # for key, value in self.items():
+        #     data[key] = value
+        with LockFile(self._filepath):
+            with atomic_writer(self._filepath, 'wb') as file_obj:
+                json.dump(data, file_obj, sort_keys=True, indent=2,
+                          encoding='utf-8')
+
+    # dict methods
+    def __setitem__(self, key, value):
+        """Implement :class:`dict` interface."""
+        if self._original.get(key) != value:
+            super(Settings, self).__setitem__(key, value)
+            self.save()
+
+    def __delitem__(self, key):
+        """Implement :class:`dict` interface."""
+        super(Settings, self).__delitem__(key)
+        self.save()
+
+    def update(self, *args, **kwargs):
+        """Override :class:`dict` method to save on update."""
+        super(Settings, self).update(*args, **kwargs)
+        self.save()
+
+    def setdefault(self, key, value=None):
+        """Override :class:`dict` method to save on update."""
+        ret = super(Settings, self).setdefault(key, value)
+        self.save()
+        return ret
+
+
+class Workflow(object):
+    """Create new :class:`Workflow` instance.
+
+    :param default_settings: default workflow settings. If no settings file
+        exists, :class:`Workflow.settings` will be pre-populated with
+        ``default_settings``.
+    :type default_settings: :class:`dict`
+    :param update_settings: settings for updating your workflow from GitHub.
+        This must be a :class:`dict` that contains ``github_slug`` and
+        ``version`` keys. ``github_slug`` is of the form ``username/repo``
+        and ``version`` **must** correspond to the tag of a release. The
+        boolean ``prereleases`` key is optional and if ``True`` will
+        override the :ref:`magic argument <magic-arguments>` preference.
+        This is only recommended when the installed workflow is a pre-release.
+        See :ref:`updates` for more information.
+    :type update_settings: :class:`dict`
+    :param input_encoding: encoding of command line arguments
+    :type input_encoding: :class:`unicode`
+    :param normalization: normalisation to apply to CLI args.
+        See :meth:`Workflow.decode` for more details.
+    :type normalization: :class:`unicode`
+    :param capture_args: capture and act on ``workflow:*`` arguments. See
+        :ref:`Magic arguments <magic-arguments>` for details.
+    :type capture_args: :class:`Boolean`
+    :param libraries: sequence of paths to directories containing
+        libraries. These paths will be prepended to ``sys.path``.
+    :type libraries: :class:`tuple` or :class:`list`
+    :param help_url: URL to webpage where a user can ask for help with
+        the workflow, report bugs, etc. This could be the GitHub repo
+        or a page on AlfredForum.com. If your workflow throws an error,
+        this URL will be displayed in the log and Alfred's debugger. It can
+        also be opened directly in a web browser with the ``workflow:help``
+        :ref:`magic argument <magic-arguments>`.
+    :type help_url: :class:`unicode` or :class:`str`
+
+    """
+
+    # Which class to use to generate feedback items. You probably
+    # won't want to change this
+    item_class = Item
+
+    def __init__(self, default_settings=None, update_settings=None,
+                 input_encoding='utf-8', normalization='NFC',
+                 capture_args=True, libraries=None,
+                 help_url=None):
+        """Create new :class:`Workflow` object."""
+        self._default_settings = default_settings or {}
+        self._update_settings = update_settings or {}
+        self._input_encoding = input_encoding
+        self._normalizsation = normalization
+        self._capture_args = capture_args
+        self.help_url = help_url
+        self._workflowdir = None
+        self._settings_path = None
+        self._settings = None
+        self._bundleid = None
+        self._debugging = None
+        self._name = None
+        self._cache_serializer = 'cpickle'
+        self._data_serializer = 'cpickle'
+        self._info = None
+        self._info_loaded = False
+        self._logger = None
+        self._items = []
+        self._alfred_env = None
+        # Version number of the workflow
+        self._version = UNSET
+        # Version from last workflow run
+        self._last_version_run = UNSET
+        # Cache for regex patterns created for filter keys
+        self._search_pattern_cache = {}
+        # Magic arguments
+        #: The prefix for all magic arguments. Default is ``workflow:``
+        self.magic_prefix = 'workflow:'
+        #: Mapping of available magic arguments. The built-in magic
+        #: arguments are registered by default. To add your own magic arguments
+        #: (or override built-ins), add a key:value pair where the key is
+        #: what the user should enter (prefixed with :attr:`magic_prefix`)
+        #: and the value is a callable that will be called when the argument
+        #: is entered. If you would like to display a message in Alfred, the
+        #: function should return a ``unicode`` string.
+        #:
+        #: By default, the magic arguments documented
+        #: :ref:`here <magic-arguments>` are registered.
+        self.magic_arguments = {}
+
+        self._register_default_magic()
+
+        if libraries:
+            sys.path = libraries + sys.path
+
+    ####################################################################
+    # API methods
+    ####################################################################
+
+    # info.plist contents and alfred_* environment variables  ----------
+
+    @property
+    def alfred_version(self):
+        """Alfred version as :class:`~workflow.update.Version` object."""
+        from update import Version
+        return Version(self.alfred_env.get('version'))
+
+    @property
+    def alfred_env(self):
+        """Dict of Alfred's environmental variables minus ``alfred_`` prefix.
+
+        .. versionadded:: 1.7
+
+        The variables Alfred 2.4+ exports are:
+
+        ============================  =========================================
+        Variable                      Description
+        ============================  =========================================
+        alfred_debug                  Set to ``1`` if Alfred's debugger is
+                                      open, otherwise unset.
+        alfred_preferences            Path to Alfred.alfredpreferences
+                                      (where your workflows and settings are
+                                      stored).
+        alfred_preferences_localhash  Machine-specific preferences are stored
+                                      in ``Alfred.alfredpreferences/preferences/local/<hash>``
+                                      (see ``alfred_preferences`` above for
+                                      the path to ``Alfred.alfredpreferences``)
+        alfred_theme                  ID of selected theme
+        alfred_theme_background       Background colour of selected theme in
+                                      format ``rgba(r,g,b,a)``
+        alfred_theme_subtext          Show result subtext.
+                                      ``0`` = Always,
+                                      ``1`` = Alternative actions only,
+                                      ``2`` = Selected result only,
+                                      ``3`` = Never
+        alfred_version                Alfred version number, e.g. ``'2.4'``
+        alfred_version_build          Alfred build number, e.g. ``277``
+        alfred_workflow_bundleid      Bundle ID, e.g.
+                                      ``net.deanishe.alfred-mailto``
+        alfred_workflow_cache         Path to workflow's cache directory
+        alfred_workflow_data          Path to workflow's data directory
+        alfred_workflow_name          Name of current workflow
+        alfred_workflow_uid           UID of workflow
+        alfred_workflow_version       The version number specified in the
+                                      workflow configuration sheet/info.plist
+        ============================  =========================================
+
+        **Note:** all values are Unicode strings except ``version_build`` and
+        ``theme_subtext``, which are integers.
+
+        :returns: ``dict`` of Alfred's environmental variables without the
+            ``alfred_`` prefix, e.g. ``preferences``, ``workflow_data``.
+
+        """
+        if self._alfred_env is not None:
+            return self._alfred_env
+
+        data = {}
+
+        for key in (
+                'alfred_debug',
+                'alfred_preferences',
+                'alfred_preferences_localhash',
+                'alfred_theme',
+                'alfred_theme_background',
+                'alfred_theme_subtext',
+                'alfred_version',
+                'alfred_version_build',
+                'alfred_workflow_bundleid',
+                'alfred_workflow_cache',
+                'alfred_workflow_data',
+                'alfred_workflow_name',
+                'alfred_workflow_uid',
+                'alfred_workflow_version'):
+
+            value = os.getenv(key)
+
+            if isinstance(value, str):
+                if key in ('alfred_debug', 'alfred_version_build',
+                           'alfred_theme_subtext'):
+                    value = int(value)
+                else:
+                    value = self.decode(value)
+
+            data[key[7:]] = value
+
+        self._alfred_env = data
+
+        return self._alfred_env
+
+    @property
+    def info(self):
+        """:class:`dict` of ``info.plist`` contents."""
+        if not self._info_loaded:
+            self._load_info_plist()
+        return self._info
+
+    @property
+    def bundleid(self):
+        """Workflow bundle ID from environmental vars or ``info.plist``.
+
+        :returns: bundle ID
+        :rtype: ``unicode``
+
+        """
+        if not self._bundleid:
+            if self.alfred_env.get('workflow_bundleid'):
+                self._bundleid = self.alfred_env.get('workflow_bundleid')
+            else:
+                self._bundleid = unicode(self.info['bundleid'], 'utf-8')
+
+        return self._bundleid
+
+    @property
+    def debugging(self):
+        """Whether Alfred's debugger is open.
+
+        :returns: ``True`` if Alfred's debugger is open.
+        :rtype: ``bool``
+
+        """
+        if self._debugging is None:
+            if self.alfred_env.get('debug') == 1:
+                self._debugging = True
+            else:
+                self._debugging = False
+        return self._debugging
+
+    @property
+    def name(self):
+        """Workflow name from Alfred's environmental vars or ``info.plist``.
+
+        :returns: workflow name
+        :rtype: ``unicode``
+
+        """
+        if not self._name:
+            if self.alfred_env.get('workflow_name'):
+                self._name = self.decode(self.alfred_env.get('workflow_name'))
+            else:
+                self._name = self.decode(self.info['name'])
+
+        return self._name
+
+    @property
+    def version(self):
+        """Return the version of the workflow.
+
+        .. versionadded:: 1.9.10
+
+        Get the workflow version from environment variable,
+        the ``update_settings`` dict passed on
+        instantiation, the ``version`` file located in the workflow's
+        root directory or ``info.plist``. Return ``None`` if none
+        exists or :class:`ValueError` if the version number is invalid
+        (i.e. not semantic).
+
+        :returns: Version of the workflow (not Alfred-Workflow)
+        :rtype: :class:`~workflow.update.Version` object
+
+        """
+        if self._version is UNSET:
+
+            version = None
+            # environment variable has priority
+            if self.alfred_env.get('workflow_version'):
+                version = self.alfred_env['workflow_version']
+
+            # Try `update_settings`
+            elif self._update_settings:
+                version = self._update_settings.get('version')
+
+            # `version` file
+            if not version:
+                filepath = self.workflowfile('version')
+
+                if os.path.exists(filepath):
+                    with open(filepath, 'rb') as fileobj:
+                        version = fileobj.read()
+
+            # info.plist
+            if not version:
+                version = self.info.get('version')
+
+            if version:
+                from update import Version
+                version = Version(version)
+
+            self._version = version
+
+        return self._version
+
+    # Workflow utility methods -----------------------------------------
+
+    @property
+    def args(self):
+        """Return command line args as normalised unicode.
+
+        Args are decoded and normalised via :meth:`~Workflow.decode`.
+
+        The encoding and normalisation are the ``input_encoding`` and
+        ``normalization`` arguments passed to :class:`Workflow` (``UTF-8``
+        and ``NFC`` are the defaults).
+
+        If :class:`Workflow` is called with ``capture_args=True``
+        (the default), :class:`Workflow` will look for certain
+        ``workflow:*`` args and, if found, perform the corresponding
+        actions and exit the workflow.
+
+        See :ref:`Magic arguments <magic-arguments>` for details.
+
+        """
+        msg = None
+        args = [self.decode(arg) for arg in sys.argv[1:]]
+
+        # Handle magic args
+        if len(args) and self._capture_args:
+            for name in self.magic_arguments:
+                key = '{0}{1}'.format(self.magic_prefix, name)
+                if key in args:
+                    msg = self.magic_arguments[name]()
+
+            if msg:
+                self.logger.debug(msg)
+                if not sys.stdout.isatty():  # Show message in Alfred
+                    self.add_item(msg, valid=False, icon=ICON_INFO)
+                    self.send_feedback()
+                sys.exit(0)
+        return args
+
+    @property
+    def cachedir(self):
+        """Path to workflow's cache directory.
+
+        The cache directory is a subdirectory of Alfred's own cache directory
+        in ``~/Library/Caches``. The full path is:
+
+        ``~/Library/Caches/com.runningwithcrayons.Alfred-X/Workflow Data/<bundle id>``
+
+        ``Alfred-X`` may be ``Alfred-2`` or ``Alfred-3``.
+
+        :returns: full path to workflow's cache directory
+        :rtype: ``unicode``
+
+        """
+        if self.alfred_env.get('workflow_cache'):
+            dirpath = self.alfred_env.get('workflow_cache')
+
+        else:
+            dirpath = self._default_cachedir
+
+        return self._create(dirpath)
+
+    @property
+    def _default_cachedir(self):
+        """Alfred 2's default cache directory."""
+        return os.path.join(
+            os.path.expanduser(
+                '~/Library/Caches/com.runningwithcrayons.Alfred-2/'
+                'Workflow Data/'),
+            self.bundleid)
+
+    @property
+    def datadir(self):
+        """Path to workflow's data directory.
+
+        The data directory is a subdirectory of Alfred's own data directory in
+        ``~/Library/Application Support``. The full path is:
+
+        ``~/Library/Application Support/Alfred 2/Workflow Data/<bundle id>``
+
+        :returns: full path to workflow data directory
+        :rtype: ``unicode``
+
+        """
+        if self.alfred_env.get('workflow_data'):
+            dirpath = self.alfred_env.get('workflow_data')
+
+        else:
+            dirpath = self._default_datadir
+
+        return self._create(dirpath)
+
+    @property
+    def _default_datadir(self):
+        """Alfred 2's default data directory."""
+        return os.path.join(os.path.expanduser(
+            '~/Library/Application Support/Alfred 2/Workflow Data/'),
+            self.bundleid)
+
+    @property
+    def workflowdir(self):
+        """Path to workflow's root directory (where ``info.plist`` is).
+
+        :returns: full path to workflow root directory
+        :rtype: ``unicode``
+
+        """
+        if not self._workflowdir:
+            # Try the working directory first, then the directory
+            # the library is in. CWD will be the workflow root if
+            # a workflow is being run in Alfred
+            candidates = [
+                os.path.abspath(os.getcwdu()),
+                os.path.dirname(os.path.abspath(os.path.dirname(__file__)))]
+
+            # climb the directory tree until we find `info.plist`
+            for dirpath in candidates:
+
+                # Ensure directory path is Unicode
+                dirpath = self.decode(dirpath)
+
+                while True:
+                    if os.path.exists(os.path.join(dirpath, 'info.plist')):
+                        self._workflowdir = dirpath
+                        break
+
+                    elif dirpath == '/':
+                        # no `info.plist` found
+                        break
+
+                    # Check the parent directory
+                    dirpath = os.path.dirname(dirpath)
+
+                # No need to check other candidates
+                if self._workflowdir:
+                    break
+
+            if not self._workflowdir:
+                raise IOError("'info.plist' not found in directory tree")
+
+        return self._workflowdir
+
+    def cachefile(self, filename):
+        """Path to ``filename`` in workflow's cache directory.
+
+        Return absolute path to ``filename`` within your workflow's
+        :attr:`cache directory <Workflow.cachedir>`.
+
+        :param filename: basename of file
+        :type filename: ``unicode``
+        :returns: full path to file within cache directory
+        :rtype: ``unicode``
+
+        """
+        return os.path.join(self.cachedir, filename)
+
+    def datafile(self, filename):
+        """Path to ``filename`` in workflow's data directory.
+
+        Return absolute path to ``filename`` within your workflow's
+        :attr:`data directory <Workflow.datadir>`.
+
+        :param filename: basename of file
+        :type filename: ``unicode``
+        :returns: full path to file within data directory
+        :rtype: ``unicode``
+
+        """
+        return os.path.join(self.datadir, filename)
+
+    def workflowfile(self, filename):
+        """Return full path to ``filename`` in workflow's root directory.
+
+        :param filename: basename of file
+        :type filename: ``unicode``
+        :returns: full path to file within data directory
+        :rtype: ``unicode``
+
+        """
+        return os.path.join(self.workflowdir, filename)
+
+    @property
+    def logfile(self):
+        """Path to logfile.
+
+        :returns: path to logfile within workflow's cache directory
+        :rtype: ``unicode``
+
+        """
+        return self.cachefile('%s.log' % self.bundleid)
+
+    @property
+    def logger(self):
+        """Logger that logs to both console and a log file.
+
+        If Alfred's debugger is open, log level will be ``DEBUG``,
+        else it will be ``INFO``.
+
+        Use :meth:`open_log` to open the log file in Console.
+
+        :returns: an initialised :class:`~logging.Logger`
+
+        """
+        if self._logger:
+            return self._logger
+
+        # Initialise new logger and optionally handlers
+        logger = logging.getLogger('workflow')
+
+        if not len(logger.handlers):  # Only add one set of handlers
+
+            fmt = logging.Formatter(
+                '%(asctime)s %(filename)s:%(lineno)s'
+                ' %(levelname)-8s %(message)s',
+                datefmt='%H:%M:%S')
+
+            logfile = logging.handlers.RotatingFileHandler(
+                self.logfile,
+                maxBytes=1024 * 1024,
+                backupCount=1)
+            logfile.setFormatter(fmt)
+            logger.addHandler(logfile)
+
+            console = logging.StreamHandler()
+            console.setFormatter(fmt)
+            logger.addHandler(console)
+
+        if self.debugging:
+            logger.setLevel(logging.DEBUG)
+        else:
+            logger.setLevel(logging.INFO)
+
+        self._logger = logger
+
+        return self._logger
+
+    @logger.setter
+    def logger(self, logger):
+        """Set a custom logger.
+
+        :param logger: The logger to use
+        :type logger: `~logging.Logger` instance
+
+        """
+        self._logger = logger
+
+    @property
+    def settings_path(self):
+        """Path to settings file within workflow's data directory.
+
+        :returns: path to ``settings.json`` file
+        :rtype: ``unicode``
+
+        """
+        if not self._settings_path:
+            self._settings_path = self.datafile('settings.json')
+        return self._settings_path
+
+    @property
+    def settings(self):
+        """Return a dictionary subclass that saves itself when changed.
+
+        See :ref:`manual-settings` in the :ref:`user-manual` for more
+        information on how to use :attr:`settings` and **important
+        limitations** on what it can do.
+
+        :returns: :class:`~workflow.workflow.Settings` instance
+            initialised from the data in JSON file at
+            :attr:`settings_path` or if that doesn't exist, with the
+            ``default_settings`` :class:`dict` passed to
+            :class:`Workflow` on instantiation.
+        :rtype: :class:`~workflow.workflow.Settings` instance
+
+        """
+        if not self._settings:
+            self.logger.debug('Reading settings from `{0}` ...'.format(
+                              self.settings_path))
+            self._settings = Settings(self.settings_path,
+                                      self._default_settings)
+        return self._settings
+
+    @property
+    def cache_serializer(self):
+        """Name of default cache serializer.
+
+        .. versionadded:: 1.8
+
+        This serializer is used by :meth:`cache_data()` and
+        :meth:`cached_data()`
+
+        See :class:`SerializerManager` for details.
+
+        :returns: serializer name
+        :rtype: ``unicode``
+
+        """
+        return self._cache_serializer
+
+    @cache_serializer.setter
+    def cache_serializer(self, serializer_name):
+        """Set the default cache serialization format.
+
+        .. versionadded:: 1.8
+
+        This serializer is used by :meth:`cache_data()` and
+        :meth:`cached_data()`
+
+        The specified serializer must already by registered with the
+        :class:`SerializerManager` at `~workflow.workflow.manager`,
+        otherwise a :class:`ValueError` will be raised.
+
+        :param serializer_name: Name of default serializer to use.
+        :type serializer_name:
+
+        """
+        if manager.serializer(serializer_name) is None:
+            raise ValueError(
+                'Unknown serializer : `{0}`. Register your serializer '
+                'with `manager` first.'.format(serializer_name))
+
+        self.logger.debug(
+            'default cache serializer set to `{0}`'.format(serializer_name))
+
+        self._cache_serializer = serializer_name
+
+    @property
+    def data_serializer(self):
+        """Name of default data serializer.
+
+        .. versionadded:: 1.8
+
+        This serializer is used by :meth:`store_data()` and
+        :meth:`stored_data()`
+
+        See :class:`SerializerManager` for details.
+
+        :returns: serializer name
+        :rtype: ``unicode``
+
+        """
+        return self._data_serializer
+
+    @data_serializer.setter
+    def data_serializer(self, serializer_name):
+        """Set the default cache serialization format.
+
+        .. versionadded:: 1.8
+
+        This serializer is used by :meth:`store_data()` and
+        :meth:`stored_data()`
+
+        The specified serializer must already by registered with the
+        :class:`SerializerManager` at `~workflow.workflow.manager`,
+        otherwise a :class:`ValueError` will be raised.
+
+        :param serializer_name: Name of serializer to use by default.
+
+        """
+        if manager.serializer(serializer_name) is None:
+            raise ValueError(
+                'Unknown serializer : `{0}`. Register your serializer '
+                'with `manager` first.'.format(serializer_name))
+
+        self.logger.debug(
+            'default data serializer set to `{0}`'.format(serializer_name))
+
+        self._data_serializer = serializer_name
+
+    def stored_data(self, name):
+        """Retrieve data from data directory.
+
+        Returns ``None`` if there are no data stored under ``name``.
+
+        .. versionadded:: 1.8
+
+        :param name: name of datastore
+
+        """
+        metadata_path = self.datafile('.{0}.alfred-workflow'.format(name))
+
+        if not os.path.exists(metadata_path):
+            self.logger.debug('No data stored for `{0}`'.format(name))
+            return None
+
+        with open(metadata_path, 'rb') as file_obj:
+            serializer_name = file_obj.read().strip()
+
+        serializer = manager.serializer(serializer_name)
+
+        if serializer is None:
+            raise ValueError(
+                'Unknown serializer `{0}`. Register a corresponding '
+                'serializer with `manager.register()` '
+                'to load this data.'.format(serializer_name))
+
+        self.logger.debug('Data `{0}` stored in `{1}` format'.format(
+            name, serializer_name))
+
+        filename = '{0}.{1}'.format(name, serializer_name)
+        data_path = self.datafile(filename)
+
+        if not os.path.exists(data_path):
+            self.logger.debug('No data stored for `{0}`'.format(name))
+            if os.path.exists(metadata_path):
+                os.unlink(metadata_path)
+
+            return None
+
+        with open(data_path, 'rb') as file_obj:
+            data = serializer.load(file_obj)
+
+        self.logger.debug('Stored data loaded from : {0}'.format(data_path))
+
+        return data
+
+    def store_data(self, name, data, serializer=None):
+        """Save data to data directory.
+
+        .. versionadded:: 1.8
+
+        If ``data`` is ``None``, the datastore will be deleted.
+
+        Note that the datastore does NOT support mutliple threads.
+
+        :param name: name of datastore
+        :param data: object(s) to store. **Note:** some serializers
+            can only handled certain types of data.
+        :param serializer: name of serializer to use. If no serializer
+            is specified, the default will be used. See
+            :class:`SerializerManager` for more information.
+        :returns: data in datastore or ``None``
+
+        """
+        # Ensure deletion is not interrupted by SIGTERM
+        @uninterruptible
+        def delete_paths(paths):
+            """Clear one or more data stores"""
+            for path in paths:
+                if os.path.exists(path):
+                    os.unlink(path)
+                    self.logger.debug('Deleted data file : {0}'.format(path))
+
+        serializer_name = serializer or self.data_serializer
+
+        # In order for `stored_data()` to be able to load data stored with
+        # an arbitrary serializer, yet still have meaningful file extensions,
+        # the format (i.e. extension) is saved to an accompanying file
+        metadata_path = self.datafile('.{0}.alfred-workflow'.format(name))
+        filename = '{0}.{1}'.format(name, serializer_name)
+        data_path = self.datafile(filename)
+
+        if data_path == self.settings_path:
+            raise ValueError(
+                'Cannot save data to' +
+                '`{0}` with format `{1}`. '.format(name, serializer_name) +
+                "This would overwrite Alfred-Workflow's settings file.")
+
+        serializer = manager.serializer(serializer_name)
+
+        if serializer is None:
+            raise ValueError(
+                'Invalid serializer `{0}`. Register your serializer with '
+                '`manager.register()` first.'.format(serializer_name))
+
+        if data is None:  # Delete cached data
+            delete_paths((metadata_path, data_path))
+            return
+
+        # Ensure write is not interrupted by SIGTERM
+        @uninterruptible
+        def _store():
+            # Save file extension
+            with atomic_writer(metadata_path, 'wb') as file_obj:
+                file_obj.write(serializer_name)
+
+            with atomic_writer(data_path, 'wb') as file_obj:
+                serializer.dump(data, file_obj)
+
+        _store()
+
+        self.logger.debug('Stored data saved at : {0}'.format(data_path))
+
+    def cached_data(self, name, data_func=None, max_age=60):
+        """Return cached data if younger than ``max_age`` seconds.
+
+        Retrieve data from cache or re-generate and re-cache data if
+        stale/non-existant. If ``max_age`` is 0, return cached data no
+        matter how old.
+
+        :param name: name of datastore
+        :param data_func: function to (re-)generate data.
+        :type data_func: ``callable``
+        :param max_age: maximum age of cached data in seconds
+        :type max_age: ``int``
+        :returns: cached data, return value of ``data_func`` or ``None``
+            if ``data_func`` is not set
+
+        """
+        serializer = manager.serializer(self.cache_serializer)
+
+        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
+        age = self.cached_data_age(name)
+
+        if (age < max_age or max_age == 0) and os.path.exists(cache_path):
+
+            with open(cache_path, 'rb') as file_obj:
+                self.logger.debug('Loading cached data from : %s',
+                                  cache_path)
+                return serializer.load(file_obj)
+
+        if not data_func:
+            return None
+
+        data = data_func()
+        self.cache_data(name, data)
+
+        return data
+
+    def cache_data(self, name, data):
+        """Save ``data`` to cache under ``name``.
+
+        If ``data`` is ``None``, the corresponding cache file will be
+        deleted.
+
+        :param name: name of datastore
+        :param data: data to store. This may be any object supported by
+                the cache serializer
+
+        """
+        serializer = manager.serializer(self.cache_serializer)
+
+        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
+
+        if data is None:
+            if os.path.exists(cache_path):
+                os.unlink(cache_path)
+                self.logger.debug('Deleted cache file : %s', cache_path)
+            return
+
+        with atomic_writer(cache_path, 'wb') as file_obj:
+            serializer.dump(data, file_obj)
+
+        self.logger.debug('Cached data saved at : %s', cache_path)
+
+    def cached_data_fresh(self, name, max_age):
+        """Whether cache `name` is less than `max_age` seconds old.
+
+        :param name: name of datastore
+        :param max_age: maximum age of data in seconds
+        :type max_age: ``int``
+        :returns: ``True`` if data is less than ``max_age`` old, else
+            ``False``
+
+        """
+        age = self.cached_data_age(name)
+
+        if not age:
+            return False
+
+        return age < max_age
+
+    def cached_data_age(self, name):
+        """Return age in seconds of cache `name` or 0 if cache doesn't exist.
+
+        :param name: name of datastore
+        :type name: ``unicode``
+        :returns: age of datastore in seconds
+        :rtype: ``int``
+
+        """
+        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
+
+        if not os.path.exists(cache_path):
+            return 0
+
+        return time.time() - os.stat(cache_path).st_mtime
+
+    def filter(self, query, items, key=lambda x: x, ascending=False,
+               include_score=False, min_score=0, max_results=0,
+               match_on=MATCH_ALL, fold_diacritics=True):
+        """Fuzzy search filter. Returns list of ``items`` that match ``query``.
+
+        ``query`` is case-insensitive. Any item that does not contain the
+        entirety of ``query`` is rejected.
+
+        .. warning::
+
+            If ``query`` is an empty string or contains only whitespace,
+            a :class:`ValueError` will be raised.
+
+        :param query: query to test items against
+        :type query: ``unicode``
+        :param items: iterable of items to test
+        :type items: ``list`` or ``tuple``
+        :param key: function to get comparison key from ``items``.
+            Must return a ``unicode`` string. The default simply returns
+            the item.
+        :type key: ``callable``
+        :param ascending: set to ``True`` to get worst matches first
+        :type ascending: ``Boolean``
+        :param include_score: Useful for debugging the scoring algorithm.
+            If ``True``, results will be a list of tuples
+            ``(item, score, rule)``.
+        :type include_score: ``Boolean``
+        :param min_score: If non-zero, ignore results with a score lower
+            than this.
+        :type min_score: ``int``
+        :param max_results: If non-zero, prune results list to this length.
+        :type max_results: ``int``
+        :param match_on: Filter option flags. Bitwise-combined list of
+            ``MATCH_*`` constants (see below).
+        :type match_on: ``int``
+        :param fold_diacritics: Convert search keys to ASCII-only
+            characters if ``query`` only contains ASCII characters.
+        :type fold_diacritics: ``Boolean``
+        :returns: list of ``items`` matching ``query`` or list of
+            ``(item, score, rule)`` `tuples` if ``include_score`` is ``True``.
+            ``rule`` is the ``MATCH_*`` rule that matched the item.
+        :rtype: ``list``
+
+        **Matching rules**
+
+        By default, :meth:`filter` uses all of the following flags (i.e.
+        :const:`MATCH_ALL`). The tests are always run in the given order:
+
+        1. :const:`MATCH_STARTSWITH` : Item search key startswith
+            ``query``(case-insensitive).
+        2. :const:`MATCH_CAPITALS` : The list of capital letters in item
+            search key starts with ``query`` (``query`` may be
+            lower-case). E.g., ``of`` would match ``OmniFocus``,
+            ``gc`` would match ``Google Chrome``.
+        3. :const:`MATCH_ATOM` : Search key is split into "atoms" on
+            non-word characters (.,-,' etc.). Matches if ``query`` is
+            one of these atoms (case-insensitive).
+        4. :const:`MATCH_INITIALS_STARTSWITH` : Initials are the first
+            characters of the above-described "atoms" (case-insensitive).
+        5. :const:`MATCH_INITIALS_CONTAIN` : ``query`` is a substring of
+            the above-described initials.
+        6. :const:`MATCH_INITIALS` : Combination of (4) and (5).
+        7. :const:`MATCH_SUBSTRING` : Match if ``query`` is a substring
+            of item search key (case-insensitive).
+        8. :const:`MATCH_ALLCHARS` : Matches if all characters in
+            ``query`` appear in item search key in the same order
+            (case-insensitive).
+        9. :const:`MATCH_ALL` : Combination of all the above.
+
+
+        :const:`MATCH_ALLCHARS` is considerably slower than the other
+        tests and provides much less accurate results.
+
+        **Examples:**
+
+        To ignore :const:`MATCH_ALLCHARS` (tends to provide the worst
+        matches and is expensive to run), use
+        ``match_on=MATCH_ALL ^ MATCH_ALLCHARS``.
+
+        To match only on capitals, use ``match_on=MATCH_CAPITALS``.
+
+        To match only on startswith and substring, use
+        ``match_on=MATCH_STARTSWITH | MATCH_SUBSTRING``.
+
+        **Diacritic folding**
+
+        .. versionadded:: 1.3
+
+        If ``fold_diacritics`` is ``True`` (the default), and ``query``
+        contains only ASCII characters, non-ASCII characters in search keys
+        will be converted to ASCII equivalents (e.g. **ü** -> **u**,
+        **ß** -> **ss**, **é** -> **e**).
+
+        See :const:`ASCII_REPLACEMENTS` for all replacements.
+
+        If ``query`` contains non-ASCII characters, search keys will not be
+        altered.
+
+        """
+        if not query:
+            raise ValueError('Empty `query`')
+
+        # Remove preceding/trailing spaces
+        query = query.strip()
+
+        if not query:
+            raise ValueError('`query` contains only whitespace')
+
+        # Use user override if there is one
+        fold_diacritics = self.settings.get('__workflow_diacritic_folding',
+                                            fold_diacritics)
+
+        results = []
+
+        for item in items:
+            skip = False
+            score = 0
+            words = [s.strip() for s in query.split(' ')]
+            value = key(item).strip()
+            if value == '':
+                continue
+            for word in words:
+                if word == '':
+                    continue
+                s, rule = self._filter_item(value, word, match_on,
+                                            fold_diacritics)
+
+                if not s:  # Skip items that don't match part of the query
+                    skip = True
+                score += s
+
+            if skip:
+                continue
+
+            if score:
+                # use "reversed" `score` (i.e. highest becomes lowest) and
+                # `value` as sort key. This means items with the same score
+                # will be sorted in alphabetical not reverse alphabetical order
+                results.append(((100.0 / score, value.lower(), score),
+                                (item, score, rule)))
+
+        # sort on keys, then discard the keys
+        results.sort(reverse=ascending)
+        results = [t[1] for t in results]
+
+        if min_score:
+            results = [r for r in results if r[1] > min_score]
+
+        if max_results and len(results) > max_results:
+            results = results[:max_results]
+
+        # return list of ``(item, score, rule)``
+        if include_score:
+            return results
+        # just return list of items
+        return [t[0] for t in results]
+
+    def _filter_item(self, value, query, match_on, fold_diacritics):
+        """Filter ``value`` against ``query`` using rules ``match_on``.
+
+        :returns: ``(score, rule)``
+
+        """
+        query = query.lower()
+
+        if not isascii(query):
+            fold_diacritics = False
+
+        if fold_diacritics:
+            value = self.fold_to_ascii(value)
+
+        # pre-filter any items that do not contain all characters
+        # of ``query`` to save on running several more expensive tests
+        if not set(query) <= set(value.lower()):
+
+            return (0, None)
+
+        # item starts with query
+        if match_on & MATCH_STARTSWITH and value.lower().startswith(query):
+            score = 100.0 - (len(value) / len(query))
+
+            return (score, MATCH_STARTSWITH)
+
+        # query matches capitalised letters in item,
+        # e.g. of = OmniFocus
+        if match_on & MATCH_CAPITALS:
+            initials = ''.join([c for c in value if c in INITIALS])
+            if initials.lower().startswith(query):
+                score = 100.0 - (len(initials) / len(query))
+
+                return (score, MATCH_CAPITALS)
+
+        # split the item into "atoms", i.e. words separated by
+        # spaces or other non-word characters
+        if (match_on & MATCH_ATOM or
+                match_on & MATCH_INITIALS_CONTAIN or
+                match_on & MATCH_INITIALS_STARTSWITH):
+            atoms = [s.lower() for s in split_on_delimiters(value)]
+            # print('atoms : %s  -->  %s' % (value, atoms))
+            # initials of the atoms
+            initials = ''.join([s[0] for s in atoms if s])
+
+        if match_on & MATCH_ATOM:
+            # is `query` one of the atoms in item?
+            # similar to substring, but scores more highly, as it's
+            # a word within the item
+            if query in atoms:
+                score = 100.0 - (len(value) / len(query))
+
+                return (score, MATCH_ATOM)
+
+        # `query` matches start (or all) of the initials of the
+        # atoms, e.g. ``himym`` matches "How I Met Your Mother"
+        # *and* "how i met your mother" (the ``capitals`` rule only
+        # matches the former)
+        if (match_on & MATCH_INITIALS_STARTSWITH and
+                initials.startswith(query)):
+            score = 100.0 - (len(initials) / len(query))
+
+            return (score, MATCH_INITIALS_STARTSWITH)
+
+        # `query` is a substring of initials, e.g. ``doh`` matches
+        # "The Dukes of Hazzard"
+        elif (match_on & MATCH_INITIALS_CONTAIN and
+                query in initials):
+            score = 95.0 - (len(initials) / len(query))
+
+            return (score, MATCH_INITIALS_CONTAIN)
+
+        # `query` is a substring of item
+        if match_on & MATCH_SUBSTRING and query in value.lower():
+            score = 90.0 - (len(value) / len(query))
+
+            return (score, MATCH_SUBSTRING)
+
+        # finally, assign a score based on how close together the
+        # characters in `query` are in item.
+        if match_on & MATCH_ALLCHARS:
+            search = self._search_for_query(query)
+            match = search(value)
+            if match:
+                score = 100.0 / ((1 + match.start()) *
+                                 (match.end() - match.start() + 1))
+
+                return (score, MATCH_ALLCHARS)
+
+        # Nothing matched
+        return (0, None)
+
+    def _search_for_query(self, query):
+        if query in self._search_pattern_cache:
+            return self._search_pattern_cache[query]
+
+        # Build pattern: include all characters
+        pattern = []
+        for c in query:
+            # pattern.append('[^{0}]*{0}'.format(re.escape(c)))
+            pattern.append('.*?{0}'.format(re.escape(c)))
+        pattern = ''.join(pattern)
+        search = re.compile(pattern, re.IGNORECASE).search
+
+        self._search_pattern_cache[query] = search
+        return search
+
+    def run(self, func, text_errors=False):
+        """Call ``func`` to run your workflow.
+
+        :param func: Callable to call with ``self`` (i.e. the :class:`Workflow`
+            instance) as first argument.
+        :param text_errors: Emit error messages in plain text, not in
+            Alfred's XML/JSON feedback format. Use this when you're not
+            running Alfred-Workflow in a Script Filter and would like
+            to pass the error message to, say, a notification.
+        :type text_errors: ``Boolean``
+
+        ``func`` will be called with :class:`Workflow` instance as first
+        argument.
+
+        ``func`` should be the main entry point to your workflow.
+
+        Any exceptions raised will be logged and an error message will be
+        output to Alfred.
+
+        """
+        start = time.time()
+
+        # Call workflow's entry function/method within a try-except block
+        # to catch any errors and display an error message in Alfred
+        try:
+
+            if self.version:
+                self.logger.debug(
+                    'Workflow version : {0}'.format(self.version))
+
+            # Run update check if configured for self-updates.
+            # This call has to go in the `run` try-except block, as it will
+            # initialise `self.settings`, which will raise an exception
+            # if `settings.json` isn't valid.
+
+            if self._update_settings:
+                self.check_update()
+
+            # Run workflow's entry function/method
+            func(self)
+
+            # Set last version run to current version after a successful
+            # run
+            self.set_last_version()
+
+        except Exception as err:
+            self.logger.exception(err)
+            if self.help_url:
+                self.logger.info(
+                    'For assistance, see: {0}'.format(self.help_url))
+
+            if not sys.stdout.isatty():  # Show error in Alfred
+                if text_errors:
+                    print(unicode(err).encode('utf-8'), end='')
+                else:
+                    self._items = []
+                    if self._name:
+                        name = self._name
+                    elif self._bundleid:
+                        name = self._bundleid
+                    else:  # pragma: no cover
+                        name = os.path.dirname(__file__)
+                    self.add_item("Error in workflow '%s'" % name,
+                                  unicode(err),
+                                  icon=ICON_ERROR)
+                    self.send_feedback()
+            return 1
+
+        finally:
+            self.logger.debug('Workflow finished in {0:0.3f} seconds.'.format(
+                time.time() - start))
+
+        return 0
+
+    # Alfred feedback methods ------------------------------------------
+
+    def add_item(self, title, subtitle='', modifier_subtitles=None, arg=None,
+                 autocomplete=None, valid=False, uid=None, icon=None,
+                 icontype=None, type=None, largetext=None, copytext=None,
+                 quicklookurl=None):
+        """Add an item to be output to Alfred.
+
+        :param title: Title shown in Alfred
+        :type title: ``unicode``
+        :param subtitle: Subtitle shown in Alfred
+        :type subtitle: ``unicode``
+        :param modifier_subtitles: Subtitles shown when modifier
+            (CMD, OPT etc.) is pressed. Use a ``dict`` with the lowercase
+            keys ``cmd``, ``ctrl``, ``shift``, ``alt`` and ``fn``
+        :type modifier_subtitles: ``dict``
+        :param arg: Argument passed by Alfred as ``{query}`` when item is
+            actioned
+        :type arg: ``unicode``
+        :param autocomplete: Text expanded in Alfred when item is TABbed
+        :type autocomplete: ``unicode``
+        :param valid: Whether or not item can be actioned
+        :type valid: ``Boolean``
+        :param uid: Used by Alfred to remember/sort items
+        :type uid: ``unicode``
+        :param icon: Filename of icon to use
+        :type icon: ``unicode``
+        :param icontype: Type of icon. Must be one of ``None`` , ``'filetype'``
+           or ``'fileicon'``. Use ``'filetype'`` when ``icon`` is a filetype
+           such as ``'public.folder'``. Use ``'fileicon'`` when you wish to
+           use the icon of the file specified as ``icon``, e.g.
+           ``icon='/Applications/Safari.app', icontype='fileicon'``.
+           Leave as `None` if ``icon`` points to an actual
+           icon file.
+        :type icontype: ``unicode``
+        :param type: Result type. Currently only ``'file'`` is supported
+            (by Alfred). This will tell Alfred to enable file actions for
+            this item.
+        :type type: ``unicode``
+        :param largetext: Text to be displayed in Alfred's large text box
+            if user presses CMD+L on item.
+        :type largetext: ``unicode``
+        :param copytext: Text to be copied to pasteboard if user presses
+            CMD+C on item.
+        :type copytext: ``unicode``
+        :param quicklookurl: URL to be displayed using Alfred's Quick Look
+            feature (tapping ``SHIFT`` or ``⌘+Y`` on a result).
+        :type quicklookurl: ``unicode``
+        :returns: :class:`Item` instance
+
+        See the :ref:`script-filter-results` section of the documentation
+        for a detailed description of what the various parameters do and how
+        they interact with one another.
+
+        See :ref:`icons` for a list of the supported system icons.
+
+        .. note::
+
+            Although this method returns an :class:`Item` instance, you don't
+            need to hold onto it or worry about it. All generated :class:`Item`
+            instances are also collected internally and sent to Alfred when
+            :meth:`send_feedback` is called.
+
+            The generated :class:`Item` is only returned in case you want to
+            edit it or do something with it other than send it to Alfred.
+
+        """
+        item = self.item_class(title, subtitle, modifier_subtitles, arg,
+                               autocomplete, valid, uid, icon, icontype, type,
+                               largetext, copytext, quicklookurl)
+        self._items.append(item)
+        return item
+
+    def send_feedback(self):
+        """Print stored items to console/Alfred as XML."""
+        root = ET.Element('items')
+        for item in self._items:
+            root.append(item.elem)
+        sys.stdout.write('<?xml version="1.0" encoding="utf-8"?>\n')
+        sys.stdout.write(ET.tostring(root).encode('utf-8'))
+        sys.stdout.flush()
+
+    ####################################################################
+    # Updating methods
+    ####################################################################
+
+    @property
+    def first_run(self):
+        """Return ``True`` if it's the first time this version has run.
+
+        .. versionadded:: 1.9.10
+
+        Raises a :class:`ValueError` if :attr:`version` isn't set.
+
+        """
+        if not self.version:
+            raise ValueError('No workflow version set')
+
+        if not self.last_version_run:
+            return True
+
+        return self.version != self.last_version_run
+
+    @property
+    def last_version_run(self):
+        """Return version of last version to run (or ``None``).
+
+        .. versionadded:: 1.9.10
+
+        :returns: :class:`~workflow.update.Version` instance
+            or ``None``
+
+        """
+        if self._last_version_run is UNSET:
+
+            version = self.settings.get('__workflow_last_version')
+            if version:
+                from update import Version
+                version = Version(version)
+
+            self._last_version_run = version
+
+        self.logger.debug('Last run version : {0}'.format(
+                          self._last_version_run))
+
+        return self._last_version_run
+
+    def set_last_version(self, version=None):
+        """Set :attr:`last_version_run` to current version.
+
+        .. versionadded:: 1.9.10
+
+        :param version: version to store (default is current version)
+        :type version: :class:`~workflow.update.Version` instance
+            or ``unicode``
+        :returns: ``True`` if version is saved, else ``False``
+
+        """
+        if not version:
+            if not self.version:
+                self.logger.warning(
+                    "Can't save last version: workflow has no version")
+                return False
+
+            version = self.version
+
+        if isinstance(version, basestring):
+            from update import Version
+            version = Version(version)
+
+        self.settings['__workflow_last_version'] = str(version)
+
+        self.logger.debug('Set last run version : {0}'.format(version))
+
+        return True
+
+    @property
+    def update_available(self):
+        """Whether an update is available.
+
+        .. versionadded:: 1.9
+
+        See :ref:`manual-updates` in the :ref:`user-manual` for detailed
+        information on how to enable your workflow to update itself.
+
+        :returns: ``True`` if an update is available, else ``False``
+
+        """
+        update_data = self.cached_data('__workflow_update_status', max_age=0)
+        self.logger.debug('update_data : {0}'.format(update_data))
+
+        if not update_data or not update_data.get('available'):
+            return False
+
+        return update_data['available']
+
+    @property
+    def prereleases(self):
+        """Whether workflow should update to pre-release versions.
+
+        .. versionadded:: 1.16
+
+        :returns: ``True`` if pre-releases are enabled with the :ref:`magic
+            argument <magic-arguments>` or the ``update_settings`` dict, else
+            ``False``.
+
+        """
+        if self._update_settings.get('prereleases'):
+            return True
+
+        return self.settings.get('__workflow_prereleases') or False
+
+    def check_update(self, force=False):
+        """Call update script if it's time to check for a new release.
+
+        .. versionadded:: 1.9
+
+        The update script will be run in the background, so it won't
+        interfere in the execution of your workflow.
+
+        See :ref:`manual-updates` in the :ref:`user-manual` for detailed
+        information on how to enable your workflow to update itself.
+
+        :param force: Force update check
+        :type force: ``Boolean``
+
+        """
+        frequency = self._update_settings.get('frequency',
+                                              DEFAULT_UPDATE_FREQUENCY)
+
+        if not force and not self.settings.get('__workflow_autoupdate', True):
+            self.logger.debug('Auto update turned off by user')
+            return
+
+        # Check for new version if it's time
+        if (force or not self.cached_data_fresh(
+                '__workflow_update_status', frequency * 86400)):
+
+            github_slug = self._update_settings['github_slug']
+            # version = self._update_settings['version']
+            version = str(self.version)
+
+            from background import run_in_background
+
+            # update.py is adjacent to this file
+            update_script = os.path.join(os.path.dirname(__file__),
+                                         b'update.py')
+
+            cmd = ['/usr/bin/python', update_script, 'check', github_slug,
+                   version]
+
+            if self.prereleases:
+                cmd.append('--prereleases')
+
+            self.logger.info('Checking for update ...')
+
+            run_in_background('__workflow_update_check', cmd)
+
+        else:
+            self.logger.debug('Update check not due')
+
+    def start_update(self):
+        """Check for update and download and install new workflow file.
+
+        .. versionadded:: 1.9
+
+        See :ref:`manual-updates` in the :ref:`user-manual` for detailed
+        information on how to enable your workflow to update itself.
+
+        :returns: ``True`` if an update is available and will be
+            installed, else ``False``
+
+        """
+        import update
+
+        github_slug = self._update_settings['github_slug']
+        # version = self._update_settings['version']
+        version = str(self.version)
+
+        if not update.check_update(github_slug, version, self.prereleases):
+            return False
+
+        from background import run_in_background
+
+        # update.py is adjacent to this file
+        update_script = os.path.join(os.path.dirname(__file__),
+                                     b'update.py')
+
+        cmd = ['/usr/bin/python', update_script, 'install', github_slug,
+               version]
+
+        if self.prereleases:
+            cmd.append('--prereleases')
+
+        self.logger.debug('Downloading update ...')
+        run_in_background('__workflow_update_install', cmd)
+
+        return True
+
+    ####################################################################
+    # Keychain password storage methods
+    ####################################################################
+
+    def save_password(self, account, password, service=None):
+        """Save account credentials.
+
+        If the account exists, the old password will first be deleted
+        (Keychain throws an error otherwise).
+
+        If something goes wrong, a :class:`KeychainError` exception will
+        be raised.
+
+        :param account: name of the account the password is for, e.g.
+            "Pinboard"
+        :type account: ``unicode``
+        :param password: the password to secure
+        :type password: ``unicode``
+        :param service: Name of the service. By default, this is the
+            workflow's bundle ID
+        :type service: ``unicode``
+
+        """
+        if not service:
+            service = self.bundleid
+
+        try:
+            self._call_security('add-generic-password', service, account,
+                                '-w', password)
+            self.logger.debug('Saved password : %s:%s', service, account)
+
+        except PasswordExists:
+            self.logger.debug('Password exists : %s:%s', service, account)
+            current_password = self.get_password(account, service)
+
+            if current_password == password:
+                self.logger.debug('Password unchanged')
+
+            else:
+                self.delete_password(account, service)
+                self._call_security('add-generic-password', service,
+                                    account, '-w', password)
+                self.logger.debug('save_password : %s:%s', service, account)
+
+    def get_password(self, account, service=None):
+        """Retrieve the password saved at ``service/account``.
+
+        Raise :class:`PasswordNotFound` exception if password doesn't exist.
+
+        :param account: name of the account the password is for, e.g.
+            "Pinboard"
+        :type account: ``unicode``
+        :param service: Name of the service. By default, this is the workflow's
+                        bundle ID
+        :type service: ``unicode``
+        :returns: account password
+        :rtype: ``unicode``
+
+        """
+        if not service:
+            service = self.bundleid
+
+        output = self._call_security('find-generic-password', service,
+                                     account, '-g')
+
+        # Parsing of `security` output is adapted from python-keyring
+        # by Jason R. Coombs
+        # https://pypi.python.org/pypi/keyring
+        m = re.search(
+            r'password:\s*(?:0x(?P<hex>[0-9A-F]+)\s*)?(?:"(?P<pw>.*)")?',
+            output)
+
+        if m:
+            groups = m.groupdict()
+            h = groups.get('hex')
+            password = groups.get('pw')
+            if h:
+                password = unicode(binascii.unhexlify(h), 'utf-8')
+
+        self.logger.debug('Got password : %s:%s', service, account)
+
+        return password
+
+    def delete_password(self, account, service=None):
+        """Delete the password stored at ``service/account``.
+
+        Raise :class:`PasswordNotFound` if account is unknown.
+
+        :param account: name of the account the password is for, e.g.
+            "Pinboard"
+        :type account: ``unicode``
+        :param service: Name of the service. By default, this is the workflow's
+                        bundle ID
+        :type service: ``unicode``
+
+        """
+        if not service:
+            service = self.bundleid
+
+        self._call_security('delete-generic-password', service, account)
+
+        self.logger.debug('Deleted password : %s:%s', service, account)
+
+    ####################################################################
+    # Methods for workflow:* magic args
+    ####################################################################
+
+    def _register_default_magic(self):
+        """Register the built-in magic arguments."""
+        # TODO: refactor & simplify
+        # Wrap callback and message with callable
+        def callback(func, msg):
+            def wrapper():
+                func()
+                return msg
+
+            return wrapper
+
+        self.magic_arguments['delcache'] = callback(self.clear_cache,
+                                                    'Deleted workflow cache')
+        self.magic_arguments['deldata'] = callback(self.clear_data,
+                                                   'Deleted workflow data')
+        self.magic_arguments['delsettings'] = callback(
+            self.clear_settings, 'Deleted workflow settings')
+        self.magic_arguments['reset'] = callback(self.reset,
+                                                 'Reset workflow')
+        self.magic_arguments['openlog'] = callback(self.open_log,
+                                                   'Opening workflow log file')
+        self.magic_arguments['opencache'] = callback(
+            self.open_cachedir, 'Opening workflow cache directory')
+        self.magic_arguments['opendata'] = callback(
+            self.open_datadir, 'Opening workflow data directory')
+        self.magic_arguments['openworkflow'] = callback(
+            self.open_workflowdir, 'Opening workflow directory')
+        self.magic_arguments['openterm'] = callback(
+            self.open_terminal, 'Opening workflow root directory in Terminal')
+
+        # Diacritic folding
+        def fold_on():
+            self.settings['__workflow_diacritic_folding'] = True
+            return 'Diacritics will always be folded'
+
+        def fold_off():
+            self.settings['__workflow_diacritic_folding'] = False
+            return 'Diacritics will never be folded'
+
+        def fold_default():
+            if '__workflow_diacritic_folding' in self.settings:
+                del self.settings['__workflow_diacritic_folding']
+            return 'Diacritics folding reset'
+
+        self.magic_arguments['foldingon'] = fold_on
+        self.magic_arguments['foldingoff'] = fold_off
+        self.magic_arguments['foldingdefault'] = fold_default
+
+        # Updates
+        def update_on():
+            self.settings['__workflow_autoupdate'] = True
+            return 'Auto update turned on'
+
+        def update_off():
+            self.settings['__workflow_autoupdate'] = False
+            return 'Auto update turned off'
+
+        def prereleases_on():
+            self.settings['__workflow_prereleases'] = True
+            return 'Prerelease updates turned on'
+
+        def prereleases_off():
+            self.settings['__workflow_prereleases'] = False
+            return 'Prerelease updates turned off'
+
+        def do_update():
+            if self.start_update():
+                return 'Downloading and installing update ...'
+            else:
+                return 'No update available'
+
+        self.magic_arguments['autoupdate'] = update_on
+        self.magic_arguments['noautoupdate'] = update_off
+        self.magic_arguments['prereleases'] = prereleases_on
+        self.magic_arguments['noprereleases'] = prereleases_off
+        self.magic_arguments['update'] = do_update
+
+        # Help
+        def do_help():
+            if self.help_url:
+                self.open_help()
+                return 'Opening workflow help URL in browser'
+            else:
+                return 'Workflow has no help URL'
+
+        def show_version():
+            if self.version:
+                return 'Version: {0}'.format(self.version)
+            else:
+                return 'This workflow has no version number'
+
+        def list_magic():
+            """Display all available magic args in Alfred."""
+            isatty = sys.stderr.isatty()
+            for name in sorted(self.magic_arguments.keys()):
+                if name == 'magic':
+                    continue
+                arg = '{0}{1}'.format(self.magic_prefix, name)
+                self.logger.debug(arg)
+
+                if not isatty:
+                    self.add_item(arg, icon=ICON_INFO)
+
+            if not isatty:
+                self.send_feedback()
+
+        self.magic_arguments['help'] = do_help
+        self.magic_arguments['magic'] = list_magic
+        self.magic_arguments['version'] = show_version
+
+    def clear_cache(self, filter_func=lambda f: True):
+        """Delete all files in workflow's :attr:`cachedir`.
+
+        :param filter_func: Callable to determine whether a file should be
+            deleted or not. ``filter_func`` is called with the filename
+            of each file in the data directory. If it returns ``True``,
+            the file will be deleted.
+            By default, *all* files will be deleted.
+        :type filter_func: ``callable``
+        """
+        self._delete_directory_contents(self.cachedir, filter_func)
+
+    def clear_data(self, filter_func=lambda f: True):
+        """Delete all files in workflow's :attr:`datadir`.
+
+        :param filter_func: Callable to determine whether a file should be
+            deleted or not. ``filter_func`` is called with the filename
+            of each file in the data directory. If it returns ``True``,
+            the file will be deleted.
+            By default, *all* files will be deleted.
+        :type filter_func: ``callable``
+        """
+        self._delete_directory_contents(self.datadir, filter_func)
+
+    def clear_settings(self):
+        """Delete workflow's :attr:`settings_path`."""
+        if os.path.exists(self.settings_path):
+            os.unlink(self.settings_path)
+            self.logger.debug('Deleted : %r', self.settings_path)
+
+    def reset(self):
+        """Delete workflow settings, cache and data.
+
+        File :attr:`settings <settings_path>` and directories
+        :attr:`cache <cachedir>` and :attr:`data <datadir>` are deleted.
+
+        """
+        self.clear_cache()
+        self.clear_data()
+        self.clear_settings()
+
+    def open_log(self):
+        """Open :attr:`logfile` in default app (usually Console.app)."""
+        subprocess.call(['open', self.logfile])
+
+    def open_cachedir(self):
+        """Open the workflow's :attr:`cachedir` in Finder."""
+        subprocess.call(['open', self.cachedir])
+
+    def open_datadir(self):
+        """Open the workflow's :attr:`datadir` in Finder."""
+        subprocess.call(['open', self.datadir])
+
+    def open_workflowdir(self):
+        """Open the workflow's :attr:`workflowdir` in Finder."""
+        subprocess.call(['open', self.workflowdir])
+
+    def open_terminal(self):
+        """Open a Terminal window at workflow's :attr:`workflowdir`."""
+        subprocess.call(['open', '-a', 'Terminal',
+                        self.workflowdir])
+
+    def open_help(self):
+        """Open :attr:`help_url` in default browser."""
+        subprocess.call(['open', self.help_url])
+
+        return 'Opening workflow help URL in browser'
+
+    ####################################################################
+    # Helper methods
+    ####################################################################
+
+    def decode(self, text, encoding=None, normalization=None):
+        """Return ``text`` as normalised unicode.
+
+        If ``encoding`` and/or ``normalization`` is ``None``, the
+        ``input_encoding``and ``normalization`` parameters passed to
+        :class:`Workflow` are used.
+
+        :param text: string
+        :type text: encoded or Unicode string. If ``text`` is already a
+            Unicode string, it will only be normalised.
+        :param encoding: The text encoding to use to decode ``text`` to
+            Unicode.
+        :type encoding: ``unicode`` or ``None``
+        :param normalization: The nomalisation form to apply to ``text``.
+        :type normalization: ``unicode`` or ``None``
+        :returns: decoded and normalised ``unicode``
+
+        :class:`Workflow` uses "NFC" normalisation by default. This is the
+        standard for Python and will work well with data from the web (via
+        :mod:`~workflow.web` or :mod:`json`).
+
+        OS X, on the other hand, uses "NFD" normalisation (nearly), so data
+        coming from the system (e.g. via :mod:`subprocess` or
+        :func:`os.listdir`/:mod:`os.path`) may not match. You should either
+        normalise this data, too, or change the default normalisation used by
+        :class:`Workflow`.
+
+        """
+        encoding = encoding or self._input_encoding
+        normalization = normalization or self._normalizsation
+        if not isinstance(text, unicode):
+            text = unicode(text, encoding)
+        return unicodedata.normalize(normalization, text)
+
+    def fold_to_ascii(self, text):
+        """Convert non-ASCII characters to closest ASCII equivalent.
+
+        .. versionadded:: 1.3
+
+        .. note:: This only works for a subset of European languages.
+
+        :param text: text to convert
+        :type text: ``unicode``
+        :returns: text containing only ASCII characters
+        :rtype: ``unicode``
+
+        """
+        if isascii(text):
+            return text
+        text = ''.join([ASCII_REPLACEMENTS.get(c, c) for c in text])
+        return unicode(unicodedata.normalize('NFKD',
+                       text).encode('ascii', 'ignore'))
+
+    def dumbify_punctuation(self, text):
+        """Convert non-ASCII punctuation to closest ASCII equivalent.
+
+        This method replaces "smart" quotes and n- or m-dashes with their
+        workaday ASCII equivalents. This method is currently not used
+        internally, but exists as a helper method for workflow authors.
+
+        .. versionadded: 1.9.7
+
+        :param text: text to convert
+        :type text: ``unicode``
+        :returns: text with only ASCII punctuation
+        :rtype: ``unicode``
+
+        """
+        if isascii(text):
+            return text
+
+        text = ''.join([DUMB_PUNCTUATION.get(c, c) for c in text])
+        return text
+
+    def _delete_directory_contents(self, dirpath, filter_func):
+        """Delete all files in a directory.
+
+        :param dirpath: path to directory to clear
+        :type dirpath: ``unicode`` or ``str``
+        :param filter_func function to determine whether a file shall be
+            deleted or not.
+        :type filter_func ``callable``
+
+        """
+        if os.path.exists(dirpath):
+            for filename in os.listdir(dirpath):
+                if not filter_func(filename):
+                    continue
+                path = os.path.join(dirpath, filename)
+                if os.path.isdir(path):
+                    shutil.rmtree(path)
+                else:
+                    os.unlink(path)
+                self.logger.debug('Deleted : %r', path)
+
+    def _load_info_plist(self):
+        """Load workflow info from ``info.plist``."""
+        # info.plist should be in the directory above this one
+        self._info = plistlib.readPlist(self.workflowfile('info.plist'))
+        self._info_loaded = True
+
+    def _create(self, dirpath):
+        """Create directory `dirpath` if it doesn't exist.
+
+        :param dirpath: path to directory
+        :type dirpath: ``unicode``
+        :returns: ``dirpath`` argument
+        :rtype: ``unicode``
+
+        """
+        if not os.path.exists(dirpath):
+            os.makedirs(dirpath)
+        return dirpath
+
+    def _call_security(self, action, service, account, *args):
+        """Call ``security`` CLI program that provides access to keychains.
+
+        May raise `PasswordNotFound`, `PasswordExists` or `KeychainError`
+        exceptions (the first two are subclasses of `KeychainError`).
+
+        :param action: The ``security`` action to call, e.g.
+                           ``add-generic-password``
+        :type action: ``unicode``
+        :param service: Name of the service.
+        :type service: ``unicode``
+        :param account: name of the account the password is for, e.g.
+            "Pinboard"
+        :type account: ``unicode``
+        :param password: the password to secure
+        :type password: ``unicode``
+        :param *args: list of command line arguments to be passed to
+                      ``security``
+        :type *args: `list` or `tuple`
+        :returns: ``(retcode, output)``. ``retcode`` is an `int`, ``output`` a
+                  ``unicode`` string.
+        :rtype: `tuple` (`int`, ``unicode``)
+
+        """
+        cmd = ['security', action, '-s', service, '-a', account] + list(args)
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                             stderr=subprocess.STDOUT)
+        stdout, _ = p.communicate()
+        if p.returncode == 44:  # password does not exist
+            raise PasswordNotFound()
+        elif p.returncode == 45:  # password already exists
+            raise PasswordExists()
+        elif p.returncode > 0:
+            err = KeychainError('Unknown Keychain error : %s' % stdout)
+            err.retcode = p.returncode
+            raise err
+        return stdout.strip().decode('utf-8')
diff --git a/src/workflow/workflow3.py b/src/workflow/workflow3.py
new file mode 100644
index 0000000..fcfb65a
--- /dev/null
+++ b/src/workflow/workflow3.py
@@ -0,0 +1,445 @@
+# encoding: utf-8
+#
+# Copyright (c) 2016 Dean Jackson <deanishe@deanishe.net>
+#
+# MIT Licence. See http://opensource.org/licenses/MIT
+#
+# Created on 2016-06-25
+#
+
+"""
+:class:`Workflow3` supports Alfred 3's new features.
+
+It is an Alfred 3-only version of :class:`~workflow.workflow.Workflow`.
+
+It supports setting :ref:`workflow-variables` and
+:class:`the more advanced modifiers <Modifier>` supported by Alfred 3.
+
+In order for the feedback mechanism to work correctly, it's important
+to create :class:`Item3` and :class:`Modifier` objects via the
+:meth:`Workflow3.add_item()` and :meth:`Item3.add_modifier()` methods
+respectively. If you instantiate :class:`Item3` or :class:`Modifier`
+objects directly, the current :class:`~workflow.workflow3.Workflow3`
+object won't be aware of them, and they won't be sent to Alfred when
+you call :meth:`~workflow.workflow3.Workflow3.send_feedback()`.
+"""
+
+from __future__ import print_function, unicode_literals, absolute_import
+
+import json
+import os
+import sys
+
+from .workflow import Workflow
+
+
+class Modifier(object):
+    """Modify ``Item3`` values for when specified modifier keys are pressed.
+
+    Valid modifiers (i.e. values for ``key``) are:
+
+     * cmd
+     * alt
+     * shift
+     * ctrl
+     * fn
+
+    Attributes:
+        arg (unicode): Arg to pass to following action.
+        key (unicode): Modifier key (see above).
+        subtitle (unicode): Override item subtitle.
+        valid (bool): Override item validity.
+        variables (dict): Workflow variables set by this modifier.
+    """
+
+    def __init__(self, key, subtitle=None, arg=None, valid=None):
+        """Create a new :class:`Modifier`.
+
+        You probably don't want to use this class directly, but rather
+        use :meth:`Item3.add_modifier()` to add modifiers to results.
+
+        Args:
+            key (unicode): Modifier key, e.g. ``"cmd"``, ``"alt"`` etc.
+            subtitle (unicode, optional): Override default subtitle.
+            arg (unicode, optional): Argument to pass for this modifier.
+            valid (bool, optional): Override item's validity.
+        """
+        self.key = key
+        self.subtitle = subtitle
+        self.arg = arg
+        self.valid = valid
+
+        self.config = {}
+        self.variables = {}
+
+    def setvar(self, name, value):
+        """Set a workflow variable for this Item.
+
+        Args:
+            name (unicode): Name of variable.
+            value (unicode): Value of variable.
+        """
+        self.variables[name] = value
+
+    def getvar(self, name, default=None):
+        """Return value of workflow variable for ``name`` or ``default``.
+
+        Args:
+            name (unicode): Variable name.
+            default (None, optional): Value to return if variable is unset.
+
+        Returns:
+            unicode or ``default``: Value of variable if set or ``default``.
+        """
+        return self.variables.get(name, default)
+
+    @property
+    def obj(self):
+        """Modifier formatted for JSON serialization for Alfred 3.
+
+        Returns:
+            dict: Modifier for serializing to JSON.
+        """
+        o = {}
+
+        if self.subtitle is not None:
+            o['subtitle'] = self.subtitle
+
+        if self.arg is not None:
+            o['arg'] = self.arg
+
+        if self.valid is not None:
+            o['valid'] = self.valid
+
+        # Variables and config
+        if self.variables or self.config:
+            d = {}
+            if self.variables:
+                d['variables'] = self.variables
+
+            if self.config:
+                d['config'] = self.config
+
+            if self.arg is not None:
+                d['arg'] = self.arg
+
+            o['arg'] = json.dumps({'alfredworkflow': d})
+
+        return o
+
+
+class Item3(object):
+    """Represents a feedback item for Alfred 3.
+
+    Generates Alfred-compliant JSON for a single item.
+
+    You probably shouldn't use this class directly, but via
+    :meth:`Workflow3.add_item`. See :meth:`~Workflow3.add_item`
+    for details of arguments.
+    """
+
+    def __init__(self, title, subtitle='', arg=None, autocomplete=None,
+                 valid=False, uid=None, icon=None, icontype=None,
+                 type=None, largetext=None, copytext=None, quicklookurl=None):
+        """Use same arguments as for :meth:`Workflow.add_item`.
+
+        Argument ``subtitle_modifiers`` is not supported.
+        """
+        self.title = title
+        self.subtitle = subtitle
+        self.arg = arg
+        self.autocomplete = autocomplete
+        self.valid = valid
+        self.uid = uid
+        self.icon = icon
+        self.icontype = icontype
+        self.type = type
+        self.quicklookurl = quicklookurl
+        self.largetext = largetext
+        self.copytext = copytext
+
+        self.modifiers = {}
+
+        self.config = {}
+        self.variables = {}
+
+    def setvar(self, name, value):
+        """Set a workflow variable for this Item.
+
+        Args:
+            name (unicode): Name of variable.
+            value (unicode): Value of variable.
+
+        """
+        self.variables[name] = value
+
+    def getvar(self, name, default=None):
+        """Return value of workflow variable for ``name`` or ``default``.
+
+        Args:
+            name (unicode): Variable name.
+            default (None, optional): Value to return if variable is unset.
+
+        Returns:
+            unicode or ``default``: Value of variable if set or ``default``.
+        """
+        return self.variables.get(name, default)
+
+    def add_modifier(self, key, subtitle=None, arg=None, valid=None):
+        """Add alternative values for a modifier key.
+
+        Args:
+            key (unicode): Modifier key, e.g. ``"cmd"`` or ``"alt"``
+            subtitle (unicode, optional): Override item subtitle.
+            arg (unicode, optional): Input for following action.
+            valid (bool, optional): Override item validity.
+
+        Returns:
+            Modifier: Configured :class:`Modifier`.
+        """
+        mod = Modifier(key, subtitle, arg, valid)
+
+        for k in self.variables:
+            mod.setvar(k, self.variables[k])
+
+        self.modifiers[key] = mod
+
+        return mod
+
+    @property
+    def obj(self):
+        """Item formatted for JSON serialization.
+
+        Returns:
+            dict: Data suitable for Alfred 3 feedback.
+        """
+        # Basic values
+        o = {'title': self.title,
+             'subtitle': self.subtitle,
+             'valid': self.valid}
+
+        icon = {}
+
+        # Optional values
+        if self.arg is not None:
+            o['arg'] = self.arg
+
+        if self.autocomplete is not None:
+            o['autocomplete'] = self.autocomplete
+
+        if self.uid is not None:
+            o['uid'] = self.uid
+
+        if self.type is not None:
+            o['type'] = self.type
+
+        if self.quicklookurl is not None:
+            o['quicklookurl'] = self.quicklookurl
+
+        # Largetype and copytext
+        text = self._text()
+        if text:
+            o['text'] = text
+
+        icon = self._icon()
+        if icon:
+            o['icon'] = icon
+
+        # Variables and config
+        js = self._vars_and_config()
+        if js:
+            o['arg'] = js
+
+        # Modifiers
+        mods = self._modifiers()
+        if mods:
+            o['mods'] = mods
+
+        return o
+
+    def _icon(self):
+        """Return `icon` object for item.
+
+        Returns:
+            dict: Mapping for item `icon` (may be empty).
+        """
+        icon = {}
+        if self.icon is not None:
+            icon['path'] = self.icon
+
+        if self.icontype is not None:
+            icon['type'] = self.icontype
+
+        return icon
+
+    def _text(self):
+        """Return `largetext` and `copytext` object for item.
+
+        Returns:
+            dict: `text` mapping (may be empty)
+        """
+        text = {}
+        if self.largetext is not None:
+            text['largetype'] = self.largetext
+
+        if self.copytext is not None:
+            text['copy'] = self.copytext
+
+        return text
+
+    def _vars_and_config(self):
+        """Build `arg` including workflow variables and configuration.
+
+        Returns:
+            str: JSON string value for `arg` (or `None`)
+        """
+        if self.variables or self.config:
+            d = {}
+            if self.variables:
+                d['variables'] = self.variables
+
+            if self.config:
+                d['config'] = self.config
+
+            if self.arg is not None:
+                d['arg'] = self.arg
+
+            return json.dumps({'alfredworkflow': d})
+
+        return None
+
+    def _modifiers(self):
+        """Build `mods` dictionary for JSON feedback.
+
+        Returns:
+            dict: Modifier mapping or `None`.
+        """
+        if self.modifiers:
+            mods = {}
+            for k, mod in self.modifiers.items():
+                mods[k] = mod.obj
+
+            return mods
+
+        return None
+
+
+class Workflow3(Workflow):
+    """Workflow class that generates Alfred 3 feedback.
+
+    Attributes:
+        item_class (class): Class used to generate feedback items.
+        variables (dict): Top level workflow variables.
+    """
+
+    item_class = Item3
+
+    def __init__(self, **kwargs):
+        """Create a new :class:`Workflow3` object.
+
+        See :class:`~workflow.workflow.Workflow` for documentation.
+        """
+        Workflow.__init__(self, **kwargs)
+        self.variables = {}
+        self._rerun = 0
+
+    @property
+    def _default_cachedir(self):
+        """Alfred 3's default cache directory."""
+        return os.path.join(
+            os.path.expanduser(
+                '~/Library/Caches/com.runningwithcrayons.Alfred-3/'
+                'Workflow Data/'),
+            self.bundleid)
+
+    @property
+    def _default_datadir(self):
+        """Alfred 3's default data directory."""
+        return os.path.join(os.path.expanduser(
+            '~/Library/Application Support/Alfred 3/Workflow Data/'),
+            self.bundleid)
+
+    @property
+    def rerun(self):
+        """How often (in seconds) Alfred should re-run the Script Filter."""
+        return self._rerun
+
+    @rerun.setter
+    def rerun(self, seconds):
+        """Interval at which Alfred should re-run the Script Filter.
+
+        Args:
+            seconds (int): Interval between runs.
+        """
+        self._rerun = seconds
+
+    def setvar(self, name, value):
+        """Set a "global" workflow variable.
+
+        These variables are always passed to downstream workflow objects.
+
+        If you have set :attr:`rerun`, these variables are also passed
+        back to the script when Alfred runs it again.
+
+        Args:
+            name (unicode): Name of variable.
+            value (unicode): Value of variable.
+        """
+        self.variables[name] = value
+
+    def getvar(self, name, default=None):
+        """Return value of workflow variable for ``name`` or ``default``.
+
+        Args:
+            name (unicode): Variable name.
+            default (None, optional): Value to return if variable is unset.
+
+        Returns:
+            unicode or ``default``: Value of variable if set or ``default``.
+        """
+        return self.variables.get(name, default)
+
+    def add_item(self, title, subtitle='', arg=None, autocomplete=None,
+                 valid=False, uid=None, icon=None, icontype=None,
+                 type=None, largetext=None, copytext=None, quicklookurl=None):
+        """Add an item to be output to Alfred.
+
+        See :meth:`~workflow.workflow.Workflow.add_item` for the main
+        documentation.
+
+        The key difference is that this method does not support the
+        ``modifier_subtitles`` argument. Use the :meth:`~Item3.add_modifier()`
+        method instead on the returned item instead.
+
+        Returns:
+            Item3: Alfred feedback item.
+        """
+        item = self.item_class(title, subtitle, arg,
+                               autocomplete, valid, uid, icon, icontype, type,
+                               largetext, copytext, quicklookurl)
+
+        self._items.append(item)
+        return item
+
+    @property
+    def obj(self):
+        """Feedback formatted for JSON serialization.
+
+        Returns:
+            dict: Data suitable for Alfred 3 feedback.
+        """
+        items = []
+        for item in self._items:
+            items.append(item.obj)
+
+        o = {'items': items}
+        if self.variables:
+            o['variables'] = self.variables
+        if self.rerun:
+            o['rerun'] = self.rerun
+        return o
+
+    def send_feedback(self):
+        """Print stored items to console/Alfred as JSON."""
+        json.dump(self.obj, sys.stdout)
+        sys.stdout.flush()