Source code for wltp.pandel

#! python
#-*- coding: utf-8 -*-
#
# Copyright 2013-2014 European Commission (JRC);
# Licensed under the EUPL (the 'Licence');
# You may not use this work except in compliance with the Licence.
# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl
"""A :dfn:`pandas-model` is a tree of strings, numbers, sequences, dicts, pandas instances and resolvable
URI-references, implemented by :class:`Pandel`. """

from __future__ import division, unicode_literals

import abc
from collections import Mapping, Sequence
from collections import OrderedDict, namedtuple
import contextlib
import numbers
import re

from jsonschema import Draft3Validator, Draft4Validator, ValidationError
import jsonschema
from jsonschema.exceptions import SchemaError
from pandas.core.generic import NDFrame
from six import string_types

import numpy as np
import pandas as pd


try:
    from urllib.parse import urljoin
except ImportError:
    from urlparse import urljoin



[docs]class ModelOperations(namedtuple('ModelOperations', 'inp out conv')):
    """
    Customization functions for traversing, I/O, and converting self-or-descendant branch (sub)model values.
    """
[docs]    def __new__(cls, inp=None, out=None, conv=None):
        """

        :param list inp:    the `args-list` to :meth:`Pandel._read_branch()`

        :param out:         The args to :meth:`Pandel._write_branch()`, that may be specified either as:

                            * an `args-list`, that will apply for all model data-types (lists, dicts & pandas),
                            * a map of ``type`` --> ``args-list``, where the ``None`` key is the *catch-all* case,
                            * a function returning the `args-list` for some branch-value,
                              with signature: ``def get_write_branch_args(branch)``.

        :param conv:        The conversion-functions (:dfn:`convertors`) for the various model's data-types.
                            The convertors have signature ``def convert(branch)``, and they may be
                            specified either as:

                            * a map of ``(from_type, to_type)`` --> ``conversion_func()``, where the ``None`` key
                              is the *catch-all* case,
                            * a "master-switch" function returning the appropriate convertor
                              depending on the requested conversion.
                              The master-function's signature is ``def get_convertor(from_branch, to_branch)``.

                            The minimum convertors demanded by :class:`Pandel` are (at least, check the code for more):

                            * DataFrame  <--> dict
                            * Series     <--> dict
                            * ndarray    <--> list
        """

        return super(ModelOperations, cls).__new__(cls, inp, out, conv)

    def choose_out_args(self, branch):
        pass

    def choose_convertor(self, from_type, to_type):
        pass

[docs]class PathMaps(object):
    """
    Cascade prefix-mapping of json-paths to any values (here :class:`ModelOperations`.
    """
    pass



ValidatorBase = jsonschema.validators.create({}) # Workaround https://github.com/Julian/jsonschema/issues/178
[docs]class PandelVisitor(ValidatorBase):
    """
    A customized :class:`Draft4Validator` suporting instance-trees with pandas and numpy objects, natively.

    Any pandas or numpy instance (for example ``obj``) is treated like that:

    +----------------------------+-----------------------------------------+
    |        Python Type         |     JSON Equivalence                    |
    +============================+=========================================+
    | :class:`pandas.DataFrame`  | as ``object`` *json-type*, with         |
    |                            | ``obj.columns`` as *keys*, and          |
    |                            | ``obj[col].values`` as *values*         |
    +----------------------------+-----------------------------------------+
    | :class:`pandas.Series`     | as ``object`` *json-type*, with         |
    |                            | ``obj.index`` as *keys*, and            |
    |                            | ``obj.values`` as *values*              |
    +----------------------------+-----------------------------------------+
    | :class:`np.ndarray`,       | as ``array`` *json-type*                |
    | :class:`list`,             |                                         |
    | :class:`tuple`             |                                         |
    +----------------------------+-----------------------------------------+

    Note that the value of each dataFrame column is a :``ndarray`` instances.

    The simplest validations of an object or a pandas-instance is like this:

        >>> import pandas as pd

        >>> schema = {
        ...     'type': 'object',
        ... }
        >>> pv = PandelVisitor(schema)

        >>> pv.validate({'foo': 'bar'})
        >>> pv.validate(pd.Series({'foo': 1}))
        >>> pv.validate([1,2])                                       ## A sequence is invalid here.
        Traceback (most recent call last):
        ...
        jsonschema.exceptions.ValidationError: [1, 2] is not of type 'object'
        <BLANKLINE>
        Failed validating 'type' in schema:
            {'type': 'object'}
        <BLANKLINE>
        On instance:
            [1, 2]


    Or demanding specific properties with ``required`` and no ``additionalProperties``:

        >>> schema = {
        ...     'type':     'object',
        ...     'required': ['foo'],
        ...    'additionalProperties': False,
        ...    'properties': {
        ...        'foo': {}
        ...    }
        ... }
        >>> pv = PandelVisitor(schema)

        >>> pv.validate(pd.Series({'foo': 1}))
        >>> pv.validate(pd.Series({'foo': 1, 'bar': 2}))             ## Additional 'bar' is present!
        Traceback (most recent call last):
        ...
        jsonschema.exceptions.ValidationError: Additional properties are not allowed ('bar' was unexpected)
        <BLANKLINE>
        Failed validating 'additionalProperties' in schema:
            {'additionalProperties': False,
             'properties': {'foo': {}},
             'required': ['foo'],
             'type': 'object'}
        <BLANKLINE>
        On instance:
            bar    2
            foo    1
            dtype: int64

        >>> pv.validate(pd.Series({}))                               ## Required 'foo' missing!
        Traceback (most recent call last):
        ...
        jsonschema.exceptions.ValidationError: 'foo' is a required property
        <BLANKLINE>
        Failed validating 'required' in schema:
            {'additionalProperties': False,
             'properties': {'foo': {}},
             'required': ['foo'],
             'type': 'object'}
        <BLANKLINE>
        On instance:
            Series([], dtype: float64)

    """
    def __init__(self, schema, types=(), resolver=None, format_checker=None, skip_meta_validation=False):
        super(PandelVisitor, self).__init__(schema, types, resolver, format_checker)

        self._types.update({
            "number":   (numbers.Number, np.number), ## type(np.nan) == builtins.float! FIXME, are numpy-numbers --> json-types OK??
            "integer":  (int, np.integer),
            "boolean":  (bool, np.bool_), #, np.bool8),
            "array":    (list, tuple, np.ndarray),
            "object" :  (dict, pd.DataFrame, pd.Series)
        })

        ## Setup Draft4/3 validation
        #
        # Meta-validate schema
        #    with original validators (and not self)
        #    because this class inherits an empty (schema/rules) validator.
        validator_class = jsonschema.validators.validator_for(schema)  ## Falls back to 'Draft4' if no `$schema` exists.
        self.VALIDATORS = validator_class.VALIDATORS.copy()
        self.META_SCHEMA = validator_class.META_SCHEMA
        self.VALIDATORS.update({
            'items':                PandelVisitor._rule_items,
            'additionalProperties': PandelVisitor._rule_additionalProperties,
            'additionalItems':      PandelVisitor._rule_additionalItems,
        })
        if validator_class == Draft3Validator:
            self.VALIDATORS.update({
                'properties':           PandelVisitor._rule_properties_draft3,
            })
        else:
            self.VALIDATORS.update({
                'properties':           PandelVisitor._rule_properties_draft4,
                'required':             PandelVisitor._rule_required_draft4,
            })

        self.old_scopes = []

        ## Cannot use ``validator_class.check_schema()`` because
        #    need to relay my args to ``validator_class.__init__()``.
        # Even better use myself, that i'm fatser (kind of...).
        if not skip_meta_validation:
            for error in self.iter_errors(schema, validator_class.META_SCHEMA):
                raise SchemaError.create_from(error)

    ##################################
    ############ Visiting ###########
    ##################################

    def _get_iprop(self, instance, prop):
        val = instance[prop]
        if isinstance(val, NDFrame):
            val = val.values
        return val

    def _is_iprop_in(self, instance, prop):
        return prop in instance.keys()

    def _iter_iprop_names(self, instance):
        return instance.keys()

    def _iter_iprop_pairs(self, instance):
        if isinstance(instance, pd.DataFrame):
            return ((k, v.values) for k, v in instance.iteritems())
        if isinstance(instance, pd.Series):
            return instance.iteritems()
        return instance.items()

    def _iter_iitems(self, instance):
        return instance



    def iter_errors(self, instance, _schema=None):
        if _schema is None:
            _schema = self.schema

        scope = _schema.get("id")
        has_scope = scope
        if has_scope:
            old_scope = self.resolver.resolution_scope
            self.old_scopes.append(old_scope)
            self.resolver.resolution_scope = urljoin(old_scope, scope)

        ref = _schema.get("$ref")
        if ref is not None:
            validators = [("$ref", ref)]
        else:
            validators = self._iter_iprop_pairs(_schema)

        for k, v in validators:
            validator = self.VALIDATORS.get(k)
            if validator is None:
                continue

            errors = validator(self, v, instance, _schema) or ()
            for error in errors:
                # set details if not already set by the called fn
                error._set(
                    validator=k,
                    validator_value=v,
                    instance=instance,
                    schema=_schema,
                )
                if k != "$ref":
                    error.schema_path.appendleft(k)
                yield error

        if has_scope:
            self.resolver.resolution_scope = self.old_scopes.pop()


    ##################################
    ############# Rules ##############
    ##################################

    def _rule_properties_draft4(self, sprops, instance, schema):
        if not self.is_type(instance, "object"):
            return

        iprops = set(self._iter_iprop_names(instance))
        for prop in iprops & set(sprops.keys()):
            subschema = sprops[prop]
            for error in self.descend(
                self._get_iprop(instance, prop),
                subschema,
                path=prop,
                schema_path=prop,
            ):
                yield error

    def _rule_properties_draft3(self, properties, instance, schema):
        if not self.is_type(instance, "object"):
            return

        for prop, subschema in self._iter_iprop_pairs(properties):
            if self._is_iprop_in(instance, prop):
                for error in self.descend(
                    self._get_iprop(instance, prop),
                    subschema,
                    path=prop,
                    schema_path=prop,
                ):
                    yield error
            elif subschema.get("required", False):
                error = ValidationError("%r is a required prop" % prop)
                error._set(
                    validator="required",
                    validator_value=subschema["required"],
                    instance=instance,
                    schema=schema,
                )
                error.path.appendleft(prop)
                error.schema_path.extend([prop, "required"])
                yield error

    def _rule_items(self, items, instance, schema):
        if not self.is_type(instance, "array"):
            return

        if self.is_type(items, "object"):
            for index, item in enumerate(self._iter_iitems(instance)):
                for error in self.descend(item, items, path=index):
                    yield error
        else:
            for (index, item), subschema in zip(enumerate(self._iter_iitems(instance)), items):
                for error in self.descend(
                    item, subschema, path=index, schema_path=index,
                ):
                    yield error

    def _rule_additionalProperties(self, aP, instance, schema):
        if not self.is_type(instance, 'object'):
            return

        sprops = schema.get("properties", {})
        patterns = "|".join(schema.get("patternProperties", {}))
        extras = set()
        for iprop in self._iter_iprop_names(instance):
            if iprop not in sprops and \
                    not patterns or not re.search(patterns, iprop):
                extras.add(iprop)

        if extras:
            if self.is_type(aP, "object"):
                for extra in extras:
                    for error in self.descend(self._get_iprop(instance, extra), aP, path=extra):
                        yield error
            elif not aP:
                yield ValidationError(
                    "Additional properties are not allowed (%s %s unexpected)" %
                    jsonschema._utils.extras_msg(extras))

    def _rule_additionalItems(self, aI, instance, schema):
        if (
            not self.is_type(instance, "array") or
            self.is_type(schema.get("items", {}), "object")
        ):
            return

        len_items = len(schema.get("items", []))
        if self.is_type(aI, "object"):
            for index, item in enumerate(instance[len_items:], start=len_items):
                for error in self.descend(item, aI, path=index):
                    yield error
        elif not aI and len(instance) > len_items:
            yield ValidationError(
                "Additional items are not allowed (%s %s unexpected)" %
                jsonschema._utils.extras_msg(instance[len(schema.get("items", [])):])
            )



    def _rule_required_draft4(self, required, instance, schema):
        if self.is_type(instance, 'object'):
            for sprop in required:
                if not self._is_iprop_in(instance, sprop):
                    yield ValidationError("%r is a required property" % sprop)



[docs]class Pandel(object):
    """
    Builds, validates and stores a *pandas-model*, a mergeable stack of JSON-schema abiding trees of
    strings and numbers, assembled with

    * sequences,
    * dictionaries,
    * :class:`pandas.DataFrame`,
    * :class:`pandas.Series`, and
    * URI-references to other model-trees.



    .. _pandel-overview:

    **Overview**

    The **making of a model** involves, among others, schema-validating, reading :dfn:`subtree-branches`
    from URIs, cloning, converting and merging multiple :dfn:`sub-models` in a single :dfn:`unified-model` tree,
    without side-effecting given input.
    All these happen in 4+1 steps::

                       ....................... Model Construction .................
          ------------ :  _______    ___________                                  :
         / top_model /==>|Resolve|->|PreValidate|-+                               :
         -----------'  : |___0___|  |_____1_____| |                               :
          ------------ :  _______    ___________  |   _____    ________    ______ :   --------
         / base-model/==>|Resolve|->|PreValidate|-+->|Merge|->|Validate|->|Curate|==>/ model /
         -----------'  : |___0___|  |_____1_____|    |_ 2__|  |___3____|  |__4+__|:  -------'
                       ............................................................

    All steps are executed "lazily" using generators (with :keyword:`yield`).
    Before proceeding to the next step, the previous one must have completed successfully.
    That way, any ad-hoc code in building-step-5(*curation*), for instance, will not suffer a horrible death
    due to badly-formed data.

    [TODO] The **storing of a model** simply involves distributing model parts into different files and/or formats,
    again without side-effecting the unified-model.



    .. _pandel-building-model:

    **Building model**

    Here is a detailed description of each building-step:

    1.  :meth:`_resolve` and substitute any `json-references <http://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03>`_
        present in the submodels with content-fragments fetched from the referred URIs.
        The submodels are **cloned** first, to avoid side-effecting them.

        Although by default a combination of *JSON* and *CSV* files is expected, this can be customized,
        either by the content in the json-ref, within the model (see below), or
        as :ref:`explained  <pandel-customization>` below.

        The **extended json-refs syntax** supported provides for passing arguments into :meth:`_read_branch()`
        and :meth:`_write_branch()` methods.  The syntax is easier to explain by showing what
        the default :attr:`_global_cntxt` corresponds to, for a ``DataFrame``::

            {
              "$ref": "http://example.com/example.json#/foo/bar",
              "$inp": ["AUTO"],
              "$out": ["CSV", "encoding=UTF-8"]
            }

        And here what is required to read and (later) store into a HDF5 local file with a predefined name::

            {
              "$ref": "file://./filename.hdf5",
              "$inp": ["AUTO"],
              "$out": ["HDF5"]
            }

        .. Warning:: Step NOT IMPLEMENTED YET!


    2.  Loosely :meth:`_prevalidate` each sub-model separately with :term:`json-schema`,
        where any pandas-instances (DataFrames and Series) are left as is.
        It is the duty of the developer to ensure that the prevalidation-schema is *loose enough* that
        it allows for various submodel-forms, prior to merging, to pass.


    3.  Recursively **clone**  and :meth:`_merge` sub-models in a single unified-model tree.
        Branches from sub-models higher in the stack override the respective ones from the sub-models below,
        recursively.  Different object types need to be **converted** appropriately (ie. merging
        a ``dict`` with a ``DataFrame`` results into a ``DataFrame``, so the dictionary has to convert
        to dataframe).

        The required **conversions** into pandas classes can be customized as :ref:`explained  <pandel-customization>`
        below.  Series and DataFrames cannot merge together, and Sequences do not merge
        with any other object-type (themselfs included), they just "overwrite".

        The default convertor-functions defined both for submodels and models are listed in the following table:

        ============ ========== =========================================
            From:       To:                  Method:
        ============ ========== =========================================
         dict        DataFrame  ``pd.DataFrame``  (the constructor)
         DataFrame   dict       ``lambda df: df.to_dict('list')``
         dict        Series     ``pd.Series``     (the constructor)
         Series      dict       :meth:`lambda sr: sr.to_dict()`
        ============ ========== =========================================


    4.  Strictly json-:meth:`_validate` the unified-model (ie enforcing ``required`` schema-rules).

        The required **conversions** from pandas classes can be customized as :ref:`explained  <pandel-customization>`
        below.

        The default convertor-functions are the same as above.


    5.  (Optionally) Apply the :meth:`_curate` functions on the the model to enforce dependencies and/or any
        ad-hoc generation-rules among the data.  You can think of bash-like expansion patterns,
        like ``${/some/path:=$HOME}`` or expressions like ``%len(../other/path)``.



    .. _pandel-storing:

    **Storing model**

    When storing model-parts, if unspecified, the filenames to write into will be deduced from the jsonpointer-path
    of the ``$out``'s parent, by substituting "strange" chars with undescores(``_``).

    .. Warning:: Functionality NOT IMPLEMENTED YET!



    .. _pandel-customization:

    **Customization**

    Some operations within steps (namely *conversion* and *IO*) can be customized by the following means
    (from lower to higher precedance):

    a.  The global-default :class:`ModelOperations` instance on the :attr:`_global_cntxt`,
        applied on both submodels and unified-model.

        For example to channel the whole reading/writing of models through
        `HDF5 <http://pandas.pydata.org/pandas-docs/stable/io.html#io-hdf5>`_ data-format, it would suffice
        to modify the :attr:`_global_cntxt` like that::

            pm = FooPandelModel()                        ## some concrete model-maker
            io_args = ["HDF5"]
            pm.mod_global_operations(inp=io_args, out=io_args)

    b.  [TODO] Extra-properties on the json-schema applied on both submodels and unified-model for the specific path defined.
        The supported properties are the non-functional properties of :class:`ModelOperations`.

    d.  Specific-properties regarding *IO* operations within each submodel - see the *resolve* building-step,
        above.

    c.  Context-maps of ``json_paths`` --> :class:`ModelOperations` instances, installed by :meth:`add_submodel()` and
        :attr:`unified_contexts` on the model-maker.  They apply to self-or-descedant subtree of each model.

        The `json_path` is a strings obeying a simplified :term:`json-pointer` syntax (no char-normalizations yet),
        ie ``/some/foo/1/pointer``.  An empty-string(``''``) matches all model.

        When multiple convertors match for a model-value, the selected convertor to be used is the most specific one
        (the one with longest prefix).  For instance, on the model::

            [ { "foo": { "bar": 0 } } ]


        all of the following would match the ``0`` value:

        - the global-default :attr:`_global_cntxt`,
        - ``/``, and
        - ``/0/foo``

        but only the last's context-props will be applied.



    .. _Attributes:

    **Atributes**

    .. Attribute:: model

        The model-tree that will receive the merged submodels after :meth:`build()` has been invoked.
        Depending on the submodels, the top-value can be any of the supported model data-types.


    .. Attribute:: _submodel_tuples

        The stack of (``submodel``, ``path_ops``) tuples. The list's 1st element is the :dfn:`base-model`,
        the last one, the :dfn:`top-model`.  Use the :meth:`add_submodel()` to build this list.


    .. Attribute:: _global_cntxt

        A :class:`ModelOperations` instance acting as the global-default context for the unified-model and all submodels.
        Use :meth:`mod_global_operations()` to modify it.


    .. Attribute:: _curate_funcs

        The sequence of *curate* functions to be executed as the final step by :meth:`_curate()`.
        They are "normal" functions (not generators) with signature::

            def curate_func(model_maker):
                pass      ## ie: modify ``model_maker.model``.

        Better specify this list of functions on construction time.


    .. Attribute:: _errored

            An internal boolean flag that becomes ``True`` if any build-step has failed,
            to halt proceeding to the next one.  It is ``None`` if build has not started yet.


    .. _pandel-examples:

    **Examples**

    The basic usage requires to subclass your own model-maker, just so that a *json-schema*
    is provided for both validation-steps, 2 & 4:

        >>> from collections import OrderedDict as od                           ## Json is better with stable keys-order

        >>> class MyModel(Pandel):
        ...     def _get_json_schema(self, is_prevalidation):
        ...         return {                                                    ## Define the json-schema.
        ...             '$schema': 'http://json-schema.org/draft-04/schema#',
        ...             'required': [] if is_prevalidation else ['a', 'b'],     ## Prevalidation is more loose.
        ...             'properties': {
        ...                 'a': {'type': 'string'},
        ...                 'b': {'type': 'number'},
        ...                 'c': {'type': 'number'},
        ...             }
        ...         }


    Then you can instanciate it and add your submodels:

        >>> mm = MyModel()
        >>> mm.add_submodel(od(a='foo', b=1))                                   ## submodel-1 (base)
        >>> mm.add_submodel(pd.Series(od(a='bar', c=2)))                        ## submodel-2 (top-model)


    You then have to build the final unified-model (any validation errors would be reported at this point):

        >>> mdl = mm.build()

    Note that you can also access the unified-model in the :attr:`model` attribute.
    You can now interogate it:

        >>> mdl['a'] == 'bar'                       ## Value overridden by top-model
        True
        >>> mdl['b'] == 1                           ## Value left intact from base-model
        True
        >>> mdl['c'] == 2                           ## New value from top-model
        True


    Lets try to build with invalid submodels:

        >>> mm = MyModel()
        >>> mm.add_submodel({'a': 1})               ## According to the schema, this should have been a string,
        >>> mm.add_submodel({'b': 'string'})        ## and this one, a number.

        >>> sorted(mm.build_iter(), key=lambda ex: ex.message)                   ## Fetch a list with all validation errors.
        [<ValidationError: "'string' is not of type 'number'">,
         <ValidationError: "1 is not of type 'string'">,
         <ValidationError: 'Gave-up building model after step 1.prevalidate (out of 4).'>]

        >>> mdl = mm.model
        >>> mdl is None                                     ## No model constructed, failed before merging.
        True


    And lets try to build with valid submodels but invalid merged-one:

        >>> mm = MyModel()
        >>> mm.add_submodel({'a': 'a str'})
        >>> mm.add_submodel({'c': 1})

        >>> sorted(mm.build_iter(), key=lambda ex: ex.message)        ## Missing required('b') prop rom merged-model.
        [<ValidationError: "'b' is a required property">,
         <ValidationError: 'Gave-up building model after step 3.validate (out of 4).'>]

    """

    __metaclass__ = abc.ABCMeta

[docs]    def __init__(self, curate_funcs=()):
        """

        :param sequence curate_funcs:   See :attr:`_curate_funcs`.
        """

        self.model          = None
        self._errored       = None
        self._submodel_tuples   = []
        self._curate_funcs  = curate_funcs
        self._global_cntxt  = []
        self._unified_contexts = None


[docs]    def mod_global_operations(self, operations=None, **cntxt_kwargs):
        """

        Since it is the fall-back operation for *conversions* and *IO* operation, it must exist and have
        all its props well-defined for the class to work correctly.

        :param ModelOperations operations:  Replaces values of the installed context with
                                            non-empty values from this one.
        :param cntxt_kwargs:                Replaces the keyworded-values on the existing `operations`.
                                            See :class:`ModelOperations` for supported keywords.
        """
        if operations:
            assert isinstance(operations, ModelOperations), (type(operations), operations)
            self._global_cntxt = operations
        self._global_cntxt._replace(**cntxt_kwargs)


    @property
    def unified_contexts(self):
        """
        A map of ``json_paths`` --> :class:`ModelOperations` instances acting on the unified-model.
        """
        return self._unified_contexts
    @unified_contexts.setter
    def unified_contexts(self, path_ops):
        assert isinstance(path_ops, Mapping), (type(path_ops), path_ops)
        self._unified_contexts = path_ops

[docs]    def _select_context(self, path, branch):
        """
        Finds which context to use while visiting model-nodes, by enforcing the precedance-rules described
        in the :ref:`Customizations  <pandel-customization>`.

        :param str path:    the branch's jsonpointer-path
        :param str branch:  the actual branch's node
        :return:            the selected :class:`ModelOperations`
        """
        pass


[docs]    def _read_branch(self):
        """
        Reads model-branches during *resolve* step.
        """
        pass # TODO: impl read_branch()
[docs]    def _write_branch(self):
        """
        Writes model-branches during *distribute* step.
        """
        pass # TODO: impl write_branch()

[docs]    def _get_json_schema(self, is_prevalidation):
        """
        :return: a json schema, more loose when `prevalidation` for each case
        :rtype: dictionary
        """
        pass

    def _rule_AdditionalProperties(self, validator, aP, required, instance, schema):
        properties = schema.get("properties", {})
        patterns = "|".join(schema.get("patternProperties", {}))
        extras = set()
        for prop in instance:
            if prop not in properties:
                if patterns and re.search(patterns, prop):
                    continue
                extras.add(prop)

        if validator.is_type(aP, "object"):
            for extra in extras:
                for error in validator.descend(instance[extra], aP, path=extra):
                    yield error
        elif not aP and extras:
            error = "Additional properties are not allowed (%s %s unexpected)"
            yield ValidationError(error % jsonschema._utils.extras_msg(extras))


    def _rule_Required(self, validator, required, instance, schema):
        if (validator.is_type(instance, "object") or
                validator.is_type(instance, "DataFrame") or
                 validator.is_type(instance, "Series")):
            for prop in required:
                if prop not in instance:
                    yield ValidationError("%r is a required property" % prop)


    def _get_model_validator(self, schema):

        validator = Draft4Validator(schema)
        validator._types.update({"ndarray": np.ndarray, "DataFrame" : pd.DataFrame, 'Series':pd.Series})
        validator.VALIDATORS['DataFrame'] = self._rule_Required

        return validator


    def _validate_json_model(self, schema, mdl):
        validator = self._get_model_validator(schema)
        for err in validator.iter_errors(mdl):
            self._errored = True
            yield err

[docs]    def _clone_and_merge_submodels(self, a, b, path=''):
        """' Recursively merge b into a, cloning both. """

        if isinstance(a, pd.DataFrame) or isinstance(b, pd.DataFrame):
            a = pd.DataFrame() if a is None else pd.DataFrame(a)
            b = pd.DataFrame() if b is None else pd.DataFrame(b)

            a.update(b) #, 'outer') NOT IMPL YET
            extra_b_items = list(set(b.columns) - set(a.columns))
            a[extra_b_items] = b[extra_b_items]

        elif isinstance(a, pd.Series) or isinstance(b, pd.Series):
            a = pd.Series() if a is None else pd.Series(a)
            b = pd.Series() if b is None else pd.Series(b)
            #a.update(b) # DOES NOT append extra keys!
            a = b.combine_first(a)

        elif isinstance(a, Mapping) or isinstance(b, Mapping):
            a = OrderedDict() if a is None else OrderedDict(a)
            b = OrderedDict() if b is None else OrderedDict(b)

            for key in b:
                b_val = b[key]
                if key in a:
                    val = self._clone_and_merge_submodels(a[key], b_val, '%s/%s'%(path, key))
                else:
                    val = b_val
                a[key] = val

        elif (isinstance(a, Sequence) and not isinstance(a, string_types)) or \
                (isinstance(b, Sequence) and not isinstance(b, string_types)):
            if not b is None:
                val = b
            else:
                val = a

            l = list()
            for (i, item) in enumerate(val):
                l.append(self._clone_and_merge_submodels(item, None, '%s[%i]'%(path, i)))
            a = l

        elif a is None and b is None:
            raise ValidationError("Cannot merge Nones at path(%s)!" % path)

        else:
            if not b is None:
                a = b

        return a

[docs]    def _resolve(self):
        "Step-1"
        if False:
            yield

[docs]    def _prevalidate(self):
        "Step-1"
        for (mdl, path_ops) in self._submodel_tuples:
            schema = self._get_json_schema(is_prevalidation=True)
            for err in self._validate_json_model(schema, mdl):
                yield err

[docs]    def _merge(self):
        "Step-2"
        for (mdl, path_ops) in self._submodel_tuples:
            self.model = self._clone_and_merge_submodels(self.model, mdl)
        if False:
            yield       ## Just mark method as generator.

[docs]    def _validate(self):
        "Step-3"
        schema = self._get_json_schema(is_prevalidation=False)
        for err in self._validate_json_model(schema, self.model):
            yield err

[docs]    def _curate(self):
        "Step-4:  Invokes any curate-functions found in :attr:`_curate_funcs`."
        if False:
            yield       ## To be overriden, just mark method as generator.
        for curfunc in self._curate_funcs:
            curfunc(self)

[docs]    def add_submodel(self, model, path_ops=None):
        """
        Pushes on top a submodel, along with its context-map.

        :param model:               the model-tree (sequence, mapping, pandas-types)
        :param dict path_ops:       A map of ``json_paths`` --> :class:`ModelOperations` instances acting on the
                                    unified-model.  The `path_ops` may often be empty.

        **Examples**

        To change the default DataFrame --> dictionary convertor for a submodel, use the following:

            >>> mdl = {'foo': 'bar'}
            >>> submdl = ModelOperations(mdl, conv={(pd.DataFrame, dict): lambda df: df.to_dict('record')})

        """

        if path_ops:
            assert isinstance(path_ops, Mapping), (type(path_ops), path_ops)

        return self._submodel_tuples.append((model, path_ops))


[docs]    def build_iter(self):
        """
        Iteratively build model, yielding any problems as :class:`ValidationError` instances.

        For debugging, the unified model at :attr:`model` my contain intermediate results at any time,
        even if construction has failed.  Check the :attr:`_errored` flag if neccessary.
        """

        steps = [
            (self._prevalidate, 'prevalidate'),
            (self._merge,       'merge'),
            (self._validate,    'validate'),
            (self._curate,      'curate'),
        ]
        self._errored = False
        self.model = None

        for (i, (step, step_name)) in enumerate(steps, start=1):
            try:
                for err in step():
                    yield err
            except ValidationError as ex:
                self._errored = True
                yield ex

            except Exception as ex:
                self._errored = True

                nex = ValidationError('Model step-%i(%s) failed due to: %s'%(i, step_name, ex))
                nex.cause = ex

                yield nex

            if self._errored:
                yield ValidationError('Gave-up building model after step %i.%s (out of %i).'%(i, step_name, len(steps)))
                break

[docs]    def build(self):
        """
        Attempts to build the model by exhausting :meth:`build_iter()`, or raises its 1st error.

        Use this method when you do not want to waste time getting the full list of errors.
        """

        err = next(self.build_iter(), None)
        if err:
            raise err

        return self.model


class JsonPointerException(Exception):
    pass

[docs]def jsonpointer_parts(jsonpointer):
    """
    Iterates over the ``jsonpointer`` parts.

    :param str jsonpointer: a jsonpointer to resolve within document
    :return: a generator over the parts of the json-pointer

    :author: Julian Berman, ankostis
    """

    if jsonpointer:
        parts = jsonpointer.split(u"/")
        if parts.pop(0) != '':
            raise JsonPointerException('Location must starts with /')
    
        for part in parts:
            part = part.replace(u"~1", u"/").replace(u"~0", u"~")
    
            yield part

_scream = object()
[docs]def resolve_jsonpointer(doc, jsonpointer, default=_scream):
    """
    Resolve a ``jsonpointer`` within the referenced ``doc``.
    
    :param doc: the referrant document
    :param str jsonpointer: a jsonpointer to resolve within document
    :return: the resolved doc-item or raises :class:`RefResolutionError` 

    :author: Julian Berman, ankostis
    """
    for part in jsonpointer_parts(jsonpointer):
        if isinstance(doc, Sequence):
            # Array indexes should be turned into integers
            try:
                part = int(part)
            except ValueError:
                pass
        try:
            doc = doc[part]
        except (TypeError, LookupError):
            if default is _scream:
                raise JsonPointerException("Unresolvable JSON pointer(%r)@(%s)" % (jsonpointer, part))
            else:
                return default
        
    return doc

        
[docs]def set_jsonpointer(doc, jsonpointer, value, object_factory=dict):
    """
    Resolve a ``jsonpointer`` within the referenced ``doc``.
    
    :param doc: the referrant document
    :param str jsonpointer: a jsonpointer to the node to modify 
    :raises: JsonPointerException (if jsonpointer empty, missing, invalid-contet)
    """
    
    
    parts = list(jsonpointer_parts(jsonpointer))
        
    ## Will scream if used on 1st iteration.
    #
    pdoc = None
    ppart = None
    for i, part in enumerate(parts):
        if isinstance(doc, Sequence) and not isinstance(doc, str):
            ## Array indexes should be turned into integers
            #
            doclen = len(doc)
            if part == '-':
                part = doclen
            else:
                try:
                    part = int(part)
                except ValueError:
                    raise JsonPointerException("Expected numeric index(%s) for sequence at (%r)[%i]" % (part, jsonpointer, i))
                else:
                    if part > doclen:
                        raise JsonPointerException("Index(%s) out of bounds(%i) of (%r)[%i]" % (part, doclen, jsonpointer, i))
        try:
            ndoc = doc[part]
        except (LookupError):
            break  ## Branch-extension needed.
        except (TypeError): # Maybe indexing a string...
            ndoc = object_factory()
            pdoc[ppart] = ndoc
            doc = ndoc
            break  ## Branch-extension needed.
    
        doc, pdoc, ppart = ndoc, doc, part 
    else:
        doc = pdoc # If loop exhausted, cancel last assignment.

    ## Build branch with value-leaf.
    #
    nbranch = value
    for part2 in reversed(parts[i+1:]):
        ndoc = object_factory()
        ndoc[part2] = nbranch
        nbranch = ndoc
        
    ## Attach new-branch. 
    try:
        doc[part] = nbranch
    except IndexError: # Inserting last sequence-element raises IndexError("list assignment index out of range")
        doc.append(nbranch)
    
#    except (IndexError, TypeError) as ex:
#        #if isinstance(ex, IndexError) or 'list indices must be integers' in str(ex):
#        raise JsonPointerException("Incompatible content of JSON pointer(%r)@(%s)" % (jsonpointer, part))
#        else:
#            doc = {}
#            parent_doc[parent_part] = doc 
#            doc[part] = value 

        
if __name__ == '__main__':
    raise "Not runnable!"