From 9cc4ec09a7d45480ae126bf6e35f31ab3629815f Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 25 May 2017 00:25:02 -0400 Subject: [PATCH 01/31] Minor correction for badness if packaging folder is missing. --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5a85c7de..6d0f64ba 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ clean: rm -rvf build htmlcov veryclean: clean - rm -rvf *.egg-info .packaging + rm -rvf *.egg-info .packaging/* test: develop @clear @@ -28,4 +28,3 @@ release: ${PROJECT}.egg-info/PKG-INFO: setup.py setup.cfg marrow/mongo/core/release.py @mkdir -p ${VIRTUAL_ENV}/lib/pip-cache pip install --cache-dir "${VIRTUAL_ENV}/lib/pip-cache" -Ue ".[${USE}]" - From 28598e455a0c77cb8cf8adf3573dcfb753ae7a85 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 25 May 2017 00:25:33 -0400 Subject: [PATCH 02/31] Unpacking changes, docstring updates, tweaks to __pk__ usage in Queryable. --- marrow/mongo/core/trait/identified.py | 7 ++----- marrow/mongo/core/trait/queryable.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/marrow/mongo/core/trait/identified.py b/marrow/mongo/core/trait/identified.py index b2aba98a..b9d515ae 100644 --- a/marrow/mongo/core/trait/identified.py +++ b/marrow/mongo/core/trait/identified.py @@ -22,7 +22,7 @@ class Identified(Document): id = ObjectId('_id', assign=True, write=False, repr=False) def __eq__(self, other): - """Equality comparison between the IDs of the respective documents.""" + """Equality comparison between the identifiers of the respective documents.""" if isinstance(other, Document): return self.id == other.id @@ -32,7 +32,4 @@ def __eq__(self, other): def __ne__(self, other): """Inverse equality comparison between the backing store and other value.""" - if isinstance(other, Document): - return self.id != other.id - - return self.id != other + return not self == other diff --git a/marrow/mongo/core/trait/queryable.py b/marrow/mongo/core/trait/queryable.py index da44a573..c1f02e4d 100644 --- a/marrow/mongo/core/trait/queryable.py +++ b/marrow/mongo/core/trait/queryable.py @@ -214,7 +214,7 @@ def find_one(cls, *args, **kw): """ if len(args) == 1 and not isinstance(args[0], Filter): - args = (cls.id == args[0], ) + args = (getattr(cls, cls.__pk__) == args[0], ) Doc, collection, query, options = cls._prepare_find(*args, **kw) result = Doc.from_mongo(collection.find_one(query, **options)) From f8fc45f07342c3e9e624c358169668b707313d12 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 25 May 2017 00:28:03 -0400 Subject: [PATCH 03/31] WIP heirarchical traits; starting with the ABC, HParent, and HPath. --- marrow/mongo/core/trait/heir/__init__.py | 0 marrow/mongo/core/trait/heir/base.py | 189 +++++++++++++++++++++ marrow/mongo/core/trait/heir/hparent.py | 74 +++++++++ marrow/mongo/core/trait/heir/hpath.py | 200 +++++++++++++++++++++++ 4 files changed, 463 insertions(+) create mode 100644 marrow/mongo/core/trait/heir/__init__.py create mode 100644 marrow/mongo/core/trait/heir/base.py create mode 100644 marrow/mongo/core/trait/heir/hparent.py create mode 100644 marrow/mongo/core/trait/heir/hpath.py diff --git a/marrow/mongo/core/trait/heir/__init__.py b/marrow/mongo/core/trait/heir/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/marrow/mongo/core/trait/heir/base.py b/marrow/mongo/core/trait/heir/base.py new file mode 100644 index 00000000..e379f342 --- /dev/null +++ b/marrow/mongo/core/trait/heir/base.py @@ -0,0 +1,189 @@ +# encoding: utf-8 + +from contextlib import contextmanager + +from .... import Document +from ....trait import Queryable + + +log = __import__('logging').getLogger(__name__) + + +class Heirarchical(Queryable): + """Record sufficient information to form a heirarchy of documents. + + Inteded for use via subclassing; not directly usable. This contains API prototypes and mixin methods similar in + function to an abstract base class. For the most part subclasses should be able to "get away with" overriding + appropriate accessor properties with real fields, or by providing alternate properties returning query fragments. + + A subclass should not, except under extreme circumstances, override the actual `find_` and `attach` methods. + """ + + # Accessor Properties + + @property + def parent(self): + """The primary key identity of, or query fragment to select the parent of this document. + + The result of a `ReferenceField('.')` is sufficient to satisfy this, if redefined as one. + + Used by `get_parent` as passed as the first positional argument to `find_one`. + """ + + raise NotImplementedError() + + @property + def ancestors(self): + """A query fragment or iterable of primary key identifiers to select the ancestors of this document. + + The result of an `Array(ReferenceField('.'))` is sufficient to satisfy this, if redefined as one. + + Used by `get_ancestors` with behaviour defined by `_find_heirarchical`. + """ + + raise NotImplementedError() + + @property + def siblings(self): + """A query fragment or iterable of primary key identifiers to select the siblings of this document. + + Used by `get_siblings` with behaviour defined by `_find_heirarchical`. + """ + + raise NotImplementedError() + + @property + def children(self): + """A query fragment or iterable of primary key identifiers to select the children of this document. + + The result of an `Array(ReferenceField('.'))` is sufficient to satisfy this, if redefined as one. + + Used by `get_children` with behaviour defined by `_find_heirarchical`. + """ + + raise NotImplementedError() + + @property + def descendants(self): + """A query fragment or iterable of primary key identifiers to select the descendants of this document. + + Used by `get_descendants` with behaviour defined by `_find_heirarchical`. + """ + + raise NotImplementedError() + + # Internal Use Methods + + def _find_heirarchical(self, accessor, *args, **kw): + try: + query = getattr(self, accessor) + + if not isinstance(query, Filter): + query = list(query) + + except TypeError: + query = None + + if not query: + return None + + if isinstance(query, list): + iterator = self.find_in_sequence(self.__pk__, query, *args, **kw) + + else: + iterator = self.find(query, *args, **kw) + + return (self.from_mongo(record) for record in iterator) + + @contextmanager + def _heir_update(self): + ops = self.get_collection().initialize_ordered_bulk_op() + + yield ops + + try: + ops.execute() + except: + __import__('pprint').pprint(e.details) + raise + + # Active Collection Methods + + def get_parent(self, *args, **kw): + """Retrieve the Document instance representing the immediate parent of this document.""" + + parent = self.parent + + if parent is None: + return None + + return self.find_one(parent, *args, **kw) + + def find_ancestors(self, *args, **kw): + """Retrieve the Document instances representing the ancestors of this document.""" + + return self._find_heirarchical('ancestors', *args, **kw) + + def find_siblings(self, *args, **kw): + """Retrieve the Document instances representing siblings of this document, excluding this document.""" + + return self._find_heirarchical('siblings', *args, id__ne=self.id, **kw) + + def find_children(self, *args, **kw): + """Find the immediate children of this document.""" + + return self._find_heirarchical('children', *args, **kw) + + def find_descendants(self, *args, **kw): + """Prepare a MongoDB QuerySet searching for all descendants of this document.""" + + return self._find_heirarchical('descendants', *args, **kw) + + # Active Document Methods + + def detach(self): + """Detach this document from its tree, forming the root of a new tree containing this document and its children.""" + + return self + + def _attach(self, child, ops, project=None): + if not isinstance(child, Document): + child = Doc.find_one(child, project=project) + + return child + + def attach(self, child, **kw): + """Attach the given child document (with any descendants) to this document.""" + + with self._heir_update(**kw) as ops: + self._attach(child, ops) + + return self + + def attach_to(self, parent): + """Attach this document (with any descendants) to the given parent.""" + + if not isinstance(parent, Document): + parent = self.find_one(parent) + + return parent.attach(self) + + def attach_before(self, sibling): + """Attach this document (with any descendants) to the same parent as the target sibling, prior to that sibling.""" + + if not isinstance(sibling, Document): + sibling = self.find_one(sibling, project=('parent', )) + + self.attach_to(sibling.parent) + + return self.reload() + + def attach_after(self, sibling): + """Attach this document (with any descendants) to the same parent as the target sibling, after that sibling.""" + + if not isinstance(sibling, Document): + sibling = self.find_one(sibling, project=('parent', )) + + self.attach_to(sibling.parent) + + return self.reload() diff --git a/marrow/mongo/core/trait/heir/hparent.py b/marrow/mongo/core/trait/heir/hparent.py new file mode 100644 index 00000000..43ed6285 --- /dev/null +++ b/marrow/mongo/core/trait/heir/hparent.py @@ -0,0 +1,74 @@ +# encoding: utf-8 + +from re import escape +from pymongo.errors import BulkWriteError + +from .base import Heirarchical +from .... import Document, Index, U +from ....field import Path + + +log = __import__('logging').getLogger(__name__) + + +class HParent(Heirarchical): + """Record sufficient information to form an orderless heirarchy of documents using parent association. + + This models the tree with a document per node and nodes containing a concrete list of child references. + + Ref: https://docs.mongodb.com/manual/tutorial/model-tree-structures-with-parent-references/ + """ + + parent = Reference('.', default=None, assign=True) + + _parent = Index('parent') + + def get_parent(self, *args, **kw): + """Retrieve the Document instance representing the immediate parent of this document.""" + + if not self.parent: # We can save time. + return None + + return self.find_one(self.parent, *args, **kw) + + def find_siblings(self, *args, **kw): + """Prepare a MongoDB QuerySet searching the siblings of this document, excluding this document.""" + + Doc, collection, query, options = self._prepare_find(*args, parent=self.parent, id__ne=self.id, **kw) + return collection.find(query, **options) + + def find_children(self, *args, **kw): + """Prepare a MongoDB QuerySet searching for immediate children of this document.""" + + if __debug__: + log.debug("Finding children by parent association: " + str(self.id), extra={'parent': self}) + + Doc, collection, query, options = self._prepare_find(*args, parent=self, **kw) + return collection.find(query, **options) + + def detach(self): + """Detach this document from its tree, forming the root of a new tree containing this document and its children.""" + + self = super(HParent, self).detach() + + if not self.parent: + return self + + Doc, collection, query, options = self._prepare_find(id=self.id) + result = collection.update_one(query, U(Doc, parent=None)) + + self.parent = None # Clean up to save needing to reload the record. + + return self + + def attach(self, child): + """Attach the given child document (with any descendants) to this document.""" + + child = super(HParent, self).attach(child) + + Doc, collection, query, options = self._prepare_find(id=child.id) + result = collection.update_one(query, U(Doc, parent=self.id)) + + child.parent = self.id # Clean up to save needing to reload the record. + + return child diff --git a/marrow/mongo/core/trait/heir/hpath.py b/marrow/mongo/core/trait/heir/hpath.py new file mode 100644 index 00000000..0b19b78f --- /dev/null +++ b/marrow/mongo/core/trait/heir/hpath.py @@ -0,0 +1,200 @@ +# encoding: utf-8 + +from itertools import chain +from re import escape +from pathlib import PurePosixPath + +from .base import Heirarchical +from .... import Index, U +from ....field import Path + + +log = __import__('logging').getLogger(__name__) + + +class HPath(Heirarchical): + """Record sufficient information to form an orderless heirarchy of documents using materialzied paths. + + This models the tree with a document per node and nodes containing a concrete list of child references. + + When mixing Heirarchical traits, use more efficient parent/child mix-ins later in your subclass definition. + + Ref: https://docs.mongodb.com/manual/tutorial/model-tree-structures-with-materialized-paths/ + """ + + __pk__ = 'path' # Pivot short-form queries to query the path, not ID. + + path = Path(required=True, repr=False) # The coalesced path to the document. + + _path = Index('path', unique=True) + + # Accessor Properties + + @property + def parent(self): + """A query fragment to select the parent of this document. + + Used by `get_parent` as passed as the first positional argument to `find_one`. + """ + + if __debug__: + log.debug("Finding parent by materialized path: " + str(self.path), extra={'document': self}) + + path = self.path + + if not path or path == path.parent or str(path.parent) == '.': + return None # No path, already root, or top level relative; no parent. + + return self.__class__.path == path.parent + + @property + def ancestors(self): + """A query fragment selecting for the ancestors of this document, or a generator of parent path references. + + Warning: if you change the `__pk__` from `path`, the results will be unordered unless you explicitly sort on + the `path` field yourself. With a `__pk__` of `path` this will return a generator of references for ordered + selection by `_find_heirarchical`, otherwise it returns a query fragment filtering by possible paths. + + Used by `get_ancestors` with behaviour defined by `_find_heirarchical`. + """ + + if __debug__: + log.debug("Finding ancestors by materialized path: " + str(self.path), extra={'document': self}) + + path = self.path + + if not path or path == path.parent or str(path.parent) == '.': + return None # No path, already root, or top level relative; no parents. + + absolute = self.path.is_absolute() + parents = (parent for parent in self.path.parents if absolute or parent.name) + + if self.__pk__ != 'path': # Warning: requires manual sort on `path` to preserve order! + return self.__class__.path.any(parents) + + return parents # We can just return the generator directly for consumption by `_find_heirarchical`. + + @property + def siblings(self): + """A query fragment selecting for the siblings of this document. + + Used by `get_siblings` with behaviour defined by `_find_heirarchical`. + """ + + if __debug__: + log.debug("Finding siblings by materialized path: " + str(self.path), extra={'document': self}) + + path = self.path + + if not path or path == path.parent or str(path.parent) == '.': + return None # No path, already root, or top level relative; no siblings. + + return self.__class__.path.re(r'^', escape(str(self.path.parent)), r'\/[^\/]+$') + + @property + def children(self): + """A query fragment selecting for the immediate children of this document. + + Used by `get_children` with behaviour defined by `_find_heirarchical`. + """ + + if __debug__: + log.debug("Finding children by materialized path: " + str(self.path), extra={'document': self}) + + if not self.path: + return + + return self.__class__.path.re(r'^', escape(str(self.path)), r'\/[^\/]+$') + + @property + def descendants(self): + """A query fragment selecting for the descendants of this document. + + Used by `get_descendants` with behaviour defined by `_find_heirarchical`. + """ + + return self.__class__.path.re(r'^', escape(str(self.path)), r'\/') + + # Active Collection Methods + + @classmethod + def get_nearest(cls, path, *args, **kw): + """Find and return the deepest Asset matching the given path.""" + + path = PurePosixPath(path) + absolute = path.is_absolute() + parents = (parent for parent in path.parents if absolute or parent.name) + kw.setdefault('sort', ('-path', )) + + return cls.find_one(*args, path__in=chain((path, ), parents), **kw) + + @classmethod + def find_nearest(cls, path, *args, **kw): + """Find all nodes up to the deepest node matched by the given path. + + Conceptually the reverse of `get_nearest`. + """ + + path = PurePosixPath(path) + absolute = path.is_absolute() + parents = (parent for parent in path.parents if absolute or parent.name) + kw.setdefault('sort', ('-path', )) + + for doc in cls.find(*args, path__in=chain((path, ), parents), **kw): + yield cls.from_mongo(doc) + + # Active Document Methods + + def detach(self): + """Detach this document from its tree, forming the root of a new tree containing this document and its children.""" + + self = super(HPath, self).detach() + + if not self.path.root: + return self # Already detached. + + Doc = self.__class__ + collection = self.get_collection() + length = len(str(self.path.parent)) + 1 + bulk = collection.initialize_unordered_bulk_op() + + bulk.find(Doc.id == self.id).update(U(Doc, path=self.slug)) + + for descendant in self.find_descendants(projection=('id', 'path')): + bulk.find(Doc.id == descendant['_id']).update(U(Doc, path=descendant['path'][length:])) + + result = bulk.execute() + + self.path = self.slug # Clean up to save needing to reload the record. + + return self + + def _attach(self, child, ops, project=()): + """Attach the given child document (with any descendants) to this document. + + This is not an atomic operation unless the node has no descendents and no other heirarchical mix-ins. + """ + + assert self.path, "Path required for child attachment." + + project = {'id', 'path'}.union(project) # We need the path field, if not present. + child, invalidated = super(HPath, self)._attach(child, ops, project) + + assert child.path, "Child must have addressable path, even if detached." + assert child.path.name, "Child must not be literal root element." + + # Step one, update the child itself. Speed matters on these, so we avoid using parametric helpers where + # reasonable to do so. Pro tip, re-mapping the back-end name of trait-provided fields is bad news bears. + # Matters more in the tight loop seen in step two, below. + + chop = len(str(child.path)) - len(child.path.name) + ops.find({'_id': child.id}).update_one({'$set': {'path': str(self.path / child.path.name)}}) + child.path = self.path / child.path.name + + # Step two, find descendants of the child and update them. There might not be any, but why waste a round trip? + + for offspring in self.get_collection().find(self.descendents, project={'path': 1}): + path = str(self.path / offspring['path'][chop:]) + ops.find({'_id': offspring['_id']}).update_one({'$set': {'path': path}}) + + return self From 27006d3ff414398c6e2f12b01bb69ea73ef579a3 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 25 May 2017 00:59:21 -0400 Subject: [PATCH 04/31] Docstrings, TODOs. --- marrow/mongo/core/trait/heir/base.py | 29 ++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/marrow/mongo/core/trait/heir/base.py b/marrow/mongo/core/trait/heir/base.py index e379f342..6df48289 100644 --- a/marrow/mongo/core/trait/heir/base.py +++ b/marrow/mongo/core/trait/heir/base.py @@ -75,6 +75,9 @@ def descendants(self): # Internal Use Methods def _find_heirarchical(self, accessor, *args, **kw): + # TODO: Refactor as callable class level accessor object. + # Queryable.objects([pk or sequence or fragment]=None, *fragment, **kw) + try: query = getattr(self, accessor) @@ -97,6 +100,8 @@ def _find_heirarchical(self, accessor, *args, **kw): @contextmanager def _heir_update(self): + # TODO: Refactor as Queryable.objects.bulk(ordered=False) method. + ops = self.get_collection().initialize_ordered_bulk_op() yield ops @@ -105,7 +110,7 @@ def _heir_update(self): ops.execute() except: __import__('pprint').pprint(e.details) - raise + raise # TODO: We can do better than the above. # Active Collection Methods @@ -147,8 +152,16 @@ def detach(self): return self def _attach(self, child, ops, project=None): + """Perform the work of attaching this document, to be overridden in subclasses. + + Subclasses should super() early and utilize the result in order to benefit from automatic casting. Provided + a reference to the child to attach, an ordered bulk operations object, and optional default projection. + """ + + # TODO: Refactor to isolate this as common to all cooperative attachment methods. + if not isinstance(child, Document): - child = Doc.find_one(child, project=project) + child = Doc.find_one(child, project=project) # TODO: Utilize Queryable default projection. return child @@ -164,15 +177,17 @@ def attach_to(self, parent): """Attach this document (with any descendants) to the given parent.""" if not isinstance(parent, Document): - parent = self.find_one(parent) + parent = self.find_one(parent) # TODO: Utilize Queryable default projection. return parent.attach(self) def attach_before(self, sibling): """Attach this document (with any descendants) to the same parent as the target sibling, prior to that sibling.""" + # TODO: Utilize _attach/attach refactor. + if not isinstance(sibling, Document): - sibling = self.find_one(sibling, project=('parent', )) + sibling = self.find_one(sibling) # TODO: Utilize Queryable default projection. self.attach_to(sibling.parent) @@ -181,9 +196,11 @@ def attach_before(self, sibling): def attach_after(self, sibling): """Attach this document (with any descendants) to the same parent as the target sibling, after that sibling.""" + # TODO: Utilize _attach/attach refactor. + if not isinstance(sibling, Document): - sibling = self.find_one(sibling, project=('parent', )) + sibling = self.find_one(sibling) # TODO: Utilize Queryable default projection. self.attach_to(sibling.parent) - return self.reload() + return self.reload() \ No newline at end of file From c33bc34ca7b257d64f746eb692bf71cde0b35057 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Sun, 28 May 2017 23:58:58 -0400 Subject: [PATCH 05/31] Build parallelization and removal of egg legacy. --- setup.cfg | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 857b80db..8d1130ab 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,6 +11,7 @@ bdist-base = .packaging/dist [build] build-base = .packaging/build +parallel = 3 [install] optimize = 1 @@ -19,10 +20,6 @@ optimize = 1 bdist-base = .packaging/dist dist-dir = .packaging/release -[bdist_egg] -bdist-dir = .packaging/dist -dist-dir = .packaging/release - [bdist_wheel] bdist-dir = .packaging/dist dist-dir = .packaging/release From c79cd2363e8fee9dd89842ec84cfafa6b9c7de39 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Mon, 29 May 2017 00:38:36 -0400 Subject: [PATCH 06/31] Dead import removal. --- marrow/mongo/core/field/array.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/marrow/mongo/core/field/array.py b/marrow/mongo/core/field/array.py index f37ebf04..187033ff 100644 --- a/marrow/mongo/core/field/array.py +++ b/marrow/mongo/core/field/array.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals -from ... import Document, Field +from ... import Field from .base import _HasKind, _CastingKind @@ -29,6 +29,9 @@ def to_native(self, obj, name, value): if isinstance(value, self.List): return value + if value is None: + return None + result = self.List(super(Array, self).to_native(obj, name, i) for i in value) obj.__data__[self.__name__] = result @@ -37,4 +40,7 @@ def to_native(self, obj, name, value): def to_foreign(self, obj, name, value): """Transform to a MongoDB-safe value.""" + if value is None: + return None + return self.List(super(Array, self).to_foreign(obj, name, i) for i in value) From a30b5ef2f4cc8646afc31d4fa86e8f1954a10c0c Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 21:48:13 -0400 Subject: [PATCH 07/31] Updated dependencies to pin tzlocal and add timezone feature. --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3acff3a9..7b9acae1 100755 --- a/setup.py +++ b/setup.py @@ -90,9 +90,10 @@ extras_require = dict( decimal = ['pymongo>=3.4'], # More modern version required for Decimal128 support. development = tests_require + ['pre-commit'], # Development-time dependencies. - scripting = ['javascripthon<1.0'], # Allow map/reduce functions and "stored functions" to be Python. - logger = ['tzlocal'], # Timezone support to store log times in UTC like a sane person. + logger = ['tzlocal>=1.4'], # Timezone support to store log times in UTC like a sane person. markdown = ['misaka', 'pygments'], # Markdown text storage. + scripting = ['javascripthon<1.0'], # Allow map/reduce functions and "stored functions" to be Python. + timezone = ['pytz', 'tzlocal>=1.4'], # Support for timezones. ), tests_require = tests_require, From 95a97589aed4b9f07a0889bd4f1028f335e25205 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 23:09:10 -0400 Subject: [PATCH 08/31] Adapt to future reserved word use; use wait, not await. --- marrow/mongo/core/trait/queryable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/marrow/mongo/core/trait/queryable.py b/marrow/mongo/core/trait/queryable.py index c1f02e4d..6b62fabe 100644 --- a/marrow/mongo/core/trait/queryable.py +++ b/marrow/mongo/core/trait/queryable.py @@ -45,6 +45,7 @@ class Queryable(Collection): 'maxTimeMs': 'max_time_ms', # Common typo. 'noCursorTimeout': 'no_cursor_timeout', 'oplogReplay': 'oplog_replay', + 'wait': 'await', # XXX: 'await' will be reserved in 3.7+ } AGGREGATE_OPTIONS = UNIVERSAL_OPTIONS | { From 0733071ba19f6be7d7d8de8b59eda12ff20c366a Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 23:10:38 -0400 Subject: [PATCH 09/31] Add Bandit exclusions. --- .bandit | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .bandit diff --git a/.bandit b/.bandit new file mode 100644 index 00000000..4493ef25 --- /dev/null +++ b/.bandit @@ -0,0 +1,2 @@ +[bandit] +exclude: /env,.eggs,.packaging,.cache From bd1a09a4127efbd07bd9568f75e12329f2ebf64f Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 23:16:25 -0400 Subject: [PATCH 10/31] Correction for filtering with cast None values and minor CPython field reference optimization. --- marrow/mongo/query/query.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/marrow/mongo/query/query.py b/marrow/mongo/query/query.py index 4cc0219d..c6dcad9f 100644 --- a/marrow/mongo/query/query.py +++ b/marrow/mongo/query/query.py @@ -154,15 +154,20 @@ def __unicode__(self): def _op(self, operation, other, *allowed): """A basic operation operating on a single value.""" + f = self._field + if self._combining: # We are a field-compound query fragment, e.g. (Foo.bar & Foo.baz). return reduce(self._combining, - (q._op(operation, other, *allowed) for q in self._field)) # pylint:disable=protected-access + (q._op(operation, other, *allowed) for q in f)) # pylint:disable=protected-access # Optimize this away in production; diagnosic aide. - if __debug__ and _complex_safety_check(self._field, {operation} & set(allowed)): # pragma: no cover + if __debug__ and _complex_safety_check(f, {operation} & set(allowed)): # pragma: no cover raise NotImplementedError("{self!r} does not allow {op} comparison.".format(self=self, op=operation)) - return Filter({self._name: {operation: self._field.transformer.foreign(other, (self._field, self._document))}}) + if other is not None: + other = f.transformer.foreign(other, (f, self._document)) + + return Filter({self._name: {operation: other}}) def _iop(self, operation, other, *allowed): """An iterative operation operating on multiple values. @@ -170,17 +175,23 @@ def _iop(self, operation, other, *allowed): Consumes iterators to construct a concrete list at time of execution. """ + f = self._field + if self._combining: # We are a field-compound query fragment, e.g. (Foo.bar & Foo.baz). return reduce(self._combining, - (q._iop(operation, other, *allowed) for q in self._field)) # pylint:disable=protected-access + (q._iop(operation, other, *allowed) for q in f)) # pylint:disable=protected-access # Optimize this away in production; diagnosic aide. - if __debug__ and _complex_safety_check(self._field, {operation} & set(allowed)): # pragma: no cover + if __debug__ and _complex_safety_check(f, {operation} & set(allowed)): # pragma: no cover raise NotImplementedError("{self!r} does not allow {op} comparison.".format( self=self, op=operation)) + def _t(o): + for value in o: + yield None if value is None else f.transformer.foreign(value, (f, self._document)) + other = other if len(other) > 1 else other[0] - values = [self._field.transformer.foreign(value, (self._field, self._document)) for value in other] + values = list(_t(other)) return Filter({self._name: {operation: values}}) @@ -213,14 +224,16 @@ def __eq__(self, other): Documentation: https://docs.mongodb.org/manual/reference/operator/query/eq/#op._S_eq """ + f = self._field + if self._combining: # We are a field-compound query fragment, e.g. (Foo.bar & Foo.baz). - return reduce(self._combining, (q.__eq__(other) for q in self._field)) + return reduce(self._combining, (q.__eq__(other) for q in f)) # Optimize this away in production; diagnosic aide. if __debug__ and _simple_safety_check(self._field, '$eq'): # pragma: no cover raise NotImplementedError("{self!r} does not allow $eq comparison.".format(self=self)) - return Filter({self._name: self._field.transformer.foreign(other, (self._field, self._document))}) + return Filter({self._name: None if other is None else f.transformer.foreign(other, (f, self._document))}) def __gt__(self, other): """Matches values that are greater than a specified value. @@ -374,7 +387,7 @@ def __invert__(self): def re(self, *parts): """Matches string values against a regular expression compiled of individual parts. - Document.field.re(r'^', variable_part, r'\.') + Document.field.re(r'^', variable_part, r'.') Regex operator: {$regex: value} Documentation: https://docs.mongodb.com/manual/reference/operator/query/regex/ From 741e9785170c3fdaec43f200a3c350eb4e7fc7d8 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 23:56:25 -0400 Subject: [PATCH 11/31] Logical order correction accounting for Date changes. --- marrow/mongo/core/field/ttl.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/marrow/mongo/core/field/ttl.py b/marrow/mongo/core/field/ttl.py index 98ad411b..81aefc3f 100644 --- a/marrow/mongo/core/field/ttl.py +++ b/marrow/mongo/core/field/ttl.py @@ -16,15 +16,13 @@ class TTL(Date): __disallowed_operators__ = {'#array'} def to_foreign(self, obj, name, value): - value = super(TTL, self).to_foreign(obj, name, value) - if isinstance(value, timedelta): - return utcnow() + value - - if isinstance(value, datetime): - return value - - if isinstance(value, Number): - return utcnow() + timedelta(days=value) + value = utcnow() + value + elif isinstance(value, datetime): + value = value + elif isinstance(value, Number): + value = utcnow() + timedelta(days=value) + else: + raise ValueError("Invalid TTL value: " + repr(value)) - raise ValueError("Invalid TTL value: " + repr(value)) + return super(TTL, self).to_foreign(obj, name, value) From eed25b44b3aa186338503f8318b6147228d51413 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 23:57:26 -0400 Subject: [PATCH 12/31] Utilize pytz.utc if available. --- marrow/mongo/util/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/marrow/mongo/util/__init__.py b/marrow/mongo/util/__init__.py index 5d750dda..c221fce9 100644 --- a/marrow/mongo/util/__init__.py +++ b/marrow/mongo/util/__init__.py @@ -5,11 +5,17 @@ from collections import OrderedDict from datetime import datetime, timedelta from operator import attrgetter -from bson.tz_util import utc from marrow.schema.meta import ElementMeta from ...package.loader import load +# Conditional dependencies. + +try: + from pytz import utc # Richer, more capable implementation. +except ImportError: + from bson.tz_util import utc # Fallback on hard dependency. + SENTINEL = object() # Singleton value to detect unassigned values. From 60b8fdfbad46d9aae148c62a53ba8e0f4060a8d1 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 30 May 2017 23:59:28 -0400 Subject: [PATCH 13/31] Correction for deprecation and future warnings. * Pymongo legacy collection method use of `update`. * Python 3.7 forthcoming reservation of `await`. --- test/trait/test_queryable.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/trait/test_queryable.py b/test/trait/test_queryable.py index eed68f89..eb89c85d 100644 --- a/test/trait/test_queryable.py +++ b/test/trait/test_queryable.py @@ -51,22 +51,22 @@ def test_prepare_find_cursor_conflict(self, Sample): Sample._prepare_find(cursor_type=CursorType.TAILABLE, tail=True) with pytest.raises(TypeError): - Sample._prepare_find(cursor_type=CursorType.TAILABLE, await=True) + Sample._prepare_find(cursor_type=CursorType.TAILABLE, wait=True) with pytest.raises(TypeError): - Sample._prepare_find(cursor_type=CursorType.TAILABLE, tail=True, await=True) + Sample._prepare_find(cursor_type=CursorType.TAILABLE, tail=True, wait=True) def test_prepare_find_cursor_type_tail(self, Sample): cls, collection, query, options = Sample._prepare_find(tail=True) assert options['cursor_type'] == CursorType.TAILABLE_AWAIT def test_prepare_find_cursor_type_tail_not_await(self, Sample): - cls, collection, query, options = Sample._prepare_find(tail=True, await=False) + cls, collection, query, options = Sample._prepare_find(tail=True, wait=False) assert options['cursor_type'] == CursorType.TAILABLE def test_prepare_find_cursor_type_await_conflict(self, Sample): with pytest.raises(TypeError): - Sample._prepare_find(await=False) + Sample._prepare_find(wait=False) def test_prepare_find_max_time_modifier(self, Sample): cls, collection, query, options = Sample._prepare_find(max_time_ms=1000) @@ -114,7 +114,7 @@ def test_find_in_sequence(self, db, Sample): def test_reload_all(self, Sample): doc = Sample.find_one(integer=42) assert doc.string == 'baz' - Sample.get_collection().update(Sample.id == doc, U(Sample, integer=1337, string="hoi")) + Sample.get_collection().update_one(Sample.id == doc, U(Sample, integer=1337, string="hoi")) assert doc.string == 'baz' doc.reload() assert doc.string == 'hoi' @@ -123,7 +123,7 @@ def test_reload_all(self, Sample): def test_reload_specific(self, Sample): doc = Sample.find_one(integer=42) assert doc.string == 'baz' - Sample.get_collection().update(Sample.id == doc, U(Sample, integer=1337, string="hoi")) + Sample.get_collection().update_one(Sample.id == doc, U(Sample, integer=1337, string="hoi")) assert doc.string == 'baz' doc.reload('string') assert doc.string == 'hoi' From 49ebe1985e16a74dd994b4f6f4c1efe2c8182650 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 00:01:54 -0400 Subject: [PATCH 14/31] Silly regexen are silly. --- test/query/test_q.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/query/test_q.py b/test/query/test_q.py index b550c9dd..5fad41a4 100644 --- a/test/query/test_q.py +++ b/test/query/test_q.py @@ -128,8 +128,8 @@ def test_operator_invert(self): assert unicode(Sample.generic) == ~Sample.generic == 'generic' def test_operator_re(self): - result = Sample.field.re(r'^', 'foo', r'\.') - assert result.as_query == {'field_name': {'$regex': r'^foo\.'}} + result = Sample.field.re(r'^', 'foo', r'.') + assert result.as_query == {'field_name': {'$regex': r'^foo.'}} def test_operator_size(self): result = Sample.array.size(10) From 3d4c256e75375d675ea5d19cdeb16e35aca9b6f1 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 00:02:45 -0400 Subject: [PATCH 15/31] Correction for Pymongo deprecation of insert vs. insert_one. --- test/util/test_capped.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/util/test_capped.py b/test/util/test_capped.py index 952aaafd..b8a632e9 100644 --- a/test/util/test_capped.py +++ b/test/util/test_capped.py @@ -55,27 +55,27 @@ def capped(request, connection): def gen_log_entries(collection, count=1000): - collection.insert({'message': 'first'}) # To avoid immediate exit of the tail.ddddd + collection.insert_one({'message': 'first'}) # To avoid immediate exit of the tail.ddddd for i in range(count-2): sleep(3.0/count) # If we go too fast, the test might not be able to keep up. - collection.insert({'message': 'test #' + str(i) + ' of ' + str(count), 'priority': choice(_PRIORITY)}) + collection.insert_one({'message': 'test #' + str(i) + ' of ' + str(count), 'priority': choice(_PRIORITY)}) - collection.insert({'message': 'last'}) + collection.insert_one({'message': 'last'}) class TestCappedQueries(object): def test_single(self, capped): assert capped.count() == 0 - result = capped.insert({'message': 'first'}) + result = capped.insert_one({'message': 'first'}) assert capped.count() == 1 first = next(tail(capped)) assert first['message'] == 'first' - assert first['_id'] == result + assert first['_id'] == result.inserted_id def test_basic_timeout(self, capped): - capped.insert({'message': 'first'}) + capped.insert_one({'message': 'first'}) start = time() @@ -96,7 +96,7 @@ def test_empty_trap(self, capped): def test_patch(self, capped): _patch() - capped.insert({}) + capped.insert_one({}) assert len(list(capped.tail(timeout=0.25))) == 1 From c9da2435750034dbe8e1fd9c7be0c88f9580505f Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 00:03:26 -0400 Subject: [PATCH 16/31] Addition of timezone handling to Date fields. --- marrow/mongo/core/field/date.py | 102 ++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 4 deletions(-) diff --git a/marrow/mongo/core/field/date.py b/marrow/mongo/core/field/date.py index 3626596e..9f1e5f0c 100644 --- a/marrow/mongo/core/field/date.py +++ b/marrow/mongo/core/field/date.py @@ -1,18 +1,112 @@ # encoding: utf-8 +"""Marrow Mongo Date field specialization. + +Commentary on high-level management of timezone casting: + + https://groups.google.com/forum/#!topic/mongodb-user/GOMjTJON4cg +""" + from __future__ import unicode_literals -from bson import ObjectId +from datetime import datetime, timedelta, tzinfo +from bson import ObjectId as OID +from collections import MutableMapping +from datetime import datetime, timedelta from .base import Field +from ...util import utc, utcnow +from ....schema import Attribute + +# Conditional dependencies. + +try: + from pytz import timezone as get_tz +except ImportError: + get_tz = None + +try: + localtz = __import__('tzlocal').get_localzone() +except ImportError: + localtz = None + + +log = __import__('logging').getLogger(__name__) class Date(Field): + """MongoDB date/time storage. + + Accepts the following options in addition to the base Field options: + + `naive`: The timezone to interpret assigned "naive" datetime objects as. + `timezone`: The timezone to cast objects retrieved from the database to. + + Timezone references may be, or may be a callback returning, a `tzinfo`-suitable object, the string name of a + timezone according to `pytz`, the alias 'naive' (strip or ignore the timezone) or 'local' (the local host's) + timezone explicitly. None values imply no conversion. + + All dates are converted to and stored in UTC for storage within MongoDB; the original timezone is lost. As a + result if `naive` is `None` then assignment of naive `datetime` objects will fail. + """ + __foreign__ = 'date' __disallowed_operators__ = {'#array'} + naive = Attribute(default=utc) + tz = Attribute(default=None) # Timezone to cast to when retrieving from the database. + + def _process_tz(self, dt, naive, tz): + """Process timezone casting and conversion.""" + + def _tz(t): + if t is None: + return + + if t == 'local': + if __debug__ and not localtz: + raise ValueError("Requested conversion to local timezone, but `localtz` not installed.") + + t = localtz + + if not isinstance(t, tzinfo): + t = get_tz(t) + + if not hasattr(t, 'normalize'): # Attempt to handle non-pytz tzinfo. + t = get_tz(t.tzname()) + + return t + + naive = _tz(naive) + tz = _tz(tz) + + if not dt.tzinfo and naive: + dt = naive.localize(dt) + + if tz: + dt = tz.normalize(dt.astimezone(tz)) + + return dt + + def to_native(self, obj, name, value): + if not isinstance(value, datetime): + log.warn("Non-date stored in {}.{} field.".format(self.__class__.__name__, ~self), + extra={'document': obj, 'field': ~self, 'value': value}) + return value + + return self._process_tz(value, self.naive, self.tz) + def to_foreign(self, obj, name, value): # pylint:disable=unused-argument - if isinstance(value, ObjectId): - return value.generation_time + if isinstance(value, MutableMapping) and '_id' in value: + value = value['_id'] + + if isinstance(value, OID): + value = value.generation_time + + elif isinstance(value, timedelta): + value = utcnow() + value + + assert isinstance(value, datetime), "Value must be a datetime, ObjectId, or identified document, not: " + \ + repr(value) - return value + return self._process_tz(value, self.naive, utc) From fbf5a55e8909458d0f59af1bc0d3c798df072591 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:00:08 -0400 Subject: [PATCH 17/31] Remove build parallelism. wry T_T --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 1bcfe0a8..79a2ccef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,7 +11,6 @@ bdist-base = .packaging/dist [build] build-base = .packaging/build -parallel = 3 [install] optimize = 1 From 145382f8eba7dc5e88c645c5d0da64fb9a1d6082 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:13:59 -0400 Subject: [PATCH 18/31] Remove impossible case. --- marrow/mongo/core/field/array.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/marrow/mongo/core/field/array.py b/marrow/mongo/core/field/array.py index 187033ff..2763bb63 100644 --- a/marrow/mongo/core/field/array.py +++ b/marrow/mongo/core/field/array.py @@ -29,9 +29,6 @@ def to_native(self, obj, name, value): if isinstance(value, self.List): return value - if value is None: - return None - result = self.List(super(Array, self).to_native(obj, name, i) for i in value) obj.__data__[self.__name__] = result @@ -40,7 +37,4 @@ def to_native(self, obj, name, value): def to_foreign(self, obj, name, value): """Transform to a MongoDB-safe value.""" - if value is None: - return None - return self.List(super(Array, self).to_foreign(obj, name, i) for i in value) From 10c343554ed7e079bb050ec1ef2c5a3413315272 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:25:05 -0400 Subject: [PATCH 19/31] Test on multiple MongoDB versions, with and without timezone support. --- .travis.yml | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2847ab76..4e58f0a8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,15 +16,26 @@ python: - "3.5" - "3.6" +env: + - MONGODB_VERSION=3.4.4 + +matrix: + include: + - python: "3.5" + env: NO_PYTZ=1 + - python: "3.5" + env: MONGODB_VERSION=3.2.13 + before_install: - - wget http://fastdl.mongodb.org/linux/mongodb-linux-x86_64-3.4.1.tgz -O /tmp/mongodb.tgz + - wget http://fastdl.mongodb.org/linux/mongodb-linux-x86_64-${MONGODB_VERSION}.tgz -O /tmp/mongodb.tgz - tar -xvf /tmp/mongodb.tgz - mkdir /tmp/data - - ${PWD}/mongodb-linux-x86_64-3.4.1/bin/mongod --dbpath /tmp/data --bind_ip 127.0.0.1 --noauth &> /dev/null & + - ${PWD}/mongodb-linux-x86_64-${MONGODB_VERSION}/bin/mongod --dbpath /tmp/data --bind_ip 127.0.0.1 --noauth &> /dev/null & install: - - travis_retry pip install --upgrade setuptools pip pytest pytest-cov codecov 'setuptools_scm>=1.9' cffi - - pip install -e '.[development,logger]' + - 'travis_retry pip install --upgrade setuptools pip pytest pytest-cov codecov "setuptools_scm>=1.9" cffi' + - 'pip install -e ".[development,logger]"' + - '[[ -e $NO_PYTZ ]] && pip uninstall --quiet --yes pytz tzlocal' script: python setup.py test From dffe131ed68a041f2a2a20a167f6a36b185e1c2f Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:26:01 -0400 Subject: [PATCH 20/31] Correction for missing inheritance of global environment. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4e58f0a8..03ebd461 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ env: matrix: include: - python: "3.5" - env: NO_PYTZ=1 + env: NO_PYTZ=1 MONGODB_VERSION=3.4.4 - python: "3.5" env: MONGODB_VERSION=3.2.13 From 71144991ae3daa888783dbdcb011161bc70bb927 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:32:58 -0400 Subject: [PATCH 21/31] Simplification of testing requirement declaration, fix for comparison on Travis. --- .travis.yml | 6 +++--- setup.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 03ebd461..5340c18c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,7 @@ env: matrix: include: - python: "3.5" - env: NO_PYTZ=1 MONGODB_VERSION=3.4.4 + env: MONGODB_VERSION=3.4.4 NO_PYTZ=1 - python: "3.5" env: MONGODB_VERSION=3.2.13 @@ -34,8 +34,8 @@ before_install: install: - 'travis_retry pip install --upgrade setuptools pip pytest pytest-cov codecov "setuptools_scm>=1.9" cffi' - - 'pip install -e ".[development,logger]"' - - '[[ -e $NO_PYTZ ]] && pip uninstall --quiet --yes pytz tzlocal' + - 'pip install -e ".[development]"' + - 'test -n $NO_PYTZ && pip uninstall --quiet --yes pytz tzlocal' script: python setup.py test diff --git a/setup.py b/setup.py index 7b9acae1..d3fb1b9a 100755 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ 'pytest-catchlog', # log capture 'pytest-isort', # import ordering 'misaka', 'pygments', # Markdown field support + 'pytz', 'tzlocal>=1.4', # timezone support, logger support ] From bbaa06dfbc1f98e00bc3f2561427a84cfbcbad69 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:41:34 -0400 Subject: [PATCH 22/31] Make uninstallation verbose. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5340c18c..cdf177d3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,7 +35,7 @@ before_install: install: - 'travis_retry pip install --upgrade setuptools pip pytest pytest-cov codecov "setuptools_scm>=1.9" cffi' - 'pip install -e ".[development]"' - - 'test -n $NO_PYTZ && pip uninstall --quiet --yes pytz tzlocal' + - 'test -n "${NO_PYTZ}" && pip uninstall --yes pytz tzlocal || true' script: python setup.py test From 683db05a0950c65796e62d0325bfb1da71418ae5 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 16:45:19 -0400 Subject: [PATCH 23/31] Pivot now optional test requirements to development requirements. --- setup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index d3fb1b9a..9840bcfa 100755 --- a/setup.py +++ b/setup.py @@ -32,8 +32,6 @@ 'pytest-flakes', # syntax validation 'pytest-catchlog', # log capture 'pytest-isort', # import ordering - 'misaka', 'pygments', # Markdown field support - 'pytz', 'tzlocal>=1.4', # timezone support, logger support ] @@ -90,7 +88,11 @@ extras_require = dict( decimal = ['pymongo>=3.4'], # More modern version required for Decimal128 support. - development = tests_require + ['pre-commit'], # Development-time dependencies. + development = tests_require + [ + 'pre-commit', # Development-time dependencies. + 'misaka', 'pygments', # Markdown field support + 'pytz', 'tzlocal>=1.4', # timezone support, logger support + ], logger = ['tzlocal>=1.4'], # Timezone support to store log times in UTC like a sane person. markdown = ['misaka', 'pygments'], # Markdown text storage. scripting = ['javascripthon<1.0'], # Allow map/reduce functions and "stored functions" to be Python. From 00590d00b014e2d1f6eeb5f02fbd4b9979b8cd07 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Wed, 31 May 2017 17:11:08 -0400 Subject: [PATCH 24/31] Armour against missing optional modules. --- marrow/mongo/core/field/date.py | 19 +++++++++++++++---- test/util/test_logger.py | 12 ++++++++++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/marrow/mongo/core/field/date.py b/marrow/mongo/core/field/date.py index 9f1e5f0c..4ac975ff 100644 --- a/marrow/mongo/core/field/date.py +++ b/marrow/mongo/core/field/date.py @@ -70,10 +70,13 @@ def _tz(t): t = localtz if not isinstance(t, tzinfo): + if __debug__ and not localtz: + raise ValueError("The `pytz` package must be installed to look up timezone: " + repr(t)) + t = get_tz(t) - if not hasattr(t, 'normalize'): # Attempt to handle non-pytz tzinfo. - t = get_tz(t.tzname()) + if not hasattr(t, 'normalize') and get_tz: # Attempt to handle non-pytz tzinfo. + t = get_tz(t.tzname(dt)) return t @@ -81,10 +84,18 @@ def _tz(t): tz = _tz(tz) if not dt.tzinfo and naive: - dt = naive.localize(dt) + if hasattr(naive, 'localize'): + dt = naive.localize(dt) + elif naive == utc: + dt = dt.replace(tzinfo=naive) + else: + raise ValueError("Must install `pytz` package for timezone support.") if tz: - dt = tz.normalize(dt.astimezone(tz)) + if hasattr(tz, 'normalize'): + dt = tz.normalize(dt.astimezone(tz)) + else: + dt = dt.astimezone(tz) # Warning: this might not always be entirely correct! return dt diff --git a/test/util/test_logger.py b/test/util/test_logger.py index 3f2f8a6f..849ec645 100644 --- a/test/util/test_logger.py +++ b/test/util/test_logger.py @@ -1,5 +1,13 @@ # encoding: utf-8 -from marrow.mongo.util.logger import JSONFormatter, MongoFormatter, MongoHandler +import pytest -JSONFormatter, MongoFormatter, MongoHandler +try: + from marrow.mongo.util.logger import JSONFormatter, MongoFormatter, MongoHandler + +except ImportError: + pytestmark = pytest.mark.skip(reason="Local timezone support library tzlocal not installed.") + + +def test_nothing(): + JSONFormatter, MongoFormatter, MongoHandler From a801e15e2bb4a779063c0ac91473ce2ba3baca11 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 1 Jun 2017 10:09:44 -0400 Subject: [PATCH 25/31] Hide URI password from repr, tweak compilation/decompilation. --- marrow/mongo/core/field/link.py | 22 +++++++++++----------- test/field/test_link.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/marrow/mongo/core/field/link.py b/marrow/mongo/core/field/link.py index e173c03e..03a1813a 100644 --- a/marrow/mongo/core/field/link.py +++ b/marrow/mongo/core/field/link.py @@ -40,11 +40,11 @@ class URIString(MutableMapping): def __init__(self, url=None, **parts): self._query = {} - if hasattr(url, '__link__'): + if hasattr(url, '__link__'): # We utilize a custom object protocol to retrieve links to things. url = url.__link__ if isinstance(url, URIString): - url = url.url + url = unicode(url) self.url = url # If None, this will also handle setting defaults. @@ -58,7 +58,7 @@ def __init__(self, url=None, **parts): # String Protocols def __repr__(self): - return 'URI({0!s})'.format(self) + return 'URI({0})'.format(self._compile(True)) def __str__(self): """Return the Unicode text representation of this URL.""" @@ -173,15 +173,14 @@ def query(self, value): @property def url(self): if not self._url: - self._compile() + self._url = self._compile() return self._url @url.setter def url(self, value): if value: - self._url = value - self._decompile() + self._decompile(value) return self._url = None @@ -247,11 +246,11 @@ def relative(self): # Internal Methods - def _compile(self): - self._url = "".join(( + def _compile(self, safe=False): + return "".join(( (self.scheme + "://") if self.scheme else ("//" if self.host else ""), quote_plus(self.user) if self.user else "", - (":" + quote_plus(self.password)) if self.user and self.password else "", + (":" + ("" if safe else quote_plus(self.password))) if self.user and self.password else "", "@" if self.user else "", self.host or "", (":" + unicode(self.port)) if self.host and self.port else "", @@ -260,8 +259,9 @@ def _compile(self): ("#" + quote_plus(self.fragment)) if self.fragment else "", )) - def _decompile(self): - result = urlsplit(self._url) + def _decompile(self, value): + self._url = value + result = urlsplit(value) for part in self.__parts__: if not hasattr(result, part): continue diff --git a/test/field/test_link.py b/test/field/test_link.py index 53edb69f..495343e2 100644 --- a/test/field/test_link.py +++ b/test/field/test_link.py @@ -28,7 +28,7 @@ def test_complete_url(self): assert not link.relative - assert repr(link) == "URI(" + uri + ")" + assert repr(link) == "URI(" + uri.replace('pass', '') + ")" with pytest.raises(ValueError): link['query'] From 72c89aaeb1d3edbdfe1f2d45e1b969c9e723375a Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 1 Jun 2017 10:39:59 -0400 Subject: [PATCH 26/31] Hide URI password from repr, tweak composition, can resolve relative link updates. --- marrow/mongo/core/field/link.py | 21 +++++++++++++++++---- test/field/test_link.py | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/marrow/mongo/core/field/link.py b/marrow/mongo/core/field/link.py index 03a1813a..772cb533 100644 --- a/marrow/mongo/core/field/link.py +++ b/marrow/mongo/core/field/link.py @@ -12,9 +12,9 @@ from cgi import escape try: - from urllib.parse import urlsplit, quote_plus, unquote_plus, parse_qsl + from urllib.parse import urljoin, urlsplit, quote_plus, unquote_plus, parse_qsl except ImportError: # Adapt to locations on legacy versions. - from urlparse import urlsplit, parse_qsl + from urlparse import urljoin, urlsplit, parse_qsl from urllib import quote_plus, unquote_plus try: @@ -24,7 +24,7 @@ class URIString(MutableMapping): - """An object representing a URL (absolute or relative) and its components. + """An object representing a URI or URL (absolute or relative) and its components. Acts as a mutable mapping for manipulation of query string arguments. If the query string is not URL "form encoded" attempts at mapping access or manipulation will fail with a ValueError. No effort is made to @@ -58,7 +58,7 @@ def __init__(self, url=None, **parts): # String Protocols def __repr__(self): - return 'URI({0})'.format(self._compile(True)) + return "URI('{0}')".format(self._compile(True)) def __str__(self): """Return the Unicode text representation of this URL.""" @@ -244,6 +244,19 @@ def relative(self): return not (self.scheme and self.host and self._path and self._path.is_absolute()) + def resolve(self, uri=None, **parts): + """Attempt to resolve a new URI given an updated URI or URIString, partial or complete.""" + + if uri: + result = URIString(urljoin(unicode(self), unicode(uri))) + else: + result = URIString(self) + + for k, v in parts.items(): + setattr(result, k, v) + + return result + # Internal Methods def _compile(self, safe=False): diff --git a/test/field/test_link.py b/test/field/test_link.py index 495343e2..49872e3a 100644 --- a/test/field/test_link.py +++ b/test/field/test_link.py @@ -28,7 +28,7 @@ def test_complete_url(self): assert not link.relative - assert repr(link) == "URI(" + uri.replace('pass', '') + ")" + assert repr(link) == "URI('" + uri.replace('pass', '') + "')" with pytest.raises(ValueError): link['query'] From 82650aa99ecfc1e29565c3e0d2041f8845429dc7 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 1 Jun 2017 11:04:08 -0400 Subject: [PATCH 27/31] Simplification of URIString, consistency cleanup, docstrings. --- marrow/mongo/core/field/link.py | 119 ++++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 23 deletions(-) diff --git a/marrow/mongo/core/field/link.py b/marrow/mongo/core/field/link.py index 772cb533..562a6d6b 100644 --- a/marrow/mongo/core/field/link.py +++ b/marrow/mongo/core/field/link.py @@ -24,7 +24,7 @@ class URIString(MutableMapping): - """An object representing a URI or URL (absolute or relative) and its components. + """An object representing a URI (absolute or relative) and its components. Acts as a mutable mapping for manipulation of query string arguments. If the query string is not URL "form encoded" attempts at mapping access or manipulation will fail with a ValueError. No effort is made to @@ -32,41 +32,48 @@ class URIString(MutableMapping): """ # Skip allocation of a dictionary per instance by pre-defining available slots. - __slots__ = ('_url', 'scheme', 'user', 'password', 'host', 'port', '_path', '_query', 'fragment') + __slots__ = ('_uri', 'scheme', 'user', 'password', 'host', 'port', '_path', '_query', 'fragment') __parts__ = {'scheme', 'user', 'password', 'host', 'port', 'path', 'query', 'fragment', 'username', 'hostname'} # Basic Protocols - def __init__(self, url=None, **parts): - self._query = {} + def __init__(self, _uri=None, **parts): + """Initialize a new URI from a passed in string or named parts. - if hasattr(url, '__link__'): # We utilize a custom object protocol to retrieve links to things. - url = url.__link__ + If both a base URI and parts are supplied than the parts will override those present in the URI. + """ - if isinstance(url, URIString): - url = unicode(url) + self._query = {} - self.url = url # If None, this will also handle setting defaults. + if hasattr(_uri, '__link__'): # We utilize a custom object protocol to retrieve links to things. + _uri = _uri.__link__ + + # To allow for simpler cases, this attribute does not need to be callable. + if callable(_uri): _uri = _uri() - if parts: # If not given a base URL, defines a new URL, otherwise update the given URL. + self.url = _uri # If None, this will also handle setting defaults. + + if parts: # If not given a base URI, defines a new URI, otherwise update the given URI. for part, value in parts.items(): if part not in self.__parts__: - raise TypeError("Unknown URL component: " + part) + raise TypeError("Unknown URI component: " + part) setattr(self, part, value) # String Protocols def __repr__(self): + """Return a "safe" programmers' representation that omits passwords.""" + return "URI('{0}')".format(self._compile(True)) def __str__(self): - """Return the Unicode text representation of this URL.""" + """Return the Unicode text representation of this URI, including passwords.""" return self.url def __bytes__(self): - """Return the binary string representation of this URL.""" + """Return the binary string representation of this URI, including passwords.""" return self.url.encode('utf-8') @@ -75,6 +82,13 @@ def __bytes__(self): __str__ = __bytes__ def __html__(self): + """Return an HTML representation of this link. + + A link to http://example.com/foo/bar will result in: + + example.com/foo/bar + """ + return '{summary}'.format( address = escape(self.url), summary = escape(self.host + unicode(self.path)), @@ -83,9 +97,12 @@ def __html__(self): # Comparison Protocols def __eq__(self, other): + """Compare this URI against another value.""" + if not isinstance(other, self.__class__): other = self.__class__(other) + # Because things like query string argument order may differ, but still be equivalent... for part in self.__parts__: if not getattr(self, part, None) == getattr(other, part, None): return False @@ -93,37 +110,57 @@ def __eq__(self, other): return True def __ne__(self, other): + """Inverse comparison support.""" + return not self == other # Mapping Protocols def __getitem__(self, name): + """Shortcut for retrieval of a query string argument.""" + if not isinstance(self._query, dict): raise ValueError("Query string is not manipulatable.") return self._query[name] def __setitem__(self, name, value): + """Shortcut for (re)assignment of query string arguments.""" + if not isinstance(self._query, dict): raise ValueError("Query string is not manipulatable.") - self._url = None # Invalidate the cached string. + self._uri = None # Invalidate the cached string. self._query[name] = unicode(value) def __delitem__(self, name): + """Shortcut for removal of a query string argument.""" + if not isinstance(self._query, dict): raise ValueError("Query string is not manipulatable.") - self._url = None # Invalidate the cached string. + self._uri = None # Invalidate the cached string. del self._query[name] def __iter__(self): + """Retrieve the query string argument names.""" + if not isinstance(self._query, dict): raise ValueError("Query string is not manipulatable.") return iter(self._query) + def __bool__(self): + """Truthyness comparison.""" + + return bool(self._uri) + + if py2: + __nonzero__ = __bool__ + def __len__(self): + """Determine the number of query string arguments.""" + if not isinstance(self._query, dict): raise ValueError("Query string is not manipulatable.") @@ -133,26 +170,38 @@ def __len__(self): @property def username(self): + """Alias for compatibility with urlsplit results.""" + return self.user @username.setter def username(self, value): + """Alias for compatibility with urlsplit results.""" + self.user = value @property def hostname(self): + """Alias for compatibility with urlsplit results.""" + return self.host @hostname.setter def hostname(self, value): + """Alias for compatibility with urlsplit results.""" + self.host = value @property def path(self): + """The path referenced by this URI.""" + return self._path @path.setter def path(self, value): + """Cast path assignments to our native path representation.""" + if value: self._path = _Path(value) else: @@ -160,10 +209,14 @@ def path(self, value): @property def query(self): + """Retrieve the "query string arguments" for this URI.""" + return self._query @query.setter def query(self, value): + """Assign query string arguments to this URI.""" + if isinstance(value, unicode): self.qs = value return @@ -172,18 +225,22 @@ def query(self, value): @property def url(self): - if not self._url: - self._url = self._compile() + """Retrieve the "compiled" URL.""" + + if not self._uri: + self._uri = self._compile() - return self._url + return self._uri @url.setter def url(self, value): + """Assign and replace the entire URI with a new URL.""" + if value: - self._decompile(value) + self._decompile(unicode(value)) return - self._url = None + self._uri = None self.scheme = None self.user = None self.password = None @@ -195,6 +252,8 @@ def url(self, value): @property def qs(self): + """Retrieve the query string as a string, not a mapping, while preserving unprocessable values.""" + if not self._query: return "" @@ -215,6 +274,8 @@ def qs(self): @qs.setter def qs(self, value): + """Assign a new query string as a string, not a mapping, while preserving unprocessable values.""" + query = self._query if value: @@ -239,6 +300,11 @@ def qs(self, value): @property def relative(self): + """Identify if this URI is relative to some "current context". + + For example, if the protocol is missing, it's protocol-relative. If the host is missing, it's host-relative, etc. + """ + if self.scheme not in {None, '', 'http', 'https'}: return False @@ -252,14 +318,19 @@ def resolve(self, uri=None, **parts): else: result = URIString(self) - for k, v in parts.items(): - setattr(result, k, v) + for part, value in parts.items(): + if part not in self.__parts__: + raise TypeError("Unknown URI component: " + part) + + setattr(result, part, value) return result # Internal Methods def _compile(self, safe=False): + """Compile the parts of a URI down into a string representation.""" + return "".join(( (self.scheme + "://") if self.scheme else ("//" if self.host else ""), quote_plus(self.user) if self.user else "", @@ -273,7 +344,9 @@ def _compile(self, safe=False): )) def _decompile(self, value): - self._url = value + """Store the original and process parts from a URI string.""" + + self._uri = value result = urlsplit(value) for part in self.__parts__: From 3b4084c3f7ff8e292a2a0014b204a26e5b915efd Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 1 Jun 2017 11:08:36 -0400 Subject: [PATCH 28/31] Correction for import ordering warnings. --- test/field/test_date.py | 1 + test/field/test_decimal.py | 1 + test/util/test_capped.py | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/field/test_date.py b/test/field/test_date.py index e9c161cd..61b55d15 100644 --- a/test/field/test_date.py +++ b/test/field/test_date.py @@ -5,6 +5,7 @@ from datetime import datetime from bson import ObjectId + from common import FieldExam from marrow.mongo.field import Date diff --git a/test/field/test_decimal.py b/test/field/test_decimal.py index ce22f6d3..40cf5fa2 100644 --- a/test/field/test_decimal.py +++ b/test/field/test_decimal.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from decimal import Decimal as dec + from bson.decimal128 import Decimal128 from common import FieldExam diff --git a/test/util/test_capped.py b/test/util/test_capped.py index b8a632e9..6bbdbf40 100644 --- a/test/util/test_capped.py +++ b/test/util/test_capped.py @@ -15,7 +15,6 @@ from marrow.mongo.util.capped import _patch, tail from marrow.schema.compat import pypy - skip = int(os.environ.get('TEST_SKIP_CAPPED', 0)) or pypy pytestmark = pytest.mark.skipif(skip, reason="Slow tests skipped.") From f44b5536ac86a30af2fbd1639f123355960f7bd7 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 1 Jun 2017 11:23:08 -0400 Subject: [PATCH 29/31] Added relative resolution tests. --- test/field/test_link.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/field/test_link.py b/test/field/test_link.py index 49872e3a..0c754d27 100644 --- a/test/field/test_link.py +++ b/test/field/test_link.py @@ -172,6 +172,15 @@ def test_recast(self): uri = "http://user:pass@host:8088/path?query#fragment" link = Link.URI(uri) assert link == Link.URI(link) + + def test_relative_resolution(self): + uri = "https://example.com/about/" + link = Link.URI(uri) + + assert unicode(link.resolve('/')) == 'https://example.com/' + assert unicode(link.resolve('us')) == 'https://example.com/about/us' + assert unicode(link.resolve('../img/banner.jpeg')) == 'https://example.com/img/banner.jpeg' + assert unicode(link.resolve('//cdn.example.com/asset')) == 'https://cdn.example.com/asset' class TestLinkField(FieldExam): From da9f07dd5145798cf9f4faa3b462c7025e617172 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Mon, 19 Jun 2017 23:50:33 -0400 Subject: [PATCH 30/31] Additional venv ignorance. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3a750ac4..db6db78f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ .packaging # Virtual Environment Pseudo-Chroot +.venv bin include lib @@ -36,4 +37,3 @@ htmlcov # Completion Integration .ropeproject tags - From 5880531585403871d90439d44b21dd9f42d70473 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Tue, 20 Jun 2017 00:32:35 -0400 Subject: [PATCH 31/31] Added docs submodule and tweaked Landscape.io config. --- .gitmodules | 4 ++++ .landscape.yaml | 1 + docs | 1 + 3 files changed, 6 insertions(+) create mode 100644 .gitmodules create mode 160000 docs diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..8915632d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "book"] + path = docs + url = git@github.com:marrow/mongo.git + branch = book diff --git a/.landscape.yaml b/.landscape.yaml index 907abd79..6d088391 100644 --- a/.landscape.yaml +++ b/.landscape.yaml @@ -1,5 +1,6 @@ max-line-length: 120 ignore-paths: + - docs - example python-targets: - 2 diff --git a/docs b/docs new file mode 160000 index 00000000..9bc5222c --- /dev/null +++ b/docs @@ -0,0 +1 @@ +Subproject commit 9bc5222c1059b5ea895a71acc66dc2e1a23653d8