"""
Base classes of provider definition responsible for minimum set of methods and
properties, that should be implemented or overridden in all nested providers.
"""
import json
import logging
from abc import ABCMeta, abstractmethod
from collections import OrderedDict
from collections.abc import MutableSequence
from typing import List, MutableMapping, Optional, Tuple, Union
from urllib.parse import urlparse
import requests
from geocoder.distance import Distance
logger = logging.getLogger(__name__)
[docs]class OneResult(metaclass=ABCMeta):
"""Container for one (JSON) object returned by provider
**Class variables:**
:cvar cls._TO_EXCLUDE: List of properties and attributes to exclude in
:func:`OneResult._parse_json_with_fieldnames`
:cvar bool cls._GEOCODER3_READY: Temporary value, representing is provider tested
and finished migration to geocoder3. On default value will bypass some internal
checks.
**Instance variables:**
After creation each instance of :class:`OneResult` has the following mandatory
variables. For some providers this list can be extended by provider implementation.
:ivar self.object_raw_json: Raw json for object, passed by
:func:`MultipleResultsQuery._parse_results`
:ivar self.object_json: Result of :func:`OneResult._parse_json_with_fieldnames`
:ivar self.fieldnames: Fieldnames list generated in
:func:`OneResult._parse_json_with_fieldnames`
**Init parameters:**
For initialization parameters, please check :func:`OneResult.__init__`
method documentation.
"""
_GEOCODER3_READY = False
_TO_EXCLUDE = [
"parse",
"object_raw_json",
"object_json",
"url",
"fieldnames",
"help",
"debug",
"short_name",
"api",
"content",
"params",
"street_number",
"api_key",
"key",
"id",
"x",
"y",
"latlng",
"headers",
"timeout",
"wkt",
"locality",
"province",
"rate_limited_get",
"osm",
"route",
"schema",
"properties",
"geojson",
"tree",
"error",
"proxies",
"road",
"xy",
"road_long",
"city_long",
"state_long",
"country_long",
"postal_town_long",
"province_long",
"road_long",
"street_long",
"interpolated",
"method",
"geometry",
"session",
]
[docs] def __init__(self, json_content):
"""Initialize :class:`OneResult` object and parse input json
:param dict json_content: Dictionary, passed by
:func:`MultipleResultsQuery.__call__`
"""
self.object_raw_json = json_content
# attributes returned in JSON format
self.fieldnames = []
self.object_json = {}
self._parse_json_with_fieldnames()
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if getattr(cls.lat, "__isabstractmethod__", False):
raise NotImplementedError("All subclasses should implement 'lat' property")
if getattr(cls.lng, "__isabstractmethod__", False):
raise NotImplementedError("All subclasses should implement 'lng' property")
if getattr(cls.address, "__isabstractmethod__", False):
raise NotImplementedError(
"All subclasses should implement 'address' property"
)
@property
@abstractmethod
def lat(self) -> Optional[float]:
"""Latitude of the object"""
return None
@property
@abstractmethod
def lng(self) -> Optional[float]:
"""Longitude of the object"""
return None
@property
def west(self) -> Optional[float]:
"""Return optional west coordinate of bbox, if available."""
return self.bbox[0] if self.bbox else None
@property
def south(self) -> Optional[float]:
"""Return optional south coordinate of bbox, if available."""
return self.bbox[1] if self.bbox else None
@property
def east(self) -> Optional[float]:
"""Return optional east coordinate of bbox, if available."""
return self.bbox[2] if self.bbox else None
@property
def north(self) -> Optional[float]:
"""Return optional north coordinate of bbox, if available."""
return self.bbox[3] if self.bbox else None
@property
def northeast(self) -> List[float]:
"""Return north-east list of coordinates for bounds, if available."""
return [self.north, self.east] if self.bbox else []
@property
def southwest(self) -> List[float]:
"""Return south-west list of coordinates for bounds, if available."""
return [self.south, self.west] if self.bbox else []
@property
def bbox(self) -> List[float]:
"""Output answer as GeoJSON bbox if it can be calculated/retrieved."""
return []
@property
def bounds(self) -> dict:
"""Output answer as Google Maps API bounds if it can be calculated/retrieved."""
return (
{"northeast": self.northeast, "southwest": self.southwest}
if self.northeast and self.southwest
else {}
)
@property
@abstractmethod
def address(self) -> Optional[str]:
"""Object simple string address."""
return None
def __repr__(self) -> str:
"""Display [address] if available; [lat, lng] otherwise"""
return f"[{self.address}]" if self.address else f"[{self.lat}, {self.lng}]"
[docs] def _parse_json_with_fieldnames(self):
"""Parse the instance object with all attributes/methods defined in the class,
except for the ones defined starting with '_' or flagged in
:attr:`cls._TO_EXCLUDE`.
The final result is stored in :attr:`self.object_json` and
:attr:`self.fieldnames`
"""
for key in dir(self):
if not key.startswith("_") and key not in self._TO_EXCLUDE:
self.fieldnames.append(key)
value = getattr(self, key)
if value:
self.object_json[key] = value
# Add OK attribute even if value is "False"
self.object_json["ok"] = self.ok
@property
def ok(self) -> bool:
"""Status of retrieving location/IP coordinates or reverse geocoding.
Usually should be replaced in reverse results class.
"""
return bool(self.lng and self.lat)
@property
def status(self) -> str:
"""Specify current summary status of instance"""
if self.ok:
return "OK"
if not self.address:
return "ERROR - No results found"
return "ERROR - No Geometry"
[docs] def debug(self):
"""Display debug information for instance of :class:`OneResult`"""
logger.debug("From provider")
logger.debug("-------------")
logger.debug(json.dumps(self.object_raw_json, indent=4))
logger.debug("Cleaned json")
logger.debug("------------")
logger.debug(json.dumps(self.object_json, indent=4))
@property
def confidence(self) -> int:
"""Is as a measure of how confident we are that centre point coordinates
returned for the result precisely reflect the result.
"""
if not self.bounds:
# Cannot determine score
return 0
# Units are measured in Kilometers
distance = Distance(self.northeast, self.southwest, units="km")
for score, maximum in [
(10, 0.25),
(9, 0.5),
(8, 1),
(7, 5),
(6, 7.5),
(5, 10),
(4, 15),
(3, 20),
(2, 25),
]:
if distance < maximum:
return score
if distance >= 25:
return 1
@property
def geometry(self) -> dict:
"""Output answer as GeoJSON Point"""
return {"type": "Point", "coordinates": [self.x, self.y]} if self.ok else {}
@property
def geojson(self) -> dict:
"""Output answer as GeoJSON Feature"""
feature = {
"type": "Feature",
"properties": self.object_json,
}
if self.bbox:
feature["bbox"] = self.bbox
feature["properties"]["bbox"] = self.bbox
if self.geometry:
feature["geometry"] = self.geometry
return feature
@property
def wkt(self) -> Optional[str]:
"""Output coordinates in well-known text format, no SRID data."""
return f"POINT({self.x} {self.y})" if self.ok else None
@property
def xy(self) -> Optional[list]:
"""Optional list of longitude and latitude values."""
return [self.lng, self.lat] if self.ok else None
@property
def latlng(self) -> Optional[list]:
"""Optional list of latitude and longitude values."""
return [self.lat, self.lng] if self.ok else None
@property
def y(self) -> Optional[float]:
"""Latitude of the object"""
return self.lat
@property
def x(self) -> Optional[float]:
"""Longitude of the object"""
return self.lng
[docs]class MultipleResultsQuery(MutableSequence):
"""Base results and query manager container
This class responsible for checking correct new provider files creation before it
will be implemented in project. Such checks done in :func:`__init_subclass__` method
and will not allow to initialize project without fix.
**Class variables:**
Some class variables are mandatory for all nested subclasses.
:cvar str cls._URL: Default URL for provider, can be overwritten with `url` input
parameter
:cvar OneResult cls._RESULT_CLASS: Provider's individual result class.
:cvar str cls._KEY: Provider's default api_key. Usually map to ENV variable
responsible for key parsing. Can be overwritten with **key** parameter on
instance creation. Shows actually used key when requested from instance.
:cvar bool cls._KEY_MANDATORY: Special mark for check of mandatory presence of api
key, for providers with mandatory key requirement
:cvar str cls._METHOD: Provider's internal method, that should match with api.py
:attr:`options` definition.
:cvar str cls._PROVIDER: Provider's internal name, that should match with api.py
:attr:`options` definition.
:cvar float cls._TIMEOUT: Default timeout for :func:`requests.request`
configuration, can be overwritten on instance creation or instance calling
:cvar bool cls._GEOCODER3_READY: Temporary value, representing is provider tested
and finished migration to geocoder3. On default value will generate warning on
any provider call.
**Instance variables:**
After creation each instance of :class:`MultipleResultsQuery` has the following
mandatory variables. For some providers this list can be extended by provider
implementation.
:ivar list[OneResult] self.results_list: Hold all answers from provider in parsed
state
:ivar str self.url: Final request url that will be/was used during request
:ivar str self.location: Object to geocode/reverse geocode
:ivar float self.timeout: Final request timeout that was used during request
:ivar Optional[dict] self.proxies: Final request proxies that was used during
request
:ivar requests.Session self.session: :class:`requests.Session` object, that was used
:ivar dict self.headers: Final request headers that was used during request
:ivar dict self.params: Final request query params that was used during request
:ivar Optional[int] self.status_code: :class:`requests.Response` final HTTP answer
code or `None` if request is not made yet, or :mod:`requests` failed during
request
:ivar requests.Response self.raw_response: Contain raw :class:`requests.Response`
from provider
:ivar Union[dict, list] self.raw_json: Contain raw :func:`requests.Response.json`
from provider
:ivar str self.error: :mod:`requests` detailed error, if was raised during request
:ivar bool self.is_called: `False` on instance initialization, become `True` after
calling of :func:`__call__` method(i.e. instance call)
:ivar OneResult self.current_result: Mapping to result, that are used for direct
attributes retrieval in :func:`__getattr__`
**Init parameters:**
For initialization parameters, please check :func:`MultipleResultsQuery.__init__`
method documentation.
"""
_URL = None
_RESULT_CLASS = None
_KEY = None
_KEY_MANDATORY = True
_METHOD = None
_PROVIDER = None
_TIMEOUT = 5.0
_GEOCODER3_READY = False
@staticmethod
def _is_valid_url(url: Optional[str]) -> bool:
"""Validate that URL contains a valid protocol and a valid domain
:param Optional[str] url: Any string to be checked for format validity.
Does not check for endpoint existence.
"""
try:
parsed = urlparse(url)
mandatory_parts = [parsed.scheme in ["http", "https"], parsed.netloc]
return all(mandatory_parts)
except AttributeError:
return False
@classmethod
def _is_valid_result_class(cls) -> bool:
"""Validate cls._RESULT_CLASS has correct subclass nesting"""
try:
return issubclass(cls._RESULT_CLASS, OneResult)
except TypeError:
# TypeError raised by issubclass if cls._RESULT_CLASS is None
return False
[docs] @classmethod
def _get_api_key(cls, key: Optional[str] = None) -> Optional[str]:
"""Retrieves API Key from method argument first, then from Environment variables
:param Optional[str] key: Custom API Key data for provider usage, if required.
Passed from :func:`__init__` method.
:raises ValueError: If api key was not provided, but mandatory for provider use
"""
key = key or cls._KEY
if not key and cls._KEY_MANDATORY:
raise ValueError("Provide API Key")
return key
[docs] def __init_subclass__(cls, **kwargs):
"""Responsible for setup check for :class:`MultipleResultsQuery` subclasses.
:raises ValueError: When subclass not define :attr:`cls._URL` value.
:raises ValueError: When subclass incorrectly define :attr:`cls._RESULT_CLASS`
value.
:raises ValueError: When subclass incorrectly define :attr:`cls._METHOD` value.
"""
super().__init_subclass__(**kwargs)
# check validity of class._URL
if not cls._is_valid_url(cls._URL):
raise ValueError(f"Subclass must define a valid URL. Got {cls._URL}")
# check validity of cls._RESULT_CLASS
if not cls._is_valid_result_class():
raise ValueError(
f"Subclass must define _RESULT_CLASS from 'OneResult'. "
f"Got {cls._RESULT_CLASS}",
)
# check validity of cls._METHOD
if not cls._METHOD or cls._METHOD not in [
"id",
"geocode",
"details",
"reverse",
"timezone",
"elevation",
"places",
"batch",
"batch_reverse",
"children",
"hierarchy",
"parcel",
]:
raise ValueError(
f"Subclass must define correct _METHOD attribute, not equal to None. "
f"Got {cls._METHOD}"
)
[docs] def __init__(
self,
location,
url: Optional[str] = None,
key: Optional[str] = None,
timeout: Union[None, float, Tuple[float, float], Tuple[float, None]] = None,
proxies: Optional[MutableMapping[str, str]] = None,
session: Optional[requests.Session] = None,
headers: Optional[MutableMapping[str, str]] = None,
params: Optional[dict] = None,
**kwargs,
):
"""Initialize a :class:`MultipleResultsQuery` object.
For class and instance variables description please refer to class docstrings.
:param location: Query content for geocode or reverse geocoding
:param Optional[str] url: Overwrite for default provider service url
:param Optional[str] key: API Key data for provider usage, if required. Passed
to :func:`_get_api_key`, which result passed to :func:`_build_headers` and
:func:`_build_params`, and may be passed to other custom provider's
implementation methods. Check exact provider docs.
:param Union[None, float, Tuple[float, float], Tuple[float, None]] timeout:
Max request answer wait time
:param Optional[MutableMapping[str, str]] proxies:
Proxies for :func:`requests.request`
:param Optional[requests.Session] session: Custom :class:`requests.Session` for
request
:param Optional[MutableMapping[str, str]] headers: Additional headers for
:func:`requests.request`
:param Optional[dict] params: Additional query parameters
:param kwargs: Any other keyword arguments, that will be passed to internal
:func:`_build_headers`, :func:`_build_params`, :func:`_before_initialize` or
other custom provider's implementation methods. Check exact provider docs
:raises ValueError: When provided custom :attr:`url` is not well-formatted
:raises ValueError: If api key was not provided, but mandatory for provider use
"""
super(MultipleResultsQuery, self).__init__()
self.results_list = []
# Check url if it was changed on instance creation
if url and not self._is_valid_url(url):
raise ValueError(f"url not valid. Got {url}")
self.url = url or self._URL
# check validity of provider key
provider_key = self._KEY = self._get_api_key(key=key)
# point to geocode, as a string or coordinates
self.location = location
# set attributes to manage query. Can be overwritten in __call__
self.timeout = timeout or self._TIMEOUT
self.proxies = proxies
self.session = session
# headers can be overwritten in _build_headers,
# headers can be extended with headers keyword argument
self.headers = self._build_headers(provider_key, **kwargs).copy()
self.headers.update(headers or {})
# params can be overwritten in _build_params
# params can be extended with params keyword argument
# OrderedDict in order to preserve the order of the url query parameters
self.params = OrderedDict(self._build_params(location, provider_key, **kwargs))
self.params.update(params or {})
# results of query (set by __call__ and _connect)
self.status_code = None
self.raw_response = None
self.raw_json = None
self.error = None
self.is_called = False
# pointer to result where to delegate calls
self.current_result = None
self._before_initialize(location, **kwargs)
def __getitem__(self, key):
"""Special method implementation for custom :class:`MutableSequence` subclass
Not expected to be nested or changed in subclasses.
"""
return self.results_list[key]
def __setitem__(self, key, value):
"""Special method implementation for custom :class:`MutableSequence` subclass
Not expected to be nested or changed in subclasses.
"""
self.results_list[key] = value
def __delitem__(self, key):
"""Special method implementation for custom :class:`MutableSequence` subclass
Not expected to be nested or changed in subclasses.
"""
del self.results_list[key]
def __len__(self):
"""Special method implementation for custom :class:`MutableSequence` subclass
Not expected to be nested or changed in subclasses.
"""
return len(self.results_list)
def insert(self, index, value):
"""Special method implementation for custom :class:`MutableSequence` subclass
Not expected to be nested or changed in subclasses.
"""
self.results_list.insert(index, value)
def add(self, value):
"""Special method implementation for custom :class:`MutableSequence` subclass
Not expected to be nested or changed in subclasses.
"""
self.results_list.append(value)
def __repr__(self) -> str:
"""Display :class:`MultipleResultsQuery` debug console representation"""
base_repr = "<[{0}] {1} - {2} {{0}}>".format(
self.status, self._PROVIDER.title(), self._METHOD.title()
)
if len(self) == 0:
return base_repr.format("[empty]")
elif len(self) == 1:
return base_repr.format(repr(self[0]))
else:
return base_repr.format(f"#{len(self)} results")
[docs] def _build_params(self, location, provider_key, **kwargs) -> dict:
"""Generate default query parameters mapping for provider
:param location: Query content for geocode or reverse geocoding
:param provider_key: Finalized api_key, from :func:`_get_api_key` method
:param kwargs: All kwargs from :func:`__init__` method
"""
return {}
[docs] def _before_initialize(self, location, **kwargs):
"""Hook for children class to finalize their setup before the query
:param location: Query content for geocode or reverse geocoding
:param kwargs: All kwargs from :func:`__init__` method
"""
pass
[docs] def __call__(
self,
timeout: Union[None, float, Tuple[float, float], Tuple[float, None]] = None,
proxies: Optional[MutableMapping[str, str]] = None,
session: Optional[requests.Session] = None,
):
"""Query remote server and parse results
Any keyword argument of :func:`__call__` will have precedence over same argument
in :func:`__init__` method.
:param Union[None, float, Tuple[float, float], Tuple[float, None]] timeout:
Max request answer wait time
:param Optional[MutableMapping[str, str]] proxies:
Proxies for :func:`requests.request`
:param Optional[requests.Session] session: Custom :class:`requests.Session` for
request
"""
self.is_called = True
if self._GEOCODER3_READY is False:
logger.warning(
"This provider behaviour not tested in geocoder3, results may be "
"incorrect, or not all features available."
)
# Allow in call overwrite of connection settings
self.timeout = timeout or self.timeout
self.proxies = proxies or self.proxies
self.session = session or self.session or requests.Session()
# query URL and get valid JSON (also stored in self.raw_json)
json_response = self._connect()
# catch errors and debug warnings
has_error = (
self._catch_errors(json_response) if json_response is not None else True
)
if self.url not in self.raw_response.url:
logger.warning(
"Expected request url (%s) and final request url (%s) do not match. "
"Probably redirects was made.",
self.url,
self.raw_response.url,
)
# creates instance for results
if not has_error:
self._parse_results(json_response)
return self
[docs] def _connect(self) -> Union[list, dict, None]:
"""Responsible for handling external request and connection errors"""
try:
# make request and get response
self.raw_response = self.rate_limited_get(
self.url,
params=self.params,
headers=self.headers,
timeout=self.timeout,
proxies=self.proxies,
)
logger.info("Requested %s", self.raw_response.url)
# check that response is ok
self.status_code = self.raw_response.status_code
self.raw_response.raise_for_status()
# rely on json method to get non-empty well formatted JSON
self.raw_json = self.raw_response.json()
except requests.exceptions.RequestException as err:
# store real status code and error
self.error = f"ERROR - {str(err)}"
logger.error(
"Status code %s from %s: %s", self.status_code, self.url, self.error
)
return None
# return response within its JSON format
return self.raw_json
[docs] def rate_limited_get(self, url, **kwargs):
"""By default, simply wraps a :func:`requests.get` request"""
return self.session.get(url, **kwargs)
[docs] def _adapt_results(self, json_response) -> Union[dict, List[dict]]:
"""Allow children classes to format json_response into
:func:`_parse_results` expected format
This required for correct iteration in :func:`_parse_results`
:param json_response: Raw json from provider, usually same as in
:attr:`raw_json`, by default invoked inside :func:`_parse_results`
"""
return json_response
[docs] def _parse_results(self, json_response: Union[dict, List[dict]]):
"""Responsible for parsing original json and separating it to
:class:`OneResult` objects
"""
for json_dict in self._adapt_results(json_response):
self.add(self._RESULT_CLASS(json_dict))
# set default result to use for delegation
self.current_result = len(self) > 0 and self[0]
[docs] def _catch_errors(self, json_response):
"""Checks the JSON returned from the provider and flag errors if necessary"""
return self.error
@property
def has_data(self) -> bool:
"""Status of geocoding if request was made
:raises RuntimeError: When external request was not made before property call
"""
if not self.is_called:
raise RuntimeError(
"Cannot detect data presence. External request was not made. "
"Use instance __call__() method to retrieve data."
)
return len(self) > 0
@property
def status(self) -> str:
"""Specify current summary status of instance
**Possible statuses:**
- "External request was not made"
- "OK" - when request was made, and any result retrieved
- :mod:`requests` error text representation, if request faced error
- "ERROR - No results found"
- "ERROR - Unhandled Exception"
"""
if not self.is_called:
return "External request was not made"
elif self.has_data:
return "OK"
elif self.error:
return self.error
elif len(self) == 0:
return "ERROR - No results found"
else:
return "ERROR - Unhandled Exception"
@property
def geojson(self) -> dict:
"""Output all answers as GeoJSON FeatureCollection"""
geojson_results = [result.geojson for result in self]
return {"type": "FeatureCollection", "features": geojson_results}
[docs] def debug(self) -> list:
"""Display debug information for instance of :class:`MultipleResultsQuery`"""
logger.debug(repr(self))
logger.debug(f"results: {len(self)}")
logger.debug(f"code: {self.status_code}")
logger.debug(f"url: {self.url}")
stats = []
if self.is_called and self.has_data:
for index, result in enumerate(self):
logger.debug(f"Details for result #{index + 1}")
logger.debug("---")
stats.append(result.debug())
else:
logger.debug(self.status)
return stats
[docs] def __getattr__(self, name: str): # sourcery skip: swap-if-expression
"""Allow direct access to :attr:`MultipleResultsQuery.current_result`
attributes from direct calling of :class:`MultipleResultsQuery`
Called when an attribute lookup has not found the attribute in the usual
places (i.e. it is not an instance attribute nor is it found in the class tree
for self).
.. note:: If the attribute is found through the normal mechanism,
:func:`__getattr__` is not called.
:param name: Attribute name for lookup
:raises RuntimeError: If provider query was not made and
:attr:`current_result` is still empty. (From :func:`has_data`)
"""
return None if not self.has_data else getattr(self.current_result, name)