Source code for label

"""
The Label module provides classes for DNS labels.

The Label class
===============

A label is initialized from bytes::

    from dike.label import Label
    monty = Label(b'flying-circus')

We can also create a label from a string using the
:py:meth:`Label.fromstr()` class method , which converts the string to
`Punycode <https://tools.ietf.org/html/rfc3492>`_::

    biter = Label.fromstr('møøsë')
    print(biter)              # møøsë (via implicit string conversion)
    print(bytes(biter))       # b'xn--ms-ija4ca'
    print(repr(biter))        # Label.fromstr('møøsë')

A utility function which will create a label from bytes, strings, or
other labels is also available::

    from dike.label import make_label
    average_airspeed0 = make_label(b'African')
    average_airspeed1 = make_label('European')
    average_airspeed2 = make_label(average_airspeed1)

Labels are immutable - once initialized the value never changes. This
has the advantage of allowing labels to be used as keys in
:py:class:`dict` and various other places, but does mean that updating
a label is not possible. To make changes we can convert to either a
:py:class:`string` or :py:class:`bytes` value and use slicing or
concatenation to produce the value you want, then initialize a new
label::

    country = Label.fromstr('holland')
    same_country = Label.fromstr('nether' + str(country)[3:] + 's')

    print(country)        # holland
    print(same_country)   # netherlands

Label comparisons work as expected, and are case-insensitive (as per
DNS specification)::

    print(Label(b'flying') == Label(b'circus'))   # False
    print(Label(b'flying') > Label(b'circus'))    # True
    print(Label(b'CIRCUS') == Label(b'circus'))   # True

If you have a label you can also use bytes or strings in comparisons,
and these work as if you had built a Label object for the comparison::

    print(Label.fromstr('Norwegian') != 'blue')  # True
    print(Label(b'short') < b'shortness')        # True

The module also includes a number of utility functions::

    i_am_a_host_label = Label.fromstr('mailserver')
    i_am_not_a_host_label = Label.fromstr('WITCH!!!')

    print(i_am_a_host_label.ishost())           # True
    print(i_am_a_host_label.canonical())        # b'mailserver'

    print(i_am_not_a_host_label.ishost())       # False
    print(i_am_not_a_host_label.canonical())    # b'witch!!!

Finally, be careful when converting arbitrary labels to strings, for
example when receiving labels in DNS packets from the Internet. This
can result in a :py:class:`UnicodeError` being raised, if the bytes
cannot be represented in Punycode. You can use the
:py:meth:`Label.to_presentation()` method to convert the label to a
string the label in this case, for example when logging. This uses
escape sequences for any characters that might be interpreted as
special in a zone file, as defined in
`RFC 1035 <https://tools.ietf.org/html/rfc1035>`_::

    bad_punycode = Label(b'scary-' + bytes([0x80]))
    print(bad_punycode.to_presentation())   # scary-\\128


The LabelFactory class
======================

Since we often use the same label many times when dealing with DNS, it
can be more efficient to use the same instance for all occurrences of
a given label. For example, the label ``com`` is likely to appear in
many DNS names, and it can be more efficient to reuse the same label.
This is safe, because labels are immutable.

The :py:class:`LabelFactory` class exists for this purpose::

    from dike import LabelFactory, Label
    zone_label_factory = LabelFactory()
    foo = zone_label_factory.fromstr('foo')
    bar = zone_label_factory.fromstr('foo')
    baz = Label.fromstr('foo')

    print(foo == bar)                          # True
    print(foo is bar)                          # True
    print(foo == baz)                          # True
    print(foo is baz)                          # False

Notice that we can also create labels by normal object creation.
Comparisons (``==``, ``<``, and so on) work as expected, but the
labels have different identities in this case (so the ``is``
comparison is ``False``).

The :py:meth:`LabelFactory.frombytes()` and
:py:meth:`LabelFactory.fromlabel()` methods are also available for
creating from bytes or other Label instances.

The labels in a LabelFactory are stored in a
:py:class:`weakref.WeakValueDictionary`, so when a label is no longer
used memory used by the object will be released.
"""

from typing import Union

import copy
import weakref

import encodings.idna

from dike.errors import EmptyLabel, LabelTooLong


[docs]class Label: """ The :py:class:`Label` constructor requires a single value, which is :py:class:`bytes`. If the optional `canonicalize` argument is used then the label will be converted to the canonical version (that is, ASCII lower-case). If a label is more than 63 characters long, a :py:class:`LabelTooLong` exception will be raised. Attempting to create an empty lable (with ``b''``) will raise a :py:class:`EmptyLabel` exception. :param label_val: Value to use when creating the label. :type label_val: bytes :raises: :py:class:`EmptyLabel` :raises: :py:class:`LabelTooLong` """ __slots__ = ( '_label_bytes', '_canonical_bytes', '_ishost_flag', '__weakref__', ) _label_bytes: bytes _canonical_bytes: Union[None, bytes] _ishost_flag: bool def __init__(self, label_val: bytes, *, canonicalize: bool = False): # The _ishost_flag attribute is set when the ishost() method is # first invoked. # Check the length. if len(label_val) == 0: raise EmptyLabel() if len(label_val) > 63: raise LabelTooLong(label_val) # If we want a canonical version, set that now. if canonicalize: self._canonical_bytes = label_val.lower() self._label_bytes = self._canonical_bytes # Otherwise leave our canonical version unset. else: self._canonical_bytes = None self._label_bytes = label_val
[docs] @staticmethod def fromstr(label_val: str, *, canonicalize: bool = False) -> 'Label': """ Create an IDNA version of a string. Certain Unicode values are not allowed in Punycode. A :py:class:`UnicodeError` exception will be raised in that case. :return: a label :rtype: Label :raises UnicodeError: string cannot be converted to Punycode. """ try: digested_val = encodings.idna.ToASCII(label_val) except UnicodeError as err: # encodings.idna.ToASCII raises a single exception on an # empty label or too long label, so differentiate here. if label_val == "": raise EmptyLabel() from err if str(err) == "label empty or too long": raise LabelTooLong(label_val) from err raise err return Label(digested_val, canonicalize=canonicalize)
def _invariant(self) -> None: """ This method documents what must always be true about a label instance. """ assert isinstance(self._label_bytes, bytes) assert 1 <= len(self._label_bytes) <= 63 assert ((self._canonical_bytes is None) or (isinstance(self._canonical_bytes, bytes) and (self._label_bytes.lower() == self._canonical_bytes))) if getattr(self, '_ishost_flag', None) is not None: assert isinstance(self._ishost_flag, bool)
[docs] def canonical(self) -> bytes: """ Return the :py:class:`bytes` representing the canonical version of a label. This is the label converted to lowercase ASCII. :return: canonical version of the label :rtype: bytes """ if self._canonical_bytes is None: self._canonical_bytes = self._label_bytes.lower() return self._canonical_bytes
[docs] def ishost(self) -> bool: """ Test if the label is valid in a host name. The rules for host names are defined in: * `RFC 1034 Section 3.5 <https://tools.ietf.org/html/rfc1034#section-3.5>`_ * `RFC 1123 Section 2 <https://tools.ietf.org/html/rfc1123#section-2>`_ :rtype: bool """ if getattr(self, '_ishost_flag', None) is None: ishost_flag = True if not chr(self._label_bytes[0]).isalnum(): ishost_flag = False else: for octet in self._label_bytes[1:-1]: if (not chr(octet).isalnum()) and (octet != ord('-')): ishost_flag = False break if ishost_flag and not chr(self._label_bytes[-1]).isalnum(): ishost_flag = False self._ishost_flag = ishost_flag return self._ishost_flag
@staticmethod def _prepare_label_compare(label_val: Union['Label', str, bytes]) -> bytes: if isinstance(label_val, Label): return label_val.canonical() if isinstance(label_val, str): try: ascii_label = encodings.idna.ToASCII(label_val) return ascii_label.lower() # type: ignore except UnicodeError as err: # encodings.idna.ToASCII raises a single exception on an # empty label or too long label, so differentiate here. if label_val == "": return b'' if str(err) == "label empty or too long": raise LabelTooLong(label_val) from err raise err if len(label_val) > 63: raise LabelTooLong(label_val) return label_val.lower() def __eq__(self, other: object) -> bool: if not isinstance(other, (Label, str, bytes)): return NotImplemented return self.canonical() == Label._prepare_label_compare(other) def __ne__(self, other: object) -> bool: if not isinstance(other, (Label, str, bytes)): return NotImplemented return self.canonical() != Label._prepare_label_compare(other) def __ge__(self, other: Union['Label', str, bytes]) -> bool: return self.canonical() >= Label._prepare_label_compare(other) def __gt__(self, other: Union['Label', str, bytes]) -> bool: return self.canonical() > Label._prepare_label_compare(other) def __le__(self, other: Union['Label', str, bytes]) -> bool: return self.canonical() <= Label._prepare_label_compare(other) def __lt__(self, other: Union['Label', str, bytes]) -> bool: return self.canonical() < Label._prepare_label_compare(other) def __hash__(self) -> int: # To allow hashing of labels we need to ensure that the hash # values follow the same rule as equality. That means hashing # based on the canonical version of the label. # # https://hynek.me/articles/hashes-and-equality/ return hash(self.canonical()) def __repr__(self) -> str: cls_name = self.__class__.__name__ try: return cls_name + ".fromstr('" + str(self) + "')" except UnicodeError: return cls_name + "(" + str(self._label_bytes) + ")" def __str__(self) -> str: return encodings.idna.ToUnicode(self._label_bytes) # type: ignore def __bytes__(self) -> bytes: return self._label_bytes # This is a table used to translate bytes in a label to the master # zone file presentation format, as documented in RFC 1035. # # Non-printable characters get translated into the decimal-escaped # version, so chr(4) becomes '\004'. # # We also escape a few other characters: # # * <space> becomes '\032', to avoid treating it as whitespace. # Note that this is not strictly necessary, and could possibly # be presented as '\ '. This may be visually confusing, so we # opt for the decimal-encoded version. # # * The double-quote, ", becomes '\"', to avoid starting or ending # a quoted string. # # * The dollar sign, $, becomes '\$', to avoid anyone confusing # with a control entry like $INCLUDE or $ORIGIN. (You _can_ have # a label named '$ORIGIN', after all.) Note that this is not # strictly necessary at all times; it could be used only for # dollar signs that appear as the first character as a label. # However using it in all cases is not an error. # # * Open and close parenthesis become '\(' and '\)', respectively. # These are used for grouping otherwise. # # * The dot, ., becomes '\.', to avoid being used as the label # separator. # # * The semicolon, ;, becomes '\;', to avoid being used as the # start of a comment. # # * The at sign, @, becomes '\@', to avoid being substituted for # the origin. # # * The backslash, \, becomes '\092', since it otherwise indicates # a escaped character. # _presentation_translation = [ '\\000', '\\001', '\\002', '\\003', '\\004', '\\005', '\\006', '\\007', '\\008', '\\009', '\\010', '\\011', '\\012', '\\013', '\\014', '\\015', '\\016', '\\017', '\\018', '\\019', '\\020', '\\021', '\\022', '\\023', '\\024', '\\025', '\\026', '\\027', '\\028', '\\029', '\\030', '\\031', '\\032', '!', '\\"', '#', '\\$', '%', '&', "'", '\\(', '\\)', '*', '+', ',', '-', '\\.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '\\;', '<', '=', '>', '?', '\\@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\092', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '\\127', '\\128', '\\129', '\\130', '\\131', '\\132', '\\133', '\\134', '\\135', '\\136', '\\137', '\\138', '\\139', '\\140', '\\141', '\\142', '\\143', '\\144', '\\145', '\\146', '\\147', '\\148', '\\149', '\\150', '\\151', '\\152', '\\153', '\\154', '\\155', '\\156', '\\157', '\\158', '\\159', '\\160', '\\161', '\\162', '\\163', '\\164', '\\165', '\\166', '\\167', '\\168', '\\169', '\\170', '\\171', '\\172', '\\173', '\\174', '\\175', '\\176', '\\177', '\\178', '\\179', '\\180', '\\181', '\\182', '\\183', '\\184', '\\185', '\\186', '\\187', '\\188', '\\189', '\\190', '\\191', '\\192', '\\193', '\\194', '\\195', '\\196', '\\197', '\\198', '\\199', '\\200', '\\201', '\\202', '\\203', '\\204', '\\205', '\\206', '\\207', '\\208', '\\209', '\\210', '\\211', '\\212', '\\213', '\\214', '\\215', '\\216', '\\217', '\\218', '\\219', '\\220', '\\221', '\\222', '\\223', '\\224', '\\225', '\\226', '\\227', '\\228', '\\229', '\\230', '\\231', '\\232', '\\233', '\\234', '\\235', '\\236', '\\237', '\\238', '\\239', '\\240', '\\241', '\\242', '\\243', '\\244', '\\245', '\\246', '\\247', '\\248', '\\249', '\\250', '\\251', '\\252', '\\253', '\\254', '\\255', ]
[docs] def to_presentation(self) -> str: """ Return a string of the label converted to the master zone file presentation format described in `RFC 1035 <https://tools.ietf.org/html/rfc1035#section-5.1>`_. :return: presentation format of the label :rtype: str """ presentation = [self._presentation_translation[octet] for octet in self._label_bytes] return "".join(presentation)
def make_label(label_val: Union[Label, str, bytes], *, canonicalize: bool = False) -> Label: """ Initialize a label from a string, bytes, or another label. :return: an initialized label :rtype: Label :raises: :py:class:`EmptyLabel` :raises: :py:class:`LabelTooLong` :raises UnicodeError: string cannot be converted to Punycode. """ # pylint: disable=protected-access # If we are initializing from another label, we can just re-use # that label, since labels are immutable. One exception is that if # we want a canonical version of the label then we make a copy and # use the canonical() method to get our bytes. if isinstance(label_val, Label): if (canonicalize and (label_val._label_bytes != label_val._canonical_bytes)): result = copy.copy(label_val) result._label_bytes = label_val.canonical() else: result = label_val elif isinstance(label_val, str): result = Label.fromstr(label_val, canonicalize=canonicalize) else: result = Label(label_val, canonicalize=canonicalize) return result
[docs]class LabelFactory: """ The :py:class:`LabelFactory` constructor takes no arguments. """ __slots__ = ('_labels',) _labels: 'weakref.WeakValueDictionary[bytes, Label]' def __init__(self) -> None: self._labels = weakref.WeakValueDictionary() def _invariant(self) -> None: assert isinstance(self._labels, weakref.WeakValueDictionary) def _fetch_or_create(self, canonical_bytes: bytes) -> Label: if canonical_bytes in self._labels: return self._labels[canonical_bytes] new_label = Label(canonical_bytes) self._labels[canonical_bytes] = new_label return new_label
[docs] def fromlabel(self, label: Label) -> Label: """ Get a :py:class:`Label` instance the same as the label passed. While you could use a simple assignment to also use the same label:: ipso = Label(b'facto') quid = ipso print(quid is ipso) # True Using the :py:class:`LabelFactory` for this will store the reference in the :py:class:`LabelFactory` instance, which might be useful when mixing creation from both :py:class:`Label` and :py:class:`str`/:py:class:`bytes`:: factory = LabelFactory() ipso = Label(b'facto') quid = factory.fromlabel(ipso) pro = factory.fromstr('facto') print(quid is pro) # True :param label: Label that we want to return an instance of :type label: :py:class:`Label` :rtype: :py:class:`Label` """ canonical_bytes = label.canonical() return self._labels.setdefault(canonical_bytes, label)
[docs] def frombytes(self, label_bytes: bytes) -> Label: """ Get a :py:class:`Label` instance the same as one created from the :py:class:`bytes` passed. :param bytes label_bytes: bytes that we want a label of :rtype: :py:class:`Label` :raises: :py:class:`EmptyLabel` :raises: :py:class:`LabelTooLong` """ canonical_bytes = label_bytes.lower() return self._fetch_or_create(canonical_bytes)
[docs] def fromstr(self, label_str: str) -> Label: """ Get a :py:class:`Label` instance the same as one created from the :py:class:`str` passed. :param str label_str: string that we want a label of :rtype: :py:class:`Label` :raises: :py:class:`EmptyLabel` :raises: :py:class:`LabelTooLong` :raises UnicodeError: string cannot be converted to Punycode. """ try: canonical_bytes = encodings.idna.ToASCII(label_str).lower() except UnicodeError as err: # encodings.idna.ToASCII raises a single exception on an # empty label or too long label, so differentiate here. if label_str == "": raise EmptyLabel() from err if "too long" in str(err): raise LabelTooLong(label_str) from err raise err return self._fetch_or_create(canonical_bytes)