"""
The Label module provides classes for DNS labels.
The Label class
===============
A label is initialized from bytes::
from dike.label import Label
monty = Label(b'flying-circus')
We can also create a label from a string using the
:py:meth:`Label.fromstr()` class method , which converts the string to
`Punycode <https://tools.ietf.org/html/rfc3492>`_::
biter = Label.fromstr('møøsë')
print(biter) # møøsë (via implicit string conversion)
print(bytes(biter)) # b'xn--ms-ija4ca'
print(repr(biter)) # Label.fromstr('møøsë')
A utility function which will create a label from bytes, strings, or
other labels is also available::
from dike.label import make_label
average_airspeed0 = make_label(b'African')
average_airspeed1 = make_label('European')
average_airspeed2 = make_label(average_airspeed1)
Labels are immutable - once initialized the value never changes. This
has the advantage of allowing labels to be used as keys in
:py:class:`dict` and various other places, but does mean that updating
a label is not possible. To make changes we can convert to either a
:py:class:`string` or :py:class:`bytes` value and use slicing or
concatenation to produce the value you want, then initialize a new
label::
country = Label.fromstr('holland')
same_country = Label.fromstr('nether' + str(country)[3:] + 's')
print(country) # holland
print(same_country) # netherlands
Label comparisons work as expected, and are case-insensitive (as per
DNS specification)::
print(Label(b'flying') == Label(b'circus')) # False
print(Label(b'flying') > Label(b'circus')) # True
print(Label(b'CIRCUS') == Label(b'circus')) # True
If you have a label you can also use bytes or strings in comparisons,
and these work as if you had built a Label object for the comparison::
print(Label.fromstr('Norwegian') != 'blue') # True
print(Label(b'short') < b'shortness') # True
The module also includes a number of utility functions::
i_am_a_host_label = Label.fromstr('mailserver')
i_am_not_a_host_label = Label.fromstr('WITCH!!!')
print(i_am_a_host_label.ishost()) # True
print(i_am_a_host_label.canonical()) # b'mailserver'
print(i_am_not_a_host_label.ishost()) # False
print(i_am_not_a_host_label.canonical()) # b'witch!!!
Finally, be careful when converting arbitrary labels to strings, for
example when receiving labels in DNS packets from the Internet. This
can result in a :py:class:`UnicodeError` being raised, if the bytes
cannot be represented in Punycode. You can use the
:py:meth:`Label.to_presentation()` method to convert the label to a
string the label in this case, for example when logging. This uses
escape sequences for any characters that might be interpreted as
special in a zone file, as defined in
`RFC 1035 <https://tools.ietf.org/html/rfc1035>`_::
bad_punycode = Label(b'scary-' + bytes([0x80]))
print(bad_punycode.to_presentation()) # scary-\\128
The LabelFactory class
======================
Since we often use the same label many times when dealing with DNS, it
can be more efficient to use the same instance for all occurrences of
a given label. For example, the label ``com`` is likely to appear in
many DNS names, and it can be more efficient to reuse the same label.
This is safe, because labels are immutable.
The :py:class:`LabelFactory` class exists for this purpose::
from dike import LabelFactory, Label
zone_label_factory = LabelFactory()
foo = zone_label_factory.fromstr('foo')
bar = zone_label_factory.fromstr('foo')
baz = Label.fromstr('foo')
print(foo == bar) # True
print(foo is bar) # True
print(foo == baz) # True
print(foo is baz) # False
Notice that we can also create labels by normal object creation.
Comparisons (``==``, ``<``, and so on) work as expected, but the
labels have different identities in this case (so the ``is``
comparison is ``False``).
The :py:meth:`LabelFactory.frombytes()` and
:py:meth:`LabelFactory.fromlabel()` methods are also available for
creating from bytes or other Label instances.
The labels in a LabelFactory are stored in a
:py:class:`weakref.WeakValueDictionary`, so when a label is no longer
used memory used by the object will be released.
"""
from typing import Union
import copy
import weakref
import encodings.idna
from dike.errors import EmptyLabel, LabelTooLong
[docs]class Label:
"""
The :py:class:`Label` constructor requires a single value, which
is :py:class:`bytes`.
If the optional `canonicalize` argument is used then the label
will be converted to the canonical version (that is, ASCII
lower-case).
If a label is more than 63 characters long, a
:py:class:`LabelTooLong` exception will be raised. Attempting to
create an empty lable (with ``b''``) will raise a
:py:class:`EmptyLabel` exception.
:param label_val: Value to use when creating the label.
:type label_val: bytes
:raises: :py:class:`EmptyLabel`
:raises: :py:class:`LabelTooLong`
"""
__slots__ = (
'_label_bytes',
'_canonical_bytes',
'_ishost_flag',
'__weakref__',
)
_label_bytes: bytes
_canonical_bytes: Union[None, bytes]
_ishost_flag: bool
def __init__(self, label_val: bytes, *, canonicalize: bool = False):
# The _ishost_flag attribute is set when the ishost() method is
# first invoked.
# Check the length.
if len(label_val) == 0:
raise EmptyLabel()
if len(label_val) > 63:
raise LabelTooLong(label_val)
# If we want a canonical version, set that now.
if canonicalize:
self._canonical_bytes = label_val.lower()
self._label_bytes = self._canonical_bytes
# Otherwise leave our canonical version unset.
else:
self._canonical_bytes = None
self._label_bytes = label_val
[docs] @staticmethod
def fromstr(label_val: str, *, canonicalize: bool = False) -> 'Label':
"""
Create an IDNA version of a string.
Certain Unicode values are not allowed in Punycode. A
:py:class:`UnicodeError` exception will be raised in that
case.
:return: a label
:rtype: Label
:raises UnicodeError: string cannot be converted to Punycode.
"""
try:
digested_val = encodings.idna.ToASCII(label_val)
except UnicodeError as err:
# encodings.idna.ToASCII raises a single exception on an
# empty label or too long label, so differentiate here.
if label_val == "":
raise EmptyLabel() from err
if str(err) == "label empty or too long":
raise LabelTooLong(label_val) from err
raise err
return Label(digested_val, canonicalize=canonicalize)
def _invariant(self) -> None:
"""
This method documents what must always be true about a label
instance.
"""
assert isinstance(self._label_bytes, bytes)
assert 1 <= len(self._label_bytes) <= 63
assert ((self._canonical_bytes is None) or
(isinstance(self._canonical_bytes, bytes) and
(self._label_bytes.lower() == self._canonical_bytes)))
if getattr(self, '_ishost_flag', None) is not None:
assert isinstance(self._ishost_flag, bool)
[docs] def canonical(self) -> bytes:
"""
Return the :py:class:`bytes` representing the canonical
version of a label. This is the label converted to lowercase
ASCII.
:return: canonical version of the label
:rtype: bytes
"""
if self._canonical_bytes is None:
self._canonical_bytes = self._label_bytes.lower()
return self._canonical_bytes
[docs] def ishost(self) -> bool:
"""
Test if the label is valid in a host name. The rules for host
names are defined in:
* `RFC 1034 Section 3.5
<https://tools.ietf.org/html/rfc1034#section-3.5>`_
* `RFC 1123 Section 2
<https://tools.ietf.org/html/rfc1123#section-2>`_
:rtype: bool
"""
if getattr(self, '_ishost_flag', None) is None:
ishost_flag = True
if not chr(self._label_bytes[0]).isalnum():
ishost_flag = False
else:
for octet in self._label_bytes[1:-1]:
if (not chr(octet).isalnum()) and (octet != ord('-')):
ishost_flag = False
break
if ishost_flag and not chr(self._label_bytes[-1]).isalnum():
ishost_flag = False
self._ishost_flag = ishost_flag
return self._ishost_flag
@staticmethod
def _prepare_label_compare(label_val: Union['Label', str, bytes]) -> bytes:
if isinstance(label_val, Label):
return label_val.canonical()
if isinstance(label_val, str):
try:
ascii_label = encodings.idna.ToASCII(label_val)
return ascii_label.lower() # type: ignore
except UnicodeError as err:
# encodings.idna.ToASCII raises a single exception on an
# empty label or too long label, so differentiate here.
if label_val == "":
return b''
if str(err) == "label empty or too long":
raise LabelTooLong(label_val) from err
raise err
if len(label_val) > 63:
raise LabelTooLong(label_val)
return label_val.lower()
def __eq__(self, other: object) -> bool:
if not isinstance(other, (Label, str, bytes)):
return NotImplemented
return self.canonical() == Label._prepare_label_compare(other)
def __ne__(self, other: object) -> bool:
if not isinstance(other, (Label, str, bytes)):
return NotImplemented
return self.canonical() != Label._prepare_label_compare(other)
def __ge__(self, other: Union['Label', str, bytes]) -> bool:
return self.canonical() >= Label._prepare_label_compare(other)
def __gt__(self, other: Union['Label', str, bytes]) -> bool:
return self.canonical() > Label._prepare_label_compare(other)
def __le__(self, other: Union['Label', str, bytes]) -> bool:
return self.canonical() <= Label._prepare_label_compare(other)
def __lt__(self, other: Union['Label', str, bytes]) -> bool:
return self.canonical() < Label._prepare_label_compare(other)
def __hash__(self) -> int:
# To allow hashing of labels we need to ensure that the hash
# values follow the same rule as equality. That means hashing
# based on the canonical version of the label.
#
# https://hynek.me/articles/hashes-and-equality/
return hash(self.canonical())
def __repr__(self) -> str:
cls_name = self.__class__.__name__
try:
return cls_name + ".fromstr('" + str(self) + "')"
except UnicodeError:
return cls_name + "(" + str(self._label_bytes) + ")"
def __str__(self) -> str:
return encodings.idna.ToUnicode(self._label_bytes) # type: ignore
def __bytes__(self) -> bytes:
return self._label_bytes
# This is a table used to translate bytes in a label to the master
# zone file presentation format, as documented in RFC 1035.
#
# Non-printable characters get translated into the decimal-escaped
# version, so chr(4) becomes '\004'.
#
# We also escape a few other characters:
#
# * <space> becomes '\032', to avoid treating it as whitespace.
# Note that this is not strictly necessary, and could possibly
# be presented as '\ '. This may be visually confusing, so we
# opt for the decimal-encoded version.
#
# * The double-quote, ", becomes '\"', to avoid starting or ending
# a quoted string.
#
# * The dollar sign, $, becomes '\$', to avoid anyone confusing
# with a control entry like $INCLUDE or $ORIGIN. (You _can_ have
# a label named '$ORIGIN', after all.) Note that this is not
# strictly necessary at all times; it could be used only for
# dollar signs that appear as the first character as a label.
# However using it in all cases is not an error.
#
# * Open and close parenthesis become '\(' and '\)', respectively.
# These are used for grouping otherwise.
#
# * The dot, ., becomes '\.', to avoid being used as the label
# separator.
#
# * The semicolon, ;, becomes '\;', to avoid being used as the
# start of a comment.
#
# * The at sign, @, becomes '\@', to avoid being substituted for
# the origin.
#
# * The backslash, \, becomes '\092', since it otherwise indicates
# a escaped character.
#
_presentation_translation = [
'\\000', '\\001', '\\002', '\\003', '\\004', '\\005', '\\006', '\\007',
'\\008', '\\009', '\\010', '\\011', '\\012', '\\013', '\\014', '\\015',
'\\016', '\\017', '\\018', '\\019', '\\020', '\\021', '\\022', '\\023',
'\\024', '\\025', '\\026', '\\027', '\\028', '\\029', '\\030', '\\031',
'\\032', '!', '\\"', '#', '\\$', '%', '&', "'",
'\\(', '\\)', '*', '+', ',', '-', '\\.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', '\\;', '<', '=', '>', '?',
'\\@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '[', '\\092', ']', '^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', '{', '|', '}', '~', '\\127',
'\\128', '\\129', '\\130', '\\131', '\\132', '\\133', '\\134', '\\135',
'\\136', '\\137', '\\138', '\\139', '\\140', '\\141', '\\142', '\\143',
'\\144', '\\145', '\\146', '\\147', '\\148', '\\149', '\\150', '\\151',
'\\152', '\\153', '\\154', '\\155', '\\156', '\\157', '\\158', '\\159',
'\\160', '\\161', '\\162', '\\163', '\\164', '\\165', '\\166', '\\167',
'\\168', '\\169', '\\170', '\\171', '\\172', '\\173', '\\174', '\\175',
'\\176', '\\177', '\\178', '\\179', '\\180', '\\181', '\\182', '\\183',
'\\184', '\\185', '\\186', '\\187', '\\188', '\\189', '\\190', '\\191',
'\\192', '\\193', '\\194', '\\195', '\\196', '\\197', '\\198', '\\199',
'\\200', '\\201', '\\202', '\\203', '\\204', '\\205', '\\206', '\\207',
'\\208', '\\209', '\\210', '\\211', '\\212', '\\213', '\\214', '\\215',
'\\216', '\\217', '\\218', '\\219', '\\220', '\\221', '\\222', '\\223',
'\\224', '\\225', '\\226', '\\227', '\\228', '\\229', '\\230', '\\231',
'\\232', '\\233', '\\234', '\\235', '\\236', '\\237', '\\238', '\\239',
'\\240', '\\241', '\\242', '\\243', '\\244', '\\245', '\\246', '\\247',
'\\248', '\\249', '\\250', '\\251', '\\252', '\\253', '\\254', '\\255',
]
[docs] def to_presentation(self) -> str:
"""
Return a string of the label converted to the master zone file
presentation format described in
`RFC 1035 <https://tools.ietf.org/html/rfc1035#section-5.1>`_.
:return: presentation format of the label
:rtype: str
"""
presentation = [self._presentation_translation[octet]
for octet in self._label_bytes]
return "".join(presentation)
def make_label(label_val: Union[Label, str, bytes], *,
canonicalize: bool = False) -> Label:
"""
Initialize a label from a string, bytes, or another label.
:return: an initialized label
:rtype: Label
:raises: :py:class:`EmptyLabel`
:raises: :py:class:`LabelTooLong`
:raises UnicodeError: string cannot be converted to Punycode.
"""
# pylint: disable=protected-access
# If we are initializing from another label, we can just re-use
# that label, since labels are immutable. One exception is that if
# we want a canonical version of the label then we make a copy and
# use the canonical() method to get our bytes.
if isinstance(label_val, Label):
if (canonicalize and
(label_val._label_bytes != label_val._canonical_bytes)):
result = copy.copy(label_val)
result._label_bytes = label_val.canonical()
else:
result = label_val
elif isinstance(label_val, str):
result = Label.fromstr(label_val, canonicalize=canonicalize)
else:
result = Label(label_val, canonicalize=canonicalize)
return result
[docs]class LabelFactory:
"""
The :py:class:`LabelFactory` constructor takes no arguments.
"""
__slots__ = ('_labels',)
_labels: 'weakref.WeakValueDictionary[bytes, Label]'
def __init__(self) -> None:
self._labels = weakref.WeakValueDictionary()
def _invariant(self) -> None:
assert isinstance(self._labels, weakref.WeakValueDictionary)
def _fetch_or_create(self, canonical_bytes: bytes) -> Label:
if canonical_bytes in self._labels:
return self._labels[canonical_bytes]
new_label = Label(canonical_bytes)
self._labels[canonical_bytes] = new_label
return new_label
[docs] def fromlabel(self, label: Label) -> Label:
"""
Get a :py:class:`Label` instance the same as the label passed.
While you could use a simple assignment to also use the same
label::
ipso = Label(b'facto')
quid = ipso
print(quid is ipso) # True
Using the :py:class:`LabelFactory` for this will store the
reference in the :py:class:`LabelFactory` instance, which
might be useful when mixing creation from both
:py:class:`Label` and :py:class:`str`/:py:class:`bytes`::
factory = LabelFactory()
ipso = Label(b'facto')
quid = factory.fromlabel(ipso)
pro = factory.fromstr('facto')
print(quid is pro) # True
:param label: Label that we want to return an instance of
:type label: :py:class:`Label`
:rtype: :py:class:`Label`
"""
canonical_bytes = label.canonical()
return self._labels.setdefault(canonical_bytes, label)
[docs] def frombytes(self, label_bytes: bytes) -> Label:
"""
Get a :py:class:`Label` instance the same as one created from
the :py:class:`bytes` passed.
:param bytes label_bytes: bytes that we want a label of
:rtype: :py:class:`Label`
:raises: :py:class:`EmptyLabel`
:raises: :py:class:`LabelTooLong`
"""
canonical_bytes = label_bytes.lower()
return self._fetch_or_create(canonical_bytes)
[docs] def fromstr(self, label_str: str) -> Label:
"""
Get a :py:class:`Label` instance the same as one created from
the :py:class:`str` passed.
:param str label_str: string that we want a label of
:rtype: :py:class:`Label`
:raises: :py:class:`EmptyLabel`
:raises: :py:class:`LabelTooLong`
:raises UnicodeError: string cannot be converted to Punycode.
"""
try:
canonical_bytes = encodings.idna.ToASCII(label_str).lower()
except UnicodeError as err:
# encodings.idna.ToASCII raises a single exception on an
# empty label or too long label, so differentiate here.
if label_str == "":
raise EmptyLabel() from err
if "too long" in str(err):
raise LabelTooLong(label_str) from err
raise err
return self._fetch_or_create(canonical_bytes)