# Copyright 2012-2019, Damian Johnson and The Tor Project
# See LICENSE for licensing information

"""
Parsing for Tor extra-info descriptors. These are published by relays whenever
their server descriptor is published and have a similar format. However, unlike
server descriptors these don't contain information that Tor clients require to
function and as such aren't fetched by default.

Defined in section 2.1.2 of the `dir-spec
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_,
extra-info descriptors contain interesting but non-vital information such as
usage statistics. Tor clients cannot request these documents for bridges.

Extra-info descriptors are available from a few sources...

* If you have 'DownloadExtraInfo 1' in your torrc...

 * control port via 'GETINFO extra-info/digest/\\*' queries
 * the 'cached-extrainfo' file in tor's data directory

* Archived descriptors provided by `CollecTor <https://metrics.torproject.org/collector.html>`_.

* Directory authorities and mirrors via their DirPort.

**Module Overview:**

::

  ExtraInfoDescriptor - Tor extra-info descriptor.
    |- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
    |- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
    |
    +- digest - calculates the upper-case hex digest value for our content

.. data:: DirResponse (enum)

  Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses.

  =================== ===========
  DirResponse         Description
  =================== ===========
  **OK**              network status requests that were answered
  **NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities
  **UNAVAILABLE**     requested network status was unavailable
  **NOT_FOUND**       requested network status was not found
  **NOT_MODIFIED**    network status unmodified since If-Modified-Since time
  **BUSY**            directory was busy
  =================== ===========

.. data:: DirStat (enum)

  Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and
  dir_*_tunneled_dl.

  ===================== ===========
  DirStat               Description
  ===================== ===========
  **COMPLETE**          requests that completed successfully
  **TIMEOUT**           requests that didn't complete within a ten minute timeout
  **RUNNING**           requests still in process when measurement's taken
  **MIN**               smallest rate at which a descriptor was downloaded in B/s
  **MAX**               largest rate at which a descriptor was downloaded in B/s
  **D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s
  **Q1** and **Q3**     rate of the slowest and fastest quarter download rates in B/s
  **MD**                median download rate in B/s
  ===================== ===========
"""

import functools
import hashlib
import re

import stem.prereq
import stem.util.connection
import stem.util.enum
import stem.util.str_tools

from stem.descriptor import (
  PGP_BLOCK_END,
  Descriptor,
  DigestHash,
  DigestEncoding,
  create_signing_key,
  _descriptor_content,
  _read_until_keywords,
  _descriptor_components,
  _value,
  _values,
  _parse_simple_line,
  _parse_int_line,
  _parse_timestamp_line,
  _parse_forty_character_hex,
  _parse_key_block,
  _mappings_for,
  _append_router_signature,
  _random_nickname,
  _random_fingerprint,
  _random_date,
  _random_crypto_blob,
)

if stem.prereq._is_lru_cache_available():
  from functools import lru_cache
else:
  from stem.util.lru_cache import lru_cache

# known statuses for dirreq-v2-resp and dirreq-v3-resp...
DirResponse = stem.util.enum.Enum(
  ('OK', 'ok'),
  ('NOT_ENOUGH_SIGS', 'not-enough-sigs'),
  ('UNAVAILABLE', 'unavailable'),
  ('NOT_FOUND', 'not-found'),
  ('NOT_MODIFIED', 'not-modified'),
  ('BUSY', 'busy'),
)

# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
dir_stats += ['d%i' % i for i in range(1, 5)]
dir_stats += ['d%i' % i for i in range(6, 10)]
DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])

# relay descriptors must have exactly one of the following
REQUIRED_FIELDS = (
  'extra-info',
  'published',
  'router-signature',
)

# optional entries that can appear at most once
SINGLE_FIELDS = (
  'read-history',
  'write-history',
  'geoip-db-digest',
  'geoip6-db-digest',
  'bridge-stats-end',
  'bridge-ips',
  'dirreq-stats-end',
  'dirreq-v2-ips',
  'dirreq-v3-ips',
  'dirreq-v2-reqs',
  'dirreq-v3-reqs',
  'dirreq-v2-share',
  'dirreq-v3-share',
  'dirreq-v2-resp',
  'dirreq-v3-resp',
  'dirreq-v2-direct-dl',
  'dirreq-v3-direct-dl',
  'dirreq-v2-tunneled-dl',
  'dirreq-v3-tunneled-dl',
  'dirreq-read-history',
  'dirreq-write-history',
  'entry-stats-end',
  'entry-ips',
  'cell-stats-end',
  'cell-processed-cells',
  'cell-queued-cells',
  'cell-time-in-queue',
  'cell-circuits-per-decile',
  'conn-bi-direct',
  'exit-stats-end',
  'exit-kibibytes-written',
  'exit-kibibytes-read',
  'exit-streams-opened',
)

_timestamp_re = re.compile('^(.*) \\(([0-9]+) s\\)( .*)?$')
_locale_re = re.compile('^[a-zA-Z0-9\\?]{2}$')


def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
  """
  Iterates over the extra-info descriptors in a file.

  :param file descriptor_file: file with descriptor content
  :param bool is_bridge: parses the file as being a bridge descriptor
  :param bool validate: checks the validity of the descriptor's content if
    **True**, skips these checks otherwise
  :param dict kwargs: additional arguments for the descriptor constructor

  :returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
    instances in the file

  :raises:
    * **ValueError** if the contents is malformed and validate is **True**
    * **IOError** if the file can't be read
  """

  while True:
    if not is_bridge:
      extrainfo_content = _read_until_keywords('router-signature', descriptor_file)

      # we've reached the 'router-signature', now include the pgp style block

      block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
      extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
    else:
      extrainfo_content = _read_until_keywords('router-digest', descriptor_file, True)

    if extrainfo_content:
      if extrainfo_content[0].startswith(b'@type'):
        extrainfo_content = extrainfo_content[1:]

      if is_bridge:
        yield BridgeExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs)
      else:
        yield RelayExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs)
    else:
      break  # done parsing file


def _parse_timestamp_and_interval(keyword, content):
  """
  Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.

  :param str keyword: line's keyword
  :param str content: line content to be parsed

  :returns: **tuple** of the form (timestamp (**datetime**), interval
    (**int**), remaining content (**str**))

  :raises: **ValueError** if the content is malformed
  """

  line = '%s %s' % (keyword, content)
  content_match = _timestamp_re.match(content)

  if not content_match:
    raise ValueError('Malformed %s line: %s' % (keyword, line))

  timestamp_str, interval, remainder = content_match.groups()

  if remainder:
    remainder = remainder[1:]  # remove leading space

  if not interval.isdigit():
    raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))

  try:
    timestamp = stem.util.str_tools._parse_timestamp(timestamp_str)
    return timestamp, int(interval), remainder
  except ValueError:
    raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))


def _parse_extra_info_line(descriptor, entries):
  # "extra-info" Nickname Fingerprint

  value = _value('extra-info', entries)
  extra_info_comp = value.split()

  if len(extra_info_comp) < 2:
    raise ValueError('Extra-info line must have two values: extra-info %s' % value)
  elif not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
    raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
  elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
    raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % extra_info_comp[1])

  descriptor.nickname = extra_info_comp[0]
  descriptor.fingerprint = extra_info_comp[1]


def _parse_transport_line(descriptor, entries):
  # "transport" transportname address:port [arglist]
  # Everything after the transportname is scrubbed in published bridge
  # descriptors, so we'll never see it in practice.
  #
  # These entries really only make sense for bridges, but have been seen
  # on non-bridges in the wild when the relay operator configured it this
  # way.

  transports = {}

  for value in _values('transport', entries):
    name, address, port, args = None, None, None, None

    if ' ' not in value:
      # scrubbed
      name = value
    else:
      # not scrubbed
      value_comp = value.split()

      if len(value_comp) < 1:
        raise ValueError('Transport line is missing its transport name: transport %s' % value)
      elif len(value_comp) < 2:
        raise ValueError('Transport line is missing its address:port value: transport %s' % value)
      elif ':' not in value_comp[1]:
        raise ValueError("Transport line's address:port entry is missing a colon: transport %s" % value)

      name = value_comp[0]
      address, port_str = value_comp[1].rsplit(':', 1)

      if not stem.util.connection.is_valid_ipv4_address(address) or \
             stem.util.connection.is_valid_ipv6_address(address, allow_brackets = True):
        raise ValueError('Transport line has a malformed address: transport %s' % value)
      elif not stem.util.connection.is_valid_port(port_str):
        raise ValueError('Transport line has a malformed port: transport %s' % value)

      address.lstrip('[').rstrip(']')
      port = int(port_str)
      args = value_comp[2:] if len(value_comp) >= 3 else []

    transports[name] = (address, port, args)

  descriptor.transport = transports


def _parse_padding_counts_line(descriptor, entries):
  # "padding-counts" YYYY-MM-DD HH:MM:SS (NSEC s) key=val key=val...

  value = _value('padding-counts', entries)
  timestamp, interval, remainder = _parse_timestamp_and_interval('padding-counts', value)
  counts = {}

  for k, v in _mappings_for('padding-counts', remainder, require_value = True):
    counts[k] = int(v) if v.isdigit() else v

  setattr(descriptor, 'padding_counts_end', timestamp)
  setattr(descriptor, 'padding_counts_interval', interval)
  setattr(descriptor, 'padding_counts', counts)


def _parse_dirreq_line(keyword, recognized_counts_attr, unrecognized_counts_attr, descriptor, entries):
  value = _value(keyword, entries)

  recognized_counts = {}
  unrecognized_counts = {}

  is_response_stats = keyword in ('dirreq-v2-resp', 'dirreq-v3-resp')
  key_set = DirResponse if is_response_stats else DirStat

  key_type = 'STATUS' if is_response_stats else 'STAT'

  for status, count in _mappings_for(keyword, value, divider = ','):
    if not count.isdigit():
      raise ValueError('%s lines should contain %s=COUNT mappings: %s %s' % (keyword, key_type, keyword, value))

    if status in key_set:
      recognized_counts[status] = int(count)
    else:
      unrecognized_counts[status] = int(count)

  setattr(descriptor, recognized_counts_attr, recognized_counts)
  setattr(descriptor, unrecognized_counts_attr, unrecognized_counts)


def _parse_dirreq_share_line(keyword, attribute, descriptor, entries):
  value = _value(keyword, entries)

  if not value.endswith('%'):
    raise ValueError('%s lines should be a percentage: %s %s' % (keyword, keyword, value))
  elif float(value[:-1]) < 0:
    raise ValueError('Negative percentage value: %s %s' % (keyword, value))

  # bug means it might be above 100%: https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html

  setattr(descriptor, attribute, float(value[:-1]) / 100)


def _parse_cell_line(keyword, attribute, descriptor, entries):
  # "<keyword>" num,...,num

  value = _value(keyword, entries)
  entries, exc = [], None

  if value:
    for entry in value.split(','):
      try:
        # Values should be positive but as discussed in ticket #5849
        # there was a bug around this. It was fixed in tor 0.2.2.1.

        entries.append(float(entry))
      except ValueError:
        exc = ValueError('Non-numeric entry in %s listing: %s %s' % (keyword, keyword, value))

  setattr(descriptor, attribute, entries)

  if exc:
    raise exc


def _parse_timestamp_and_interval_line(keyword, end_attribute, interval_attribute, descriptor, entries):
  # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)

  timestamp, interval, _ = _parse_timestamp_and_interval(keyword, _value(keyword, entries))
  setattr(descriptor, end_attribute, timestamp)
  setattr(descriptor, interval_attribute, interval)


def _parse_conn_bi_direct_line(descriptor, entries):
  # "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH

  value = _value('conn-bi-direct', entries)
  timestamp, interval, remainder = _parse_timestamp_and_interval('conn-bi-direct', value)
  stats = remainder.split(',')

  if len(stats) != 4 or not (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
    raise ValueError('conn-bi-direct line should end with four numeric values: conn-bi-direct %s' % value)

  descriptor.conn_bi_direct_end = timestamp
  descriptor.conn_bi_direct_interval = interval
  descriptor.conn_bi_direct_below = int(stats[0])
  descriptor.conn_bi_direct_read = int(stats[1])
  descriptor.conn_bi_direct_write = int(stats[2])
  descriptor.conn_bi_direct_both = int(stats[3])


def _parse_history_line(keyword, end_attribute, interval_attribute, values_attribute, descriptor, entries):
  # "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...

  value = _value(keyword, entries)
  timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
  history_values = []

  if remainder:
    try:
      history_values = [int(entry) for entry in remainder.split(',')]
    except ValueError:
      raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value))

  setattr(descriptor, end_attribute, timestamp)
  setattr(descriptor, interval_attribute, interval)
  setattr(descriptor, values_attribute, history_values)


def _parse_port_count_line(keyword, attribute, descriptor, entries):
  # "<keyword>" port=N,port=N,...

  value, port_mappings = _value(keyword, entries), {}

  for port, stat in _mappings_for(keyword, value, divider = ','):
    if (port != 'other' and not stem.util.connection.is_valid_port(port)) or not stat.isdigit():
      raise ValueError('Entries in %s line should only be PORT=N entries: %s %s' % (keyword, keyword, value))

    port = int(port) if port.isdigit() else port
    port_mappings[port] = int(stat)

  setattr(descriptor, attribute, port_mappings)


def _parse_geoip_to_count_line(keyword, attribute, descriptor, entries):
  # "<keyword>" CC=N,CC=N,...
  #
  # The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
  # locale codes for some special values, for instance...
  #   A1,"Anonymous Proxy"
  #   A2,"Satellite Provider"
  #   ??,"Unknown"

  value, locale_usage = _value(keyword, entries), {}

  for locale, count in _mappings_for(keyword, value, divider = ','):
    if not _locale_re.match(locale) or not count.isdigit():
      raise ValueError('Entries in %s line should only be CC=N entries: %s %s' % (keyword, keyword, value))

    locale_usage[locale] = int(count)

  setattr(descriptor, attribute, locale_usage)


def _parse_bridge_ip_versions_line(descriptor, entries):
  value, ip_versions = _value('bridge-ip-versions', entries), {}

  for protocol, count in _mappings_for('bridge-ip-versions', value, divider = ','):
    if not count.isdigit():
      raise stem.ProtocolError('IP protocol count was non-numeric (%s): bridge-ip-versions %s' % (count, value))

    ip_versions[protocol] = int(count)

  descriptor.ip_versions = ip_versions


def _parse_bridge_ip_transports_line(descriptor, entries):
  value, ip_transports = _value('bridge-ip-transports', entries), {}

  for protocol, count in _mappings_for('bridge-ip-transports', value, divider = ','):
    if not count.isdigit():
      raise stem.ProtocolError('Transport count was non-numeric (%s): bridge-ip-transports %s' % (count, value))

    ip_transports[protocol] = int(count)

  descriptor.ip_transports = ip_transports


def _parse_hs_stats(keyword, stat_attribute, extra_attribute, descriptor, entries):
  # "<keyword>" num key=val key=val...

  value, stat, extra = _value(keyword, entries), None, {}

  if value is None:
    pass  # not in the descriptor
  elif value == '':
    raise ValueError("'%s' line was blank" % keyword)
  else:
    if ' ' in value:
      stat_value, remainder = value.split(' ', 1)
    else:
      stat_value, remainder = value, None

    try:
      stat = int(stat_value)
    except ValueError:
      raise ValueError("'%s' stat was non-numeric (%s): %s %s" % (keyword, stat_value, keyword, value))

    for key, val in _mappings_for(keyword, remainder):
      extra[key] = val

  setattr(descriptor, stat_attribute, stat)
  setattr(descriptor, extra_attribute, extra)


_parse_identity_ed25519_line = _parse_key_block('identity-ed25519', 'ed25519_certificate', 'ED25519 CERT')
_parse_master_key_ed25519_line = _parse_simple_line('master-key-ed25519', 'ed25519_certificate_hash')
_parse_geoip_db_digest_line = _parse_forty_character_hex('geoip-db-digest', 'geoip_db_digest')
_parse_geoip6_db_digest_line = _parse_forty_character_hex('geoip6-db-digest', 'geoip6_db_digest')
_parse_dirreq_v2_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-resp', 'dir_v2_responses', 'dir_v2_responses_unknown')
_parse_dirreq_v3_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-resp', 'dir_v3_responses', 'dir_v3_responses_unknown')
_parse_dirreq_v2_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-direct-dl', 'dir_v2_direct_dl', 'dir_v2_direct_dl_unknown')
_parse_dirreq_v3_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-direct-dl', 'dir_v3_direct_dl', 'dir_v3_direct_dl_unknown')
_parse_dirreq_v2_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-tunneled-dl', 'dir_v2_tunneled_dl', 'dir_v2_tunneled_dl_unknown')
_parse_dirreq_v3_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-tunneled-dl', 'dir_v3_tunneled_dl', 'dir_v3_tunneled_dl_unknown')
_parse_dirreq_v2_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v2-share', 'dir_v2_share')
_parse_dirreq_v3_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v3-share', 'dir_v3_share')
_parse_cell_processed_cells_line = functools.partial(_parse_cell_line, 'cell-processed-cells', 'cell_processed_cells')
_parse_cell_queued_cells_line = functools.partial(_parse_cell_line, 'cell-queued-cells', 'cell_queued_cells')
_parse_cell_time_in_queue_line = functools.partial(_parse_cell_line, 'cell-time-in-queue', 'cell_time_in_queue')
_parse_cell_circuits_per_decline_line = _parse_int_line('cell-circuits-per-decile', 'cell_circuits_per_decile', allow_negative = False)
_parse_published_line = _parse_timestamp_line('published', 'published')
_parse_geoip_start_time_line = _parse_timestamp_line('geoip-start-time', 'geoip_start_time')
_parse_cell_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'cell-stats-end', 'cell_stats_end', 'cell_stats_interval')
_parse_entry_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'entry-stats-end', 'entry_stats_end', 'entry_stats_interval')
_parse_exit_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'exit-stats-end', 'exit_stats_end', 'exit_stats_interval')
_parse_bridge_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'bridge-stats-end', 'bridge_stats_end', 'bridge_stats_interval')
_parse_dirreq_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'dirreq-stats-end', 'dir_stats_end', 'dir_stats_interval')
_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values')
_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values')
_parse_dirreq_read_history_line = functools.partial(_parse_history_line, 'dirreq-read-history', 'dir_read_history_end', 'dir_read_history_interval', 'dir_read_history_values')
_parse_dirreq_write_history_line = functools.partial(_parse_history_line, 'dirreq-write-history', 'dir_write_history_end', 'dir_write_history_interval', 'dir_write_history_values')
_parse_exit_kibibytes_written_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-written', 'exit_kibibytes_written')
_parse_exit_kibibytes_read_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-read', 'exit_kibibytes_read')
_parse_exit_streams_opened_line = functools.partial(_parse_port_count_line, 'exit-streams-opened', 'exit_streams_opened')
_parse_hidden_service_stats_end_line = _parse_timestamp_line('hidserv-stats-end', 'hs_stats_end')
_parse_hidden_service_rend_relayed_cells_line = functools.partial(_parse_hs_stats, 'hidserv-rend-relayed-cells', 'hs_rend_cells', 'hs_rend_cells_attr')
_parse_hidden_service_dir_onions_seen_line = functools.partial(_parse_hs_stats, 'hidserv-dir-onions-seen', 'hs_dir_onions_seen', 'hs_dir_onions_seen_attr')
_parse_dirreq_v2_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-ips', 'dir_v2_ips')
_parse_dirreq_v3_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-ips', 'dir_v3_ips')
_parse_dirreq_v2_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-reqs', 'dir_v2_requests')
_parse_dirreq_v3_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-reqs', 'dir_v3_requests')
_parse_geoip_client_origins_line = functools.partial(_parse_geoip_to_count_line, 'geoip-client-origins', 'geoip_client_origins')
_parse_entry_ips_line = functools.partial(_parse_geoip_to_count_line, 'entry-ips', 'entry_ips')
_parse_bridge_ips_line = functools.partial(_parse_geoip_to_count_line, 'bridge-ips', 'bridge_ips')
_parse_router_sig_ed25519_line = _parse_simple_line('router-sig-ed25519', 'ed25519_signature')
_parse_router_digest_sha256_line = _parse_simple_line('router-digest-sha256', 'router_digest_sha256')
_parse_router_digest_line = _parse_forty_character_hex('router-digest', '_digest')
_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE')


class ExtraInfoDescriptor(Descriptor):
  """
  Extra-info descriptor document.

  :var str nickname: **\\*** relay's nickname
  :var str fingerprint: **\\*** identity key fingerprint
  :var datetime published: **\\*** time in UTC when this descriptor was made
  :var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses
  :var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses
  :var dict transport: **\\*** mapping of transport methods to their (address,
    port, args) tuple, these usually appear on bridges in which case all of
    those are **None**

  **Bi-directional connection usage:**

  :var datetime conn_bi_direct_end: end of the sampling interval
  :var int conn_bi_direct_interval: seconds per interval
  :var int conn_bi_direct_below: connections that read/wrote less than 20 KiB
  :var int conn_bi_direct_read: connections that read at least 10x more than wrote
  :var int conn_bi_direct_write: connections that wrote at least 10x more than read
  :var int conn_bi_direct_both: remaining connections

  **Bytes read/written for relayed traffic:**

  :var datetime read_history_end: end of the sampling interval
  :var int read_history_interval: seconds per interval
  :var list read_history_values: bytes read during each interval

  :var datetime write_history_end: end of the sampling interval
  :var int write_history_interval: seconds per interval
  :var list write_history_values: bytes written during each interval

  **Cell relaying statistics:**

  :var datetime cell_stats_end: end of the period when stats were gathered
  :var int cell_stats_interval: length in seconds of the interval
  :var list cell_processed_cells: measurement of processed cells per circuit
  :var list cell_queued_cells: measurement of queued cells per circuit
  :var list cell_time_in_queue: mean enqueued time in milliseconds for cells
  :var int cell_circuits_per_decile: mean number of circuits in a decile

  **Directory Mirror Attributes:**

  :var datetime dir_stats_end: end of the period when stats were gathered
  :var int dir_stats_interval: length in seconds of the interval
  :var dict dir_v2_ips: mapping of locales to rounded count of requester ips
  :var dict dir_v3_ips: mapping of locales to rounded count of requester ips
  :var float dir_v2_share: percent of total directory traffic it expects to serve
  :var float dir_v3_share: percent of total directory traffic it expects to serve
  :var dict dir_v2_requests: mapping of locales to rounded count of requests
  :var dict dir_v3_requests: mapping of locales to rounded count of requests

  :var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
  :var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
  :var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count
  :var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count

  :var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
  :var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
  :var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement
  :var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement

  :var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
  :var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
  :var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
  :var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement

  **Bytes read/written for directory mirroring:**

  :var datetime dir_read_history_end: end of the sampling interval
  :var int dir_read_history_interval: seconds per interval
  :var list dir_read_history_values: bytes read during each interval

  :var datetime dir_write_history_end: end of the sampling interval
  :var int dir_write_history_interval: seconds per interval
  :var list dir_write_history_values: bytes read during each interval

  **Guard Attributes:**

  :var datetime entry_stats_end: end of the period when stats were gathered
  :var int entry_stats_interval: length in seconds of the interval
  :var dict entry_ips: mapping of locales to rounded count of unique user ips

  **Exit Attributes:**

  :var datetime exit_stats_end: end of the period when stats were gathered
  :var int exit_stats_interval: length in seconds of the interval
  :var dict exit_kibibytes_written: traffic per port (keys are ints or 'other')
  :var dict exit_kibibytes_read: traffic per port (keys are ints or 'other')
  :var dict exit_streams_opened: streams per port (keys are ints or 'other')

  **Hidden Service Attributes:**

  :var datetime hs_stats_end: end of the sampling interval
  :var int hs_rend_cells: rounded count of the RENDEZVOUS1 cells seen
  :var int hs_rend_cells_attr: **\\*** attributes provided for the hs_rend_cells
  :var int hs_dir_onions_seen: rounded count of the identities seen
  :var int hs_dir_onions_seen_attr: **\\*** attributes provided for the hs_dir_onions_seen

  **Padding Count Attributes:**

  :var dict padding_counts: **\\*** padding parameters
  :var datetime padding_counts_end: end of the period when padding data is being collected
  :var int padding_counts_interval: length in seconds of the interval

  **Bridge Attributes:**

  :var datetime bridge_stats_end: end of the period when stats were gathered
  :var int bridge_stats_interval: length in seconds of the interval
  :var dict bridge_ips: mapping of locales to rounded count of unique user ips
  :var datetime geoip_start_time: replaced by bridge_stats_end (deprecated)
  :var dict geoip_client_origins: replaced by bridge_ips (deprecated)
  :var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
  :var dict ip_versions: mapping of ip transports to a count for the number of users

  **\\*** attribute is either required when we're parsed with validation or has
  a default value, others are left as **None** if undefined

  .. versionchanged:: 1.4.0
     Added the hs_stats_end, hs_rend_cells, hs_rend_cells_attr,
     hs_dir_onions_seen, and hs_dir_onions_seen_attr attributes.

  .. versionchanged:: 1.6.0
     Added the padding_counts, padding_counts_end, and padding_counts_interval
     attributes.
  """

  ATTRIBUTES = {
    'nickname': (None, _parse_extra_info_line),
    'fingerprint': (None, _parse_extra_info_line),
    'published': (None, _parse_published_line),
    'geoip_db_digest': (None, _parse_geoip_db_digest_line),
    'geoip6_db_digest': (None, _parse_geoip6_db_digest_line),
    'transport': ({}, _parse_transport_line),

    'conn_bi_direct_end': (None, _parse_conn_bi_direct_line),
    'conn_bi_direct_interval': (None, _parse_conn_bi_direct_line),
    'conn_bi_direct_below': (None, _parse_conn_bi_direct_line),
    'conn_bi_direct_read': (None, _parse_conn_bi_direct_line),
    'conn_bi_direct_write': (None, _parse_conn_bi_direct_line),
    'conn_bi_direct_both': (None, _parse_conn_bi_direct_line),

    'read_history_end': (None, _parse_read_history_line),
    'read_history_interval': (None, _parse_read_history_line),
    'read_history_values': (None, _parse_read_history_line),

    'write_history_end': (None, _parse_write_history_line),
    'write_history_interval': (None, _parse_write_history_line),
    'write_history_values': (None, _parse_write_history_line),

    'cell_stats_end': (None, _parse_cell_stats_end_line),
    'cell_stats_interval': (None, _parse_cell_stats_end_line),
    'cell_processed_cells': (None, _parse_cell_processed_cells_line),
    'cell_queued_cells': (None, _parse_cell_queued_cells_line),
    'cell_time_in_queue': (None, _parse_cell_time_in_queue_line),
    'cell_circuits_per_decile': (None, _parse_cell_circuits_per_decline_line),

    'dir_stats_end': (None, _parse_dirreq_stats_end_line),
    'dir_stats_interval': (None, _parse_dirreq_stats_end_line),
    'dir_v2_ips': (None, _parse_dirreq_v2_ips_line),
    'dir_v3_ips': (None, _parse_dirreq_v3_ips_line),
    'dir_v2_share': (None, _parse_dirreq_v2_share_line),
    'dir_v3_share': (None, _parse_dirreq_v3_share_line),
    'dir_v2_requests': (None, _parse_dirreq_v2_reqs_line),
    'dir_v3_requests': (None, _parse_dirreq_v3_reqs_line),
    'dir_v2_responses': (None, _parse_dirreq_v2_resp_line),
    'dir_v3_responses': (None, _parse_dirreq_v3_resp_line),
    'dir_v2_responses_unknown': (None, _parse_dirreq_v2_resp_line),
    'dir_v3_responses_unknown': (None, _parse_dirreq_v3_resp_line),
    'dir_v2_direct_dl': (None, _parse_dirreq_v2_direct_dl_line),
    'dir_v3_direct_dl': (None, _parse_dirreq_v3_direct_dl_line),
    'dir_v2_direct_dl_unknown': (None, _parse_dirreq_v2_direct_dl_line),
    'dir_v3_direct_dl_unknown': (None, _parse_dirreq_v3_direct_dl_line),
    'dir_v2_tunneled_dl': (None, _parse_dirreq_v2_tunneled_dl_line),
    'dir_v3_tunneled_dl': (None, _parse_dirreq_v3_tunneled_dl_line),
    'dir_v2_tunneled_dl_unknown': (None, _parse_dirreq_v2_tunneled_dl_line),
    'dir_v3_tunneled_dl_unknown': (None, _parse_dirreq_v3_tunneled_dl_line),

    'dir_read_history_end': (None, _parse_dirreq_read_history_line),
    'dir_read_history_interval': (None, _parse_dirreq_read_history_line),
    'dir_read_history_values': (None, _parse_dirreq_read_history_line),

    'dir_write_history_end': (None, _parse_dirreq_write_history_line),
    'dir_write_history_interval': (None, _parse_dirreq_write_history_line),
    'dir_write_history_values': (None, _parse_dirreq_write_history_line),

    'entry_stats_end': (None, _parse_entry_stats_end_line),
    'entry_stats_interval': (None, _parse_entry_stats_end_line),
    'entry_ips': (None, _parse_entry_ips_line),

    'exit_stats_end': (None, _parse_exit_stats_end_line),
    'exit_stats_interval': (None, _parse_exit_stats_end_line),
    'exit_kibibytes_written': (None, _parse_exit_kibibytes_written_line),
    'exit_kibibytes_read': (None, _parse_exit_kibibytes_read_line),
    'exit_streams_opened': (None, _parse_exit_streams_opened_line),

    'hs_stats_end': (None, _parse_hidden_service_stats_end_line),
    'hs_rend_cells': (None, _parse_hidden_service_rend_relayed_cells_line),
    'hs_rend_cells_attr': ({}, _parse_hidden_service_rend_relayed_cells_line),
    'hs_dir_onions_seen': (None, _parse_hidden_service_dir_onions_seen_line),
    'hs_dir_onions_seen_attr': ({}, _parse_hidden_service_dir_onions_seen_line),

    'padding_counts': ({}, _parse_padding_counts_line),
    'padding_counts_end': (None, _parse_padding_counts_line),
    'padding_counts_interval': (None, _parse_padding_counts_line),

    'bridge_stats_end': (None, _parse_bridge_stats_end_line),
    'bridge_stats_interval': (None, _parse_bridge_stats_end_line),
    'bridge_ips': (None, _parse_bridge_ips_line),
    'geoip_start_time': (None, _parse_geoip_start_time_line),
    'geoip_client_origins': (None, _parse_geoip_client_origins_line),

    'ip_versions': (None, _parse_bridge_ip_versions_line),
    'ip_transports': (None, _parse_bridge_ip_transports_line),
  }

  PARSER_FOR_LINE = {
    'extra-info': _parse_extra_info_line,
    'geoip-db-digest': _parse_geoip_db_digest_line,
    'geoip6-db-digest': _parse_geoip6_db_digest_line,
    'transport': _parse_transport_line,
    'cell-circuits-per-decile': _parse_cell_circuits_per_decline_line,
    'dirreq-v2-resp': _parse_dirreq_v2_resp_line,
    'dirreq-v3-resp': _parse_dirreq_v3_resp_line,
    'dirreq-v2-direct-dl': _parse_dirreq_v2_direct_dl_line,
    'dirreq-v3-direct-dl': _parse_dirreq_v3_direct_dl_line,
    'dirreq-v2-tunneled-dl': _parse_dirreq_v2_tunneled_dl_line,
    'dirreq-v3-tunneled-dl': _parse_dirreq_v3_tunneled_dl_line,
    'dirreq-v2-share': _parse_dirreq_v2_share_line,
    'dirreq-v3-share': _parse_dirreq_v3_share_line,
    'cell-processed-cells': _parse_cell_processed_cells_line,
    'cell-queued-cells': _parse_cell_queued_cells_line,
    'cell-time-in-queue': _parse_cell_time_in_queue_line,
    'published': _parse_published_line,
    'geoip-start-time': _parse_geoip_start_time_line,
    'cell-stats-end': _parse_cell_stats_end_line,
    'entry-stats-end': _parse_entry_stats_end_line,
    'exit-stats-end': _parse_exit_stats_end_line,
    'bridge-stats-end': _parse_bridge_stats_end_line,
    'dirreq-stats-end': _parse_dirreq_stats_end_line,
    'conn-bi-direct': _parse_conn_bi_direct_line,
    'read-history': _parse_read_history_line,
    'write-history': _parse_write_history_line,
    'dirreq-read-history': _parse_dirreq_read_history_line,
    'dirreq-write-history': _parse_dirreq_write_history_line,
    'exit-kibibytes-written': _parse_exit_kibibytes_written_line,
    'exit-kibibytes-read': _parse_exit_kibibytes_read_line,
    'exit-streams-opened': _parse_exit_streams_opened_line,
    'hidserv-stats-end': _parse_hidden_service_stats_end_line,
    'hidserv-rend-relayed-cells': _parse_hidden_service_rend_relayed_cells_line,
    'hidserv-dir-onions-seen': _parse_hidden_service_dir_onions_seen_line,
    'padding-counts': _parse_padding_counts_line,
    'dirreq-v2-ips': _parse_dirreq_v2_ips_line,
    'dirreq-v3-ips': _parse_dirreq_v3_ips_line,
    'dirreq-v2-reqs': _parse_dirreq_v2_reqs_line,
    'dirreq-v3-reqs': _parse_dirreq_v3_reqs_line,
    'geoip-client-origins': _parse_geoip_client_origins_line,
    'entry-ips': _parse_entry_ips_line,
    'bridge-ips': _parse_bridge_ips_line,
    'bridge-ip-versions': _parse_bridge_ip_versions_line,
    'bridge-ip-transports': _parse_bridge_ip_transports_line,
  }

  def __init__(self, raw_contents, validate = False):
    """
    Extra-info descriptor constructor. By default this validates the
    descriptor's content as it's parsed. This validation can be disabled to
    either improve performance or be accepting of malformed data.

    :param str raw_contents: extra-info content provided by the relay
    :param bool validate: checks the validity of the extra-info descriptor if
      **True**, skips these checks otherwise

    :raises: **ValueError** if the contents is malformed and validate is True
    """

    super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate)
    entries = _descriptor_components(raw_contents, validate)

    if validate:
      for keyword in self._required_fields():
        if keyword not in entries:
          raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword)

      for keyword in self._required_fields() + SINGLE_FIELDS:
        if keyword in entries and len(entries[keyword]) > 1:
          raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword)

      expected_first_keyword = self._first_keyword()
      if expected_first_keyword and expected_first_keyword != list(entries.keys())[0]:
        raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword)

      expected_last_keyword = self._last_keyword()
      if expected_last_keyword and expected_last_keyword != list(entries.keys())[-1]:
        raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)

      self._parse(entries, validate)
    else:
      self._entries = entries

  def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
    """
    Digest of this descriptor's content. These are referenced by...

      * **Server Descriptors**

        * Referer: :class:`~stem.descriptor.server_descriptor.ServerDescriptor` **extra_info_digest** attribute
        * Format: **SHA1/HEX**

      * **Server Descriptors**

        * Referer: :class:`~stem.descriptor.server_descriptor.ServerDescriptor` **extra_info_sha256_digest** attribute
        * Format: **SHA256/BASE64**

    .. versionchanged:: 1.8.0
       Added the hash_type and encoding arguments.

    :param stem.descriptor.DigestHash hash_type: digest hashing algorithm
    :param stem.descriptor.DigestEncoding encoding: digest encoding

    :returns: **hashlib.HASH** or **str** based on our encoding argument
    """

    raise NotImplementedError('Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass')

  def _required_fields(self):
    return REQUIRED_FIELDS

  def _first_keyword(self):
    return 'extra-info'

  def _last_keyword(self):
    return 'router-signature'


class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
  """
  Relay extra-info descriptor, constructed from data such as that provided by
  'GETINFO extra-info/digest/\\*', cached descriptors, and metrics
  (`specification <https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_).

  :var ed25519_certificate str: base64 encoded ed25519 certificate
  :var ed25519_signature str: signature of this document using ed25519
  :var str signature: **\\*** signature for this extrainfo descriptor

  **\\*** attribute is required when we're parsed with validation

  .. versionchanged:: 1.5.0
     Added the ed25519_certificate and ed25519_signature attributes.
  """

  TYPE_ANNOTATION_NAME = 'extra-info'

  ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
    'ed25519_certificate': (None, _parse_identity_ed25519_line),
    'ed25519_signature': (None, _parse_router_sig_ed25519_line),
    'signature': (None, _parse_router_signature_line),
  })

  PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
    'identity-ed25519': _parse_identity_ed25519_line,
    'router-sig-ed25519': _parse_router_sig_ed25519_line,
    'router-signature': _parse_router_signature_line,
  })

  @classmethod
  def content(cls, attr = None, exclude = (), sign = False, signing_key = None):
    base_header = (
      ('extra-info', '%s %s' % (_random_nickname(), _random_fingerprint())),
      ('published', _random_date()),
    )

    if signing_key:
      sign = True

    if sign:
      if attr and 'router-signature' in attr:
        raise ValueError('Cannot sign the descriptor if a router-signature has been provided')

      if signing_key is None:
        signing_key = create_signing_key()

      content = _descriptor_content(attr, exclude, base_header) + b'\nrouter-signature\n'
      return _append_router_signature(content, signing_key.private)
    else:
      return _descriptor_content(attr, exclude, base_header, (
        ('router-signature', _random_crypto_blob('SIGNATURE')),
      ))

  @classmethod
  def create(cls, attr = None, exclude = (), validate = True, sign = False, signing_key = None):
    return cls(cls.content(attr, exclude, sign, signing_key), validate = validate)

  @lru_cache()
  def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
    if hash_type == DigestHash.SHA1:
      # our digest is calculated from everything except our signature

      content = self._content_range(end = '\nrouter-signature\n')
      return stem.descriptor._encode_digest(hashlib.sha1(content), encoding)
    elif hash_type == DigestHash.SHA256:
      # Due to a tor bug sha256 digests are calculated from the
      # whole descriptor rather than ommiting the signature...
      #
      #   https://trac.torproject.org/projects/tor/ticket/28415

      return stem.descriptor._encode_digest(hashlib.sha256(self.get_bytes()), encoding)
    else:
      raise NotImplementedError('Extrainfo descriptor digests are only available in sha1 and sha256, not %s' % hash_type)


class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
  """
  Bridge extra-info descriptor (`bridge descriptor specification
  <https://metrics.torproject.org/collector.html#bridge-descriptors>`_)

  :var str ed25519_certificate_hash: sha256 hash of the original identity-ed25519
  :var str router_digest_sha256: sha256 digest of this document

  .. versionchanged:: 1.5.0
     Added the ed25519_certificate_hash and router_digest_sha256 attributes.
  """

  TYPE_ANNOTATION_NAME = 'bridge-extra-info'

  ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
    'ed25519_certificate_hash': (None, _parse_master_key_ed25519_line),
    'router_digest_sha256': (None, _parse_router_digest_sha256_line),
    '_digest': (None, _parse_router_digest_line),
  })

  PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
    'master-key-ed25519': _parse_master_key_ed25519_line,
    'router-digest-sha256': _parse_router_digest_sha256_line,
    'router-digest': _parse_router_digest_line,
  })

  @classmethod
  def content(cls, attr = None, exclude = (), sign = False):
    if sign:
      raise NotImplementedError('Signing of %s not implemented' % cls.__name__)

    return _descriptor_content(attr, exclude, (
      ('extra-info', 'ec2bridgereaac65a3 %s' % _random_fingerprint()),
      ('published', _random_date()),
    ), (
      ('router-digest', _random_fingerprint()),
    ))

  def digest(self, hash_type = DigestHash.SHA1, encoding = DigestEncoding.HEX):
    if hash_type == DigestHash.SHA1 and encoding == DigestEncoding.HEX:
      return self._digest
    elif hash_type == DigestHash.SHA256 and encoding == DigestEncoding.BASE64:
      return self.router_digest_sha256
    else:
      raise NotImplementedError('Bridge extrainfo digests are only available as sha1/hex and sha256/base64, not %s/%s' % (hash_type, encoding))

  def _required_fields(self):
    excluded_fields = [
      'router-signature',
    ]

    included_fields = [
      'router-digest',
    ]

    return tuple(included_fields + [f for f in REQUIRED_FIELDS if f not in excluded_fields])

  def _last_keyword(self):
    return None
