o
    Q^i                  	   @   s0  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlZddlmZmZ dZdZdaedZedZedd	d	Zd
d Zd ddZd ddZd!ddZdddejdddddf	ddZd!ddZd!ddZd!ddZ G dd de!Z"G dd de!Z#dS )"a  
Descriptor archives are available from `CollecTor
<https://metrics.torproject.org/collector.html>`_. If you need Tor's topology
at a prior point in time this is the place to go!

With CollecTor you can either read descriptors directly...

.. literalinclude:: /_static/example/collector_reading.py
   :language: python

... or download the descriptors to disk and read them later.

.. literalinclude:: /_static/example/collector_caching.py
   :language: python

::

  get_instance - Provides a singleton CollecTor used for...
    |- get_server_descriptors - published server descriptors
    |- get_extrainfo_descriptors - published extrainfo descriptors
    |- get_microdescriptors - published microdescriptors
    |- get_consensus - published router status entries
    |
    |- get_key_certificates - authority key certificates
    |- get_bandwidth_files - bandwidth authority heuristics
    +- get_exit_lists - TorDNSEL exit list

  File - Individual file residing within CollecTor
    |- read - provides descriptors from this file
    +- download - download this file to disk

  CollecTor - Downloader for descriptors from CollecTor
    |- get_server_descriptors - published server descriptors
    |- get_extrainfo_descriptors - published extrainfo descriptors
    |- get_microdescriptors - published microdescriptors
    |- get_consensus - published router status entries
    |
    |- get_key_certificates - authority key certificates
    |- get_bandwidth_files - bandwidth authority heuristics
    |- get_exit_lists - TorDNSEL exit list
    |
    |- index - metadata for content available from CollecTor
    +- files - files available from CollecTor

.. versionadded:: 1.8.0
    N)CompressionDocumentHandlerz!https://collector.torproject.org/  z-(\d{4})-(\d{2})\.z%(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})i'     c                   C   s   t du rt a t S )z
  Provides the singleton :class:`~stem.descriptor.collector.CollecTor`
  used for this module's shorthand functions.

  :returns: singleton :class:`~stem.descriptor.collector.CollecTor` instance
  N)SINGLETON_COLLECTOR	CollecTor r   r   ;/usr/lib/python3/dist-packages/stem/descriptor/collector.pyget_instanceP   s   
r
   F   c                 c   (    t  | |||||D ]}|V  qdS )zv
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_server_descriptors`
  on our singleton instance.
  N)r
   get_server_descriptorsstartendcache_tobridgetimeoutretriesdescr   r   r	   r   `      r   c                 c   r   )zy
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_extrainfo_descriptors`
  on our singleton instance.
  N)r
   get_extrainfo_descriptorsr   r   r   r	   r   k   r   r   c                 c   &    t  | ||||D ]}|V  qdS )zt
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_microdescriptors`
  on our singleton instance.
  N)r
   get_microdescriptorsr   r   r   r   r   r   r   r   r	   r   v      r   c	           
      c   s.    t  | ||||||||	D ]}	|	V  qdS )zm
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_consensus`
  on our singleton instance.
  N)r
   get_consensus)
r   r   r   document_handlerversionmicrodescriptorr   r   r   r   r   r   r	   r      s    r   c                 c   r   )zt
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_key_certificates`
  on our singleton instance.
  N)r
   get_key_certificatesr   r   r   r	   r       r   r    c                 c   r   )zs
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_bandwidth_files`
  on our singleton instance.
  N)r
   get_bandwidth_filesr   r   r   r	   r!      r   r!   c                 c   r   )zn
  Shorthand for
  :func:`~stem.descriptor.collector.CollecTor.get_exit_lists`
  on our singleton instance.
  N)r
   get_exit_listsr   r   r   r	   r"      r   r"   c                   @   sT   e Zd ZdZdd ZddddejddfddZdd
dZe	dd Z
e	dd ZdS )FileaF  
  File within CollecTor.

  :var str path: file path within collector
  :var tuple types: descriptor types contained within this file
  :var stem.descriptor.Compression compression: file compression, **None** if
    this cannot be determined
  :var int size: size of the file
  :var str sha256: file's sha256 checksum

  :var datetime start: first publication within the file, **None** if this
    cannot be determined
  :var datetime end: last publication within the file, **None** if this cannot
    be determined
  :var datetime last_modified: when the file was last modified
  c                 C   s   || _ |r	t|nd| _t|| _|| _|| _tj	|d| _
d | _|r9|r9tj	|d| _tj	|d| _d S t|\| _| _d S )Nr   z%Y-%m-%d %H:%M)pathtupletypesr#   _guess_compressioncompressionsizesha256datetimestrptimelast_modified_downloaded_tor   r   _guess_time_range)selfr$   r&   r)   r*   first_publishedlast_publishedr-   r   r   r	   __init__   s   zFile.__init__Nr   c              	   c   s.   |du r+t dd | jD }| jstdt|dkr&tdd| j | jd }|du r]| jrAtj| jrAtj	| j}nt
 }	| |	||||||D ]}
|
V  qPt|	 dS | |d	||}tjj||d
D ]'}
|du s{||
 jrt|
dd}|r|r||k rqm|r||krqm|
V  qmdS )a  
    Provides descriptors from this archive. Descriptors are downloaded or read
    from disk as follows...

    * If this file has already been downloaded through
      :func:`~stem.descriptor.collector.CollecTor.download' these descriptors
      are read from disk.

    * If a **directory** argument is provided and the file is already present
      these descriptors are read from disk.

    * If a **directory** argument is provided and the file is not present the
      file is downloaded this location then read.

    * If the file has neither been downloaded and no **directory** argument
      is provided then the file is downloaded to a temporary directory that's
      deleted after it is read.

    :param str directory: destination to download into
    :param str descriptor_type: `descriptor type
      <https://metrics.torproject.org/collector.html#data-formats>`_, this is
      guessed if not provided
    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param stem.descriptor.__init__.DocumentHandler document_handler: method in
      which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
    :param int timeout: timeout when connection becomes idle, no timeout
      applied if **None**
    :param int retries: maximum attempts to impose

    :returns: iterator for :class:`~stem.descriptor.__init__.Descriptor`
      instances in the file

    :raises:
      * **ValueError** if unable to determine the descirptor type
      * **TypeError** if we cannot parse this descriptor type
      * :class:`~stem.DownloadFailed` if the download fails
    Nc                 S   s   g | ]	}| d d qS ) r   )split).0tr   r   r	   
<listcomp>   s    zFile.read.<locals>.<listcomp>z/Unable to determine this file's descriptor typer   z;Unable to disambiguate file's descriptor type from among %sz, r   T)r   	published)setr&   
ValueErrorlenjoinr.   osr$   existsdirnametempfileZmkdtempreadshutilrmtreedownloadstemZ
descriptorZ
parse_file
startswithZtype_annotationnamegetattr)r0   	directorydescriptor_typer   r   r   r   r   Z
base_typesZtmp_directoryr   r$   r9   r   r   r	   rB      s8   (

z	File.readTFc                 C   sN  | j dd }| jtjkr|r|ddd }tj |}tj ||}tj 	|s0t
| tj 	|rtt|2}tt| j}	t|  }
|	|
krZ|W  d   S |setd||	|
f W d   n1 sow   Y  tjjt| j  ||}|r| j|}t|d}|| W d   n1 sw   Y  || _|S )	a  
    Downloads this file to the given location. If a file already exists this is
    a no-op.

    :param str directory: destination to download into
    :param bool decompress: decompress written file
    :param int timeout: timeout when connection becomes idle, no timeout
      applied if **None**
    :param int retries: maximum attempts to impose
    :param bool overwrite: if this file exists but mismatches CollecTor's
      checksum then overwrites if **True**, otherwise rases an exception

    :returns: **str** with the path we downloaded to

    :raises:
      * :class:`~stem.DownloadFailed` if the download fails
      * **IOError** if a mismatching file exists and **overwrite** is **False**
    /.r   r   NzP%s already exists but mismatches CollecTor's checksum (expected: %s, actual: %s)wb)r$   r5   r(   r   	PLAINTEXTrsplitr>   
expanduserr=   r?   makedirsopenbinasciiZhexlifybase64Z	b64decoder*   hashlibrB   Z	hexdigestIOErrorrF   util
connectionrE   COLLECTOR_URL
decompresswriter.   )r0   rJ   r\   r   r   Z	overwritefilenamer$   Z
prior_fileZexpected_hashZactual_hashresponseZoutput_filer   r   r	   rE   +  s4   

	zFile.downloadc                 C   s0   t jt jt jfD ]}| |jr|  S qt jS )z>
    Determine file comprssion from CollecTor's filename.
    )r   LZMABZ2GZIPendswith	extensionrP   )r$   r(   r   r   r	   r'   b  s
   zFile._guess_compressionc                 C   s   t | }|r1tt| \}}t||d}|dk r&|t||d dfS |t|d ddfS t| }|rLtj|dd}||tj	dd fS dS )z
    Attemt to determine the (start, end) time range from CollecTor's filename.
    This provides (None, None) if this cannot be determined.
    r      z%Y-%m-%d-%H-%M-%Sr   )Zseconds)NN)
	YEAR_DATEsearchmapintgroupsr+   SEC_DATEr,   groupZ	timedelta)r$   Z
year_matchZyearZmonthr   Z	sec_matchr   r   r	   r/   n  s   

zFile._guess_time_range)TNr   F)__name__
__module____qualname____doc__r3   r   ENTRIESrB   rE   staticmethodr'   r/   r   r   r   r	   r#      s    
Z7
r#   c                	   @   s   e Zd ZdZdddZddd	Zdd
dZdddZdddej	dddddf	ddZ
dddZdddZdddZd ddZd!ddZedd ZdS )"r   aS  
  Downloader for descriptors from CollecTor. The contents of CollecTor are
  provided in `an index <https://collector.torproject.org/index/index.json>`_
  that's fetched as required.

  :var int retries: number of times to attempt the request if downloading it
    fails
  :var float timeout: duration before we'll time out our request
     Nc                 C   s"   || _ || _d | _d | _d| _d S )Nr   )r   r   _cached_index_cached_files_cached_index_at)r0   r   r   r   r   r	   r3     s
   
zCollecTor.__init__Fr   c           
   	   c   H    |sdnd}|  |||D ]}|j||||||dD ]}	|	V  qqdS )aL  
    Provides server descriptors published during the given time range, sorted
    oldest to newest.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param bool bridge: standard descriptors if **False**, bridge if **True**
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.server_descriptor.ServerDescriptor` for the
      given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    zserver-descriptorzbridge-server-descriptorr   r   NfilesrB   
r0   r   r   r   r   r   r   	desc_typefr   r   r   r	   r        z CollecTor.get_server_descriptorsc           
   	   c   rw   )aZ  
    Provides extrainfo descriptors published during the given time range,
    sorted oldest to newest.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param bool bridge: standard descriptors if **False**, bridge if **True**
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor`
      for the given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    z
extra-infozbridge-extra-inforx   Nry   r{   r   r   r	   r     r~   z#CollecTor.get_extrainfo_descriptorsc              	   c   <    |  d||D ]}|j|d||||dD ]}|V  qqdS )a  
    Provides microdescriptors estimated to be published during the given time
    range, sorted oldest to newest. Unlike server/extrainfo descriptors,
    microdescriptors change very infrequently...

    ::

      "Microdescriptors are expected to be relatively static and only change
      about once per week." -dir-spec section 3.3

    CollecTor archives only contain microdescriptors that *change*, so hourly
    tarballs often contain very few. Microdescriptors also do not contain
    their publication timestamp, so this is estimated.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.microdescriptor.Microdescriptor
      for the given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    r   rx   Nry   r0   r   r   r   r   r   r}   r   r   r   r	   r     s   zCollecTor.get_microdescriptorsc
              
   c   s    |dkr|s|sd}
n-|dkr|r|sd}
n"|dkr"|s"|s"d}
n|r'd}
n|r3|dkr3t d| t d| | |
||D ]}|j||
|||||	d	D ]}|V  qNq@d
S )a  
    Provides consensus router status entries published during the given time
    range, sorted oldest to newest.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param stem.descriptor.__init__.DocumentHandler document_handler: method in
      which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
    :param int version: consensus variant to retrieve (versions 2 or 3)
    :param bool microdescriptor: provides the microdescriptor consensus if
      **True**, standard consensus otherwise
    :param bool bridge: standard descriptors if **False**, bridge if **True**
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.router_status_entry.RouterStatusEntry`
      for the given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    r   znetwork-status-consensus-3z$network-status-microdesc-consensus-3rs   znetwork-status-2zbridge-network-statusz7Only v3 microdescriptors are available (not version %s)zCOnly v2 and v3 router status entries are available (not version %s)rx   N)r;   rz   rB   )r0   r   r   r   r   r   r   r   r   r   r|   r}   r   r   r   r	   r     s"   zCollecTor.get_consensusc              	   c   r   )a  
    Directory authority key certificates for the given time range,
    sorted oldest to newest.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.networkstatus.KeyCertificate
      for the given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    zdir-key-certificate-3rx   Nry   r   r   r   r	   r    $     zCollecTor.get_key_certificatesc              	   c   r   )a  
    Bandwidth authority heuristics for the given time range, sorted oldest to
    newest.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.bandwidth_file.BandwidthFile
      for the given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    zbandwidth-filerx   Nry   r   r   r   r	   r!   <  r   zCollecTor.get_bandwidth_filesc              	   c   r   )a  
    `TorDNSEL exit lists <https://www.torproject.org/projects/tordnsel.html.en>`_
    for the given time range, sorted oldest to newest.

    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with
    :param str cache_to: directory to cache archives into, if an archive is
      available here it is not downloaded
    :param int timeout: timeout for downloading each individual archive when
      the connection becomes idle, no timeout applied if **None**
    :param int retries: maximum attempts to impose on a per-archive basis

    :returns: **iterator** of
      :class:`~stem.descriptor.tordnsel.TorDNSEL
      for the given time range

    :raises: :class:`~stem.DownloadFailed` if the download fails
    Ztordnselrx   Nry   r   r   r   r	   r"   T  r   zCollecTor.get_exit_listsbestc                 C   s   | j rt | j tkrZ|dkr%tjtjtjtjfD ]	}|j	r#|} nqn|du r,tj}|tjkr4|j
nd}td | }|tjj|| j| j}ttjj|| _ t | _| j S )a  
    Provides the archives available in CollecTor.

    :param descriptor.Compression compression: compression type to
      download from, if undefiled we'll use the best decompression available

    :returns: **dict** with the archive contents

    :raises:
      If unable to retrieve the index this provide...

        * **ValueError** if json is malformed
        * **IOError** if unable to decompress
        * :class:`~stem.DownloadFailed` if the download fails
    r   N zindex/index.json)rt   timerv   REFRESH_INDEX_RATEr   r`   ra   rb   rP   Z	availablerd   r[   r\   rF   rY   rZ   rE   r   r   jsonloadsZ	str_toolsZ_to_unicode)r0   r(   optionrd   Zurlr_   r   r   r	   indexl  s    
zCollecTor.indexc                    s   | j rt | j tkrtt|  g dd d| _ g }| j D ]1}|r/|jdu s.|j|k r/q |r<|j	du s;|j	|kr<q  du sLt
 fdd|jD rQ|| q |S )a6  
    Provides files CollecTor presently has, sorted oldest to newest.

    :param str descriptor_type: descriptor type or prefix to retrieve
    :param datetime.datetime start: publication time to begin with
    :param datetime.datetime end: publication time to end with

    :returns: **list** of :class:`~stem.descriptor.collector.File`

    :raises:
      If unable to retrieve the index this provide...

        * **ValueError** if json is malformed
        * **IOError** if unable to decompress
        * :class:`~stem.DownloadFailed` if the download fails
    c                 S   s   | j r| j S tS )N)r   FUTURE)xr   r   r	   <lambda>  s    z!CollecTor.files.<locals>.<lambda>)keyNc                    s   g | ]}|  qS r   )rG   )r6   r|   rK   r   r	   r8     s    z#CollecTor.files.<locals>.<listcomp>)ru   r   rv   r   sortedr   _filesr   r   r   anyr&   append)r0   rK   r   r   matchesr}   r   r   r	   rz     s   
 
zCollecTor.filesc                 C   s   t | tsg S g }|  D ]P\}}|dkrE|D ],}d||dg }|t||d|d|d|d|d|d	 qq|d
kr]|D ]}|t	|||dg  qKq|S )z
    Recursively provies files within the index.

    :param dict val: index hash
    :param list path: path we've transversed into

    :returns: **list** of :class:`~stem.descriptor.collector.File`
    rz   rL   r$   r&   r)   r*   r1   r2   r-   Zdirectories)

isinstancedictitemsr=   getr   r#   extendr   r   )valr$   rz   kvattrZ	file_pathr   r   r	   r     s   
@ zCollecTor._files)rs   NNNNFNr   NNNNr   )r   )NNN)rm   rn   ro   rp   r3   r   r   r   r   rq   r   r    r!   r"   r   rz   rr   r   r   r   r   r	   r     s    




"
,



#"r   r   r   )$rp   rV   rU   r+   rW   r   r>   rerC   rA   r   Zstem.descriptorrF   Zstem.util.connectionZstem.util.str_toolsr   r   r[   r   r   compilerf   rk   r   r
   r   r   r   rq   r   r    r!   r"   objectr#   r   r   r   r   r	   <module>   s@   /







 a