o
    u]J                     @   s   d Z ddlZddlZddlZddlZzddlZW n ey%   ddlZY nw ddlZ	ddl
Z	ddlZ	ddlZ	ddlZ	dZG dd deZG dd deZG dd	 d	eZG d
d deZG dd deZG dd deZdd Zdd ZG dd deZdS )a  
Utilities for reading descriptors from local directories and archives. This is
mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
class, which is an iterator for the descriptor data in a series of
destinations. For example...

::

  my_descriptors = [
    '/tmp/server-descriptors-2012-03.tar.bz2',
    '/tmp/archived_descriptors/',
  ]

  # prints the contents of all the descriptor files
  with DescriptorReader(my_descriptors) as reader:
    for descriptor in reader:
      print descriptor

This ignores files that cannot be processed due to read errors or unparsable
content. To be notified of skipped files you can register a listener with
:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.

The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
modified timestamps for descriptor files that it has read so it can skip
unchanged files if run again. This listing of processed files can also be
persisted and applied to other
:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
following prints descriptors as they're changed over the course of a minute,
and picks up where it left off if run again...

::

  reader = DescriptorReader(['/tmp/descriptor_data'])

  try:
    processed_files = load_processed_files('/tmp/used_descriptors')
    reader.set_processed_files(processed_files)
  except: pass # could not load, maybe this is the first run

  start_time = time.time()

  while (time.time() - start_time) < 60:
    # prints any descriptors that have changed since last checked
    with reader:
      for descriptor in reader:
        print descriptor

    time.sleep(1)

  save_processed_files('/tmp/used_descriptors', reader.get_processed_files())

**Module Overview:**

::

  load_processed_files - Loads a listing of processed files
  save_processed_files - Saves a listing of processed files

  DescriptorReader - Iterator for descriptor data on the local file system
    |- get_processed_files - provides the listing of files that we've processed
    |- set_processed_files - sets our tracking of the files we have processed
    |- register_read_listener - adds a listener for when files are read
    |- register_skip_listener - adds a listener that's notified of skipped files
    |- start - begins reading descriptor data
    |- stop - stops reading descriptor data
    |- __enter__ / __exit__ - manages the descriptor reader thread in the context
    +- __iter__ - iterates over descriptor data in unread files

  FileSkipped - Base exception for a file that was skipped
    |- AlreadyRead - We've already read a file with this last modified timestamp
    |- ParsingFailure - Contents can't be parsed as descriptor data
    |- UnrecognizedType - File extension indicates non-descriptor data
    +- ReadFailed - Wraps an error that was raised while reading the file
       +- FileMissing - File does not exist

.. deprecated:: 1.8.0

   This module will likely be removed in Stem 2.0 due to lack of usage. If you
   use this modle please `let me know <https://www.atagar.com/contact/>`_.
    NZDONEc                   @   s   e Zd ZdZdS )FileSkippedz=Base error when we can't provide descriptor data from a file.N)__name__
__module____qualname____doc__ r   r   8/usr/lib/python3/dist-packages/stem/descriptor/reader.pyr   i   s    r   c                           e Zd ZdZ fddZ  ZS )AlreadyReada  
  Already read a file with this 'last modified' timestamp or later.

  :param int last_modified: unix timestamp for when the file was last modified
  :param int last_modified_when_read: unix timestamp for the modification time
    when we last read this file
  c                    s(   t t| d||f  || _|| _d S )Nz[File has already been read since it was last modified. modification time: %s, last read: %s)superr
   __init__last_modifiedlast_modified_when_read)selfr   r   	__class__r   r   r   v   s   
zAlreadyRead.__init__r   r   r   r   r   __classcell__r   r   r   r   r
   m   s    r
   c                       r	   )ParsingFailurezy
  File contents could not be parsed as descriptor data.

  :param ValueError exception: issue that arose when parsing
  c                       t t| | || _d S N)r   r   r   	exception)r   Zparsing_exceptionr   r   r   r         
zParsingFailure.__init__r   r   r   r   r   r   |   s    r   c                       r	   )UnrecognizedTypez
  File doesn't contain descriptor data. This could either be due to its file
  type or because it doesn't conform to a recognizable descriptor type.

  :param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
  c                    s   t t| d|  || _d S )NzUnrecognized mime type: %s (%s))r   r   r   	mime_type)r   r   r   r   r   r      s   
zUnrecognizedType.__init__r   r   r   r   r   r          r   c                       r	   )
ReadFailedz
  An IOError occurred while trying to read the file.

  :param IOError exception: issue that arose when reading the file, **None** if
    this arose due to the file not being present
  c                    r   r   )r   r   r   r   )r   Zread_exceptionr   r   r   r      r   zReadFailed.__init__r   r   r   r   r   r      r   r   c                       r	   )FileMissingzFile does not exist.c                    s   t t| d d S )NzFile does not exist)r   r   r   r   r   r   r   r      s   zFileMissing.__init__r   r   r   r   r   r      s    r   c                 C   s   i }t | dJ}| D ]<}tjj| }|sqd|vr$td| |dd\} }t	j
| s8td|  | sBtd| t||| < qW d   |S 1 sTw   Y  |S )a  
  Loads a dictionary of 'path => last modified timestamp' mappings, as
  persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
  file.

  :param str path: location to load the processed files dictionary from

  :returns: **dict** of 'path (**str**) => last modified unix timestamp
    (**int**)' mappings

  :raises:
    * **IOError** if unable to read the file
    * **TypeError** if unable to parse the file's contents
  rb zMalformed line: %s   z'%s' is not an absolute pathz '%s' is not an integer timestampN)open	readlinesstemutilZ	str_toolsZ_to_unicodestrip	TypeErrorrsplitospathisabsisdigitint)r*   processed_filesZ
input_fileline	timestampr   r   r   load_processed_files   s&   
r1   c              
   C   s   zt j| }t j|st | W n ty# } zt|d}~ww t| d)}t|	 D ]\} }t j
| s@td|  |d| |f  q0W d   dS 1 sUw   Y  dS )a0  
  Persists a dictionary of 'path => last modified timestamp' mappings (as
  provided by the DescriptorReader's
  :func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
  so that they can be loaded later and applied to another
  :class:`~stem.descriptor.reader.DescriptorReader`.

  :param str path: location to save the processed files dictionary to
  :param dict processed_files: 'path => last modified' mappings

  :raises:
    * **IOError** if unable to write to the file
    * **TypeError** if processed_files is of the wrong type
  Nwz&Only absolute paths are acceptable: %sz%s %i
)r)   r*   dirnameexistsmakedirsOSErrorIOErrorr"   listitemsr+   r'   write)r*   r.   Zpath_direxcZoutput_filer0   r   r   r   save_processed_files   s    
"r<   c                   @   s   e Zd ZdZddddejjjfddZdd Z	d	d
 Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( ZdS ))DescriptorReadera  
  Iterator for the descriptor data on the local file system. This can process
  text files, tarball archives (gzip or bzip2), or recurse directories.

  By default this limits the number of descriptors that we'll read ahead before
  waiting for our caller to fetch some of them. This is included to avoid
  unbounded memory usage.

  Our persistence_path argument is a convenient method to persist the listing
  of files we have processed between runs, however it doesn't allow for error
  handling. If you want that then use the
  :func:`~stem.descriptor.reader.load_processed_files` and
  :func:`~stem.descriptor.reader.save_processed_files` functions instead.

  :param str,list target: path or list of paths for files or directories to be read from
  :param bool validate: checks the validity of the descriptor's content if
    **True**, skips these checks otherwise
  :param bool follow_links: determines if we'll follow symlinks when traversing
    directories (requires python 2.6)
  :param int buffer_size: descriptors we'll buffer before waiting for some to
    be read, this is unbounded if zero
  :param str persistence_path: if set we will load and save processed file
    listings from this path, errors are ignored
  :param stem.descriptor.__init__.DocumentHandler document_handler: method in
    which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
  :param dict kwargs: additional arguments for the descriptor constructor
  Fd   Nc           	      K   s   t j|r	|gn|| _tttjj| j| _|| _	|| _
|| _|| _|| _g | _g | _i | _d | _t | _t | _t | _t | _| j  t|| _| jrhzt| j}| | W d S    Y d S d S r   )r$   r%   Z_is_str_targetsr8   mapr)   r*   abspath	_validate_follow_links_persistence_path_document_handler_kwargs_read_listeners_skip_listeners_processed_files_reader_thread	threadingRLock_reader_thread_lock
_iter_lockZEvent_iter_notice_is_stoppedsetqueueQueue_unreturned_descriptorsr1   set_processed_files)	r   targetvalidateZfollow_linksZbuffer_sizeZpersistence_pathdocument_handlerkwargsr.   r   r   r   r     s0   





zDescriptorReader.__init__c                 C   s   t dd t| j D S )a  
    For each file that we have read descriptor data from this provides a
    mapping of the form...

    ::

      absolute path (str) => last modified unix timestamp (int)

    This includes entries set through the
    :func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
    method. Each run resets this to only the files that were present during
    that run.

    :returns: **dict** with the absolute paths and unix timestamp for the last
      modified times of the files we have processed
    c                 s   s$    | ]\}}t j||fV  qd S r   )r)   r*   rA   ).0kvr   r   r   	<genexpr>H  s   " z7DescriptorReader.get_processed_files.<locals>.<genexpr>)dictr8   rI   r9   r   r   r   r   get_processed_files5  s   z$DescriptorReader.get_processed_filesc                 C   s   t || _dS )ah  
    Sets the listing of the files we have processed. Most often this is used
    with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
    pre-populate the listing of descriptor files that we have seen.

    :param dict processed_files: mapping of absolute paths (**str**) to unix
      timestamps for the last modified time (**int**)
    N)r^   rI   r   r.   r   r   r   rU   J  s   
z$DescriptorReader.set_processed_filesc                 C      | j | dS )z
    Registers a listener for when files are read. This is executed prior to
    processing files. Listeners are expected to be of the form...

    ::

      my_listener(path)

    :param functor listener: functor to be notified when files are read
    N)rG   appendr   listenerr   r   r   register_read_listenerV  s   z'DescriptorReader.register_read_listenerc                 C   ra   )aC  
    Registers a listener for files that are skipped. This listener is expected
    to be a functor of the form...

    ::

      my_listener(path, exception)

    :param functor listener: functor to be notified of files that are skipped
      to read errors or because they couldn't be parsed as valid descriptor data
    N)rH   rb   rc   r   r   r   register_skip_listenerd  s   z'DescriptorReader.register_skip_listenerc                 C   s
   | j  S )a  
    Provides the number of descriptors that are waiting to be iterated over.
    This is limited to the buffer_size that we were constructed with.

    :returns: **int** for the estimated number of currently enqueued
      descriptors, this is not entirely reliable
    )rT   Zqsizer   r   r   r   get_buffered_descriptor_counts  s   
	z.DescriptorReader.get_buffered_descriptor_countc                 C   sl   | j ) | jrtd| j  tj| jdd| _| jd | j	  W d   dS 1 s/w   Y  dS )zy
    Starts reading our descriptor files.

    :raises: **ValueError** if we're already reading the descriptor files
    z.Already running, you need to call stop() firstzDescriptor reader)rV   nameTN)
rM   rJ   
ValueErrorrP   clearrK   ZThread_read_descriptor_filesZ	setDaemonstartr   r   r   r   rl   ~  s   
"zDescriptorReader.startc              	   C   s   | j Q | j  | j  z	 | j  q tjy   Y nw | j	  d| _| j
rDz|  }t| j
| W n   Y W d   dS W d   dS W d   dS 1 sWw   Y  dS )z4
    Stops further reading of descriptor files.
    TN)rM   rP   rQ   rO   rT   
get_nowaitrR   EmptyrJ   joinrD   r_   r<   r`   r   r   r   stop  s.   



"zDescriptorReader.stopc                 C   s   i }t | j}|rC| j sC|d}tj|s!| |t	  qtj
|r6tj|| jd}| || n| || |rC| j r|| _| j sQ| jt | j  d S )Nr   )followlinks)r8   r?   rP   is_setpopr)   r*   r4   _notify_skip_listenersr   isdirwalkrC   _handle_walker_handle_filerI   rT   putFINISHEDrO   rQ   )r   new_processed_filesZremaining_filesrV   walkerr   r   r   rk     s    


z'DescriptorReader._read_descriptor_filesc              	   c   s    | j H | j s<z| j }|tkrW n.|V  W n tjy.   | j	  | j
  Y nw | j r
W d    d S W d    d S W d    d S 1 sOw   Y  d S r   )rN   rP   rr   rT   rm   rz   rR   rn   rO   waitrj   )r   
descriptorr   r   r   __iter__  s$   



"zDescriptorReader.__iter__c                 C   sF   |D ]\}}}|D ]}|  tj||| | j r  d S q	qd S r   )rx   r)   r*   ro   rP   rr   )r   r|   r{   root_filesfilenamer   r   r   rw     s   
zDescriptorReader._handle_walkerc              
   C   s   zt t|j}| j|}|||< W n ty/ } z| |t| W Y d }~d S d }~ww |rA||krA| |t	|| d S tj
|sId S t|}|d dv r\| || d S tjj|rj| | d S | |t| d S )Nr   )Nz
text/plain)r-   r)   statst_mtimerI   getr6   rt   r   r
   r*   isfile	mimetypesZ
guess_type_handle_descriptor_filer$   r%   systemZ
is_tarfile_handle_archiver   )r   rV   r{   r   Z	last_usedr;   Ztarget_typer   r   r   rx     s(   
zDescriptorReader._handle_filec              
   C   s&  zL|  | t|d7}tjj|f| j| jd| jD ]}| j	 r- W d    W d S | j
| | j  qW d    W d S 1 sEw   Y  W d S  ty^   | |t| Y d S  tyx } z| |t| W Y d }~d S d }~w ty } z| |t| W Y d }~d S d }~ww )Nr   rW   rX   )_notify_read_listenersr"   r$   r~   
parse_filerB   rE   rF   rP   rr   rT   ry   rO   rQ   r'   rt   r   ri   r   r7   r   )r   rV   r   Ztarget_filedescr;   r   r   r   r     s(   
$
&z(DescriptorReader._handle_descriptor_filec                 C   s  d }zz|  | t|}|D ]}| r||}zzHtjj|f| j| j	d| j
D ]5}| j rH W W |   W W |rF|  d S d S |tj| ||j | j| | j  q-W n3 ty~ } z| |t| W Y d }~nd }~w ty } z| |t| W Y d }~nd }~ww W |  q|  w qW n ty } z| |t| W Y d }~nd }~ww W |r|  d S d S |r|  w w )Nr   )r   tarfiler"   r   Zextractfiler$   r~   r   rB   rE   rF   rP   rr   closeZ	_set_pathr)   r*   rA   Z_set_archive_pathrh   rT   ry   rO   rQ   r'   rt   r   ri   r7   r   )r   rV   Ztar_fileZ	tar_entryentryr   r;   r   r   r   r     sR   


$

z DescriptorReader._handle_archivec                 C   s   | j D ]}|| qd S r   )rG   )r   r*   rd   r   r   r   r   4  s   

z'DescriptorReader._notify_read_listenersc                 C   s   | j D ]}||| qd S r   )rH   )r   r*   r   rd   r   r   r   rt   8  s   
z'DescriptorReader._notify_skip_listenersc                 C   s   |    | S r   )rl   r   r   r   r   	__enter__<  s   zDescriptorReader.__enter__c                 C   s   |    d S r   )rp   )r   Z	exit_typevalue	tracebackr   r   r   __exit__@  s   zDescriptorReader.__exit__)r   r   r   r   r$   r~   ZDocumentHandlerZENTRIESr   r_   rU   re   rf   rg   rl   rp   rk   r   rw   rx   r   r   r   rt   r   r   r   r   r   r   r=      s(    %	%%r=   )r   r   r)   r   rK   rR   ImportErrorrS   Zstem.descriptorr$   Zstem.prereqZ	stem.utilZstem.util.str_toolsZstem.util.systemrz   	Exceptionr   r
   r   r   r   r   r1   r<   objectr=   r   r   r   r   <module>   s2   Q("