o
    |aoA                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZmZ d dlmZ zd dl	m
Z
mZmZmZmZmZmZ W n eyC   dZdd ZY nw erLd d	lmZ e d
Ze de jZe de jZe de jZG dd dZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G d d! d!eZ#G d"d# d#eZ$G d$d% d%e#Z%G d&d' d'e#Z&G d(d) d)eZ'G d*d+ d+eZ(G d,d- d-eZ)d.eee*e+f  d/ee fd0d1Z,d2d3 Z-e-d4d5 Z.e-d6d7 Z/dS )8    N)ReferenceType)resolve_ref_strI)BufferingIterator)OptionalcastTYPE_CHECKINGIterableUnionDictCallableFc                 C   s   |S N )tvr   r   =/usr/lib/python3/dist-packages/debian/_deb822_repro/tokens.py<lambda>   s    r   )Deb822Elementz^\s+$a  
    (?P<space_before>\s*)                # Consume any whitespace before the word
                                         # The space only occurs in practise if the line starts
                                         # with space.

                                         # Optionally consume a word (needed to handle the case
                                         # when there are no words left and someone applies this
                                         # pattern to the remaining text). This is mostly here as
                                         # a fail-safe.

    (?P<word>\S+)                        # Consume the word (if present)
    (?P<trailing_whitespace>\s*)         # Consume trailing whitespace
a  
    # This regex is slightly complicated by the fact that it should work with
    # finditer and comsume the entire value.
    #
    # To do this, we structure the regex so it always starts on a comma (except
    # for the first iteration, where we permit the absence of a comma)

    (?:                                      # Optional space followed by a mandatory comma unless
                                             # it is the start of the "line" (in which case, we
                                             # allow the comma to be omitted)
        ^
        |
        (?:
            (?P<space_before_comma>\s*)      # This space only occurs in practise if the line
                                             # starts with space + comma.
            (?P<comma> ,)
        )
    )

    # From here it is "optional space, maybe a word and then optional space" again.  One reason why
    # all of it is optional is to gracefully cope with trailing commas.
    (?P<space_before_word>\s*)
    (?P<word> [^,\s] (?: [^,]*[^,\s])? )?    # "Words" can contain spaces for comma separated list.
                                             # But surrounding whitespace is ignored
    (?P<space_after_word>\s*)
a  
    ^                                          # Start of line
    (?P<field_name>                            # Capture group for the field name
        [\x21\x22\x24-\x2C\x2F-\x39\x3B-\x7F]  # First character
        [\x21-\x39\x3B-\x7F]*                  # Subsequent characters (if any)
    )
    (?P<separator> : )
    (?P<space_before_value> \s* )
    (?:                                        # Field values are not mandatory on the same line
                                               # as the field name.

      (?P<value>  \S(?:.*\S)?  )               # Values must start and end on a "non-space"
      (?P<space_after_value> \s* )             # We can have optional space after the value
    )?
c                   @   sz   e Zd ZdZdZdd Zdd Zdd Zed	d
 Z	edd Z
edd Zdd Zedd Zejdd Zdd ZdS )Deb822Tokena>  A token is an atomic syntactical element from a deb822 file

    A file is parsed into a series of tokens.  If these tokens are converted to
    text in exactly the same order, you get exactly the same file - bit-for-bit.
    Accordingly ever bit of text in a file must be assigned to exactly one
    Deb822Token.
    )_text_hash_parent_element__weakref__c                 C   s.   |dkrt d|| _d | _d | _|   d S )N zTokens must have content)
ValueErrorr   r   r   _verify_token_textselftextr   r   r   __init__b   s   zDeb822Token.__init__c                 C   s.   | j dkrdj| jj| j dddS | jjS )Nr   z{clsname}('{text}')
z\n)clsnamer   )r   format	__class____name__replacer   r   r   r   __repr__k   s
   

zDeb822Token.__repr__c                 C   sr   d| j v r3d}| jst| trd}|s| jstd| jds$td|r5d| jd d v r7tdd S d S d S )Nr    FTz>Only whitespace, error and comment tokens may contain newlinesz2Tokens containing whitespace must end on a newlinezNComments and error tokens must not contain embedded newlines (only end on one))r   
is_comment
isinstanceDeb822ErrorTokenis_whitespacer   r   endswith)r   is_single_line_tokenr   r   r   r   s   s   

zDeb822Token._verify_token_textc                 C      dS NFr   r&   r   r   r   r,         zDeb822Token.is_whitespacec                 C   r/   r0   r   r&   r   r   r   r)      r1   zDeb822Token.is_commentc                 C      | j S r   r   r&   r   r   r   r      s   zDeb822Token.textc                 C   r2   r   r3   r&   r   r   r   convert_to_text   s   zDeb822Token.convert_to_textc                 C   s
   t | jS r   )r   r   r&   r   r   r   parent_element   s   
zDeb822Token.parent_elementc                 C   s"   |d urt || _d S d | _d S r   )weakrefrefr   )r   
new_parentr   r   r   r5      s   "c                 C   s   || j u r
d | _d S d S r   )r5   r   )r   parentr   r   r   clear_parent_if_parent   s   

z"Deb822Token.clear_parent_if_parentN)r$   
__module____qualname____doc__	__slots__r   r'   r   propertyr,   r)   r   r4   r5   setterr:   r   r   r   r   r   W   s$    	




r   c                   @   s    e Zd ZdZdZedd ZdS )Deb822WhitespaceTokenaV  The token is a kind of whitespace.

    Some whitespace tokens are critical for the format (such as the Deb822ValueContinuationToken,
    spaces that separate words in list separated by spaces or newlines), while other whitespace
    tokens are truly insignificant (space before a newline, space after a comma in a comma
    list, etc.).
    r   c                 C   r/   NTr   r&   r   r   r   r,      r1   z#Deb822WhitespaceToken.is_whitespaceN)r$   r;   r<   r=   r>   r?   r,   r   r   r   r   rA      s
    rA   c                   @      e Zd ZdZdZdS )'Deb822SemanticallySignificantWhiteSpacezZWhitespace that (if removed) would change the meaning of the file (or cause syntax errors)r   Nr$   r;   r<   r=   r>   r   r   r   r   rD          rD   c                       s$   e Zd ZdZdZ fddZ  ZS )Deb822NewlineAfterValueTokenzyThe newline after a value token.

    If not followed by a continuation token, this also marks the end of the field.
    r   c                       t  d d S Nr    superr   r&   r#   r   r   r      s   z%Deb822NewlineAfterValueToken.__init__r$   r;   r<   r=   r>   r   __classcell__r   r   rL   r   rG      s    rG   c                   @   rC   )Deb822ValueContinuationTokenzWThe whitespace denoting a value spanning an additional line (the first space on a line)r   NrE   r   r   r   r   rO      rF   rO   c                   @   rC   )Deb822SpaceSeparatorTokenz@Whitespace between values in a space list (e.g. "Architectures")r   NrE   r   r   r   r   rP      rF   rP   c                   @   rC   )r+   z)Token that represents a syntactical errorr   NrE   r   r   r   r   r+      rF   r+   c                   @   s   e Zd ZdZedd ZdS )Deb822CommentTokenr   c                 C   r/   rB   r   r&   r   r   r   r)      r1   zDeb822CommentToken.is_commentN)r$   r;   r<   r>   r?   r)   r   r   r   r   rQ      s    rQ   c                       s,   e Zd ZdZ fddZedd Z  ZS )Deb822FieldNameTokenr   c                    s(   t |tstt|}t | d S r   )r*   r   sysinternrK   r   r   rL   r   r   r      s   
zDeb822FieldNameToken.__init__c                 C   s   t d| jS )Nr   )r   r   r&   r   r   r   r      s   zDeb822FieldNameToken.text)r$   r;   r<   r>   r   r?   r   rN   r   r   rL   r   rR      s
    rR   c                   @      e Zd ZdZdS )Deb822SeparatorTokenr   Nr$   r;   r<   r>   r   r   r   r   rV          rV   c                       s"   e Zd ZdZd fddZ  ZS )Deb822FieldSeparatorTokenr   returnNc                    rH   )N:rJ   r&   rL   r   r   r         z"Deb822FieldSeparatorToken.__init__rZ   N)r$   r;   r<   r>   r   rN   r   r   rL   r   rY      s    rY   c                       &   e Zd ZdZdZd fddZ  ZS )Deb822CommaTokenzZUsed by the comma-separated list value parsers to denote a comma between two value tokens.r   rZ   Nc                    rH   )N,rJ   r&   rL   r   r   r     r\   zDeb822CommaToken.__init__r]   rM   r   r   rL   r   r_         r_   c                       r^   )Deb822PipeTokenz-Used in some dependency fields as OR relationr   rZ   Nc                    rH   )N|rJ   r&   rL   r   r   r     r\   zDeb822PipeToken.__init__r]   rM   r   r   rL   r   rb     ra   rb   c                   @   rC   )Deb822ValueTokenzWA field value can be split into multi "Deb822ValueToken"s (as well as separator tokens)r   NrE   r   r   r   r   rd     rF   rd   c                   @   rC   )Deb822ValueDependencyTokenzZPackage name, architecture name, a version number, or a profile name in a dependency fieldr   NrE   r   r   r   r   re     rF   re   c                   @   rU   )1Deb822ValueDependencyVersionRelationOperatorTokenr   NrW   r   r   r   r   rf   !  rX   rf   sequencerZ   c                 c   sv   d}i }dt tttf  dt t fdd}t|| }t|ddD ]\}}|dsJ| dur<td	t| d
 |dkrJtdt| d t	
|rn|rSd}t|dd }|re|d|7 }tt|V  q"|d dkrzt|V  q"|d dv r|durt|d }|dr|dd }d}	n|dd }d}	t|V  t|V  |	rt V  nt|V  q"t
|}
|
r3|
 \}}}}}||}d}	|du s|dkr|r|| n|}d}|r|d}	|	r|dd }|du rt|}t|}|||< ~t|V  t V  |rtt|V  |r t|V  |r+tt|V  |	r2t V  q"t|V  q"dS )zhTokenize a deb822 file

    :param sequence: An iterable of lines (a file open for reading will do)
    NsrZ   c                 s   s*    | D ]}t |tr|d}|V  qd S )Nzutf-8)r*   bytesdecode)rh   xr   r   r   _as_str/  s   

z%tokenize_deb822_file.<locals>._as_str   )startr    zInvalid line iterator: Line zD did not end on a newline and it is not the last line in the stream!r   zLine z[ was completely empty.  The tokenizer expects whitespace (including newlines) to be presentc                 S   s   t | d uS r   )_RE_WHITESPACE_LINEmatch)rk   r   r   r   r   H  s    z&tokenize_deb822_file.<locals>.<lambda>r   #) 	r(   TF)r	   r
   strri   r   	enumerater-   peekr   ro   rp   list	takewhilejoinrA   rS   rT   rQ   rO   rd   rG   r+   _RE_FIELD_LINEgroupsgetr   rR   rY   )rg   current_field_namefield_name_cacherl   text_streamnolinerleadingemit_newline_tokenfield_line_match
field_name_space_beforevaluespace_afterr   r   r   tokenize_deb822_file&  s   "













r   c                    s    fdd}|S )Nc                 3   s    d}| j ddD ]L}t| rJ |drt|V  q	d}d }|s-|d }|dd  }d}|dr<d}|d d }|d urHtt|V   |E d H  |rUt	 V  q	d S )	NT)keependsrq   Fr   rm   r    r(   )

splitlinesro   rp   
startswithrQ   r-   rO   rS   rT   rG   )r   
first_liner   has_newlinecontinuation_line_markerfuncr   r   impl  s.   


z#_value_line_tokenizer.<locals>.implr   )r   r   r   r   r   _value_line_tokenizer  s   r   c                 c   sb    d| vsJ t | D ]"}| \}}}|rtt|V  t|V  |r.tt|V  qd S rI   )"_RE_WHITESPACE_SEPARATED_WORD_LISTfinditerr{   rP   rS   rT   rd   )r   rp   r   wordr   r   r   r   whitespace_split_tokenizer  s   
r   c                 c   s    d| vsJ t | D ]6}| \}}}}}|r!tt|V  |r't V  |r1tt|V  |r8t|V  |rBtt|V  qd S rI   )_RE_COMMA_SEPARATED_WORD_LISTr   r{   rA   rS   rT   r_   rd   )r   rp   space_before_commacommaspace_before_wordr   space_after_wordr   r   r   comma_split_tokenizer  s    
r   )0rerS   r6   r   debian._utilr   r   debian._deb822_repro._utilr   typingr   r   r   r	   r
   r   r   ImportErrordebian._deb822_repro.parsingr   compilero   VERBOSEr   r   rz   r   rA   rD   rG   rO   rP   r+   rQ   rR   rV   rY   r_   rb   rd   re   rf   rt   ri   r   r   r   r   r   r   r   r   <module>   s\    (
$N
		"r
