o
    V=^,                    @   s   d dl mZmZmZ d dlmZ d dlmZm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ eeZedkraeZne	ZG dd deZdS )    )absolute_importdivisionunicode_literals)unichr)dequeOrderedDict)version_info   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Trie)      c                       sd  e Zd ZdZd fdd	Zdd Zdd Zdd
dZdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Z d>d? Z!d@dA Z"dBdC Z#dDdE Z$dFdG Z%dHdI Z&dJdK Z'dLdM Z(dNdO Z)dPdQ Z*dRdS Z+dTdU Z,dVdW Z-dXdY Z.dZd[ Z/d\d] Z0d^d_ Z1d`da Z2dbdc Z3ddde Z4dfdg Z5dhdi Z6djdk Z7dldm Z8dndo Z9dpdq Z:drds Z;dtdu Z<dvdw Z=dxdy Z>dzd{ Z?d|d} Z@d~d ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZL  ZMS )HTMLTokenizera	   This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    Nc                    sJ   t |fi || _|| _d| _g | _| j| _d| _d | _t	t
|   d S NF)r   streamparserZ
escapeFlagZlastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   r   kwargs	__class__ 5/usr/lib/python3/dist-packages/html5lib/_tokenizer.pyr!   (   s   zHTMLTokenizer.__init__c                 c   sf    t g | _|  r1| jjrtd | jjddV  | jjs| jr+| j V  | js"|  s
dS dS )z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorr   typedataN)r   
tokenQueuer   r   errorsr   poppopleftr"   r&   r&   r'   __iter__7   s   
zHTMLTokenizer.__iter__c           	      C   s  t }d}|r
t}d}g }| j }||v r+|tur+|| | j }||v r+|tustd||}|tv rJt| }| j	t
d dd|id nd|  krTd	ksYn |d
krjd}| j	t
d dd|id nfd|  krtdksn d|  krdksn d|  krdksn d|  krdksn |tg dv r| j	t
d dd|id zt|}W n ty   |d }td|d? B td|d@ B  }Y nw |dkr| j	t
d dd | j| |S )zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
       r(   z$illegal-codepoint-for-numeric-entity	charAsIntr*   r+   Zdatavarsi   i      �r	                  i  i  )#   i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i r7   i   i   i  ;z numeric-entity-without-semicolonr)   )r   r   r   charr   appendintjoinr   r,   r   	frozensetchr
ValueErrorunget)	r"   ZisHexallowedradix	charStackcr5   r@   vr&   r&   r'   consumeNumberEntityG   sr   




$z!HTMLTokenizer.consumeNumberEntityFc           	      C   s  d}| j  g}|d tv s!|d tddfv s!|d ur+||d kr+| j |d  n|d dkrd}|| j   |d dv rKd}|| j   |rS|d tv s[|si|d tv ri| j |d  | |}n| j	t
d	 d
d | j |  dd| }n|d turtd|sn|| j   |d tusztd|d d }t|}W n ty   d }Y nw |d ur|d dkr| j	t
d	 dd |d dkr|r|| tv s|| tv s|| dkr| j |  dd| }n2t| }| j |  |d||d  7 }n| j	t
d	 dd | j |  dd| }|rC| jd d d  |7  < d S |tv rKd}nd}| j	t
| |d d S )N&r   <#F)xXTr(   zexpected-numeric-entityr)   r4   r?   znamed-entity-without-semicolon=zexpected-named-entityr+   r	   SpaceCharacters
Characters)r   r@   r
   r   rG   rA   r   r   rM   r,   r   r.   rC   entitiesTrieZhas_keys_with_prefixZlongest_prefixlenKeyErrorr   r   r   )	r"   allowedCharfromAttributeoutputrJ   hexZ
entityNameZentityLengthZ	tokenTyper&   r&   r'   consumeEntity   s   

	




zHTMLTokenizer.consumeEntityc                 C   s   | j |dd dS )zIThis method replaces the need for "entityInAttributeValueState".
        T)rZ   r[   N)r^   )r"   rZ   r&   r&   r'   processEntityInAttribute   s   z&HTMLTokenizer.processEntityInAttributec                 C   s   | j }|d tv r^|d t|d< |d td kr8|d }t|}t|t|kr4||ddd  ||d< |d td kr^|d rO| j	td d	d
 |d r^| j	td dd
 | j	| | j
| _dS )zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r*   nameStartTagr+   NrQ   EndTagr(   zattributes-in-end-tagr)   selfClosingzself-closing-flag-on-end-tag)r   r   	translater   r   attributeMaprX   updater,   rA   r   r   )r"   tokenrawr+   r&   r&   r'   emitCurrentToken   s(   zHTMLTokenizer.emitCurrentTokenc                 C   s   | j  }|dkr| j| _d
S |dkr| j| _d
S |dkr5| jtd dd | jtd dd d
S |tu r;dS |t	v rS| jtd	 || j 
t	d
 d d
S | j 
d}| jtd || d d
S )NrN   rO    r(   invalid-codepointr)   rV   FrU   TrN   rO   rj   )r   r@   entityDataStater   tagOpenStater,   rA   r   r   r
   
charsUntilr"   r+   charsr&   r&   r'   r      s6   
	zHTMLTokenizer.dataStatec                 C      |    | j| _dS NT)r^   r   r   r0   r&   r&   r'   rm        zHTMLTokenizer.entityDataStatec                 C   s   | j  }|dkr| j| _dS |dkr| j| _dS |tkrdS |dkr;| jtd dd | jtd d	d dS |t	v rS| jtd
 || j 
t	d d dS | j 
d}| jtd || d dS )NrN   rO   Frj   r(   rk   r)   rV   r8   rU   Trl   )r   r@   characterReferenceInRcdatar   rcdataLessThanSignStater   r,   rA   r   r
   ro   rp   r&   r&   r'   rcdataState"  s6   
	zHTMLTokenizer.rcdataStatec                 C   rr   rs   )r^   rw   r   r0   r&   r&   r'   ru   ?  rt   z(HTMLTokenizer.characterReferenceInRcdatac                 C      | j  }|dkr| j| _d
S |dkr+| jtd dd | jtd dd d
S |tkr1dS | j d	}| jtd || d d
S NrO   rj   r(   rk   r)   rV   r8   F)rO   rj   T)	r   r@   rawtextLessThanSignStater   r,   rA   r   r   ro   rp   r&   r&   r'   rawtextStateD  &   
	zHTMLTokenizer.rawtextStatec                 C   rx   ry   )	r   r@   scriptDataLessThanSignStater   r,   rA   r   r   ro   rp   r&   r&   r'   scriptDataStateV  r|   zHTMLTokenizer.scriptDataStatec                 C   st   | j  }|tkrdS |dkr'| jtd dd | jtd dd dS | jtd || j d d dS )	NFrj   r(   rk   r)   rV   r8   T)r   r@   r   r,   rA   r   ro   r"   r+   r&   r&   r'   plaintextStateh  s   
zHTMLTokenizer.plaintextStatec                 C   s  | j  }|dkr| j| _dS |dkr| j| _dS |tv r.td |g ddd| _| j| _dS |dkrN| j	
td dd	 | j	
td
 dd	 | j| _dS |dkri| j	
td dd	 | j | | j| _dS | j	
td dd	 | j	
td
 dd	 | j | | j| _dS )N!/ra   F)r*   r`   r+   rc   ZselfClosingAcknowledged>r(   z'expected-tag-name-but-got-right-bracketr)   rV   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerO   T)r   r@   markupDeclarationOpenStater   closeTagOpenStater   r   r   tagNameStater,   rA   r   rG   bogusCommentStater   r&   r&   r'   rn   w  sH   
zHTMLTokenizer.tagOpenStatec                 C   s   | j  }|tv rtd |g dd| _| j| _dS |dkr.| jtd dd | j	| _dS |t
u rN| jtd dd | jtd	 d
d | j	| _dS | jtd dd|id | j | | j| _dS )Nrb   Fr*   r`   r+   rc   r   r(   z*expected-closing-tag-but-got-right-bracketr)   z expected-closing-tag-but-got-eofrV   </z!expected-closing-tag-but-got-charr+   r6   T)r   r@   r   r   r   r   r   r,   rA   r   r   rG   r   r   r&   r&   r'   r     s6   
zHTMLTokenizer.closeTagOpenStatec                 C   s   | j  }|tv r| j| _d
S |dkr|   d
S |tu r.| jt	d dd | j
| _d
S |dkr8| j| _d
S |dkrR| jt	d dd | jd  d	7  < d
S | jd  |7  < d
S )Nr   r(   zeof-in-tag-namer)   r   rj   rk   r`   r8   T)r   r@   r
   beforeAttributeNameStater   ri   r   r,   rA   r   r   selfClosingStartTagStater   r   r&   r&   r'   r     s0   
	zHTMLTokenizer.tagNameStatec                 C   R   | j  }|dkrd| _| j| _dS | jtd dd | j | | j	| _dS Nr   r4   rV   rO   r)   T)
r   r@   temporaryBufferrcdataEndTagOpenStater   r,   rA   r   rG   rw   r   r&   r&   r'   rv        
z%HTMLTokenizer.rcdataLessThanSignStatec                 C   Z   | j  }|tv r|  j|7  _| j| _dS | jtd dd | j 	| | j
| _dS NrV   r   r)   T)r   r@   r   r   rcdataEndTagNameStater   r,   rA   r   rG   rw   r   r&   r&   r'   r        
z#HTMLTokenizer.rcdataEndTagOpenStatec                 C     | j o| j d  | j k}| j }|tv r+|r+td | jg dd| _ | j| _d
S |dkrB|rBtd | jg dd| _ | j	| _d
S |dkr]|r]td | jg dd| _ | 
  | j| _d
S |tv rj|  j|7  _d
S | jtd d| j d	 | j| | j| _d
S Nr`   rb   Fr   r   r   rV   r   r)   T)r   lowerr   r   r@   r
   r   r   r   r   ri   r   r   r,   rA   rG   rw   r"   Zappropriater+   r&   r&   r'   r     B   
z#HTMLTokenizer.rcdataEndTagNameStatec                 C   r   r   )
r   r@   r   rawtextEndTagOpenStater   r,   rA   r   rG   r{   r   r&   r&   r'   rz     r   z&HTMLTokenizer.rawtextLessThanSignStatec                 C   r   r   )r   r@   r   r   rawtextEndTagNameStater   r,   rA   r   rG   r{   r   r&   r&   r'   r     r   z$HTMLTokenizer.rawtextEndTagOpenStatec                 C   r   r   )r   r   r   r   r@   r
   r   r   r   r   ri   r   r   r,   rA   rG   r{   r   r&   r&   r'   r     r   z$HTMLTokenizer.rawtextEndTagNameStatec                 C   s|   | j  }|dkrd| _| j| _dS |dkr'| jtd dd | j| _dS | jtd dd | j 	| | j
| _dS )	Nr   r4   r   rV   z<!r)   rO   T)r   r@   r   scriptDataEndTagOpenStater   r,   rA   r   scriptDataEscapeStartStaterG   r~   r   r&   r&   r'   r}   ,  s   
z)HTMLTokenizer.scriptDataLessThanSignStatec                 C   r   r   )r   r@   r   r   scriptDataEndTagNameStater   r,   rA   r   rG   r~   r   r&   r&   r'   r   :  r   z'HTMLTokenizer.scriptDataEndTagOpenStatec                 C   r   r   )r   r   r   r   r@   r
   r   r   r   r   ri   r   r   r,   rA   rG   r~   r   r&   r&   r'   r   E  r   z'HTMLTokenizer.scriptDataEndTagNameStatec                 C   L   | j  }|dkr| jtd dd | j| _dS | j | | j| _dS N-rV   r)   T)	r   r@   r,   rA   r   scriptDataEscapeStartDashStater   rG   r~   r   r&   r&   r'   r   a     
z(HTMLTokenizer.scriptDataEscapeStartStatec                 C   r   r   )	r   r@   r,   rA   r   scriptDataEscapedDashDashStater   rG   r~   r   r&   r&   r'   r   k  r   z,HTMLTokenizer.scriptDataEscapeStartDashStatec                 C   s   | j  }|dkr| jtd dd | j| _d
S |dkr$| j| _d
S |dkr@| jtd dd | jtd dd d
S |tkrJ| j	| _d
S | j 
d	}| jtd || d d
S )Nr   rV   r)   rO   rj   r(   rk   r8   )rO   r   rj   T)r   r@   r,   rA   r   scriptDataEscapedDashStater   "scriptDataEscapedLessThanSignStater   r   ro   rp   r&   r&   r'   scriptDataEscapedStateu  s0   
z$HTMLTokenizer.scriptDataEscapedStatec                 C   s   | j  }|dkr| jtd dd | j| _d	S |dkr$| j| _d	S |dkrD| jtd dd | jtd dd | j| _d	S |t	krN| j
| _d	S | jtd |d | j| _d	S )
Nr   rV   r)   rO   rj   r(   rk   r8   T)r   r@   r,   rA   r   r   r   r   r   r   r   r   r&   r&   r'   r     s.   
z(HTMLTokenizer.scriptDataEscapedDashStatec                 C   s   | j  }|dkr| jtd dd d
S |dkr | j| _d
S |dkr5| jtd dd | j| _d
S |dkrU| jtd dd | jtd d	d | j| _d
S |t	kr_| j
| _d
S | jtd |d | j| _d
S )Nr   rV   r)   rO   r   rj   r(   rk   r8   T)r   r@   r,   rA   r   r   r   r~   r   r   r   r   r&   r&   r'   r     s4   
z,HTMLTokenizer.scriptDataEscapedDashDashStatec                 C   s   | j  }|dkrd| _| j| _dS |tv r,| jtd d| d || _| j	| _dS | jtd dd | j 
| | j| _dS r   )r   r@   r    scriptDataEscapedEndTagOpenStater   r   r,   rA   r    scriptDataDoubleEscapeStartStaterG   r   r   r&   r&   r'   r     s   
	z0HTMLTokenizer.scriptDataEscapedLessThanSignStatec                 C   sR   | j  }|tv r|| _| j| _dS | jtd dd | j 	| | j
| _dS r   )r   r@   r   r    scriptDataEscapedEndTagNameStater   r,   rA   r   rG   r   r   r&   r&   r'   r     r   z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatec                 C   r   r   )r   r   r   r   r@   r
   r   r   r   r   ri   r   r   r,   rA   rG   r   r   r&   r&   r'   r     r   z.HTMLTokenizer.scriptDataEscapedEndTagNameStatec                 C      | j  }|ttdB v r+| jtd |d | j dkr%| j	| _
dS | j| _
dS |tv rC| jtd |d |  j|7  _dS | j | | j| _
dS N)r   r   rV   r)   ZscriptT)r   r@   r
   rD   r,   rA   r   r   r   scriptDataDoubleEscapedStater   r   r   rG   r   r&   r&   r'   r        
	z.HTMLTokenizer.scriptDataDoubleEscapeStartStatec                 C   s   | j  }|dkr| jtd dd | j| _d
S |dkr/| jtd dd | j| _d
S |dkrK| jtd dd | jtd dd d
S |tkr`| jtd d	d | j	| _d
S | jtd |d d
S Nr   rV   r)   rO   rj   r(   rk   r8   eof-in-script-in-scriptT)
r   r@   r,   rA   r    scriptDataDoubleEscapedDashStater   (scriptDataDoubleEscapedLessThanSignStater   r   r   r&   r&   r'   r     s2   
z*HTMLTokenizer.scriptDataDoubleEscapedStatec                 C   s   | j  }|dkr| jtd dd | j| _d
S |dkr/| jtd dd | j| _d
S |dkrO| jtd dd | jtd dd | j| _d
S |t	krd| jtd d	d | j
| _d
S | jtd |d | j| _d
S r   )r   r@   r,   rA   r   $scriptDataDoubleEscapedDashDashStater   r   r   r   r   r   r&   r&   r'   r     s6   
z.HTMLTokenizer.scriptDataDoubleEscapedDashStatec                 C   s  | j  }|dkr| jtd dd dS |dkr+| jtd dd | j| _dS |dkr@| jtd dd | j| _dS |dkr`| jtd dd | jtd d	d | j| _dS |t	kru| jtd d
d | j
| _dS | jtd |d | j| _dS )Nr   rV   r)   rO   r   rj   r(   rk   r8   r   T)r   r@   r,   rA   r   r   r   r~   r   r   r   r   r&   r&   r'   r   %  s<   
z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatec                 C   sR   | j  }|dkr| jtd dd d| _| j| _dS | j | | j	| _dS )Nr   rV   r)   r4   T)
r   r@   r,   rA   r   r   scriptDataDoubleEscapeEndStater   rG   r   r   r&   r&   r'   r   >  s   
z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatec                 C   r   r   )r   r@   r
   rD   r,   rA   r   r   r   r   r   r   r   rG   r   r&   r&   r'   r   I  r   z,HTMLTokenizer.scriptDataDoubleEscapeEndStatec                 C   s:  | j  }|tv r| j td dS |tv r&| jd |dg | j| _dS |dkr0| 	  dS |dkr:| j
| _dS |dv rY| jtd dd	 | jd |dg | j| _dS |d
krx| jtd dd	 | jd ddg | j| _dS |tu r| jtd dd	 | j| _dS | jd |dg | j| _dS )NTr+   r4   r   r   )'"rT   rO   r(   #invalid-character-in-attribute-namer)   rj   rk   r8   z#expected-attribute-name-but-got-eof)r   r@   r
   ro   r   r   rA   attributeNameStater   ri   r   r,   r   r   r   r   r&   r&   r'   r   Y  sJ   
z&HTMLTokenizer.beforeAttributeNameStatec                 C   s  | j  }d}d}|dkr| j| _n|tv r-| jd d d  || j td 7  < d}nz|dkr4d}ns|tv r=| j| _nj|dkrF| j	| _na|d	kre| j
td
 dd | jd d d  d7  < d}nB|dv r| j
td
 dd | jd d d  |7  < d}n#|tu r| j
td
 dd | j| _n| jd d d  |7  < d}|r| jd d d t| jd d d< | jd d d D ]\}}| jd d d |kr| j
td
 dd  nq|r|   dS )NTFrT   r+   rQ   r   r   r   rj   r(   rk   r)   r8   r   r   rO   r   zeof-in-attribute-namezduplicate-attribute)r   r@   beforeAttributeValueStater   r   r   ro   r
   afterAttributeNameStater   r,   rA   r   r   r   rd   r   ri   )r"   r+   ZleavingThisStateZ	emitTokenr`   _r&   r&   r'   r   w  s`   




z HTMLTokenizer.attributeNameStatec                 C   sN  | j  }|tv r| j td dS |dkr| j| _dS |dkr&|   dS |tv r:| jd 	|dg | j
| _dS |dkrD| j| _dS |dkrc| j	td d	d
 | jd 	ddg | j
| _dS |dv r| j	td dd
 | jd 	|dg | j
| _dS |tu r| j	td dd
 | j| _dS | jd 	|dg | j
| _dS )NTrT   r   r+   r4   r   rj   r(   rk   r)   r8   r   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   r@   r
   ro   r   r   ri   r   r   rA   r   r   r,   r   r   r   r   r&   r&   r'   r     sP   
z%HTMLTokenizer.afterAttributeNameStatec                 C   sn  | j  }|tv r| j td dS |dkr| j| _dS |dkr,| j| _| j | dS |dkr6| j| _dS |dkrK| j	
td dd |   dS |d	krm| j	
td d
d | jd d d  d7  < | j| _dS |dv r| j	
td dd | jd d d  |7  < | j| _dS |tu r| j	
td dd | j| _dS | jd d d  |7  < | j| _dS )NTr   rN   r   r   r(   z.expected-attribute-value-but-got-right-bracketr)   rj   rk   r+   rQ   r	   r8   )rT   rO   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)r   r@   r
   ro   attributeValueDoubleQuotedStater   attributeValueUnQuotedStaterG   attributeValueSingleQuotedStater,   rA   r   ri   r   r   r   r   r&   r&   r'   r     sV   
z'HTMLTokenizer.beforeAttributeValueStatec                 C      | j  }|dkr| j| _dS |dkr| d dS |dkr8| jtd dd | jd d d	  d
7  < dS |t	u rM| jtd dd | j
| _dS | jd d d	  || j d 7  < dS )Nr   rN   rj   r(   rk   r)   r+   rQ   r	   r8   z#eof-in-attribute-value-double-quote)r   rN   rj   Tr   r@   afterAttributeValueStater   r_   r,   rA   r   r   r   r   ro   r   r&   r&   r'   r     .   


z-HTMLTokenizer.attributeValueDoubleQuotedStatec                 C   r   )Nr   rN   rj   r(   rk   r)   r+   rQ   r	   r8   z#eof-in-attribute-value-single-quote)r   rN   rj   Tr   r   r&   r&   r'   r     r   z-HTMLTokenizer.attributeValueSingleQuotedStatec                 C   s  | j  }|tv r| j| _dS |dkr| d dS |dkr$|   dS |dv rB| jt	d dd | j
d d d	  |7  < dS |d
kr`| jt	d dd | j
d d d	  d7  < dS |tu ru| jt	d dd | j| _dS | j
d d d	  || j tdtB  7  < dS )NrN   r   )r   r   rT   rO   r   r(   z0unexpected-character-in-unquoted-attribute-valuer)   r+   rQ   r	   rj   rk   r8   z eof-in-attribute-value-no-quotes)rN   r   r   r   rT   rO   r   rj   T)r   r@   r
   r   r   r_   ri   r,   rA   r   r   r   r   ro   rD   r   r&   r&   r'   r     s@   



z)HTMLTokenizer.attributeValueUnQuotedStatec                 C   s   | j  }|tv r| j| _dS |dkr|   dS |dkr#| j| _dS |tu r>| j	t
d dd | j | | j| _dS | j	t
d dd | j | | j| _dS )Nr   r   r(   z$unexpected-EOF-after-attribute-valuer)   z*unexpected-character-after-attribute-valueT)r   r@   r
   r   r   ri   r   r   r,   rA   r   rG   r   r   r&   r&   r'   r   .  s.   
z&HTMLTokenizer.afterAttributeValueStatec                 C   s   | j  }|dkrd| jd< |   dS |tu r/| jtd dd | j | | j	| _
dS | jtd dd | j | | j| _
dS )Nr   Trc   r(   z#unexpected-EOF-after-solidus-in-tagr)   z)unexpected-character-after-solidus-in-tag)r   r@   r   ri   r   r,   rA   r   rG   r   r   r   r   r&   r&   r'   r   B  s$   

z&HTMLTokenizer.selfClosingStartTagStatec                 C   sD   | j d}|dd}| jtd |d | j   | j| _dS )Nr   rj   r8   Commentr)   T)	r   ro   replacer,   rA   r   r@   r   r   r   r&   r&   r'   r   T  s   
zHTMLTokenizer.bogusCommentStatec                 C   sn  | j  g}|d dkr)|| j   |d dkr(td dd| _| j| _dS nq|d dv r\d}dD ]}|| j   |d |vrGd	} nq3|r[td
 dd d dd| _| j| _dS n>|d dkr| jd ur| jj	j
r| jj	j
d j| jj	jkrd}dD ]}|| j   |d |krd	} nq}|r| j| _dS | jtd dd |r| j |  |s| j| _dS )NrQ   r   r   r4   r)   T)dD))oOrK   CtTyYpPeEFZDoctype)r*   r`   publicIdsystemIdcorrect[)r   r   Ar   r   r   r(   zexpected-dashes-or-doctype)r   r@   rA   r   r   commentStartStater   doctypeStater   ZtreeZopenElements	namespaceZdefaultNamespacecdataSectionStater,   rG   r.   r   )r"   rJ   matchedexpectedr&   r&   r'   r   c  sd   z(HTMLTokenizer.markupDeclarationOpenStatec                 C   s   | j  }|dkr| j| _dS |dkr)| jtd dd | jd  d7  < dS |dkrE| jtd d	d | j| j | j| _dS |t	u ra| jtd d
d | j| j | j| _dS | jd  |7  < | j
| _dS )Nr   rj   r(   rk   r)   r+   r8   r   incorrect-commenteof-in-commentT)r   r@   commentStartDashStater   r,   rA   r   r   r   r   commentStater   r&   r&   r'   r     s6   
	zHTMLTokenizer.commentStartStatec                 C   s   | j  }|dkr| j| _dS |dkr)| jtd dd | jd  d7  < dS |dkrE| jtd d	d | j| j | j| _dS |t	u ra| jtd d
d | j| j | j| _dS | jd  d| 7  < | j
| _dS )Nr   rj   r(   rk   r)   r+      -�r   r   r   T)r   r@   commentEndStater   r,   rA   r   r   r   r   r   r   r&   r&   r'   r     s6   
	z#HTMLTokenizer.commentStartDashStatec                 C   s   | j  }|dkr| j| _d
S |dkr)| jtd dd | jd  d7  < d
S |tu rE| jtd dd | j| j | j	| _d
S | jd  || j 
d	 7  < d
S )Nr   rj   r(   rk   r)   r+   r8   r   )r   rj   T)r   r@   commentEndDashStater   r,   rA   r   r   r   r   ro   r   r&   r&   r'   r     s*   
	
zHTMLTokenizer.commentStatec                 C   s   | j  }|dkr| j| _d	S |dkr-| jtd dd | jd  d7  < | j| _d	S |t	u rI| jtd dd | j| j | j
| _d	S | jd  d| 7  < | j| _d	S )
Nr   rj   r(   rk   r)   r+   r   zeof-in-comment-end-dashT)r   r@   r   r   r,   rA   r   r   r   r   r   r   r&   r&   r'   r     s*   
	z!HTMLTokenizer.commentEndDashStatec                 C   s6  | j  }|dkr| j| j | j| _dS |dkr4| jtd dd | jd  d7  < | j| _dS |dkrI| jtd d	d | j	| _dS |d
krc| jtd dd | jd  |7  < dS |t
u r| jtd dd | j| j | j| _dS | jtd dd | jd  d| 7  < | j| _dS )Nr   rj   r(   rk   r)   r+   u   --�r   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   r@   r,   rA   r   r   r   r   r   commentEndBangStater   r   r&   r&   r'   r     sJ   
zHTMLTokenizer.commentEndStatec                 C   s   | j  }|dkr| j| j | j| _dS |dkr)| jd  d7  < | j| _dS |dkrG| jtd dd | jd  d	7  < | j	| _dS |t
u rc| jtd d
d | j| j | j| _dS | jd  d| 7  < | j	| _dS )Nr   r   r+   z--!rj   r(   rk   r)   u   --!�zeof-in-comment-end-bang-stateT)r   r@   r,   rA   r   r   r   r   r   r   r   r   r&   r&   r'   r     s4   
	z!HTMLTokenizer.commentEndBangStatec                 C   s   | j  }|tv r| j| _dS |tu r0| jtd dd d| j	d< | j| j	 | j
| _dS | jtd dd | j | | j| _dS )Nr(   !expected-doctype-name-but-got-eofr)   Fr   zneed-space-after-doctypeT)r   r@   r
   beforeDoctypeNameStater   r   r,   rA   r   r   r   rG   r   r&   r&   r'   r     s$   

zHTMLTokenizer.doctypeStatec                 C   s   | j  }|tv r	 dS |dkr-| jtd dd d| jd< | j| j | j| _dS |dkrG| jtd dd d	| jd
< | j	| _dS |t
u rh| jtd dd d| jd< | j| j | j| _dS || jd
< | j	| _dS )Nr   r(   z+expected-doctype-name-but-got-right-bracketr)   Fr   rj   rk   r8   r`   r   T)r   r@   r
   r,   rA   r   r   r   r   doctypeNameStater   r   r&   r&   r'   r   *  s<   





z$HTMLTokenizer.beforeDoctypeNameStatec                 C   s  | j  }|tv r| jd t| jd< | j| _dS |dkr6| jd t| jd< | j	| j | j
| _dS |dkrT| j	td dd | jd  d7  < | j| _dS |tu r| j	td dd d	| jd
< | jd t| jd< | j	| j | j
| _dS | jd  |7  < dS )Nr`   r   rj   r(   rk   r)   r8   zeof-in-doctype-nameFr   T)r   r@   r
   r   rd   r   afterDoctypeNameStater   r,   rA   r   r   r   r   r   r&   r&   r'   r   D  s8   


zHTMLTokenizer.doctypeNameStatec                 C   sH  | j  }|tv r	 dS |dkr| j| j | j| _dS |tu rDd| jd< | j 	| | jt
d dd | j| j | j| _dS |dv red}d	D ]}| j  }||vr[d} nqL|rd| j| _dS n |d
v rd}dD ]}| j  }||vr|d} nqm|r| j| _dS | j 	| | jt
d dd|id d| jd< | j| _dS )Nr   Fr   r(   eof-in-doctyper)   r   T))uU)bB)lL)iIr   sS)r   r   r   r   )mMz*expected-space-or-right-bracket-in-doctyper+   r6   )r   r@   r
   r,   rA   r   r   r   r   rG   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r"   r+   r   r   r&   r&   r'   r   ]  s`   
.+
$


z#HTMLTokenizer.afterDoctypeNameStatec                 C      | j  }|tv r| j| _dS |dv r*| jtd dd | j | | j| _dS |t	u rK| jtd dd d| j
d< | j| j
 | j| _dS | j | | j| _dS 	N)r   r   r(   unexpected-char-in-doctyper)   r   Fr   T)r   r@   r
   "beforeDoctypePublicIdentifierStater   r,   rA   r   rG   r   r   r   r   r&   r&   r'   r     ,   


z,HTMLTokenizer.afterDoctypePublicKeywordStatec                 C   s  | j  }|tv r	 dS |dkrd| jd< | j| _dS |dkr*d| jd< | j| _dS |dkrK| jt	d dd d	| jd
< | j| j | j
| _dS |tu rl| jt	d dd d	| jd
< | j| j | j
| _dS | jt	d dd d	| jd
< | j| _dS )Nr   r4   r   r   r   r(   unexpected-end-of-doctyper)   Fr   r   r  T)r   r@   r
   r   (doctypePublicIdentifierDoubleQuotedStater   (doctypePublicIdentifierSingleQuotedStater,   rA   r   r   r   r  r   r&   r&   r'   r    D   





z0HTMLTokenizer.beforeDoctypePublicIdentifierStatec                 C      | j  }|dkr| j| _dS |dkr)| jtd dd | jd  d7  < dS |dkrJ| jtd d	d d
| jd< | j| j | j| _dS |t	u rk| jtd dd d
| jd< | j| j | j| _dS | jd  |7  < dS )Nr   rj   r(   rk   r)   r   r8   r   r  Fr   r   T
r   r@   !afterDoctypePublicIdentifierStater   r,   rA   r   r   r   r   r   r&   r&   r'   r    8   

	
z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatec                 C   r  )Nr   rj   r(   rk   r)   r   r8   r   r  Fr   r   Tr  r   r&   r&   r'   r	    r  z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatec                 C   s  | j  }|tv r| j| _dS |dkr | j| j | j| _dS |dkr:| jt	d dd d| jd< | j
| _dS |dkrT| jt	d dd d| jd< | j| _dS |tu ru| jt	d d	d d
| jd< | j| j | j| _dS | jt	d dd d
| jd< | j| _dS )Nr   r   r(   r  r)   r4   r   r   r   Fr   T)r   r@   r
   -betweenDoctypePublicAndSystemIdentifiersStater   r,   rA   r   r   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   r  r   r&   r&   r'   r    sH   




z/HTMLTokenizer.afterDoctypePublicIdentifierStatec                 C   s   | j  }|tv r	 dS |dkr| j| j | j| _dS |dkr,d| jd< | j| _dS |dkr;d| jd< | j	| _dS |t
kr\| jtd dd d	| jd
< | j| j | j| _dS | jtd dd d	| jd
< | j| _dS )Nr   r   r4   r   r   r(   r   r)   Fr   r  T)r   r@   r
   r,   rA   r   r   r   r  r  r   r   r  r   r&   r&   r'   r    s<   




z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatec                 C   r  r  )r   r@   r
   "beforeDoctypeSystemIdentifierStater   r,   rA   r   rG   r   r   r   r   r&   r&   r'   r   )  r  z,HTMLTokenizer.afterDoctypeSystemKeywordStatec                 C   s  | j  }|tv r	 dS |dkrd| jd< | j| _dS |dkr*d| jd< | j| _dS |dkrK| jt	d dd d	| jd
< | j| j | j
| _dS |tu rl| jt	d dd d	| jd
< | j| j | j
| _dS | jt	d dd d	| jd
< | j| _dS )Nr   r4   r   r   r   r(   r  r)   Fr   r   T)r   r@   r
   r   r  r   r  r,   rA   r   r   r   r  r   r&   r&   r'   r  =  r
  z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatec                 C   r  )Nr   rj   r(   rk   r)   r   r8   r   r  Fr   r   T
r   r@   !afterDoctypeSystemIdentifierStater   r,   rA   r   r   r   r   r   r&   r&   r'   r  Z  r  z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatec                 C   r  )Nr   rj   r(   rk   r)   r   r8   r   r  Fr   r   Tr  r   r&   r&   r'   r  r  r  z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatec                 C   s   | j  }|tv r	 dS |dkr| j| j | j| _dS |tu r>| jt	d dd d| jd< | j| j | j| _dS | jt	d dd | j
| _dS )	Nr   r(   r   r)   Fr   r  T)r   r@   r
   r,   rA   r   r   r   r   r   r  r   r&   r&   r'   r    s*   

z/HTMLTokenizer.afterDoctypeSystemIdentifierStatec                 C   s`   | j  }|dkr| j| j | j| _dS |tu r-| j | | j| j | j| _dS 	 dS )Nr   T)	r   r@   r,   rA   r   r   r   r   rG   r   r&   r&   r'   r    s   
zHTMLTokenizer.bogusDoctypeStatec                 C   s   g }	 | | jd | | jd | j }|tkrn!|dks%J |d dd  dkr:|d d d |d< n| | qd|}|d}|d	krft|D ]}| j t	d
 dd qR|
dd}|rs| j t	d |d | j| _dS )NT]r   rQ   z]]r4   rj   r   r(   rk   r)   r8   rV   )rA   r   ro   r@   r   rC   countranger,   r   r   r   r   )r"   r+   r@   Z	nullCountr   r&   r&   r'   r     s6   




zHTMLTokenizer.cdataSectionState)Nr   )N__name__
__module____qualname____doc__r!   r1   rM   r^   r_   ri   r   rm   rw   ru   r{   r~   r   rn   r   r   rv   r   r   rz   r   r   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r	  r  r  r   r  r  r  r  r  r   __classcell__r&   r&   r$   r'   r      s    
HP#

6 "-3r   N) Z
__future__r   r   r   Zsixr   rE   collectionsr   r   sysr   Z	constantsr
   r   r   r   r   r   r   r   r   r   Z_inputstreamr   Z_trier   rW   dictre   objectr   r&   r&   r&   r'   <module>   s"    