
    RhB                        d Z ddlZddlZddlmZ dgZ ej                  d      Z ej                  d      Z ej                  d      Z	 ej                  d      Z
 ej                  d	      Z ej                  d
      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  dej                         Z ej                  d
      Z ej                  d      Z G d dej(                        Zy)zA parser for HTML and XHTML.    N)unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                        e Zd ZdZdZdd fd
Z fdZd Zd Zd	Z	d
 Z
d Zd Zd Zd ZddZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z xZS )r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstyleT)convert_charrefsc                P    t         |           || _        | j                          y)zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)super__init__r	   reset)selfr	   	__class__s     "/usr/lib/python3.12/html/parser.pyr   zHTMLParser.__init__V   s!     	 0

    c                 b    d| _         d| _        t        | _        d| _        t
        |           y)z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elemr   r   )r   r   s    r   r   zHTMLParser.reset`   s)    -r   c                 N    | j                   |z   | _         | j                  d       y)zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datas     r   feedzHTMLParser.feedh   s     ||d*Qr   c                 &    | j                  d       y)zHandle any buffered data.   N)r   r   s    r   closezHTMLParser.closeq   s    Qr   Nc                     | j                   S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr    s    r   get_starttag_textzHTMLParser.get_starttag_textw   s    ###r   c                     |j                         | _        t        j                  d| j                  z  t        j                        | _        y )Nz</\s*%s\s*>)lowerr   recompileIr   )r   elems     r   set_cdata_modezHTMLParser.set_cdata_mode{   s/    **,::nt&FMr   c                 (    t         | _        d | _        y N)r   r   r   r    s    r   clear_cdata_modezHTMLParser.clear_cdata_mode   s    -r   c                 	   | j                   }d}t        |      }||k  rW| j                  rq| j                  se|j	                  d|      }|dk  r|j                  dt        ||dz
              }|dk\  r't        j                  d      j                  ||      sn|}n?| j                  j                  ||      }|r|j                         }n| j                  rn|}||k  rJ| j                  r*| j                  s| j                  t        |||              n| j                  |||        | j                  ||      }||k(  rn3|j                  } |d|      r[t         j#                  ||      r| j%                  |      }	n |d|      r| j'                  |      }	nr |d|      r| j)                  |      }	nW |d|      r| j+                  |      }	n< |d	|      r| j-                  |      }	n!|d
z   |k  r| j                  d       |d
z   }	nnh|	dk  r|sn_|j	                  d|d
z         }	|	dk  r |j	                  d|d
z         }	|	dk  r|d
z   }	n|	d
z  }	| j                  r*| j                  s| j                  t        |||	              n| j                  |||	        | j                  ||	      }n |d|      rt.        j#                  ||      }|rY|j1                         dd }
| j3                  |
       |j5                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }d||d  v r,| j                  |||dz           | j                  ||dz         }n |d|      rt6        j#                  ||      }|rW|j1                  d
      }
| j9                  |
       |j5                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }t:        j#                  ||      }|rE|rB|j1                         ||d  k(  r,|j5                         }	|	|k  r|}	| j                  ||d
z         }n>|d
z   |k  r'| j                  d       | j                  ||d
z         }nnJ d       ||k  rW|rm||k  rh| j                  s\| j                  r*| j                  s| j                  t        |||              n| j                  |||        | j                  ||      }||d  | _         y )Nr   <&"   z[\s;]</<!--<?<!r   r   z&#   ;zinteresting.search() lied)r   lenr	   r   findrfindmaxr'   r(   searchr   starthandle_datar   	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   rM   r   injampposrD   rB   knames              r   r   zHTMLParser.goahead   s   ,,L!e$$T__LLa(q5 %]]3Aqt=F!JJx077HA((//;AA1u(($$Xgal%;<$$WQq\2q!$AAvu ++J#q!%%gq1++A.Aa())!,A***1-Aa(a(Aa(33A6A!eq[$$S)AAq5S!a%0A1u#LLa!e4q5 !AAQ,,T__(('!A,)?@((16NN1a(D!$gq1 ;;=2.D''-		A%c1Q3/Eq!,Agabk)((1Q38 NN1ac2C#!3 ;;q>D))$/		A%c1Q3/Eq!,A"((!4u{{};!IIK6 !A NN1a!e4!eq[ $$S)q!a%0A555qS !eV 1q5$$T__  '!A,!78  1.q!$Aqr{r   c                 p   | j                   }|||dz    dk(  sJ d       |||dz    dk(  r| j                  |      S |||dz    dk(  r| j                  |      S |||dz    j                         d	k(  r7|j	                  d
|dz         }|dk(  ry| j                  ||dz   |        |dz   S | j                  |      S )Nr7   r6   z+unexpected call to parse_html_declaration()   r4      z<![	   z	<!doctyper   r8   r   )r   rG   parse_marked_sectionr&   r;   handle_declparse_bogus_comment)r   rQ   r   gtposs       r   rI   z!HTMLParser.parse_html_declaration   s    ,,q1~% 	D )C 	D%1QqS>V#%%a((Qqs^u$,,Q//Qqs^!!#{2LLac*E{WQqS/07N++A..r   c                     | j                   }|||dz    dv sJ d       |j                  d|dz         }|dk(  ry|r| j                  ||dz   |        |dz   S )Nr7   )r6   r3   z"unexpected call to parse_comment()r   r8   r   )r   r;   handle_comment)r   rQ   reportr   poss        r   r]   zHTMLParser.parse_bogus_comment  su    ,,q1~- 	C 1B 	C-ll3!$"9!C 01Qwr   c                     | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }| j	                  ||dz   |        |j                         }|S )Nr7   r5   zunexpected call to parse_pi()r8   )r   picloser>   r?   	handle_pirM   )r   rQ   r   rD   rS   s        r   rH   zHTMLParser.parse_pi!  st    ,,q1~%F'FF%w!,KKMwqsA'IIKr   c                 ~   d | _         | j                  |      }|dk  r|S | j                  }||| | _         g }t        j	                  ||dz         }|sJ d       |j                         }|j                  d      j                         x| _        }||k  rt        j	                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d dcxk(  r|dd  k(  rn n|dd }|rt        |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d	vr| j                  |||        |S |j                  d
      r| j                  ||       |S | j!                  ||       || j"                  v r| j%                  |       |S )Nr   r   z#unexpected call to parse_starttag()r7   rY   'r8   ")r   />ri   )r#   check_for_whole_start_tagr   tagfind_tolerantrD   rM   rK   r&   r   attrfind_tolerantr   appendstripr@   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr+   )r   rQ   endposr   attrsrD   rU   tagmattrnamerest	attrvaluerM   s                r   rE   zHTMLParser.parse_starttag-  s   #//2A:M,,&q0  &&w!4;;;uIIK"[[^1133s&j!''3A()1a(8%HdI 	2A$8)BC.82A#7237%aO	$Y/	LL(..*I67A &j a%%'k!WQv./M<<##C/
    e,d111##C(r   c                 H   | j                   }t        j                  ||      }|rt|j                         }|||dz    }|dk(  r|dz   S |dk(  r6|j	                  d|      r|dz   S |j	                  d|      ry||kD  r|S |dz   S |dk(  ry|dv ry||kD  r|S |dz   S t        d	      )
Nr   r   /ri   r7   r8   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   locatestarttagend_tolerantrD   rM   rB   AssertionError)r   rQ   r   rv   rS   nexts         r   rj   z$HTMLParser.check_for_whole_start_tagY  s    ,,&,,Wa8A1QqS>Ds{1us{%%dA.q5L%%c1-q5Hq5Lrz 5 6 1u1u677r   c                    | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }t        j                  ||      }|s| j                  | j                  |||        |S t        j                  ||dz         }|s!|||dz    dk(  r|dz   S | j                  |      S |j                  d      j                         }|j                  d|j                               }| j                  |       |dz   S |j                  d      j                         }| j                  %|| j                  k7  r| j                  |||        |S | j                  |       | j                          |S )	Nr7   r3   zunexpected call to parse_endtagr   r8   rY   z</>r   )r   	endendtagr>   rM   
endtagfindrD   r   r@   rk   r]   rK   r&   r;   handle_endtagr.   )r   rQ   r   rD   r^   	namematchtagnamer*   s           r   rF   zHTMLParser.parse_endtag{  sy   ,,q1~%H'HH%  !A#.		  !,*  5!12(..w!<I1QqS>U*Q3J33A66ooa(..0G
 LLimmo6Ew'7N{{1~##%??&t&  5!124 r   c                 J    | j                  ||       | j                  |       y r-   )rq   r   r   ru   rt   s      r   rp   zHTMLParser.handle_startendtag  s     S%(3r   c                      y r-    r   s      r   rq   zHTMLParser.handle_starttag      r   c                      y r-   r   )r   ru   s     r   r   zHTMLParser.handle_endtag  r   r   c                      y r-   r   r   rV   s     r   rL   zHTMLParser.handle_charref  r   r   c                      y r-   r   r   s     r   rO   zHTMLParser.handle_entityref  r   r   c                      y r-   r   r   s     r   r@   zHTMLParser.handle_data  r   r   c                      y r-   r   r   s     r   r`   zHTMLParser.handle_comment  r   r   c                      y r-   r   )r   decls     r   r\   zHTMLParser.handle_decl  r   r   c                      y r-   r   r   s     r   re   zHTMLParser.handle_pi  r   r   c                      y r-   r   r   s     r   unknown_declzHTMLParser.unknown_decl  r   r   )r   )__name__
__module____qualname____doc__rr   r   r   r   r!   r#   r$   r+   r.   r   rI   r]   rH   rE   rj   rF   rp   rq   r   rL   rO   r@   r`   r\   re   r   __classcell__)r   s   @r   r   r   >   s    * 1+/  O$Nu#t/*		(X8D%P 
r   )r   r'   _markupbasehtmlr   __all__r(   r   rP   rN   rJ   rC   rd   commentcloserk   rl   VERBOSEr|   r   r   
ParserBaser   r   r   r   <module>r      s   " 
   .  RZZ' RZZ%
BJJ>?	
"**@
Arzz+&
"**S/rzz)$ 2::LM BJJ=>  (RZZ ) ZZ  BJJsO	 RZZ>?
J'' Jr   