o
    7a<I                  
   @   s  d Z dgZddlZddlZddlmZmZmZmZ ddl	m
Z
mZ ddlZddlmZmZ ddl	mZmZmZmZ z
ddlmZ d	ZW n ey] Z zdd
lmZ dZW Y dZ[ndZ[ww G dd deZG dd dejZG dd deZG dd dej Z!G dd de!Z"dS )ZMITHTML5TreeBuilder    N)
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc                   @   sH   e Zd ZdZdZeeeegZdZ		dddZ
dd Zd	d
 Zdd ZdS )r   a  Use html5lib to build a tree.

    Note that this TreeBuilder does not support some features common
    to HTML TreeBuilders. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    * This TreeBuilder doesn't use different subclasses of NavigableString
      based on the name of the tag in which the string was found.

    * You can't use a SoupStrainer to parse only part of a document.
    html5libTNc                 c   s(    || _ |rtd |d d dfV  d S )NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.F)user_specified_encodingwarningswarn)selfmarkupr   Zdocument_declared_encodingZexclude_encodings r   7/usr/lib/python3/dist-packages/bs4/builder/_html5lib.pyprepare_markup?   s
   
zHTML5TreeBuilder.prepare_markupc                 C   s   | j jd urtd tj| jd}|| j_t	 }t
|ts+tr&| j|d< n| j|d< |j|fi |}t
|tr=d |_n|jjjd }t
|tsL|j}||_d | j_d S )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.)ZtreeZoverride_encodingencodingr   )soupZ
parse_onlyr   r   r   Z
HTMLParsercreate_treebuilderunderlying_builderparserdict
isinstancestrnew_html5libr   parseoriginal_encoding	tokenizerstreamZcharEncodingname)r   r   r   Zextra_kwargsdocr$   r   r   r   feedL   s"   




zHTML5TreeBuilder.feedc                 C   s   t || j| jd| _| jS )N)store_line_numbers)TreeBuilderForHtml5libr   r*   r   )r   namespaceHTMLElementsr   r   r   r   h   s
   z#HTML5TreeBuilder.create_treebuilderc                 C   s   d| S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r   )r   Zfragmentr   r   r   test_fragment_to_documento   s   z*HTML5TreeBuilder.test_fragment_to_document)NN)__name__
__module____qualname____doc__NAMEr   r   r   ZfeaturesZTRACKS_LINE_NUMBERSr   r)   r   r-   r   r   r   r   r   )   s    
c                       sj   e Zd Z		d fdd	Zdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Z  ZS )r+   NTc                    sN   |r|| _ nddlm} |	dd|i|| _ tt| | d | _|| _d S )Nr   BeautifulSoup html.parserr*   )r5   r6   )r   bs4r4   superr+   __init__r   r*   )r   r,   r   r*   kwargsr4   	__class__r   r   r9   v   s   
zTreeBuilderForHtml5lib.__init__c                 C   s   | j   t| j | j d S N)r   resetElementr   r   r   r   documentClass   s   
z$TreeBuilderForHtml5lib.documentClassc                 C   s6   |d }|d }|d }t |||}| j| d S )Nr'   publicIdsystemId)r   Zfor_name_and_idsr   object_was_parsed)r   tokenr'   rB   rC   Zdoctyper   r   r   insertDoctype   s
   z$TreeBuilderForHtml5lib.insertDoctypec                 C   sZ   i }| j r| jr| j jj \}}||d< |d |d< | jj||fi |}t|| j|S )N
sourceline   	sourcepos)r   r*   r%   r&   Zpositionr   new_tagr?   )r   r'   	namespacer:   rG   rI   tagr   r   r   elementClass   s   z#TreeBuilderForHtml5lib.elementClassc                 C   s   t t|| jS r=   )TextNoder   r   )r   datar   r   r   commentClass   s   z#TreeBuilderForHtml5lib.commentClassc                 C   s0   ddl m} |dd| _d| j_t| j| jd S )Nr   r3   r5   r6   z[document_fragment])r7   r4   r   r'   r?   )r   r4   r   r   r   fragmentClass   s   z$TreeBuilderForHtml5lib.fragmentClassc                 C   s   | j |j d S r=   )r   appendelementr   noder   r   r   appendChild   s   z"TreeBuilderForHtml5lib.appendChildc                 C   s   | j S r=   )r   r@   r   r   r   getDocument   s   z"TreeBuilderForHtml5lib.getDocumentc                 C   s   t j| jS r=   )treebuilder_baseTreeBuildergetFragmentrS   r@   r   r   r   rZ         z"TreeBuilderForHtml5lib.getFragmentc                    sB   ddl m  g tdd fdd	|d dS )Nr   r3   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c           	         s  t |  r	 t | trW| }|rK|d}|jdkr>|dp"d}|dp.|dp.d}dd| |||f  d S dd| |f  d S d	d| f  d S t | trid
d| | f  d S t | tr{dd| | f  d S | jrdt	| j | j
f }n| j
}dd| |f  | jrg }t| j D ]$\}}t |trdt	|j |j
f }t |trd|}|||f qt|D ]\}}dd|d  ||f  q|d7 }| jD ]}|| qd S )NrH      r5         z|%s<!DOCTYPE %s "%s" "%s"> z|%s<!DOCTYPE %s>z|%s<!DOCTYPE >z|%s<!-- %s -->z|%s"%s"z%s %sz|%s<%s>z
|%s%s="%s")r    r   matchgroup	lastindexrR   r   r   rK   r
   r'   attrslistitemsr   joinsortedchildren)	rS   indentmr'   rB   rC   
attributesvaluechildr4   Z
doctype_reZrvserializeElementr   r   ro      sL   












z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement
)r   )r7   r4   recompilerf   r   rS   r   rn   r   testSerializer   s   

)
z%TreeBuilderForHtml5lib.testSerializer)NT)r.   r/   r0   r9   rA   rF   rM   rP   rQ   rV   rW   rZ   rt   __classcell__r   r   r;   r   r+   t   s    r+   c                   @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )AttrListc                 C   s   || _ t| j j| _d S r=   )rS   r   rc   rs   r   r   r   r9      s   zAttrList.__init__c                 C   s   t | j  S r=   )rd   rc   re   __iter__r@   r   r   r   rw         zAttrList.__iter__c                 C   sR   | j j}||d v s| j j|v r"||| j j v r"t|ts"t|}|| j |< d S )N*)rS   Zcdata_list_attributesr'   r    rd   r   findall)r   r'   rl   Z	list_attrr   r   r   __setitem__   s   


zAttrList.__setitem__c                 C      t | j S r=   )rd   rc   re   r@   r   r   r   re      r[   zAttrList.itemsc                 C   r|   r=   rd   rc   keysr@   r   r   r   r~      r[   zAttrList.keysc                 C   s
   t | jS r=   )lenrc   r@   r   r   r   __len__     
zAttrList.__len__c                 C   s
   | j | S r=   )rc   r   r'   r   r   r   __getitem__  r   zAttrList.__getitem__c                 C   s   |t | j v S r=   r}   r   r   r   r   __contains__  rx   zAttrList.__contains__N)r.   r/   r0   r9   rw   r{   re   r~   r   r   r   r   r   r   r   rv      s    rv   c                   @   sx   e Zd Zdd Zdd Zdd Zdd ZeeeZdd
dZ	dd Z
dd Zdd Zdd Zdd Zdd ZeeZd	S )r?   c                 C   s&   t j| |j || _|| _|| _d S r=   )rX   Noder9   r'   rS   r   rK   )r   rS   r   rK   r   r   r   r9     s   
zElement.__init__c                 C   s*  d  }}t |tr| }}nt |tr|}n|jjtkr%|j }}| |_n|j}| |_t |ts:|jd ur:|j  |d urd| jjrd| jjd jtkrd| jjd }| j	
|| }|| || j	_d S t |tro| j	
|}| jjrz| jd}n| jjd ur| j	 }n| j}| j	j|| j|d d S )NF)parentmost_recent_element)r    r!   r   rS   r<   r   r   extractcontentsr   
new_stringreplace_withZ_most_recent_element_last_descendantnext_elementrD   )r   rU   Zstring_childrm   Zold_elementZnew_elementr   r   r   r   rV     s<   







zElement.appendChildc                 C   s   t | jtri S t| jS r=   )r    rS   r   rv   r@   r   r   r   getAttributesG  s   
zElement.getAttributesc                 C   s   |d urJt |dkrLg }t| D ]\}}t|tr&t| }||= |||< q| jj| j	| t| D ]	\}}|| j
|< q6| jj| j
 d S d S d S )Nr   )r   rd   re   r    tupler   r   ZbuilderZ$_replace_cdata_list_attribute_valuesr'   rS   Zset_up_substitutions)r   rk   Zconverted_attributesr'   rl   new_namer   r   r   setAttributesL  s   
zElement.setAttributesNc                 C   s6   t | j|| j}|r| || d S | | d S r=   )rN   r   r   insertBeforerV   )r   rO   r   textr   r   r   
insertTextb  s   zElement.insertTextc                 C   s   | j |j }|j jtkr4| j jr4| j j|d  jtkr4| j j|d  }| j||j  }|| d S | j ||j  | |_	d S )NrH   )
rS   indexr<   r   r   r   r   r   insertr   )r   rU   ZrefNoder   Zold_nodeZnew_strr   r   r   r   i  s   
zElement.insertBeforec                 C   s   |j   d S r=   )rS   r   rT   r   r   r   removeChildu  r[   zElement.removeChildc                 C   s   | j }|j }|j}|dd}t|jdkr|jd }|j}nd}|j}|j}t|dkrf|d }	|dur9||	_n||	_||	_|durG|	|_n|	|_|durQ|	|_|d dd}
||
_|durc|
|_d|
_|D ]}||_|j	| qhg |_||_dS )z1Move all of this tag's children into another tag.Fr   r   NT)
rS   Znext_siblingr   r   r   r   Zprevious_elementZprevious_siblingr   rR   )r   Z
new_parentrS   Znew_parent_elementZfinal_next_elementZnew_parents_last_descendantZnew_parents_last_childZ(new_parents_last_descendant_next_elementZ	to_appendZfirst_childZlast_childs_last_descendantrm   r   r   r   reparentChildrenx  s>   

zElement.reparentChildrenc                 C   sB   | j | jj| j}t|| j | j}| jD ]	\}}||j|< q|S r=   )r   rJ   rS   r'   rK   r?   rk   )r   rL   rU   keyrl   r   r   r   	cloneNode  s
   zElement.cloneNodec                 C   s   | j jS r=   )rS   r   r@   r   r   r   
hasContent  s   zElement.hasContentc                 C   s$   | j d krtd | jfS | j | jfS )NZhtml)rK   r	   r'   r@   r   r   r   getNameTuple  s   
zElement.getNameTupler=   )r.   r/   r0   r9   rV   r   r   propertyrk   r   r   r   r   r   r   r   Z	nameTupler   r   r   r   r?   
  s    6

Br?   c                   @   s   e Zd Zdd Zdd ZdS )rN   c                 C   s   t j| d  || _|| _d S r=   )rX   r   r9   rS   r   )r   rS   r   r   r   r   r9     s   
zTextNode.__init__c                 C   s   t r=   )NotImplementedErrorr@   r   r   r   r     s   zTextNode.cloneNodeN)r.   r/   r0   r9   r   r   r   r   r   rN     s    rN   )#Z__license____all__r   rq   Zbs4.builderr   r   r   r   Zbs4.elementr   r   r   Zhtml5lib.constantsr	   r
   r   r   r   r   Zhtml5lib.treebuildersr   rX   r"   ImportErrorer   r   rY   r+   objectrv   r   r?   rN   r   r   r   r   <module>   s0   Kx C