o
    7aK                     @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	 Zd#ddZG dd deZdd ZdZdZd$ddZd%ddZd&ddZd'ddZd(d d!Zed"kreej   dS dS ))z=Diagnostic functions, mainly for use when doing tech support.ZMIT    N)StringIO)
HTMLParser)BeautifulSoup__version__)builder_registryc                 C   sR  t dt  t dtj  g d}|D ]}tjD ]	}||jv r! nq|| t d|  qd|v rc|d zddl	m
} t d	d
tt|j  W n tyb } z
t d W Y d}~nd}~ww d|v rzddl}t d|j  W n ty } z
t d W Y d}~nd}~ww t| dr|  } nJ| ds| drt d|   t d dS z&tj| rt d|   t| }| } W d   n1 sw   Y  W n	 ty   Y nw t d |D ]E}t d|  d}	z
t| |d}
d}	W n ty } zt d|  t  W Y d}~nd}~ww |	r"t d|  t |
  t d qdS )zDiagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %s)html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.r	   zlxml-xmlr   etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.Nr   zFound html5lib version %sz2html5lib is not installed or couldn't be imported.readzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file. z#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printr   sysversionr   Zbuildersr   removeappendr	   r   joinmapstrZLXML_VERSIONImportErrorr   hasattrr   
startswithospathexistsopen
ValueErrorr   	Exception	traceback	print_excZprettify)dataZbasic_parsersnameZbuilderr   er   fpparsersuccesssoup r+   ./usr/lib/python3/dist-packages/bs4/diagnose.pydiagnose   s   








r-   Tc                 K   sJ   ddl m} |jt| fd|i|D ]\}}td||j|jf  qdS )a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r
   htmlz%s, %4s, %sN)r	   r   Z	iterparser   r   tagtext)r$   r.   kwargsr   Zeventelementr+   r+   r,   
lxml_trace]   s   "r3   c                   @   s`   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd ZdS )AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c                 C   s   t | d S )N)r   )selfsr+   r+   r,   _pu   s   zAnnouncingParser._pc                 C      |  d|  d S )Nz%s STARTr7   )r5   r%   Zattrsr+   r+   r,   handle_starttagx      z AnnouncingParser.handle_starttagc                 C   r8   )Nz%s ENDr9   r5   r%   r+   r+   r,   handle_endtag{   r;   zAnnouncingParser.handle_endtagc                 C   r8   )Nz%s DATAr9   r5   r$   r+   r+   r,   handle_data~   r;   zAnnouncingParser.handle_datac                 C   r8   )Nz
%s CHARREFr9   r<   r+   r+   r,   handle_charref   r;   zAnnouncingParser.handle_charrefc                 C   r8   )Nz%s ENTITYREFr9   r<   r+   r+   r,   handle_entityref   r;   z!AnnouncingParser.handle_entityrefc                 C   r8   )Nz
%s COMMENTr9   r>   r+   r+   r,   handle_comment   r;   zAnnouncingParser.handle_commentc                 C   r8   )Nz%s DECLr9   r>   r+   r+   r,   handle_decl   r;   zAnnouncingParser.handle_declc                 C   r8   )Nz%s UNKNOWN-DECLr9   r>   r+   r+   r,   unknown_decl   r;   zAnnouncingParser.unknown_declc                 C   r8   )Nz%s PIr9   r>   r+   r+   r,   	handle_pi   r;   zAnnouncingParser.handle_piN)__name__
__module____qualname____doc__r7   r:   r=   r?   r@   rA   rB   rC   rD   rE   r+   r+   r+   r,   r4   m   s    r4   c                 C   s   t  }||  dS )zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r4   Zfeed)r$   r(   r+   r+   r,   htmlparser_trace   s   rJ   ZaeiouZbcdfghjklmnpqrstvwxyz   c                 C   s:   d}t | D ]}|d dkrt}nt}|t|7 }q|S )z#Generate a random word-like string.r      r   )range_consonants_vowelsrandomchoice)lengthr6   itr+   r+   r,   rword   s   rU      c                 C   s   d dd t| D S )z'Generate a random sentence-like string. c                 s   s     | ]}t td dV  qdS )rV   	   N)rU   rP   randint).0rS   r+   r+   r,   	<genexpr>   s    zrsentence.<locals>.<genexpr>)r   rM   )rR   r+   r+   r,   	rsentence   s   r\     c                 C   s   g d}g }t | D ]9}tdd}|dkr#t|}|d|  q
|dkr3|ttdd q
|dkrCt|}|d|  q
d	d
| d S )z+Randomly generate an invalid HTML document.)pZdivspanrS   bZscripttabler      z<%s>   rV   rL   z</%s>z<html>
z</html>)rM   rP   rY   rQ   r   r\   r   )num_elementsZ	tag_nameselementsrS   rQ   Ztag_namer+   r+   r,   rdoc   s   

rg   順 c           
      C   s&  t dt  t| }t dt|  dddgddfD ]>}d}zt }t||}t }d}W n tyK } zt d	|  t  W Y d
}~nd
}~ww |rXt d||| f  qddl	m
} t }|| t }t d||   dd
l}	|	 }t }|| t }t d||   d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r	   r.   r   r   FTr   Nz"BS4+%s parsed the markup in %.2fs.r   r
   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   rg   lentimer   r!   r"   r#   r	   r   ZHTMLr   r   parse)
re   r$   r(   r)   ar*   r`   r&   r   r   r+   r+   r,   benchmark_parsers   s:   


rm   r	   c                 C   sX   t  }|j}t| }tt||d}td||| t	|}|
d |dd dS )z7Use Python's profiler on a randomly generated document.)bs4r$   r(   zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs42   N)tempfileZNamedTemporaryFiler%   rg   dictrn   cProfileZrunctxpstatsZStatsZ
sort_statsZprint_stats)re   r(   Z
filehandlefilenamer$   varsZstatsr+   r+   r,   profile   s   

rv   __main__)T)rK   )rV   )r]   )rh   )rh   r	   )!rI   Z__license__rr   ior   Zhtml.parserr   rn   r   r   Zbs4.builderr   r   rs   rP   rp   rj   r"   r   r-   r3   r4   rJ   rO   rN   rU   r\   rg   rm   rv   rF   stdinr   r+   r+   r+   r,   <module>   s<    
G&




 