
    +DCf8E              
         d Z ddlmZ ddlZddlZddlZddlZddlmZ dZ	dZ
dZ ej                  d	      Z ej                  d
j                  ddddd            Z ej                  dj                  ddd            Z ej                  d       ej                  d       ej                  d       ej                  d       ej                  d       ej                  d       ej                  d       ej                  d      dZ ej                  d      Z ej                  d      Z G d dej(                        Zd%d&dZd'dZd(dZd)dZd*dZd+d Zd,d-d!Zd,d.d"Zd,d.d#Zd/d$Zy)0a  
An implementation of `urlparse` that provides URL validation and normalization
as described by RFC3986.

We rely on this implementation rather than the one in Python's stdlib, because:

* It provides more complete URL validation.
* It properly differentiates between an empty querystring and an absent querystring,
  to distinguish URLs with a trailing '?'.
* It handles scheme, hostname, port, and path normalization.
* It supports IDNA hostnames, normalizing them to their encoded form.
* The API supports passing individual components, as well as the complete URL string.

Previously we relied on the excellent `rfc3986` package to handle URL parsing and
validation, but this module provides a simpler alternative, with less indirection
required.
    )annotationsN   )
InvalidURLi   zBABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~z!$&'()*+,;=z%[A-Fa-f0-9]{2}z(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?(?P<path>{path})(?:\?(?P<query>{query}))?(?:#(?P<fragment>{fragment}))?z([a-zA-Z][a-zA-Z0-9+.-]*)?z[^/?#]*z[^?#]*z[^#]*z.*scheme	authoritypathqueryfragmentzA(?:(?P<userinfo>{userinfo})@)?(?P<host>{host}):?(?P<port>{port})?z(\[.*\]|[^:@]*))userinfohostportz[^@]*z(\[.*\]|[^:]*))r   r   r	   r
   r   r   r   r   z ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$z^\[.*\]$c                      e Zd ZU ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   edd       Zedd       ZddZddZy)ParseResultstrr   r   r   
int | Noner   r	   
str | Noner
   r   c                    dj                  | j                  r| j                   dndd| j                  v rd| j                   dn| j                  | j                  d| j                   g      S dg      S )N @:[])joinr   r   r   selfs    O/var/www/highfloat_scraper/venv/lib/python3.12/site-packages/httpx/_urlparse.pyr   zParseResult.authorityi   sz    ww'+}}4==/#"$'499$4!DII;a $))#'99#8!DII;
 	
 ?A
 	
    c                    dj                  d| j                  v rd| j                   dn| j                  | j                  d| j                   g      S dg      S )Nr   r   r   r   )r   r   r   r   s    r   netloczParseResult.netlocs   sd    ww$'499$4!DII;a $))#'99#8!DII;
 	
 ?A
 	
r   c                    |s| S | j                   | j                  | j                  | j                  | j                  d}|j                  |       t        di |S )Nr   r   )r   r   r	   r
   r   updateurlparse)r   kwargsdefaultss      r   	copy_withzParseResult.copy_with|   sR    K kkIIZZ
 	'h''r   c                   | j                   }dj                  | j                  r| j                   dnd|rd| nd| j                  | j                  d| j                   nd| j
                  d| j
                   g      S dg      S )Nr   r   //?#)r   r   r   r	   r
   r   )r   r   s     r   __str__zParseResult.__str__   s    NN	ww%)[[4;;-q!b$-"YK 2		$(JJ$:!DJJ< '+}}'@!DMM?#
 	
 GI
 	
r   N)returnr   )r%   r   r-   r   )	__name__
__module____qualname____annotations__propertyr   r    r'   r,    r   r   r   r   `   sX    KM
I

I
 
 
 
(

r   r   c           	        t        |       t        kD  rt        d      t        d | D              rt        d      d|v r%|d   }t	        |t
              rt        |      n||d<   d|v r0|j                  d      xs d}|j                  d      \  |d<   }|d<   d	|v sd
|v rLt        |j                  d	d      xs d      }t        |j                  d
d      xs d      }|r| d| n||d<   d|v r7|j                  d      xs d}|j                  d      \  |d<   }|d<   |sd |d<   d|v rD|j                  d      xs d}	d|	v r+|	j                  d      r|	j                  d      s	d|	 d|d<   |j                         D ]q  \  }
}|	t        |      t        kD  rt        d|
 d      t        d |D              rt        d|
 d      t        |
   j                  |      rdt        d|
 d       t         j#                  |       }|J |j%                         }|j                  d|d         xs d}|j                  d|d         xs d}|j                  d|d         xs d}|j                  d|d         }|j                  d|d         }t&        j#                  |      }|J |j%                         }|j                  d|d         xs d}|j                  d|d         xs d}	|j                  d|d         }|j)                         }t        |t*        dz         }t-        |	      }t/        ||      }|dk7  }|dk7  xs |dk7  xs |d u}t1        |||       |rt3        |      }t        |t*        dz         }|d nt        |t*        dz         }|d nt        |t*        dz         }t5        |||||||      S ) NzURL too longc              3  b   K   | ]'  }|j                         xr |j                           ) y wNisasciiisprintable.0chars     r   	<genexpr>zurlparse.<locals>.<genexpr>   s*     
E4<<>4$"2"2"444
E   -/z,Invalid non-printable ASCII character in URLr   r    r   r   r   usernamepasswordr   raw_pathr*   r	   r
   r   r   zURL component 'z
' too longc              3  b   K   | ]'  }|j                         xr |j                           ) y wr6   r7   r:   s     r   r=   zurlparse.<locals>.<genexpr>   s*     O4<<><$*:*:*<&<<Or>   z8Invalid non-printable ASCII character in URL component ''zInvalid URL component 'r   r   r   safe)
has_schemehas_authorityz:/[]@z:/?[]@z:/?#[]@)lenMAX_URL_LENGTHr   any
isinstanceintr   pop	partitionquoteget
startswithendswithitemsCOMPONENT_REGEX	fullmatch	URL_REGEXmatch	groupdictAUTHORITY_REGEXlower
SUB_DELIMSencode_hostnormalize_portvalidate_pathnormalize_pathr   )urlr%   r   r    _r?   r@   rA   	seperatorr   keyvalue	url_matchurl_dictr   r   r	   r
   r   authority_matchauthority_dictr   parsed_schemeparsed_userinfoparsed_hostparsed_portrF   rG   parsed_pathparsed_queryparsed_fragments                                  r   r$   r$      s+   
 3x. (( 
E
EEGHH f~&0s&;Tv 6H%+,2,<,<S,A)v6&> VzV3J39r:J39r:9Az8*5xz V::j)/R5=5G5G5L2v	6'?"F7O zz&!'R$; 4s9K a[F6N
 lln C
U5zN* ?3%z!BCC OOO NseSTU 
 #3'11%8 #:3%q!ABBC" $I   ""$H ZZ(8"45;F

;(=>D"I::fhv./52DJJw 12Ezz*hz&:;H &++I6O&&&$..0N zz*nZ&@AGRH::fnV45;D::fnV45D
  M 
S0@AO"4(K,T6:K"$J2M!2Mk6M  $:]Kd# T
W(<=K 5Z(5J#K 
  eH:	;Q&R   r   c                j   | syt         j                  |       r	 t        j                  |        | S t        j                  |       r	 t        j                  | dd        | dd S | j                         rt        | j                         t              S 	 t        j                  | j                               j                  d      S # t        j                  $ r t        d|       w xY w# t        j                  $ r t        d|       w xY w# t        j                  $ r t        d|       w xY w)	Nr   zInvalid IPv4 address: r   zInvalid IPv6 address: rD   asciizInvalid IDNA hostname: )IPv4_STYLE_HOSTNAMErW   	ipaddressIPv4AddressAddressValueErrorr   IPv6_STYLE_HOSTNAMEIPv6Addressr8   rO   rZ   r[   idnaencodedecode	IDNAError)r   s    r   r\   r\     s(   		"	"4	(	@!!$' 		"	"4	(	@!!$q*- Abz	 TZZ\
33={{4::<(//88; ** 	@5dX>??	@ ** 	@5dX>??	@ >> =24(;<<=s#   C C+ 1D "C(+"D"D2c                    | | dk(  ry 	 t        |       }ddddddj                  |      }||k(  ry |S # t        $ r t        d|       w xY w)Nr   zInvalid port:    P   i  )ftphttphttpswswss)rL   
ValueErrorr   rP   )r   r   port_as_intdefault_ports       r   r]   r]   L  sv     |trz4$i
 rCr#NRRL l"  4>$2334s	   4 Ac                    |r | r| j                  d      st        d      yy| j                  d      rt        d      | j                  d      r|st        d      yy)z
    Path validation rules that depend on if the URL contains
    a scheme or authority component.

    See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
    /z7For absolute URLs, path must be empty or begin with '/'r)   zFURLs with no authority component cannot have a path starting with '//'r   zBURLs with no scheme component cannot have a path starting with ':'N)rQ   r   )r	   rF   rG   s      r   r^   r^   g  sp      ,VWW -4
 ??4 X 
 ??3
T  )3r   c                    | j                  d      }g }|D ]9  }|dk(  r	|dk(  r|s|dgk7  s|j                          )|j                  |       ; dj                  |      S )z
    Drop "." and ".." segments from a URL path.

    For example:

        normalize_path("/path/./to/somewhere/..") == "/path/to"
    r   .z..r   )splitrM   appendr   )r	   
componentsoutput	components       r   r_   r_     si     CJF %	$&RD.

MM)$% 88Fr   c                    dj                  | j                  d      D cg c]  }d|d
 c}      j                         S c c}w )a  
    Replace a single character with the percent-encoded representation.

    Characters outside the ASCII range are represented with their a percent-encoded
    representation of their UTF-8 byte sequence.

    For example:

        percent_encode(" ") == "%20"
    r   zutf-8%02x)r   rz   upper)r<   bytes     r   percent_encoder     s;     77t{{7/CDtaSzNDEKKMMDs   Ac                6    t         |z   dz   }| D ]  }||vs y y)z<
    Determine if a given string is already quote-safe.
    r   FT)UNRESERVED_CHARACTERSstringrE   NON_ESCAPED_CHARSr<   s       r   is_safer     s6     .4s:  (( r   c                    t        | |      r| S t        |z   }dj                  | D cg c]  }||v r|n
t        |       c}      S c c}w )z1
    Use percent-encoding to quote a string.
    rD   r   )r   r   r   r   r   s       r   percent_encodedr     sR     vD!-477QWX**t0D	DX Xs   Ac                   g }d}t        j                  t        |       D ]l  }|j                         |j	                         }}|j                  d      }||k7  r!| || }|j                  t        ||             |j                  |       |}n |t        |       k7  r!| |d }	|j                  t        |	|             dj                  |      S )a  
    Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.

    See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1

    * `string`: The string to be percent-escaped.
    * `safe`: A string containing characters that may be treated as safe, and do not
        need to be escaped. Unreserved characters are always treated as safe.
        See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
    r   rD   Nr   )
refinditerPERCENT_ENCODED_REGEXstartendgroupr   r   rH   r   )
r   rE   partscurrent_positionrW   start_positionend_positionmatched_textleading_texttrailing_texts
             r   rO   rO     s     E2F; 
(',{{}eiik{{1~--!"2>BLLLDAB 	\"'
( 3v;&/01_]>?775>r   c                    dj                  | D cg c]#  \  }}t        |d      dz   t        |d      z   % c}}      S c c}}w )am  
    We can use a much simpler version of the stdlib urlencode here because
    we don't need to handle a bunch of different typing cases, such as bytes vs str.

    https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926

    Note that we use '%20' encoding for spaces. and '%2F  for '/'.
    This is slightly different than `requests`, but is the behaviour that browsers use.

    See
    - https://github.com/encode/httpx/issues/2536
    - https://github.com/encode/httpx/issues/2721
    - https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
    &r   rD   =)r   r   )rS   kvs      r   	urlencoder     sN     88 	
1 AB'#-0KK	
 	
s   (A
r"   )r`   r   r%   r   r-   r   )r   r   r-   r   )r   zstr | int | Noner   r   r-   r   )r	   r   rF   boolrG   r   r-   None)r	   r   r-   r   )r<   r   r-   r   )r   )r   r   rE   r   r-   r   )r   r   rE   r   r-   r   )rS   zlist[tuple[str, str]]r-   r   ) __doc__
__future__r   rt   r   typingry   _exceptionsr   rI   r   r[   compiler   formatrV   rY   rT   rs   rw   
NamedTupler   r$   r\   r]   r^   r_   r   r   r   rO   r   r3   r   r   <module>r      s  " #  	   # I  
"

#45  BJJ	*
 f+  	& "**Tf 	  	  bjj56I&BJJx RZZ 

4 

7#BJJ)*BJJt	 !bjj!DE  bjj- 4
&## 4
nDN+=\66,N
Br   