
    vKg0                         S SK Jr  S SKJr  S SKJr  S SKJr  S SKJ	r	J
r
JrJrJrJrJr  SSKJrJr  SSKJrJrJr   " S	 S
5      r " S S5      r\\\4   r\\   r " S S5      rg)    )aliases)sha256)dumps)sub)AnyDictIteratorListOptionalTupleUnion   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                   d   \ rS rSr  S'S\S\S\S\SSS	\\   S
\\   4S jjr	S\
S\4S jrS\
S\4S jr\S\4S j5       rS\4S jrS\4S jrS(S jr\S\4S j5       r\S\\   4S j5       r\S\4S j5       r\S\4S j5       r\S\\   4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\S    4S j5       r\S\4S  j5       r\S\\   4S! j5       r \S\\   4S" j5       r!S)S#\S\4S$ jjr"\S\4S% j5       r#S&r$g)*CharsetMatch   Npayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadpreemptive_declarationc                     Xl         X l        X0l        XPl        X@l        S U l        / U l        SU l        S U l        S U l	        X`l
        Xpl        g )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r   s           Y/var/www/highfloat_scraper/venv/lib/python3.13/site-packages/charset_normalizer/models.py__init__CharsetMatch.__init__   sQ      '.'6,5%348+-,/"04/3&56L$    otherreturnc                     [        U[        5      (       d.  [        U[        5      (       a  [        U5      U R                  :H  $ gU R                  UR                  :H  =(       a    U R
                  UR
                  :H  $ )NF)
isinstancer   strr   encodingfingerprintr-   r2   s     r.   __eq__CharsetMatch.__eq__(   s\    %..%%% '4==88}}.X43C3CuGXGX3XXr1   c                    [        U[        5      (       d  [        e[        U R                  UR                  -
  5      n[        U R
                  UR
                  -
  5      nUS:  a  US:  a  U R
                  UR
                  :  $ US:  aU  US::  aO  [        U R                  5      [        :  a  U R                  UR                  :  $ U R                  UR                  :  $ U R                  UR                  :  $ )zA
Implemented to make sorted available upon CharsetMatches items.
g{Gz?g{Gz?)
r5   r   
ValueErrorabschaos	coherencelenr!   r   multi_byte_usage)r-   r2   chaos_differencecoherence_differences       r.   __lt__CharsetMatch.__lt__/   s     %.."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r1   c                 \    S[        [        U 5      5      [        U R                  5      -  -
  $ )Ng      ?)rA   r6   rawr-   s    r.   rB   CharsetMatch.multi_byte_usageE   s"    c#d)ns488}455r1   c                     U R                   c&  [        U R                  U R                  S5      U l         U R                   $ )Nstrict)r+   r6   r!   r"   rI   s    r.   __str__CharsetMatch.__str__I   s.    <<t}}dnnhGDL||r1   c                 N    SR                  U R                  U R                  5      $ )Nz<CharsetMatch '{}' bytes({})>)formatr7   r8   rI   s    r.   __repr__CharsetMatch.__repr__O   s    .55dmmTEUEUVVr1   c                     [        U[        5      (       a  X:X  a$  [        SR                  UR                  5      5      eS Ul        U R                  R                  U5        g )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r5   r   r=   rP   	__class__r+   r'   appendr9   s     r.   add_submatchCharsetMatch.add_submatchR   sP    %..%-MTTOO  E"r1   c                     U R                   $ N)r"   rI   s    r.   r7   CharsetMatch.encoding]   s    ~~r1   c                     / n[         R                  " 5        HK  u  p#U R                  U:X  a  UR                  U5        M(  U R                  U:X  d  M:  UR                  U5        MM     U$ )zr
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
)r   itemsr7   rU   )r-   also_known_asups       r.   encoding_aliasesCharsetMatch.encoding_aliasesa   sW    
 $&MMODA}}!$$Q'!#$$Q'	 $
 r1   c                     U R                   $ rY   r%   rI   s    r.   bomCharsetMatch.bomn       ###r1   c                     U R                   $ rY   rc   rI   s    r.   byte_order_markCharsetMatch.byte_order_markr   rf   r1   c                 H    U R                    Vs/ sH  oS   PM	     sn$ s  snf )z
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
r   r$   )r-   es     r.   r   CharsetMatch.languagesv   s"     #oo.o!o...s   c                 ,   U R                   (       dr  SU R                  ;   a  gSSKJnJn  [        U R                  5      (       a  U" U R                  5      OU" U R                  5      n[        U5      S:X  d  SU;   a  gUS   $ U R                   S   S   $ )zz
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r$   could_be_from_charsetcharset_normalizer.cdrq   rr   r   r7   rA   )r-   rq   rr   r   s       r.   languageCharsetMatch.language~   s      $444  X *$--88 &dmm4'6  9~"my&@ Q<q!!$$r1   c                     U R                   $ rY   )r#   rI   s    r.   r?   CharsetMatch.chaos   s    $$$r1   c                 J    U R                   (       d  gU R                   S   S   $ )Nr    r   r   rk   rI   s    r.   r@   CharsetMatch.coherence   s     q!!$$r1   c                 0    [        U R                  S-  SS9$ Nd      )ndigits)roundr?   rI   s    r.   percent_chaosCharsetMatch.percent_chaos   s    TZZ#%q11r1   c                 0    [        U R                  S-  SS9$ r}   )r   r@   rI   s    r.   percent_coherenceCharsetMatch.percent_coherence   s    T^^c)155r1   c                     U R                   $ )z
Original untouched bytes.
)r!   rI   s    r.   rH   CharsetMatch.raw   s    
 }}r1   c                     U R                   $ rY   )r'   rI   s    r.   submatchCharsetMatch.submatch   s    ||r1   c                 2    [        U R                  5      S:  $ Nr   )rA   r'   rI   s    r.   has_submatchCharsetMatch.has_submatch   s    4<< 1$$r1   c                    U R                   b  U R                   $ [        U 5       Vs/ sH  n[        U5      PM     nn[        [	        U Vs1 sH  o3(       d  M  UiM     sn5      5      U l         U R                   $ s  snf s  snf rY   )r&   r6   r   sortedlist)r-   chardetected_rangesrs       r.   	alphabetsCharsetMatch.alphabets   s}    +''' -0I0
,5DM$I 	 0
  &d+L!!A+L&MN###0
 ,Ms   A:	A?A?c                 r    U R                   /U R                   Vs/ sH  oR                  PM     sn-   $ s  snf )z
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
)r"   r'   r7   )r-   ms     r.   rt   "CharsetMatch.could_be_from_charset   s.     t||"D|!::|"DDD"Ds   4r7   c                 >  ^  T R                   b  T R                   U:w  at  UT l         [        T 5      nT R                  b>  T R                  R                  5       S;  a   [	        [
        U 4S jUSS S5      nX2SS -   nUR                  US5      T l        T R                  $ )z
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
N)zutf-8utf8utf_8c                    > U R                   U R                  5       S   U R                  5       S    R                  U R                  5       S   [	        TR
                  5      5      $ )Nr   r   )stringspanreplacegroupsr   r*   )r   r-   s    r.   <lambda>%CharsetMatch.output.<locals>.<lambda>   sI    ahhqvvx{QVVXa[AII
1y1F1F'Gr1   i    r   r   )r*   r6   r,   lowerr   r   encoder)   )r-   r7   decoded_stringpatched_headers   `   r.   outputCharsetMatch.output   s    
   (D,A,AX,M$,D! YN,,80066812 "%3 #5D)" "02G!G#1#8#89#MD ###r1   c                 P    [        U R                  5       5      R                  5       $ )zg
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
)r   r   	hexdigestrI   s    r.   r8   CharsetMatch.fingerprint   s    
 dkkm$..00r1   )r"   r%   r$   r'   r(   r#   r*   r)   r!   r,   r+   r&   )NN)r2   r   r3   N)r   )%__name__
__module____qualname____firstlineno__bytesr6   floatboolr   r/   objectr:   rE   propertyrB   rM   rQ   rV   r7   r
   r`   rd   rh   r   rv   r?   r@   r   r   rH   r   r   r   rt   r   r8   __static_attributes__ r1   r.   r   r      s    *.04MM M 	M
 M &M "#M !)M8YF Yt Y(F (t (, 6% 6 6 W# W	# #   
$s) 
 
 $T $ $ $ $ $ /49 / / %# % %6 %u % % %5 % %
 2u 2 2 65 6 6 U   $~.   %d % % 	$49 	$ 	$ EtCy E E$s $ $8 1S 1 1r1   r   c                       \ rS rSrSrSS\\\      4S jjrS\	\   4S jr
S\\\4   S\4S	 jrS\4S
 jrS\4S jrS\SS4S jrS\S   4S jrS\S   4S jrSrg)CharsetMatches   z
Container with every CharsetMatch items ordered by default from most probable to the less one.
Act like a list(iterable) but does not implements all related methods.
Nresultsc                 B    U(       a  [        U5      U l        g / U l        g rY   )r   _results)r-   r   s     r.   r/   CharsetMatches.__init__   s    ?FF7OBr1   r3   c              #   8   #    U R                    S h  vN   g  N7frY   r   rI   s    r.   __iter__CharsetMatches.__iter__   s     ==  s   itemc                     [        U[        5      (       a  U R                  U   $ [        U[        5      (       a2  [	        US5      nU R                   H  nXR
                  ;   d  M  Us  $    [        e)z
Retrieve a single item either by its position or encoding name (alias may be used here).
Raise KeyError upon invalid index or encoding not present in results.
F)r5   intr   r6   r   rt   KeyError)r-   r   results      r.   __getitem__CharsetMatches.__getitem__   s_    
 dC  ==&&dC  T5)D--777!M ( r1   c                 ,    [        U R                  5      $ rY   rA   r   rI   s    r.   __len__CharsetMatches.__len__  s    4==!!r1   c                 2    [        U R                  5      S:  $ r   r   rI   s    r.   __bool__CharsetMatches.__bool__  s    4==!A%%r1   c                    [        U[        5      (       d-  [        SR                  [	        UR
                  5      5      5      e[        UR                  5      [        :  a\  U R                   HL  nUR                  UR                  :X  d  M  UR                  UR                  :X  d  M;  UR                  U5          g   U R                  R                  U5        [        U R                  5      U l	        g)zf
Insert a single match. Will be inserted accordingly to preserve sort.
Can be inserted as a submatch.
z-Cannot append instance '{}' to CharsetMatchesN)r5   r   r=   rP   r6   rT   rA   rH   r   r   r8   r?   rV   rU   r   )r-   r   matchs      r.   rU   CharsetMatches.append  s    
 $--?FF'  txx=++$$(8(88U[[DJJ=V&&t, ' 	T"t}}-r1   r   c                 D    U R                   (       d  gU R                   S   $ )zA
Simply return the first match. Strict equivalent to matches[0].
Nr   r   rI   s    r.   bestCharsetMatches.best(  s     }}}}Qr1   c                 "    U R                  5       $ )z@
Redundant method, call the method best(). Kept for BC reasons.
)r   rI   s    r.   firstCharsetMatches.first0  s     yy{r1   r   rY   )r   r   r   r   __doc__r   r
   r   r/   r	   r   r   r   r6   r   r   r   r   rU   r   r   r   r   r1   r.   r   r      s    
Ol); < O!(<0 !c3h L " "&$ &.< .D .( h~.  x/ r1   r   c                       \ rS rSrS\S\\   S\\   S\\   S\S\\   S\S	\S
\S\\   S\4S jr	\
S\\\4   4S j5       rS\4S jrSrg)CliDetectionResulti;  pathr7   r`   alternative_encodingsrv   r   r   r?   r@   unicode_pathis_preferredc                     Xl         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl	        Xl
        g rY   )r   r   r7   r`   r   rv   r   r   r?   r@   r   )r-   r   r7   r`   r   rv   r   r   r?   r@   r   r   s               r.   r/   CliDetectionResult.__init__<  s@     	+7'/+;0E"%$-$2!
 )".r1   r3   c                     U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S.$ )Nr   r7   r`   r   rv   r   r   r?   r@   r   r   r   rI   s    r.   __dict__CliDetectionResult.__dict__V  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r1   c                 ,    [        U R                  SSS9$ )NT   )ensure_asciiindent)r   r   rI   s    r.   to_jsonCliDetectionResult.to_jsonf  s    T]]a@@r1   )r   r   r?   r@   r7   r`   r   r   rv   r   r   N)r   r   r   r   r6   r   r
   r   r   r/   r   r   r   r   r   r   r   r1   r.   r   r   ;  s    // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ /4 
$sCx. 
 
A Ar1   r   N)encodings.aliasesr   hashlibr   jsonr   rer   typingr   r   r	   r
   r   r   r   constantr   r   utilsr   r   r   r   r   r6   r   CoherenceMatchr   r   r   r1   r.   <module>r      sd    %    D D D G C Cf1 f1R@ @F sEz"' ,A ,Ar1   