o
    :g0                     @   s   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZmZmZ G d	d
 d
ZG dd dZeeef Zee ZG dd dZdS )    )aliases)sha256)dumps)sub)AnyDictIteratorListOptionalTupleUnion   )RE_POSSIBLE_ENCODING_INDICATIONTOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                   @   s  e Zd Z		d>dededededddee d	ee fd
dZde	defddZ
de	defddZedefddZdefddZdefddZd?ddZedefddZedee fddZedefddZedefd d!Zedee fd"d#Zedefd$d%Zedefd&d'Zedefd(d)Zedefd*d+Zedefd,d-Zedefd.d/Zeded  fd0d1Zedefd2d3Zedee fd4d5Zedee fd6d7Z d@d9edefd:d;Z!edefd<d=Z"dS )ACharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadpreemptive_declarationc                 C   sL   || _ || _|| _|| _|| _d | _g | _d| _d | _d | _	|| _
|| _d S )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string_preemptive_declaration)selfr   r   r   r   r   r   r    r*   Z/var/www/html/django-vendor/venv/lib/python3.10/site-packages/charset_normalizer/models.py__init__   s   

zCharsetMatch.__init__otherreturnc                 C   s>   t |tst |trt|| jkS dS | j|jko| j|jkS )NF)
isinstancer   strr   encodingfingerprintr)   r-   r*   r*   r+   __eq__(   s
   

zCharsetMatch.__eq__c                 C   s   t |tstt| j|j }t| j|j }|dk r%|dkr%| j|jkS |dk r@|dkr@t| jtkr:| j|jk S | j	|j	kS | j|jk S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r/   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r)   r-   chaos_differencecoherence_differencer*   r*   r+   __lt__/   s   
zCharsetMatch.__lt__c                 C   s   dt t| t | j  S )Ng      ?)r9   r0   rawr)   r*   r*   r+   r:   E   s   zCharsetMatch.multi_byte_usagec                 C   s"   | j d u rt| j| jd| _ | j S )Nstrict)r'   r0   r   r   r?   r*   r*   r+   __str__I   s   
zCharsetMatch.__str__c                 C   s   d | j| jS )Nz<CharsetMatch '{}' bytes({})>)formatr1   r2   r?   r*   r*   r+   __repr__O      zCharsetMatch.__repr__c                 C   s8   t |tr	|| krtd|jd |_| j| d S )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r/   r   r5   rB   	__class__r'   r#   appendr3   r*   r*   r+   add_submatchR   s   zCharsetMatch.add_submatchc                 C      | j S N)r   r?   r*   r*   r+   r1   ]      zCharsetMatch.encodingc                 C   sD   g }t  D ]\}}| j|kr|| q| j|kr|| q|S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr1   rF   )r)   also_known_asupr*   r*   r+   encoding_aliasesa   s   


zCharsetMatch.encoding_aliasesc                 C   rH   rI   r!   r?   r*   r*   r+   bomn   rJ   zCharsetMatch.bomc                 C   rH   rI   rP   r?   r*   r*   r+   byte_order_markr   rJ   zCharsetMatch.byte_order_markc                 C   s   dd | j D S )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        c                 S   s   g | ]}|d  qS )r   r*   ).0er*   r*   r+   
<listcomp>|       z*CharsetMatch.languages.<locals>.<listcomp>r    r?   r*   r*   r+   r   v   s   zCharsetMatch.languagesc                 C   sp   | j s1d| jv r
dS ddlm}m} t| jr|| jn|| j}t|dks+d|v r-dS |d S | j d d S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r    could_be_from_charsetcharset_normalizer.cdrZ   r[   r   r1   r9   )r)   rZ   r[   r   r*   r*   r+   language~   s   
zCharsetMatch.languagec                 C   rH   rI   )r   r?   r*   r*   r+   r7      rJ   zCharsetMatch.chaosc                 C   s   | j sdS | j d d S )Nr   r   r   rW   r?   r*   r*   r+   r8      s   zCharsetMatch.coherencec                 C      t | jd ddS Nd      )ndigits)roundr7   r?   r*   r*   r+   percent_chaos      zCharsetMatch.percent_chaosc                 C   r`   ra   )re   r8   r?   r*   r*   r+   percent_coherence   rg   zCharsetMatch.percent_coherencec                 C   rH   )z+
        Original untouched bytes.
        )r   r?   r*   r*   r+   r>      s   zCharsetMatch.rawc                 C   rH   rI   )r#   r?   r*   r*   r+   submatch   rJ   zCharsetMatch.submatchc                 C      t | jdkS Nr   )r9   r#   r?   r*   r*   r+   has_submatch   s   zCharsetMatch.has_submatchc                 C   s@   | j d ur| j S dd t| D }ttdd |D | _ | j S )Nc                 S   s   g | ]}t |qS r*   )r   )rS   charr*   r*   r+   rU      s    z*CharsetMatch.alphabets.<locals>.<listcomp>c                 S   s   h | ]}|r|qS r*   r*   )rS   rr*   r*   r+   	<setcomp>   rV   z)CharsetMatch.alphabets.<locals>.<setcomp>)r"   r0   sortedlist)r)   detected_rangesr*   r*   r+   	alphabets   s   
zCharsetMatch.alphabetsc                 C   s   | j gdd | jD  S )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        c                 S   s   g | ]}|j qS r*   )r1   )rS   mr*   r*   r+   rU      s    z6CharsetMatch.could_be_from_charset.<locals>.<listcomp>)r   r#   r?   r*   r*   r+   r]      s   z"CharsetMatch.could_be_from_charsetutf_8r1   c                    s|    j du s
 j |kr;| _ t } jdur4 j dvr4tt fdd|dd d}||dd  }||d _ jS )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        N)zutf-8utf8ru   c                    s4   | j |  d |  d  |  d t jS )Nr   r   )stringspanreplacegroupsr   r&   )rt   r?   r*   r+   <lambda>   s    z%CharsetMatch.output.<locals>.<lambda>i    r   ry   )r&   r0   r(   lowerr   r   encoder%   )r)   r1   decoded_stringpatched_headerr*   r?   r+   output   s    


	zCharsetMatch.outputc                 C   s   t |   S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestr?   r*   r*   r+   r2      s   zCharsetMatch.fingerprint)NN)r-   r   r.   N)ru   )#__name__
__module____qualname__bytesr0   floatboolr
   r,   objectr4   r=   propertyr:   rA   rC   rG   r1   r	   rO   rQ   rR   r   r_   r7   r8   rf   rh   r>   ri   rl   rs   r]   r   r2   r*   r*   r*   r+   r      st    

r   c                   @   s   e Zd ZdZddeee  fddZdee fddZ	d	e
eef defd
dZdefddZdefddZd	eddfddZded fddZded fddZdS )CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 C   s   |r	t || _d S g | _d S rI   )rp   _results)r)   r   r*   r*   r+   r,      s   zCharsetMatches.__init__r.   c                 c   s    | j E d H  d S rI   r   r?   r*   r*   r+   __iter__   s   zCharsetMatches.__iter__itemc                 C   sJ   t |tr
| j| S t |tr#t|d}| jD ]}||jv r"|  S qt)z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r/   intr   r0   r   r]   KeyError)r)   r   resultr*   r*   r+   __getitem__   s   





zCharsetMatches.__getitem__c                 C   s
   t | jS rI   r9   r   r?   r*   r*   r+   __len__  s   
zCharsetMatches.__len__c                 C   rj   rk   r   r?   r*   r*   r+   __bool__  s   zCharsetMatches.__bool__c                 C   s|   t |tstdt|jt|jtk r0| j	D ]}|j
|j
kr/|j|jkr/||  dS q| j	| t| j	| _	dS )z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r/   r   r5   rB   r0   rE   r9   r>   r   r   r2   r7   rG   rF   rp   )r)   r   matchr*   r*   r+   rF     s   


zCharsetMatches.appendr   c                 C   s   | j sdS | j d S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   r?   r*   r*   r+   best(  s   
zCharsetMatches.bestc                 C   s   |   S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   r?   r*   r*   r+   first0  s   zCharsetMatches.firstrI   )r   r   r   __doc__r
   r	   r   r,   r   r   r   r   r0   r   r   r   r   rF   r   r   r*   r*   r*   r+   r      s    r   c                   @   s~   e Zd Zdedee dee dee dedee deded	ed
ee defddZe	de
eef fddZdefddZdS )CliDetectionResultpathr1   rO   alternative_encodingsr_   rs   r   r7   r8   unicode_pathis_preferredc                 C   sF   || _ |
| _|| _|| _|| _|| _|| _|| _|| _|	| _	|| _
d S rI   )r   r   r1   rO   r   r_   rs   r   r7   r8   r   )r)   r   r1   rO   r   r_   rs   r   r7   r8   r   r   r*   r*   r+   r,   <  s   
zCliDetectionResult.__init__r.   c                 C   s2   | j | j| j| j| j| j| j| j| j| j	| j
dS )Nr   r1   rO   r   r_   rs   r   r7   r8   r   r   r   r?   r*   r*   r+   __dict__V  s   zCliDetectionResult.__dict__c                 C   s   t | jdddS )NT   )ensure_asciiindent)r   r   r?   r*   r*   r+   to_jsonf  rD   zCliDetectionResult.to_jsonN)r   r   r   r0   r
   r	   r   r   r,   r   r   r   r   r   r*   r*   r*   r+   r   ;  s6    	

r   N)encodings.aliasesr   hashlibr   jsonr   rer   typingr   r   r   r	   r
   r   r   constantr   r   utilsr   r   r   r   r   r0   r   CoherenceMatchr   r   r*   r*   r*   r+   <module>   s    $ jC