
    ;Si                     0   d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZmZmZmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZmZmZmZ  G d
 de      Z de!de	e!e
e   f   fdZ"ededededededed ed ed ed ed ed iZ#de!dee   de!fdZ$y)z(
Simple HTML -> Telegram entity parser.
    )deque)escape)
HTMLParser)IterableTupleList   )add_surrogatedel_surrogatewithin_surrogate
strip_text)TLObject)MessageEntityBoldMessageEntityItalicMessageEntityCodeMessageEntityPreMessageEntityEmailMessageEntityUrlMessageEntityTextUrlMessageEntityMentionNameMessageEntityUnderlineMessageEntityStrikeMessageEntityBlockquoteMessageEntityCustomEmojiTypeMessageEntityc                   0     e Zd Z fdZd Zd Zd Z xZS )HTMLToTelegramParserc                     t         |           d| _        g | _        i | _        t               | _        t               | _        y )N )super__init__textentities_building_entitiesr   
_open_tags_open_tags_meta)self	__class__s    ]/var/www/OnlineNewsSite/project/venv/lib/python3.12/site-packages/telethon/extensions/html.pyr!   zHTMLToTelegramParser.__init__   s7    	"$'$w    c                     | j                   j                  |       | j                  j                  d        t        |      }d }i }|dk(  s|dk(  rt        }n1|dk(  s|dk(  rt
        }n|dk(  rt        }n|dk(  s|dk(  rt        }n |dk(  rt        }n|d	k(  r(	 | j                  d
   }	 |d   t        d      d  |_        n|d
k(  rt        }d|d<   n|dk(  r	 |d   }|j                  d      r|t        d      d  }t         }n0| j#                         |k(  rt$        }nt&        }t)        |      |d<   d }| j                  j+                          | j                  j                  |       n|dk(  r	 t-        |d         }t0        }||d<   |r;|| j                  vr, |dt        | j2                        dd|| j                  |<   y y y # t        $ r Y w xY w# t        $ r	 t        }Y _w xY w# t        $ r Y y w xY w# t        t.        f$ r Y y w xY w)Nstrongbemiudels
blockquotecodepreclassz	language-r   languageahrefzmailto:urlztg-emojizemoji-iddocument_idr   )offsetlength )r%   
appendleftr&   dictr   r   r   r   r   r$   lenr7   KeyErrorr   r   
startswithr   get_starttag_textr   r   r   popleftint
ValueErrorr   r"   )r'   tagattrs
EntityTypeargsr5   r:   emoji_ids           r)   handle_starttagz$HTMLToTelegramParser.handle_starttag   s3   ""3'''-U
(?cSj*JD[C3J,JCZ/JE\SCZ,JL 0JF]/ --e4#(>#k2B2C#DCL
 E\)J!DCZFm ~~i(#i./*/
))+s2!1J!5J"/"4DKC  ((*  ++C0JuZ01 2J"*D#T%<%<<+5 ,499~, 	,D##C( =:C    /.
/  " j) sT   G +G G, 0G; 	GG GG G)(G),	G87G8;HHc                 0   t        | j                        dkD  r| j                  d   nd}|dk(  r| j                  d   }|r|}| j                  j	                         D ]#  \  }}|xj
                  t        |      z  c_        % | xj                  |z  c_        y )Nr   r   r8   )rA   r%   r&   r$   itemsr=   r"   )r'   r"   previous_tagr:   rH   entitys         r)   handle_dataz HTMLToTelegramParser.handle_dataa   s    -0-AA-Etq)23&&q)C2288: 	'KCMMSY&M	' 			T	r*   c                     	 | j                   j                          | j                  j                          | j                  j                  |d       }|r| j                  j                  |       y y # t        $ r Y Fw xY wN)r%   rE   r&   
IndexErrorr$   popr#   append)r'   rH   rQ   s      r)   handle_endtagz"HTMLToTelegramParser.handle_endtagm   sq    	OO##%  ((* ((,,S$7MM  (   		s   4A1 1	A=<A=)__name__
__module____qualname__r!   rM   rR   rX   __classcell__)r(   s   @r)   r   r      s    'BH
)r*   r   htmlreturnc                 2   | s| g fS t               }|j                  t        |              t        |j                  |j
                        }|j
                  j                          |j
                  j                  d        t        |      |j
                  fS )a  
    Parses the given HTML message and returns its stripped representation
    plus a list of the MessageEntity's that were found.

    :param html: the message with HTML to be parsed.
    :return: a tuple consisting of (clean message, [message entities]).
    c                     | j                   S rT   )r<   )rQ   s    r)   <lambda>zparse.<locals>.<lambda>   s
    FMM r*   key)	r   feedr
   r   r"   r#   reversesortr   )r]   parserr"   s      r)   parserh   x   sw     Rx!#F
KKd#$fkk6??3D
OO
OO9://r*   )z<strong>z	</strong>)z<em>z</em>)z<code>z</code>)z<u>z</u>)z<del>z</del>)z<blockquote>z</blockquote>c                 <    dj                  | j                        dfS )Nz-<pre>
    <code class='language-{}'>
        z{}
    </code>
</pre>)formatr7   e_s     r)   ra   ra      s!    	6!**% ($ r*   c                 (    dj                  |      dfS )Nz<a href="mailto:{}"></a>rj   rm   ts     r)   ra   ra      s    &<&C&CA&F%O r*   c                 (    dj                  |      dfS Nz<a href="{}">ro   rp   rq   s     r)   ra   ra      s    O$:$:1$=v#F r*   c                 N    dj                  t        | j                              dfS rt   )rj   r   r:   rk   s     r)   ra   ra      s    (>(>vaee}(Mv'V r*   c                 <    dj                  | j                        dfS )Nz<a href="tg://user?id={}">ro   )rj   user_idrk   s     r)   ra   ra      s    ,H,O,OPQPYPY,Z\b+c r*   c                 <    dj                  | j                        dfS )Nz<tg-emoji emoji-id="{}">z</tg-emoji>)rj   r;   rk   s     r)   ra   ra      s    ,F,M,Mamm,\^k+l r*   r"   r#   c                    | s| S |st        |       S t        |t              r|f}t        |       } g }t	        |      D ]  \  }}|j
                  }|j
                  |j                  z   }t        j                  t        |      d      }|sMt        |      r ||| ||       }|j                  |||d   f       |j                  || |d   f        |j                  d        t        |       }|rT|j                         \  }	}
}t        | |	      r|	dz  }	t        | |	      r| d|	 |z   t        | |	|       z   | |d z   } |	}|rTt        | d|       | |d z   } t!        |       S )a=  
    Performs the reverse operation to .parse(), effectively returning HTML
    given a normal text and its MessageEntity's.

    :param text: the text to be reconverted into HTML.
    :param entities: the MessageEntity's applied to the text.
    :return: a HTML representation of the combination of both inputs.
    Nr      c                     | d   | d   fS )Nr   rz   r>   )rr   s    r)   ra   zunparse.<locals>.<lambda>   s    !A$! r*   rb   )r   
isinstancer   r
   	enumerater<   r=   ENTITY_TO_FORMATTERgettypecallablerW   rf   rA   rV   r   r   )r"   r#   	insert_atr/   rQ   r2   rl   	delimiternext_escape_boundatrm   whats               r)   unparser      s    d|(H%;DIx( 4	6MMMMFMM)'++DL$?		"%fd1Qi8	aIaL12a!Yq\234 NN-N.D	
mmoAttR(!GB tR( CRy4&b1B)C"DDtL]L^G__  $))*+d3D3E.FFDr*   N)%__doc__collectionsr   r]   r   html.parserr   typingr   r   r   helpersr
   r   r   r   tlr   tl.typesr   r   r   r   r   r   r   r   r   r   r   r   r   r   strrh   r~   r   r>   r*   r)   <module>r      s      " ( ( P P    a): a)H0 0c4(9#::; 0( 0*,O,>  OFVcl% ,*# **;!< * *r*   