o
    Dh                     @   sR   d dl mZ d dl mZmZmZ d dlZddlmZ edZ	G dd de
ZdS )	    )absolute_import)divisionprint_functionunicode_literalsN   )normalize_whitespacez\bh\d\bc                   @   sx   e Zd ZdZdd Zedd Zedd Zedd	 Zd
d Z	edd Z
dd Zdd Zdd Zdd Zdd ZdS )	Paragraphz.Object representing one block of text in HTML.c                 C   s,   |j | _|j| _g | _d| _d| _d| _d S )Nr    )domdom_pathxpath
text_nodeschars_count_in_links
tags_count
class_type)selfpath r   R/home/air/segue/gemini/back/venv/lib/python3.10/site-packages/justext/paragraph.py__init__   s   
zParagraph.__init__c                 C   s   t t| jS N)boolHEADINGS_PATTERNsearchr   r   r   r   r   
is_heading   s   zParagraph.is_headingc                 C   s
   | j dkS )Ngood)r   r   r   r   r   is_boilerplate   s   
zParagraph.is_boilerplatec                 C   s   d | j}t| S )Nr	   )joinr   r   stripr   textr   r   r   r!       s   zParagraph.textc                 C   
   t | jS r   )lenr!   r   r   r   r   __len__%      
zParagraph.__len__c                 C   s   t | j S r   )r#   r!   splitr   r   r   r   words_count(   s   zParagraph.words_countc                 C   r"   r   )r   r   r   r   r   r   contains_text,   r%   zParagraph.contains_textc                 C   s   t |}| j| |S r   )r   r   appendr    r   r   r   append_text/   s   zParagraph.append_textc                    s   t  fdd| j D S )Nc                 3   s    | ]	}|   v V  qd S r   )lower).0word	stopwordsr   r   	<genexpr>5   s    z,Paragraph.stopwords_count.<locals>.<genexpr>)sumr!   r&   r   r/   r   r.   r   stopwords_count4   s   zParagraph.stopwords_countc                 C   s   | j dkrdS | || j  S Nr   )r'   r3   r2   r   r   r   stopwords_density7   s   
zParagraph.stopwords_densityc                 C   s    t | j}|dkrdS | j| S r4   )r#   r!   r   )r   text_lengthr   r   r   links_density=   s   

zParagraph.links_densityN)__name__
__module____qualname____doc__r   propertyr   r   r!   r$   r'   r(   r*   r3   r5   r7   r   r   r   r   r      s"    



r   )
__future__r   r   r   r   reutilsr   compiler   objectr   r   r   r   r   <module>   s   
