o
    Dh                     @   sz   d dl mZ d dl mZmZmZ d dlZd dlZd dlZd dlZe	dej
Zdd Zdd Zd	d
 Zdd Zdd ZdS )    )absolute_import)divisionprint_functionunicode_literalsNz\s+c                 C   s   t t| S )z
    Translates multiple whitespace into single space character.
    If there is at least one new line character chunk is replaced
    by single LF (Unix new line) character.
    )MULTIPLE_WHITESPACE_PATTERNsub_replace_whitespace)text r
   N/home/air/segue/gemini/back/venv/lib/python3.10/site-packages/justext/utils.pynormalize_whitespace   s   r   c                 C   s    |   }d|v sd|v rdS dS )zBNormalize all spacing characters that aren't a newline to a space.
 )group)matchr	   r
   r
   r   r      s   r   c                 C   s   |  p|   S )zw
    Returns `True` if string contains only white-space characters
    or is empty. Otherwise `False` is returned.
    )isspace)stringr
   r
   r   is_blank   s   r   c                  C   s`   t jtjd j} t j| d} g }t | D ]}t j|\}}|dkr+|	| qt
|S )z,Returns a collection of built-in stop-lists.justext	stoplistsz.txt)ospathdirnamesysmodules__file__joinlistdirsplitextappend	frozenset)path_to_stoplistsstoplist_namesfilenamename	extensionr
   r
   r   get_stoplists%   s   
r'   c                 C   sT   t jdd|  }ztd|}W n ty   td|  w tdd | D S )zAReturns an built-in stop-list for the language as a set of words.r   z%s.txtr   zStoplist for language '%s' is missing. Please use function 'get_stoplists' for complete list of stoplists and feel free to contribute by your own stoplist.c                 s   s    | ]
}| d  V  qdS )utf8N)decodelower).0wr
   r
   r   	<genexpr>?   s    zget_stoplist.<locals>.<genexpr>)	r   r   r   pkgutilget_dataIOError
ValueErrorr!   
splitlines)language	file_path	stopwordsr
   r
   r   get_stoplist3   s   r6   )
__future__r   r   r   r   rer   r   r.   compileUNICODEr   r   r   r   r'   r6   r
   r
   r
   r   <module>   s   	