o
    Dhc                     @   s0  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZ dd	lmZmZ dd
lmZ eeZede eeddeeeef  dedededef
ddZdeeeef  dedededee f
ddZedddedefddZdddedee dee dedededee fdd Zd!ed"ed#edefd$d%Zeedd&e
e dee d'ed(eded)ed*edee fd+d,Z d-ed.ededefd/d0Z!d!ed#edee fd1d2Z"d3eeeef  d4edefd5d6Z#d)eeeef  defd7d8Z$d*eeeef  defd9d:Z%dS );z/
Filters for date parsing and date validators.
    N)Counter)datetime)	lru_cache)mktime)MatchOptionalPatternUnionr      )
CACHE_SIZEMIN_DATE)	Extractorzminimum date setting: %s)maxsize
date_inputoutputformatearliestlatestreturnc                 C   s   | du rdS t | tr| }n0z%|dkr,tt| dd t| dd t| dd }nt| |}W n
 ty=   Y dS w |j|j  krK|jkr_n n| |   kr^| kr_d	S  td
|  dS )zEValidate a string w.r.t. the chosen outputformat and basic heuristicsNF%Y-%m-%d            
   Tzdate not valid: %s)	
isinstancer   intstrptime
ValueErroryear	timestampLOGGERdebug)r   r   r   r   
dateobject r#   T/home/air/segue/gemini/back/venv/lib/python3.10/site-packages/htmldate/validators.pyis_valid_date   s(   	
*r%   c              
   C   s`   t | |||r.ztd|  | |W S  ty- } ztd| | W Y d}~dS d}~ww dS )z5Robust validation and conversion for plausible dates.zcustom parse result: %sz$value error during conversion: %s %sN)r%   r    r!   strftimer   error)r   r   r   r   errr#   r#   r$   validate_and_convert<   s   r)      c              
   C   s~   t ddddd}z||  W n ttfy+ } ztd| | W Y d}~dS d}~ww t| tr5d| vr=td	|  dS d
S )z*Validate the output format in the settingsi  	   r
   r   z"wrong output format or type: %s %sNF%zmalformed output format: %sT)r   r&   	TypeErrorr   r    r'   r   str)r   r"   r(   r#   r#   r$   is_valid_formatL   s   r/   F)
incomplete
htmlstringpatternyearpatr0   c                C   s   t || }t|D ]F}||}|du r td| ||= q|d }	|s+t|	}
n|	d dkr3dnd}t||	 }
|j|
  krG|jksQn td| ||= q|S )	z5Filter the date patterns to find plausible years onlyNznot a year pattern: %sr
   r   91920zno potential year: %s)r   findalllistsearchr    r!   r   r   )r1   r2   r3   r   r   r0   occurrencesitem
year_match
lastdigitspotential_yearcenturyr#   r#   r$   plausible_year_filter]   s"   


r@   	referenceattemptoptionsc              
   C   s   zt tt||j }W n ty* } ztd|| | W  Y d}~S d}~ww |j	r;| r7t
| |} | S |} | S t| |} | S )z*Compare the date expression to a referencez-datetime.strptime exception: %s for string %sN)r   r   r   r   format	timetuple	Exceptionr    r!   originalminmax)rA   rB   rC   r   r(   r#   r#   r$   compare_values~   s   
rJ   	bestmatchoriginal_datecopyearmin_datemax_datec                 C   sj   | dur3d | d | d | d g}t|d||dr3|dks&t| d |kr3td	|| t|d|S dS )
z-Filter free text candidates in the YMD formatN-r
         r   r   r   r   zdate found for pattern "%s": %s)joinr%   r   r    r!   convert_date)rK   r2   rL   rM   r   rN   rO   pagedater#   r#   r$   filter_ymd_candidate   s   
rW   
datestringinputformatc                 C   s6   ||kr| S t | tr| |S t| |}||S )z.Parse date and return string in desired format)r   r   r&   r   )rX   rY   r   r"   r#   r#   r$   rU      s   


rU   c                 C   s<   | dkrt | }||j}t||j|j|jdr|S dS )z4Test if the extracted reference date can be returnedr   rS   N)r   fromtimestampr&   rD   r%   rH   rI   )rA   rC   r"   	convertedr#   r#   r$   check_extracted_reference   s   
r\   date_objectdefaultc                 C   sJ   t | tr| S t | tr#zt| W S  ty"   td|  Y |S w |S )zTCheck if the input is a usable datetime or ISO date string, return default otherwisezinvalid datetime string: %s)r   r   r.   fromisoformatr   r    warning)r]   r^   r#   r#   r$   check_date_input   s   

ra   c                 C   s
   t | tS )zEValidates the minimum date and/or defaults to earliest plausible date)ra   r   )rN   r#   r#   r$   get_min_date   s   
rb   c                 C   s   t | t S )zCValidates the maximum date and/or defaults to latest plausible date)ra   r   now)rO   r#   r#   r$   get_max_date   s   rd   )&__doc__loggingcollectionsr   r   	functoolsr   timer   typingr   r   r   r	   Counter_Typesettingsr   r   utilsr   	getLogger__name__r    r!   r.   boolr%   r)   r/   r@   r   rJ   rW   rU   r\   ra   rb   rd   r#   r#   r#   r$   <module>   s   
%

!
"