o
    Eb9                     @   s   d Z g dZddlZddlmZmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZ eg d	dd
fddZd#ddZeg d	dfddZ		d$ddZg d	dfddZg d	ddfddZd%ddZd&ddZd&dd Zd&d!d"ZdS )'zB
Additional statistics functions with support for masked arrays.

)
compare_medians_mshdquantileshdmedianhdquantiles_sdidealfourthsmedian_cihsmjcimquantiles_cimjrshtrimmed_mean_ci    N)float_int_ndarray)MaskedArray   )_mstats_basic)normbetatbinom)g      ?      ?g      ?Fc                 C   s   dd }t j| dtd} tj|ddd}|du s| jdkr$|| ||}n| jdkr0td	| j t ||| ||}t j|dd
S )a  
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    See Also
    --------
    hdquantiles_sd

    c                 S   sH  t t |  t}|j}t dt|ft	}|dk r*t j
|_|r&|S |d S t |d t| }tj}t|D ]:\}}	|||d |	 |d d|	  }
|
dd |
dd  }t ||}||d|f< t ||| d |d|f< q<|d |d|dkf< |d |d|dkf< |rt j
 |d|dkf< |d|dkf< |S |d S )zGComputes the HD quantiles for a 1D array. Returns nan for invalid data.   r   r   N)npsqueezesort
compressedviewr   sizeemptylenr   nanflatarangefloatr   cdf	enumeratedot)dataprobvarxsortednZhdvbetacdfip_wwZhd_mean r3   </usr/lib/python3/dist-packages/scipy/stats/_mstats_extras.py_hd_1D;   s,    "zhdquantiles.<locals>._hd_1DFcopydtyper   r7   ZndminNr   DArray 'data' must be at most two dimensional, but got data.ndim = %dr7   )maarrayr   r   ndim
ValueErrorapply_along_axisfix_invalid)r(   r)   axisr*   r5   r0   resultr3   r3   r4   r      s   
r   r   c                 C   s   t | dg||d}| S )a9  
    Returns the Harrell-Davis estimate of the median along the given axis.

    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdmedian : MaskedArray
        The median values.  If ``var=True``, the variance is returned inside
        the masked array.  E.g. for a 1-D array the shape change from (1,) to
        (2,).

    r   )rB   r*   )r   r   )r(   rB   r*   rC   r3   r3   r4   r   g   s   r   c                 C   sv   dd }t j| dtd} tj|ddd}|du r|| |}n| jdkr*td	| j t ||| |}t j|dd
 S )a  
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    c           
         s   t |  t}t t|t}|dk rt j|_t |t	|d  }t
j}t|D ]>\}}|||d | |d d|  }|dd |dd   t j fddt|D td}	t |	 |d  ||< q-|S )z%Computes the std error for 1D arrays.r   r   Nr   c                    s@   g | ]} d | d |   |d  |d d    qS )Nr   r3   ).0kr2   r+   r3   r4   
<listcomp>   s    8z4hdquantiles_sd.<locals>._hdsd_1D.<locals>.<listcomp>r8   )r   r   r   r    r   r   r!   r"   r#   r$   r   r%   r&   Zfromiterrangesqrtr*   )
r(   r)   r,   ZhdsdZvvr.   r/   r0   r1   Zmx_r3   rF   r4   _hdsd_1D   s"    z hdquantiles_sd.<locals>._hdsd_1DFr6   r   r9   Nr   r:   r;   )	r<   r=   r   r   r>   r?   r@   rA   Zravel)r(   r)   rB   rK   r0   rC   r3   r3   r4   r      s   
r   皙?rM   TT皙?c           
      C   s|   t j| dd} tj| |||d}||}tj| |||d}||d }td|d  |}	t	||	|  ||	|  fS )a  
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    Fr;   )limits	inclusiverB   r          @)
r<   r=   mstatsZtrimrZmeanZtrimmed_stdecountr   ppfr   )
r(   rP   rQ   alpharB   ZtrimmedZtmeanZtstdeZdfZtppfr3   r3   r4   r
      s   *
r
   c                 C   s`   dd }t j| dd} | jdkrtd| j tj|ddd}|d	u r(|| |S t ||| |S )
a  
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    c                 S   s   t |  } | j}t || d t}tj}t 	t
|t}t jd|d td| }|d|  }t|D ]1\}}	|||	d ||	 |||	d ||	  }
t |
| }t |
| d }t ||d  ||< q7|S )Nr   r   rH   g      ?r   )r   r   r   r   r=   Zastyper   r   r%   r   r    r   r#   r&   r'   rJ   )r(   r0   r,   r)   r.   Zmjxyr/   mWZC1ZC2r3   r3   r4   _mjci_1D  s   (zmjci.<locals>._mjci_1DFr;   r   r:   r   r9   N)r<   r=   r>   r?   r   r@   )r(   r)   rB   r[   r0   r3   r3   r4   r      s   

r   c                 C   sZ   t |d| }td|d  }tj| |dd|d}t| ||d}|||  |||  fS )a  
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles.
        If None, use a flattened array.

    Returns
    -------
    ci_lower : ndarray
        The lower boundaries of the confidence interval.  Of the same length as
        `prob`.
    ci_upper : ndarray
        The upper boundaries of the confidence interval.  Of the same length as
        `prob`.

    r   rR   r   )ZalphapZbetaprB   rB   )minr   rU   rS   Z
mquantilesr   )r(   r)   rV   rB   zZxqZsmjr3   r3   r4   r     s
   r   c                 C   sX   dd }t j| dd} |du r|| |}|S | jdkr"td| j t ||| |}|S )aA  
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    c           	      S   s>  t |  } t| }t|d| }tt|d |d}t|| |dt|d |d }|d| k rK|d8 }t|| |dt|d |d }t|| d |dt||d }|d | ||  }|| | t	||d|  |   }|| |  d| | |d    || || d   d| | ||    f}|S )Nr   rR   r   r   )
r   r   r   r    r]   intr   Z_ppfr%   r$   )	r(   rV   r,   rE   ZgkZgkkIZlambdZlimsr3   r3   r4   _cihs_1DW  s   $$$$&zmedian_cihs.<locals>._cihs_1DFr;   Nr   r:   )r<   r=   r>   r?   r@   )r(   rV   rB   ra   rC   r3   r3   r4   r   @  s   

r   c                 C   sn   t j| |dt j||d}}tj| |dtj||d}}t|| t |d |d   }dt| S )a+  
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    r\   r   r   )	r<   ZmedianrS   Zstde_medianr   absrJ   r   r%   )Zgroup_1Zgroup_2rB   Zmed_1Zmed_2Zstd_1Zstd_2rZ   r3   r3   r4   r   s  s   $r   c                 C   s:   dd }t j| |dt} |du r|| S t ||| S )aC  
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    c                 S   s   |   }t|}|dk rtjtjgS t|d d d\}}t|}d| ||d   |||   }|| }d| ||  |||d    }||gS )N   g      @g?r   )r   r    r   r!   divmodr_   )r(   rW   r,   jhZqlorE   Zqupr3   r3   r4   _idf  s     zidealfourths.<locals>._idfr\   N)r<   r   r   r   r@   )r(   rB   rg   r3   r3   r4   r     s
   r   c                 C   s   t j| dd} |du r| }ntj|ddd}| jdkrtd|  }t| dd}d|d	 |d
   |d  }| dddf |dddf | kd
}| dddf |dddf | k d
}|| d| |  S )a  
    Evaluates Rosenblatt's shifted histogram estimators for each data point.

    Rosenblatt's estimator is a centered finite-difference approximation to the
    derivative of the empirical cumulative distribution function.

    Parameters
    ----------
    data : sequence
        Input data, should be 1-D. Masked values are ignored.
    points : sequence or None, optional
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.

    Fr;   Nr   r9   z#The input array should be 1D only !r\   g333333?r   r   rM   rR   )r<   r=   r   r>   AttributeErrorrT   r   sum)r(   Zpointsr,   rrf   ZnhiZnlor3   r3   r4   r	     s   
**r	   )r   F)rL   rN   rO   N)rO   N)N)__doc____all__Znumpyr   r   r   r   Znumpy.mar<   r    r   rS   Zscipy.stats.distributionsr   r   r   r   listr   r   r   r
   r   r   r   r   r   r	   r3   r3   r3   r4   <module>   s(    
K=
3-
"
3
!(