kNNpy.kNN_3D

   1import numpy as np
   2import time
   3import sys
   4import scipy.spatial
   5import os
   6import scipy
   7
   8
   9# Ensure module path is correctly added for relative imports
  10module_path = os.path.abspath(os.path.join(''))
  11'''
  12@private
  13'''
  14if module_path not in sys.path:
  15    sys.path.append(module_path)
  16
  17# Import necessary helper functions
  18from kNNpy.HelperFunctions import calc_kNN_CDF
  19from kNNpy.HelperFunctions import CIC_3D_Interp
  20from kNNpy.HelperFunctions import smoothing_3D
  21from kNNpy.HelperFunctions import create_smoothed_field_dict_3D
  22
  23####################################################################################################
  24
  25#--------------------------------------  Function Definitions  -------------------------------------
  26
  27def TracerAuto3D(boxsize, kList, BinsRad, QueryPos, TracerPos, ReturnNNdist=False,Verbose=False):
  28    
  29    r'''
  30    Computes the $k$NN-CDFs in 3D coordinates (Banerjee & Abel (2021)[^1]) of the provided discrete tracer set (`TracerPos`), 
  31    evaluated at the provided radial distance scales `BinsRad`, for all $k$ in `kList`. Each $k$NN-CDF measures the probability
  32    $P_{\geq k}(r)$ of finding at least $k$ tracers in a randomly placed sphere of radius $r$. The $k$NN-CDFs quantify the spatial 
  33    clustering of the tracers.
  34    		
  35    Parameters
  36    ----------
  37    boxsize:  float
  38        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
  39    kList : list of ints
  40        the list of nearest neighbours to calculate the distances to. For example, if ``kList = [1, 2, 4]``, the first, second and 
  41        fourth-nearest neighbour distributions will be computed.
  42    BinsRad : list of numpy float array
  43        list of radial distance arrays (in Mpc/h) for each nearest neighbour. The $i^{th}$ element of the 
  44        list should contain a numpy array of the desired distances for the nearest neighbour specified by the $i^{th}$ element of `kList`.
  45    QueryPos : numpy float array of shape ``(n_query, 3)``
  46        array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
  47        Please ensure $0<x,y,z<boxsize$.
  48    TracerPos : numpy float array of shape ``(n_tracer, 3)``
  49        array of 3D locations for the discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
  50        Please ensure $0<x,y,z<boxsize$.
  51    ReturnNNdist : bool, optional
  52        if set to ``True``, the sorted arrays of NN distances will be returned along with the $k$NN-CDFs, by default ``False``.
  53    Verbose : bool, optional
  54        if set to ``True``, the time taken to complete each step of the calculation will be printed, by default ``False``.
  55
  56    Returns
  57    -------
  58    kNN_results: tuple of lists or list of numpy float arrays
  59        results of the kNN computation. If `ReturnNNdist` is ``True``, returns the tuple ``(p_gtr_k_list, vol)`` where `p_gtr_k_list` 
  60        is the list of auto kNN-CDFs, and `vol` is the list of NN distances. If `ReturnNNdist` is ``False``, returns `p_gtr_k_list` only
  61        
  62    Raises
  63    ------
  64    ValueError
  65        if the given query points are not on a three-dimensional grid.
  66    ValueError
  67        if x,y, or z coordinate of any of the query points is not in ``[0, boxsize)``.
  68    ValueError
  69        if x,y, or z coordinate of any of the tracer points is not in ``[0, boxsize)``..
  70    ValueError
  71        if the given tracer points are not on a three-dimensional grid.
  72
  73    References
  74    ----------
  75    [^1]: Arka Banerjee, Tom Abel, Nearest neighbour distributions: New statistical measures for cosmological clustering, 
  76    [Monthly Notices of the Royal Astronomical Society](https://doi.org/10.1093/mnras/staa3604), Volume 500, Issue 4, February 2021, Pages 5479–5499
  77        
  78    '''
  79    
  80    #-----------------------------------------------------------------------------------------------
  81
  82    if Verbose: total_start_time = time.perf_counter()
  83
  84    #-----------------------------------------------------------------------------------------------
  85        
  86    #Step 0: Check all inputs are consistent with the function requirement
  87
  88    if Verbose: print('Checking inputs ...')
  89
  90    if QueryPos.shape[1]!=3: 
  91        raise ValueError('Incorrect spatial dimension for query points: array containing the query point positions must be of shape (n_query, 3), ' \
  92        'where n_query is the number of query points.')
  93
  94    if np.any((QueryPos[:, 0] < 0) | (QueryPos[:, 0] >= boxsize)):
  95        raise ValueError('Invalid query point position(s): please ensure 0 < x < boxsize.')
  96
  97    if np.any((QueryPos[:, 1] < 0) | (QueryPos[:, 1] >= boxsize)):
  98        raise ValueError('Invalid query point position(s): please ensure 0 < y < boxsize.')
  99
 100    if np.any((QueryPos[:, 2] < 0) | (QueryPos[:, 2] >= boxsize)):
 101        raise ValueError('Invalid query point position(s): please ensure 0 < z < boxsize.')
 102
 103    if np.any((TracerPos[:, 0] < 0) | (TracerPos[:, 0] >= boxsize)):
 104        raise ValueError('Invalid tracer point position(s): please ensure 0 < x < boxsize.')
 105
 106    if np.any((TracerPos[:, 1]< 0) | (TracerPos[:, 1]>= boxsize)):
 107        raise ValueError('Invalid tracer point position(s): please ensure 0 < y < boxsize.')
 108
 109    if np.any((TracerPos[:, 2]< 0) | (TracerPos[:, 2]>= boxsize)):
 110        raise ValueError('Invalid tracer point position(s): please ensure 0 < z < boxsize.')
 111
 112    if TracerPos.shape[1]!=3: 
 113        raise ValueError('Incorrect spatial dimension for tracers: array containing the tracer positions must be of shape (n_tracer, 3), ' \
 114        'where n_tracer is the number of tracers.')
 115
 116    if Verbose: print('\tdone.')
 117
 118    #-----------------------------------------------------------------------------------------------
 119        
 120    #Building the tree
 121    if Verbose: 
 122        start_time = time.perf_counter()
 123        print('\nbuilding the tree ...')
 124    xtree    = scipy.spatial.cKDTree(TracerPos, boxsize=boxsize)
 125    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 126
 127    #-----------------------------------------------------------------------------------------------
 128    
 129    #Calculating the NN distances
 130    if Verbose: 
 131        start_time = time.perf_counter()
 132        print('\ncomputing the tracer NN distances ...')
 133    dists, _= xtree.query(QueryPos, k=max(kList), workers=-1)
 134    vol = dists[:, np.array(kList)-1]
 135    
 136    
 137    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 138
 139    #-----------------------------------------------------------------------------------------------
 140    
 141    #Calculating the kNN-CDFs
 142    if Verbose: 
 143        start_time = time.perf_counter()
 144        print('\ncomputing the tracer auto-CDFs P_{>=k} ...')
 145    p_gtr_k_list = calc_kNN_CDF(vol, BinsRad)
 146    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 147
 148    #-----------------------------------------------------------------------------------------------
 149
 150    #Collecting the results
 151    if ReturnNNdist:
 152        kNN_results = (p_gtr_k_list, vol)
 153    else:
 154        kNN_results = (p_gtr_k_list, None)
 155
 156    #-----------------------------------------------------------------------------------------------
 157
 158    if Verbose:
 159        print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
 160    
 161    return kNN_results
 162
 163####################################################################################################
 164
 165def TracerTracerCross3D(boxsize, kA_kB_list, BinsRad, QueryPos, TracerPos_A, TracerPos_B, Verbose=False):
 166    
 167    r'''
 168    Returns the probabilities $P_{\geq k_A}$, $P_{\geq k_B}$ and $P_{\geq k_A, \geq k_B}$ for ($k_A$, $k_B$) in `kA_kB_list` 
 169    that quantify the extent of the spatial cross-correlation between the given sets of discrete tracers, `TracerPos_A`, `TracerPos_B`.
 170    	
 171    1. $P_{\geq k_A}(r)$: 
 172    	the $k_A$NN-CDF of the first set of discrete tracers, evaluated at radial distance scale $r$
 173    		
 174    2. $P_{\geq k_B}(\theta)$: 
 175    	the $k_B$NN-CDF of the second set of discrete tracers, evaluated at radial distance scale $r$
 176    		
 177    3.  $P_{\geq k_A, \geq k_B}(\theta)$:
 178    	the joint probability of finding at least $k_A$ set A tracers and at least $k_B$ set B tracers within a sphere of radius $r$
 179    		
 180    The excess cross-correlation (Banerjee & Abel 2023)[^1] can be computed trivially from the quatities (see the `kNNpy.HelperFunctions.kNN_excess_cross_corr()` method to do this)
 181    	
 182    $$\psi_{k_A, k_B} = P_{\geq k_A, \geq k_B}/(P_{\geq k_A} \times P_{\geq k_B})$$
 183    		
 184    Parameters
 185    ----------
 186    boxszie:  float
 187        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
 188    kA_kB_list : list of int tuples
 189        nearest-neighbour combinations for which the cross-correlations need to be computed (see notes for more details)
 190    BinsRad : list of numpy float array
 191        list of radial distance scale arrays (in Mpc/h) for each nearest neighbour combination in `kA_kB_list`. The $i^{th}$ element of the 
 192        list should contain a numpy array of the desired distances for the $i^{th}$ nearest neighbour combination.
 193    QueryPos : numpy float array of shape ``(n_query, 3)``
 194        array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 195        Please ensure $0<x,y,z<boxsize$.
 196    TracerPos_A : numpy float array of shape ``(n_tracer, 3)``
 197        array of 3D locations for the first set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 198        Please ensure $0<x,y,z<boxsize$.
 199    TracerPos_B : numpy float array of shape ``(n_tracer, 3)``
 200        array of 3D locations for the second set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 201        Please ensure $0<x,y,z<boxsize$.
 202    Verbose : bool, optional
 203        if set to ``True``, the time taken to complete each step of the calculation will be printed, by default ``False``.
 204
 205    Returns
 206    -------
 207    p_gtr_kA_list: list of numpy float arrays
 208        list of auto kNN-CDFs of the first set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_A^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
 209        
 210    p_gtr_kB_list: list of numpy float arrays
 211        list of auto kNN-CDFs of the second set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_B^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
 212    
 213    p_gtr_kA_kB_list: list of numpy float arrays
 214        list of joint tracer-tracer nearest neighbour distributions evaluated at the desired distance bins. The $i^{th}$ element represents the joint {$k_A^i$, $k_B^i$}NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
 215        
 216    Raises
 217    ------
 218    ValueError
 219        if the lengths of `BinsRad` and `kA_kB_list` do not match.
 220    ValueError
 221        if the given query points are not on a three-dimensional grid.
 222    ValueError
 223        if x,y, or z coordinates of any of the query points is not in ``[0, boxsize)``.
 224    ValueError
 225        if x,y, or z coordinates of any of the tracer points is not in ``[0, boxsize)``.
 226    ValueError
 227        if any of the given tracer points are not on a three-dimensional grid.
 228
 229    Notes
 230    -----
 231    The parameter `kA_kB_list` should provide the desired combinations of NN indices for the two tracers sets being cross-correlated. For example, if you wish to compute the joint {1,1}, {1,2} and {2,1}NN-CDFs, then set
 232            
 233        kA_kB_list = [(1,1), (1,2), (2,1)]
 234
 235    Please note that if the number density of one set of tracers is significantly smaller than the other, the joint kNN-CDFs approach the auto kNN-CDFs of the less dense tracer set. In this scenario, it may be better to treat the denser tracer set as a continuous field and use the `TracerFieldCross2DA()` method instead to conduct the cross-correlation analysis  (see Gupta & Banerjee (2024)[^2] for a detailed discussion).
 236
 237    References
 238    ----------
 239    [^1]: Arka Banerjee, Tom Abel, Cosmological cross-correlations and nearest neighbour distributions, [Monthly Notices of the Royal Astronomical Society](https://doi.org/10.1093/mnras/stab961), Volume 504, Issue 2, June 2021, Pages 2911–2923
 240        
 241    '''
 242    
 243    #-----------------------------------------------------------------------------------------------
 244
 245    if Verbose: total_start_time = time.perf_counter()
 246
 247    #-----------------------------------------------------------------------------------------------
 248        
 249    #Step 0: Check all inputs are consistent with the function requirement
 250
 251    if Verbose: print('Checking inputs ...')
 252
 253    if len(BinsRad)!=len(kA_kB_list): 
 254        raise ValueError("length of 'BinsRad' must match length of 'kA_kB_list'.")
 255
 256    if QueryPos.shape[1]!=3: 
 257        raise ValueError('Incorrect spatial dimension for query points: array containing the query point positions must be of shape (n_query,3),' \
 258        ' where n_query is the number of query points.')
 259    
 260    if np.any((QueryPos[:, 0] < 0) | (QueryPos[:, 0] >= boxsize)):
 261        raise ValueError('Invalid query point position(s): please ensure 0 < x < boxsize.')
 262
 263    if np.any((QueryPos[:, 1] < 0) | (QueryPos[:, 1] >= boxsize)):
 264        raise ValueError('Invalid query point position(s): please ensure 0 < y < boxsize.')
 265
 266    if np.any((QueryPos[:, 2] < 0 ) | (QueryPos[:, 2] >= boxsize)):
 267        raise ValueError('Invalid query point position(s): please ensure 0 < z < boxsize.')
 268
 269    if np.any((TracerPos_A[:, 0] < 0) | (TracerPos_A[:, 0] >= boxsize)):
 270        raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < x < boxsize.')
 271
 272    if np.any((TracerPos_A[:, 1]< 0) | (TracerPos_A[:, 1]>= boxsize)):
 273        raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < y < boxsize.')
 274
 275    if np.any((TracerPos_A[:, 2]< 0) | (TracerPos_A[:, 2]>= boxsize)):
 276        raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < z < boxsize.')
 277
 278    if np.any((TracerPos_B[:, 0] < 0) | (TracerPos_B[:, 0] >= boxsize)):
 279        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < x < boxsize.')
 280
 281    if np.any((TracerPos_B[:, 1]< 0) | (TracerPos_B[:, 1]>= boxsize)):
 282        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < y < boxsize.')
 283
 284    if np.any((TracerPos_B[:, 2]< 0) | (TracerPos_B[:, 2]>= boxsize)):
 285        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < z < boxsize.')
 286
 287    
 288    if TracerPos_A.shape[1]!=3 or TracerPos_B.shape[1]!=3: 
 289        raise ValueError('Incorrect spatial dimension for tracers: array containing the tracer positions must be of shape (n_tracer, 3),' \
 290        ' where n_tracer is the number of tracers.')
 291
 292    if Verbose: print('\tdone.')
 293
 294    #-----------------------------------------------------------------------------------------------
 295
 296    #Figuring out the NN indices from the kA_kB_list
 297    kList_A, kList_B = [], []
 298    for kA, kB in kA_kB_list:
 299        kList_A.append(kA)
 300        kList_B.append(kB)
 301    kMax_A, kMax_B = max(kList_A), max(kList_B)
 302
 303    #-----------------------------------------------------------------------------------------------
 304        
 305    #Building the trees
 306    if Verbose: 
 307        start_time = time.perf_counter()
 308        print('\nbuilding the trees ...')
 309        start_time_A = time.perf_counter()
 310    xtree_A = scipy.spatial.cKDTree(TracerPos_A, boxsize=boxsize)
 311    if Verbose: 
 312        print('\tfirst set of tracers done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_A))
 313        start_time_B = time.perf_counter()
 314    xtree_B = scipy.spatial.cKDTree(TracerPos_B, boxsize=boxsize)
 315    if Verbose: 
 316        print('\tsecond set of tracers done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_B))
 317        print('\tcombined time: {:.2e} s.'.format(time.perf_counter()-start_time))
 318
 319    #-----------------------------------------------------------------------------------------------
 320    
 321    #Calculating the NN distances
 322    if Verbose: 
 323        start_time = time.perf_counter()
 324        print('\ncomputing the tracer NN distances ...')
 325    vol_A, _ = xtree_A.query(QueryPos, k=kMax_A)
 326    vol_B, _ = xtree_B.query(QueryPos, k=kMax_B)
 327    req_vol_A = vol_A[:, np.array(kList_A)-1]
 328    req_vol_B = vol_B[:, np.array(kList_B)-1]
 329    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 330
 331    #-----------------------------------------------------------------------------------------------
 332    
 333    #Calculating the auto kNN-CDFs
 334    if Verbose: 
 335        start_time = time.perf_counter()
 336        print('\ncomputing the tracer auto-CDFs P_{>=kA}, P_{>=kB} ...')
 337    p_gtr_kA_list = calc_kNN_CDF(req_vol_A, BinsRad)
 338    p_gtr_kB_list = calc_kNN_CDF(req_vol_B, BinsRad)
 339    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 340
 341    #-----------------------------------------------------------------------------------------------
 342
 343    #Calculating the joint kNN-CDFs
 344    if Verbose: 
 345        start_time = time.perf_counter()
 346        print('\ncomputing the joint-CDFs P_{>=kA, >=kB} ...')
 347    joint_vol = np.zeros((vol_A.shape[0], len(kA_kB_list)))
 348    for i, _ in enumerate(kA_kB_list):
 349        joint_vol[:, i] = np.maximum(req_vol_A[:, i], req_vol_B[:, i])
 350    p_gtr_kA_kB_list = calc_kNN_CDF(joint_vol, BinsRad)
 351    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 352
 353    #-----------------------------------------------------------------------------------------------
 354
 355    if Verbose:
 356        print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
 357    
 358    return p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list
 359
 360####################################################################################################
 361
 362def TracerTracerCross3D_DataVector(boxsize, kA_kB_list, BinsRad, QueryPos, TracerPos_A_dict, TracerPos_B, Verbose=False ):
 363    r'''
 364    Returns the probabilities $P_{\geq k_{A_i}}$, $P_{\geq k_B}$ and $P_{\geq k_{A_i}, \geq k_B}$ for ($k_{A_i}$, $k_B$) in `kA_kB_list` for various 
 365    realizations of Tracer A, while keeping the set Tracer B constant. Refer to Notes to understand why this might be useful. These quantify
 366    the extent of the spatial cross-correlation between the given sets of discrete tracers, the $i^{\text{th}}$ realization of `TracerPos_A`, `TracerPos_B`.
 367    We do not vary the 'kA_kB_list' as a function of the realizations of Tracer A.
 368    	
 369    1. $P_{\geq k_{A_i}}(r)$: 
 370    	the $k_A$NN-CDF of the $i^{\text{th}}$ realization of the first set of discrete tracers, evaluated at radial distance scale $r$
 371    		
 372    2. $P_{\geq k_B}(\theta)$: 
 373    	the $k_B$NN-CDF of the second set of discrete tracers, evaluated at radial distance scale $r$
 374    		
 375    3.  $P_{\geq k_{A_i}, \geq k_B}(\theta)$:
 376    	the joint probability of finding at least $k_A$ set A tracers and at least $k_B$ set B tracers within a sphere of radius $r$, for the
 377        $i^{\text{th}}$ realization of Tracer A
 378    		
 379    The excess cross-correlation (Banerjee & Abel 2023)[^1] can be computed trivially from the quatities (see the `kNNpy.HelperFunctions.kNN_excess_cross_corr()` method to do this)
 380    	
 381    $$\psi_{k_A, k_B} = P_{\geq k_A, \geq k_B}/(P_{\geq k_A} \times P_{\geq k_B})$$
 382    		
 383    Parameters
 384    ----------
 385    boxsize:  float
 386        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
 387    kA_kB_list : list of int tuples
 388        nearest-neighbour combinations for which the cross-correlations need to be computed (see notes for more details)
 389    BinsRad : list of numpy float array
 390        list of radial distance scale arrays (in Mpc/h) for each nearest neighbour combination in `kA_kB_list`. The $i^{th}$ element of the 
 391        list should contain a numpy array of the desired distances for the $i^{th}$ nearest neighbour combination.
 392    QueryPos : numpy float array of shape ``(n_query, 3)``
 393        array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 394        Please ensure $0<x,y,z<boxsize$.
 395    TracerPos_A_dict : dictionary, where each key corresponds to the realization, and stores the corresponding numpy array of size ``(n_tracer,3)``, that 
 396        is the tracer positions for the $i^{\text{th}}$ realization 
 397        The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 398        Please ensure $0<x,y,z<boxsize$.
 399    TracerPos_B : numpy float array of shape ``(n_tracer, 3)``
 400        array of 3D locations for the second set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 401        Please ensure $0<x,y,z<boxsize$.
 402    Verbose : bool, optional
 403        if set to ``True``, the time taken to complete each step of the calculation will be printed, by default ``False``.
 404
 405    Returns
 406    -------
 407    Realizations: a numpy array of arrays where the $i^{\text{th}}$ element corresponds to the 3D cross-correlation calculated between the $i^{\text{th}} 
 408    realization of Tracer A and Tracer B. The values correspond to an numpy array: [p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list]
 409    p_gtr_kA_list: list of numpy float arrays
 410        list of auto kNN-CDFs of the first set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_A^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
 411        
 412    p_gtr_kB_list: list of numpy float arrays
 413        list of auto kNN-CDFs of the second set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_B^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
 414    
 415    p_gtr_kA_kB_list: list of numpy float arrays
 416        list of joint tracer-tracer nearest neighbour distributions evaluated at the desired distance bins. The $i^{th}$ element represents the joint {$k_A^i$, $k_B^i$}NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
 417        
 418    Raises
 419    ------
 420    ValueError
 421        if the lengths of `BinsRad` and `kA_kB_list` do not match.
 422    ValueError
 423        if the given query points are not on a three-dimensional grid.
 424    ValueError
 425        if x,y, or z coordinates of any of the query points is not in ``[0, boxsize)``.
 426    ValueError
 427        if x,y, or z coordinates of any of the tracer points is not in `'[0, boxsize)``.
 428    ValueError
 429        if any of the given tracer points are not on a three-dimensional grid.
 430
 431    Notes
 432    -----
 433    The parameter `kA_kB_list` should provide the desired combinations of NN indices for the two tracers sets being cross-correlated. For example, if you wish to compute the joint {1,1}, {1,2} and {2,1}NN-CDFs, then set
 434            
 435        kA_kB_list = [(1,1), (1,2), (2,1)]
 436
 437    Please note that if the number density of one set of tracers is significantly smaller than the other, the joint kNN-CDFs approach the auto kNN-CDFs of the less dense tracer set. In this scenario, it may be better to treat the denser tracer set as a continuous field and use the `TracerFieldCross2DA()` method instead to conduct the cross-correlation analysis  (see Gupta & Banerjee (2024)[^2] for a detailed discussion).
 438    #Write why this module might be useful
 439
 440    References
 441    ----------
 442    [^1]: Arka Banerjee, Tom Abel, Cosmological cross-correlations and nearest neighbour distributions, [Monthly Notices of the Royal Astronomical Society](https://doi.org/10.1093/mnras/stab961), Volume 504, Issue 2, June 2021, Pages 2911–2923
 443        
 444    '''
 445    
 446    #-----------------------------------------------------------------------------------------------
 447
 448    if Verbose: total_start_time = time.perf_counter()
 449    keys=list(TracerPos_A_dict.keys())
 450
 451    #-----------------------------------------------------------------------------------------------
 452        
 453    #Step 0: Check all inputs are consistent with the function requirement
 454
 455    if Verbose: print('Checking inputs ...')
 456
 457    if len(BinsRad)!=len(kA_kB_list): 
 458        raise ValueError("length of 'BinsRad' must match length of 'kA_kB_list'.")
 459
 460    if QueryPos.shape[1]!=3: 
 461        raise ValueError('Incorrect spatial dimension for query points: array containing the query point positions must be of shape (n_query,3),' \
 462        ' where n_query is the number of query points.')
 463    
 464    if np.any((QueryPos[:, 0] < 0) | (QueryPos[:, 0] >= boxsize)):
 465        raise ValueError('Invalid query point position(s): please ensure 0 < x < boxsize.')
 466
 467    if np.any((QueryPos[:, 1] < 0) | (QueryPos[:, 1] >= boxsize)):
 468        raise ValueError('Invalid query point position(s): please ensure 0 < y < boxsize.')
 469
 470    if np.any((QueryPos[:, 2] < 0) | (QueryPos[:, 2] >= boxsize)):
 471        raise ValueError('Invalid query point position(s): please ensure 0 < z < boxsize.')
 472    for i in range(len(keys)):
 473        if np.any((TracerPos_A_dict[keys[i]][:, 0] < 0) | (TracerPos_A_dict[keys[i]][:, 0] >= boxsize)):
 474            raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < x < boxsize.')
 475
 476    for i in range(len(keys)):
 477        if np.any((TracerPos_A_dict[keys[i]][:, 1]< 0) | (TracerPos_A_dict[keys[i]][:, 1]>= boxsize)):
 478            raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < y < boxsize.')
 479
 480    for i in range(len(keys)):
 481        if np.any((TracerPos_A_dict[keys[i]][:, 2]< 0) | (TracerPos_A_dict[keys[i]][:, 2]>= boxsize)):
 482            raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < z < boxsize.')
 483
 484    if np.any((TracerPos_B[:, 0] < 0) | (TracerPos_B[:, 0] >= boxsize)):
 485        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < x < boxsize.')
 486
 487    if np.any((TracerPos_B[:, 1]< 0) | (TracerPos_B[:, 1]>= boxsize)):
 488        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < y < boxsize.')
 489
 490    if np.any((TracerPos_B[:, 2]< 0) | (TracerPos_B[:, 2]>= boxsize)):
 491        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < z < boxsize.')
 492
 493    for i in range(len(keys)):
 494        if TracerPos_A_dict[keys[i]].shape[1]!=3 or TracerPos_B.shape[1]!=3: 
 495            raise ValueError('Incorrect spatial dimension for tracers: array containing the tracer positions must be of shape (n_tracer, 3),' \
 496            ' where n_tracer is the number of tracers.')
 497
 498    if Verbose: print('\tdone.')
 499
 500    #-----------------------------------------------------------------------------------------------
 501    #Figuring out the NN indices from the kA_kB_list
 502    kList_A, kList_B = [], []
 503    for kA, kB in kA_kB_list:
 504        kList_A.append(kA)
 505        kList_B.append(kB)
 506    kMax_A, kMax_B = max(kList_A), max(kList_B)
 507
 508    #-----------------------------------------------------------------------------------------------
 509        
 510    #Building the trees
 511    if Verbose: 
 512        start_time = time.perf_counter()
 513        print('\nbuilding the trees ...')
 514        start_time_B = time.perf_counter()
 515    xtree_B = scipy.spatial.cKDTree(TracerPos_B, boxsize=boxsize)  
 516    if Verbose: 
 517        print('\tsecond set of tracers done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_B))
 518
 519    #Initializing the containinment array
 520    #Realizations=np.zeros((len(TracerPos_A_dict.values()),3,len(kA_kB_list)))
 521    Realizations=[]
 522    
 523    for i, values in enumerate(TracerPos_A_dict.values()):
 524        if Verbose:
 525            print(f'\n Building the tree for the {i}th relaization of Tracer A')
 526            start_time_A=time.perf_counter()
 527        xtree_A=scipy.spatial.cKDTree(values, boxsize=boxsize)
 528        if Verbose:
 529            print('\tset of tracers being iterated over done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_A))
 530            print('\tcombined time: {:.2e} s.'.format(time.perf_counter()-start_time))
 531
 532        #Calculating the NN distances
 533        if Verbose: 
 534            start_time = time.perf_counter()
 535            print('\ncomputing the tracer NN distances ...')
 536        vol_A, _ = xtree_A.query(QueryPos, k=kMax_A)
 537        vol_B, _ = xtree_B.query(QueryPos, k=kMax_B)
 538        req_vol_A = vol_A[:, np.array(kList_A)-1]
 539        req_vol_B = vol_B[:, np.array(kList_B)-1]
 540        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 541
 542        #-----------------------------------------------------------------------------------------------
 543    
 544        #Calculating the auto kNN-CDFs
 545        if Verbose: 
 546            start_time = time.perf_counter()
 547            print('\ncomputing the tracer auto-CDFs P_{>=kA}, P_{>=kB} ...')
 548        p_gtr_kA_list = calc_kNN_CDF(req_vol_A, BinsRad)
 549        p_gtr_kB_list = calc_kNN_CDF(req_vol_B, BinsRad)
 550        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 551
 552        #-----------------------------------------------------------------------------------------------
 553
 554        #Calculating the joint kNN-CDFs
 555        if Verbose: 
 556            start_time = time.perf_counter()
 557            print('\ncomputing the joint-CDFs P_{>=kA, >=kB} ...')
 558        joint_vol = np.zeros((vol_A.shape[0], len(kA_kB_list)))
 559        for i, _ in enumerate(kA_kB_list):
 560            joint_vol[:, i] = np.maximum(req_vol_A[:, i], req_vol_B[:, i])
 561        p_gtr_kA_kB_list = calc_kNN_CDF(joint_vol, BinsRad)
 562        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
 563
 564        #-----------------------------------------------------------------------------------------------
 565
 566        if Verbose:
 567            print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
 568        
 569        Realizations.append([p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list])
 570    Realizations=np.array(Realizations) 
 571    return Realizations
 572
 573####################################################################################################
 574
 575def TracerFieldCross3D(kList, RBins, BoxSize, QueryPos, TracerPos, Field3D, FieldConstPercThreshold, ReturnSmoothedFieldDict=False, Verbose=False):
 576    r'''
 577    Returns the probabilities $P_{\geq k}$, $P_{>{\rm dt}}$ and $P_{\geq k,>{\rm dt}}$ for $k$ in `kList`, that quantify the extent of the spatial cross-correlation between the given discrete tracer positions (`TracerPos`) and the given continuous overdensity field (`SmoothedFieldDict`) in three-dimensional space.
 578    
 579    1. $P_{\geq k}(r)$: 
 580        the kNN-CDF of the discrete tracers, evaluated at separation $r$
 581    
 582    2. $P_{>{\rm dt}}(r)$: 
 583        the probability of the overdensity field smoothed with a top-hat filter of radius $r$ exceeding the given constant percentile density threshold
 584    
 585    3. $P_{\geq k, >{\rm dt}}(r)$:
 586        the joint probability of finding at least 'k' tracers within a sphere of radius $r$ AND the overdensity field smoothed at scale $r$ exceeding the given density threshold (as specified by the parameter `FieldConstPercThreshold`)
 587    
 588    The excess cross-correlation (Banerjee & Abel 2023)[^1] can be computed trivially from these quantities:
 589    
 590    $$\psi_{k, {\rm dt}} = \frac{P_{\geq k, >{\rm dt}}}{P_{\geq k} \times P_{>{\rm dt}}}$$
 591
 592    Parameters
 593    ----------
 594    kList : list of int
 595        List of nearest neighbours to compute. For example, if ``kList = [1, 2, 4]``, the first, second and fourth-nearest neighbour distributions will be computed.
 596
 597    RBins : list of numpy float arrays
 598        List of radial distance arrays (in comoving Mpc/$h$), one for each value in `kList`. The i-th element of the list should be a numpy array specifying the distances to be used for the nearest neighbour calculation corresponding to `kList[i]`.
 599
 600    BoxSize : float
 601        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
 602
 603    QueryPos : numpy float array of shape ``(n_query, 3)``
 604        Array of 3D positions (e.g., in Cartesian coordinates) used to query the nearest-neighbour distances, and also compute field's CDF.
 605
 606    TracerPos : numpy float array of shape ``(n_tracer, 3)``
 607        Array of 3D positions of discrete tracers, with columns representing the x, y, and z coordinates, respectively.
 608    
 609    Field3D : numpy float array of shape ``(n_grid, n_grid, n_grid)``
 610        A 3D numpy array representing the continuous field (for e.g., the matter overdensity field). The shape of the array should match the grid size used for smoothing.
 611
 612    FieldConstPercThreshold : float
 613        The percentile threshold for identifying overdense regions in the continuous field. For example, ``75.0`` indicates the 75th percentile.
 614
 615    ReturnSmoothedFieldDict : bool, optional
 616        if set to ``True``, the dictionary containing the continuous field smoothed at the provided radial bins, will be returned along with the nearest-neighbour measurements, by default ``False``.
 617    
 618    Verbose : bool, optional
 619        If True, prints timing information for each step. Default is False.
 620
 621    Returns
 622    -------
 623    p_gtr_k_list : list of numpy float arrays
 624        Auto kNN-CDFs of the discrete tracers evaluated at the desired distance bins.
 625
 626    p_gtr_dt_list : list of numpy float arrays
 627        Overdensity-field auto kNN-CDFs evaluated at the same scales.
 628
 629    p_gtr_k_dt_list : list of numpy float arrays
 630        Joint CDFs of finding $\geq k$ tracers AND field value exceeding the threshold at a given scale.
 631
 632    SmoothedFieldDict : dict
 633        dictionary containing the continuous field smoothed at the provided radial bins, returned only if `ReturnSmoothedDict` is ``True``. For example, ``SmoothedFieldDict['5']`` represents the continuous map smoothed at a scale of 5 Mpc/h.
 634
 635    Raises
 636    ------
 637    ValueError
 638        If TracerPos are not 3D.
 639    ValueError
 640        If QueryPos are not 3D.
 641    ValueError
 642        If tracer positions are outside the specified box size.
 643    ValueError
 644        If QueryPos are outside the specified box size.
 645
 646    References
 647    ----------
 648    [^1]: Arka Banerjee, Tom Abel, Tracer-field cross-correlations with k-nearest neighbour distributions, [MNRAS](https://doi.org/10.1093/mnras/stac3813), Volume 519, Issue 4, March 2023, Pages 4856-4868
 649
 650    [^2]: Eishica Chand, Arka Banerjee, Simon Foreman, Francisco Villaescusa-Navarro, [MNRAS](https://doi.org/10.1093/mnras/staf433), Volume 538, Issue 3, April 2025, Pages 2204-221 
 651    '''
 652
 653    if Verbose: total_start_time = time.perf_counter()
 654
 655    #-----------------------------------------------------------------------------------------------
 656
 657    # Step 0: Input validation
 658    if Verbose: print('Checking inputs ...')
 659
 660    if QueryPos.shape[1] != 3:
 661        raise ValueError("Query positions must be 3D (shape: n_query x 3).")
 662    if TracerPos.shape[1] != 3:
 663        raise ValueError("Tracer positions must be 3D (shape: n_tracer x 3).")
 664    if np.any((TracerPos <= 0) | (TracerPos > BoxSize)):
 665        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
 666    if np.any((QueryPos <= 0) | (QueryPos > BoxSize)):
 667        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
 668
 669    if Verbose: print('\tdone.')
 670
 671    #-----------------------------------------------------------------------------------------------
 672    # Step 1: Compute kNN-CDFs for tracer positions
 673    if Verbose:
 674        step_1_start_time = time.perf_counter()
 675        print('\ninitiating step 1 ...')
 676
 677    #-----------------------------------------------------------------------------------------------
 678
 679    # Building the kdTree
 680    if Verbose:
 681        print('\n\tbuilding the kdTree ...')
 682        t_start = time.perf_counter()
 683
 684    xtree = scipy.spatial.cKDTree(TracerPos, boxsize=BoxSize)
 685
 686    if Verbose:
 687        print('\t\tdone; time taken: {:.2e} s.'.format(time.perf_counter() - t_start))
 688
 689    #------------------------------------------------------------------------------------------------
 690
 691    # To store the CDFs for each k  
 692    if Verbose:
 693        print('\n\tcomputing the tracer NN distances ...')
 694        t_start = time.perf_counter()
 695    
 696
 697    #-------------------------------------------------------------------------------------------------
 698
 699    Nquery = QueryPos.shape[0]
 700    dists, _ = xtree.query(QueryPos, k=max(kList), workers=-1)
 701    vol = dists[:, np.array(kList)-1]
 702    
 703    #------------------------------------------------------------------------------------------------
 704
 705    # Compute the kNN-CDFs for the tracers
 706    if Verbose:
 707        print('\t\tdone; time taken: {:.2e} s.'.format(time.perf_counter() - t_start))
 708   
 709    if Verbose:
 710        print('\n\tcomputing P_{>=k} ...')
 711        t_start = time.perf_counter()
 712
 713    p_gtr_k_list = calc_kNN_CDF(vol, RBins)
 714
 715    if Verbose:
 716        print('\t\tdone; time taken: {:.2e} s.'.format(time.perf_counter() - t_start))
 717        print('time taken for step 1: {:.2e} s.'.format(time.perf_counter() - step_1_start_time))
 718
 719    #------------------------------------------------------------------------------------------------
 720
 721    # Step 2: Compute kNN-CDFs for the overdensity field, and the joint CDFs with tracers 
 722
 723    if Verbose:
 724        step_2_start_time = time.perf_counter()
 725        print('\ninitiating step 2 ...')
 726
 727    # Store computed smoothed fields, interpolated values, and percentile thresholds
 728    SmoothedFieldDictOut = {}
 729    Interpolated_Smoothed_Field = {}
 730    Delta_Threshold = {}
 731
 732    # To store the CDFs for each k
 733    p_gtr_k_dt_list = []
 734    p_gtr_dt_list = []
 735
 736    #------------------------------------------------------------------------------------------------
 737
 738    # Compute the CDFs
 739    for k_ind, k in enumerate(kList):
 740
 741        if Verbose:
 742            print(f"\nComputing P_{{>=k, >dt}} and P_{{>dt}} for k = {k} ...")
 743            k_start_time = time.perf_counter()
 744
 745        p_gtr_k_dt = np.zeros(len(RBins[k_ind]))
 746        p_gtr_dt   = np.zeros(len(RBins[k_ind]))
 747
 748        for j, ss in enumerate(RBins[k_ind]):
 749
 750            #------------------------------------------------------------------------------------------------
 751            ss_str = str(ss)
 752
 753            if ss_str not in SmoothedFieldDictOut:
 754                SmoothedFieldDictOut[ss_str] = smoothing_3D(Field3D, Filter='Top-Hat', grid=Field3D.shape[0], BoxSize=BoxSize, R=ss, Verbose=False)
 755
 756            #-------------------------------------------------------------------------------------------------
 757
 758            if ss_str not in Interpolated_Smoothed_Field:
 759                Interpolated_Smoothed_Field[ss_str] = CIC_3D_Interp(QueryPos, SmoothedFieldDictOut[ss_str], BoxSize)
 760
 761            interp_field = Interpolated_Smoothed_Field[ss_str]
 762
 763            
 764            #-------------------------------------------------------------------------------------------------
 765
 766            if ss_str not in Delta_Threshold:
 767                Delta_Threshold[ss_str] = np.percentile(Interpolated_Smoothed_Field[ss_str], FieldConstPercThreshold)
 768
 769            delta_star_ss = Delta_Threshold[ss_str]
 770
 771            #-------------------------------------------------------------------------------------------------
 772
 773            # Compute fractions
 774            vol_mask      = vol[:, k_ind] < ss
 775            field_mask    = interp_field > delta_star_ss
 776
 777            p_gtr_dt[j]   = np.count_nonzero(field_mask) / Nquery
 778            p_gtr_k_dt[j] = np.count_nonzero(vol_mask & field_mask) / Nquery
 779
 780        #-------------------------------------------------------------------------------------------------
 781
 782        p_gtr_k_dt_list.append(p_gtr_k_dt)
 783        p_gtr_dt_list.append(p_gtr_dt)
 784
 785        if Verbose:
 786            print(f"\tdone for k = {k}; time taken: {time.perf_counter() - k_start_time:.2e} s")
 787
 788    #------------------------------------------------------------------------------------------------
 789
 790    if Verbose:
 791        print(f"\nTotal time taken: {time.perf_counter() - step_2_start_time:.2e} s")
 792    
 793    #-----------------------------------------------------------------------------------------------
 794
 795    if Verbose:
 796        print(f"\nTotal time taken for all steps: {time.perf_counter() - total_start_time:.2e} s")
 797
 798    if ReturnSmoothedFieldDict:
 799        return p_gtr_k_list, p_gtr_dt_list, p_gtr_k_dt_list, SmoothedFieldDictOut
 800    else:
 801        return p_gtr_k_list, p_gtr_dt_list, p_gtr_k_dt_list
 802
 803####################################################################################################
 804
 805def TracerFieldCross3D_DataVector(kList, RBins, BoxSize, QueryPos, TracerPosVector, Field, FieldConstPercThreshold, ReturnSmoothedDict=False, Verbose=False):
 806    
 807    r'''
 808    Returns 'data vectors' of the  the probabilities $P_{\geq k}$, $P_{>{\rm dt}}$ and $P_{\geq k,>{\rm dt}}$ [refer to kNNpy.kNN_3D.TracerFieldCross for definitions] for $k$ in `kList` for multiple realisations of the given discrete tracer set [`TracerPosVector`] and a single realisation of the given continuous overdensity field (`Field`). Please refer to notes to understand why this might be useful.
 809    	
 810    Parameters
 811    ----------
 812    kList : list of int
 813        List of nearest neighbours to compute. For example, if ``kList = [1, 2, 4]``, the first, second and fourth-nearest neighbour distributions will be computed.
 814
 815    RBins : list of numpy float arrays
 816        List of radial distance arrays (in comoving Mpc/$h$), one for each value in `kList`. The i-th element of the list should be a numpy array specifying the distances to be used for the nearest neighbour calculation corresponding to `kList[i]`.
 817
 818    BoxSize : float
 819        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
 820
 821    QueryPos : numpy float array of shape ``(n_query, 3)``
 822        Array of 3D positions (e.g., in Cartesian coordinates) used to query the nearest-neighbour distances, and also compute field's CDF.
 823    
 824    TracerPosVector : numpy float array of shape ``(n_realisations, n_tracer, 3)``
 825        Array of 3D positions of n_realisations of discrete tracers, with columns representing the x, y, and z coordinates, respectively.
 826    
 827    Field : numpy float array of shape ``(n_grid, n_grid, n_grid)``
 828        A 3D numpy array representing the continuous field (for e.g., the matter overdensity field). The shape of the array should match the grid size used for smoothing.
 829
 830    FieldConstPercThreshold : float
 831        The percentile threshold for identifying overdense regions in the continuous field. For example, ``75.0`` indicates the 75th percentile.
 832
 833    ReturnSmoothedFieldDict : bool, optional
 834        if set to ``True``, the dictionary containing the continuous field smoothed at the provided radial bins, will be returned along with the nearest-neighbour measurements, by default ``False``.
 835    
 836    Verbose : bool, optional
 837        If True, prints timing information for each step. Default is False.
 838
 839    Returns
 840    -------
 841    p_gtr_k_veclist: list of numpy float arrays
 842        list of auto kNN-CDFs of the discrete tracers evaluated at the desired distance bins. Each list member is a 2D array of shape ``(n_realisations, n_bins)``.
 843
 844    p_gtr_dt_list: list of numpy float arrays
 845        continuum version of auto kNN-CDFs for the continuous field evaluated at the desired distance bins.
 846
 847    p_gtr_k_dt_veclist: list of numpy float arrays
 848        list of joint tracer-field nearest neighbour distributions evaluated at the desired distance bins. Each list member is a 2D array of shape ``(n_realisations, n_bins)``.
 849
 850    SmoothedFieldDict : dict
 851        dictionary containing the continuous field smoothed at the provided radial bins, returned only if `ReturnSmoothedDict` is ``True``. For example, ``SmoothedFieldDict['5']`` represents the continuous map smoothed at a scale of 5 Mpc/h.
 852
 853    Raises
 854    ------
 855    ValueError
 856        If TracerPos are not on a 3dimensional grid.
 857    ValueError
 858        If QueryPos are not on a 3dimensional grid.
 859    ValueError
 860        If tracer positions are outside the specified box size.
 861    ValueError
 862        If QueryPos are outside the specified box size.
 863
 864    References
 865    ----------
 866    [^1]: Arka Banerjee, Tom Abel, Tracer-field cross-correlations with k-nearest neighbour distributions, [MNRAS](https://doi.org/10.1093/mnras/stac3813), Volume 519, Issue 4, March 2023, Pages 4856-4868
 867
 868    [^2]: Eishica Chand, Arka Banerjee, Simon Foreman, Francisco Villaescusa-Navarro, [MNRAS](https://doi.org/10.1093/mnras/staf433), Volume 538, Issue 3, April 2025, Pages 2204-221 
 869    '''
 870
 871    if Verbose: total_start_time = time.perf_counter()
 872
 873    #-----------------------------------------------------------------------------------------------
 874        
 875    #Step 0: Check all inputs are consistent with the function requirement
 876
 877    if Verbose:
 878        print('Checking inputs ...')
 879
 880    # Query positions must be (n_query, 3)
 881    if QueryPos.ndim != 2 or QueryPos.shape[1] != 3:
 882        raise ValueError("Query positions must be of shape: (n_query, 3), where n_query is the number of query points.")
 883
 884    # Tracer positions must be (n_realizations, n_tracer, 3)
 885    if TracerPosVector.ndim != 3 or TracerPosVector.shape[2] != 3:
 886        raise ValueError("Tracer positions must be of shape: (n_realizations, n_tracer, 3), where n_realizations is the number of tracer realizations and n_tracer is the number of tracers per realization.")
 887
 888    # Tracer positions must lie within [0, BoxSize)
 889    if np.any((TracerPosVector <= 0) | (TracerPosVector > BoxSize)):
 890        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
 891    if np.any((QueryPos <= 0) | (QueryPos > BoxSize)):
 892        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
 893
 894    if Verbose:
 895        print('\tdone.')
 896
 897    #-----------------------------------------------------------------------------------------------
 898        
 899    #Step 1: smooth the continuous field and store in dictionary
 900    if Verbose:
 901        step_1_start_time = time.perf_counter()
 902        print('\ninitiating step 1 (smoothing the continuous field at the given radial scales)...')
 903
 904    #-----------------------------------------------------------------------------------------------
 905
 906    grid = Field.shape[0]  
 907    Filter = 'Top-Hat'
 908
 909    SmoothedFieldDict = create_smoothed_field_dict_3D(Field, Filter, grid, BoxSize, RBins, thickness=None, Verbose=False)
 910
 911    if Verbose: print('\tdone; time taken for step 1: {:.2e} s.'.format(time.perf_counter()-step_1_start_time))
 912
 913    #-----------------------------------------------------------------------------------------------
 914        
 915    #Step 2: 
 916
 917    # A. Compute the fraction of query points at which the smoothed fields at the different radial
 918    #    scales are greater than the overdensity threshold.
 919
 920    # B. For each realization of the discrete tracer set, calculate 
 921    #   (i)  nearest neighbour distances of query points, and the kNN-CDFs for the discrete tracers
 922    #   (ii) the fraction of query points with nearest neighbour distance less than the angular
 923    #        distance and smoothed field greater than the overdensity threshold
 924
 925    if Verbose: 
 926        step_2_start_time = time.perf_counter()
 927        print('\ninitiating step 2 (looping the tracer-field cross-correlation computations over the multiple tracer realisations)...')
 928
 929    #-----------------------------------------------------------------------------------------------
 930
 931    n_reals = TracerPosVector.shape[0]
 932    p_gtr_k_veclist, p_gtr_dt_list, p_gtr_k_dt_veclist = [], [], []
 933
 934    Interpolated_Smoothed_Field = {}
 935    Delta_Threshold = {}
 936
 937    #------------------------------------------------------------------------------------------------
 938    for k_ind, k in enumerate(kList):
 939             
 940            p_gtr_k_veclist.append(np.zeros((n_reals, len(RBins[k_ind]))))
 941            p_gtr_dt_list.append(np.zeros(len(RBins[k_ind])))
 942            p_gtr_k_dt_veclist.append(np.zeros((n_reals, len(RBins[k_ind]))))
 943
 944    #------------------------------------------------------------------------------------------------
 945
 946    for realisation, TracerPos in enumerate(TracerPosVector):
 947
 948        if Verbose:
 949            start_time_real = time.perf_counter()
 950            print(f'\n\n--------------  Realisation {realisation+1}/{n_reals}  --------------\n')
 951
 952        #-------------------------------------------------------------------------------------------
 953
 954        #Tracer calculations
 955
 956        #Building the tree
 957        if Verbose: 
 958            start_time_tree = time.perf_counter()
 959            print('\nbuilding the kdTree for the discrete tracer set ...')
 960
 961        xtree = scipy.spatial.cKDTree(TracerPos, boxsize=BoxSize)
 962
 963        if Verbose: 
 964            print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_tree))
 965
 966        #-------------------------------------------------------------------------------------------
 967
 968        #Calculating the NN distances
 969        if Verbose: 
 970            start_time_NN = time.perf_counter()
 971            print('\ncomputing the tracer NN distances ...')
 972        dists, _ = xtree.query(QueryPos, k=max(kList))
 973        vol = dists[:, np.array(kList)-1]
 974        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_NN))
 975
 976        #-------------------------------------------------------------------------------------------
 977    
 978        #Calculating the auto kNN-CDFs
 979        if Verbose: 
 980            start_time_CDF = time.perf_counter()
 981            print('\ncomputing the tracer auto-CDFs P_{>=k} ...')
 982        p_gtr_k_list = calc_kNN_CDF(vol, RBins)
 983        for k_ind, k in enumerate(kList):
 984            p_gtr_k_veclist[k_ind][realisation] = p_gtr_k_list[k_ind]
 985        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_CDF))
 986
 987        #-------------------------------------------------------------------------------------------
 988
 989        #Tracer-field calculations
 990
 991        if Verbose: 
 992            start_time_tf_cross = time.perf_counter()
 993            print('\ncomputing the tracer-field cross-correlation ...\n')
 994
 995        for k_ind, k in enumerate(kList):
 996             
 997            if Verbose: 
 998                if realisation==0:
 999                    print('\tComputing P_(>dt) and P_(>=k, >dt) for k = {} ...'.format(k))
1000                else:
1001                    print('\tComputing P_(>=k, >dt) for k = {} ...'.format(k))
1002
1003            for i, ss in enumerate(RBins[k_ind]):
1004
1005                ss_str = str(ss)
1006
1007                #-----------------------------------------------------------------------------------
1008
1009                # Interpolate the smoothed field at the query positions
1010                if ss_str not in Interpolated_Smoothed_Field:
1011                    Interpolated_Smoothed_Field[ss_str] = CIC_3D_Interp(QueryPos, SmoothedFieldDict[ss_str], BoxSize)
1012
1013                interp_field = Interpolated_Smoothed_Field[ss_str]
1014            
1015                #-------------------------------------------------------------------------------------------------
1016
1017                # Compute the overdensity threshold for the smoothed field
1018                if ss_str not in Delta_Threshold:
1019                    Delta_Threshold[ss_str] = np.percentile(Interpolated_Smoothed_Field[ss_str], FieldConstPercThreshold)
1020                   
1021                delta_star_ss = Delta_Threshold[ss_str]    
1022
1023                #-----------------------------------------------------------------------------------
1024
1025                #Compute the fraction of query points satisfying the joint condition
1026                ind_gtr_k_dt = np.where((vol[:, k_ind] < ss) & (interp_field > delta_star_ss))
1027                p_gtr_k_dt_veclist[k_ind][realisation, i] = len(ind_gtr_k_dt[0])/QueryPos.shape[0]
1028
1029                #-----------------------------------------------------------------------------------
1030
1031                #Compute the fraction of query points with smoothed field exceeding the overdensity threshold
1032                if realisation==0: 
1033                    ind_gtr_dt = np.where(interp_field > delta_star_ss)
1034                    p_gtr_dt_list[k_ind][i] = len(ind_gtr_dt[0])/QueryPos.shape[0]
1035
1036        if Verbose: print('\n\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_tf_cross))
1037
1038        #-------------------------------------------------------------------------------------------
1039
1040        if Verbose: 
1041            print('\ntime taken for realisation {}: {:.2e} s.'.format(realisation+1, time.perf_counter()-start_time_real))
1042
1043    #-----------------------------------------------------------------------------------------------
1044
1045    if Verbose: 
1046        print('\n\n--------------  all realisations done!  --------------\n')
1047        print('\n\ttime taken for step 2: {:.2e} s.'.format(time.perf_counter()-step_2_start_time))
1048
1049    #-----------------------------------------------------------------------------------------------
1050
1051    if Verbose: print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
1052    
1053    if ReturnSmoothedDict: 
1054        return p_gtr_k_veclist, p_gtr_dt_list, p_gtr_k_dt_veclist, SmoothedFieldDict
1055    else: 
1056        return p_gtr_k_veclist, p_gtr_dt_list, p_gtr_k_dt_veclist
1057
1058####################################################################################################
1059
1060#----------------------------------------  END OF PROGRAM!  ----------------------------------------
1061
1062####################################################################################################
def TracerAuto3D( boxsize, kList, BinsRad, QueryPos, TracerPos, ReturnNNdist=False, Verbose=False):
 28def TracerAuto3D(boxsize, kList, BinsRad, QueryPos, TracerPos, ReturnNNdist=False,Verbose=False):
 29    
 30    r'''
 31    Computes the $k$NN-CDFs in 3D coordinates (Banerjee & Abel (2021)[^1]) of the provided discrete tracer set (`TracerPos`), 
 32    evaluated at the provided radial distance scales `BinsRad`, for all $k$ in `kList`. Each $k$NN-CDF measures the probability
 33    $P_{\geq k}(r)$ of finding at least $k$ tracers in a randomly placed sphere of radius $r$. The $k$NN-CDFs quantify the spatial 
 34    clustering of the tracers.
 35    		
 36    Parameters
 37    ----------
 38    boxsize:  float
 39        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
 40    kList : list of ints
 41        the list of nearest neighbours to calculate the distances to. For example, if ``kList = [1, 2, 4]``, the first, second and 
 42        fourth-nearest neighbour distributions will be computed.
 43    BinsRad : list of numpy float array
 44        list of radial distance arrays (in Mpc/h) for each nearest neighbour. The $i^{th}$ element of the 
 45        list should contain a numpy array of the desired distances for the nearest neighbour specified by the $i^{th}$ element of `kList`.
 46    QueryPos : numpy float array of shape ``(n_query, 3)``
 47        array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 48        Please ensure $0<x,y,z<boxsize$.
 49    TracerPos : numpy float array of shape ``(n_tracer, 3)``
 50        array of 3D locations for the discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
 51        Please ensure $0<x,y,z<boxsize$.
 52    ReturnNNdist : bool, optional
 53        if set to ``True``, the sorted arrays of NN distances will be returned along with the $k$NN-CDFs, by default ``False``.
 54    Verbose : bool, optional
 55        if set to ``True``, the time taken to complete each step of the calculation will be printed, by default ``False``.
 56
 57    Returns
 58    -------
 59    kNN_results: tuple of lists or list of numpy float arrays
 60        results of the kNN computation. If `ReturnNNdist` is ``True``, returns the tuple ``(p_gtr_k_list, vol)`` where `p_gtr_k_list` 
 61        is the list of auto kNN-CDFs, and `vol` is the list of NN distances. If `ReturnNNdist` is ``False``, returns `p_gtr_k_list` only
 62        
 63    Raises
 64    ------
 65    ValueError
 66        if the given query points are not on a three-dimensional grid.
 67    ValueError
 68        if x,y, or z coordinate of any of the query points is not in ``[0, boxsize)``.
 69    ValueError
 70        if x,y, or z coordinate of any of the tracer points is not in ``[0, boxsize)``..
 71    ValueError
 72        if the given tracer points are not on a three-dimensional grid.
 73
 74    References
 75    ----------
 76    [^1]: Arka Banerjee, Tom Abel, Nearest neighbour distributions: New statistical measures for cosmological clustering, 
 77    [Monthly Notices of the Royal Astronomical Society](https://doi.org/10.1093/mnras/staa3604), Volume 500, Issue 4, February 2021, Pages 5479–5499
 78        
 79    '''
 80    
 81    #-----------------------------------------------------------------------------------------------
 82
 83    if Verbose: total_start_time = time.perf_counter()
 84
 85    #-----------------------------------------------------------------------------------------------
 86        
 87    #Step 0: Check all inputs are consistent with the function requirement
 88
 89    if Verbose: print('Checking inputs ...')
 90
 91    if QueryPos.shape[1]!=3: 
 92        raise ValueError('Incorrect spatial dimension for query points: array containing the query point positions must be of shape (n_query, 3), ' \
 93        'where n_query is the number of query points.')
 94
 95    if np.any((QueryPos[:, 0] < 0) | (QueryPos[:, 0] >= boxsize)):
 96        raise ValueError('Invalid query point position(s): please ensure 0 < x < boxsize.')
 97
 98    if np.any((QueryPos[:, 1] < 0) | (QueryPos[:, 1] >= boxsize)):
 99        raise ValueError('Invalid query point position(s): please ensure 0 < y < boxsize.')
100
101    if np.any((QueryPos[:, 2] < 0) | (QueryPos[:, 2] >= boxsize)):
102        raise ValueError('Invalid query point position(s): please ensure 0 < z < boxsize.')
103
104    if np.any((TracerPos[:, 0] < 0) | (TracerPos[:, 0] >= boxsize)):
105        raise ValueError('Invalid tracer point position(s): please ensure 0 < x < boxsize.')
106
107    if np.any((TracerPos[:, 1]< 0) | (TracerPos[:, 1]>= boxsize)):
108        raise ValueError('Invalid tracer point position(s): please ensure 0 < y < boxsize.')
109
110    if np.any((TracerPos[:, 2]< 0) | (TracerPos[:, 2]>= boxsize)):
111        raise ValueError('Invalid tracer point position(s): please ensure 0 < z < boxsize.')
112
113    if TracerPos.shape[1]!=3: 
114        raise ValueError('Incorrect spatial dimension for tracers: array containing the tracer positions must be of shape (n_tracer, 3), ' \
115        'where n_tracer is the number of tracers.')
116
117    if Verbose: print('\tdone.')
118
119    #-----------------------------------------------------------------------------------------------
120        
121    #Building the tree
122    if Verbose: 
123        start_time = time.perf_counter()
124        print('\nbuilding the tree ...')
125    xtree    = scipy.spatial.cKDTree(TracerPos, boxsize=boxsize)
126    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
127
128    #-----------------------------------------------------------------------------------------------
129    
130    #Calculating the NN distances
131    if Verbose: 
132        start_time = time.perf_counter()
133        print('\ncomputing the tracer NN distances ...')
134    dists, _= xtree.query(QueryPos, k=max(kList), workers=-1)
135    vol = dists[:, np.array(kList)-1]
136    
137    
138    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
139
140    #-----------------------------------------------------------------------------------------------
141    
142    #Calculating the kNN-CDFs
143    if Verbose: 
144        start_time = time.perf_counter()
145        print('\ncomputing the tracer auto-CDFs P_{>=k} ...')
146    p_gtr_k_list = calc_kNN_CDF(vol, BinsRad)
147    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
148
149    #-----------------------------------------------------------------------------------------------
150
151    #Collecting the results
152    if ReturnNNdist:
153        kNN_results = (p_gtr_k_list, vol)
154    else:
155        kNN_results = (p_gtr_k_list, None)
156
157    #-----------------------------------------------------------------------------------------------
158
159    if Verbose:
160        print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
161    
162    return kNN_results

Computes the $k$NN-CDFs in 3D coordinates (Banerjee & Abel (2021)1) of the provided discrete tracer set (TracerPos), evaluated at the provided radial distance scales BinsRad, for all $k$ in kList. Each $k$NN-CDF measures the probability $P_{\geq k}(r)$ of finding at least $k$ tracers in a randomly placed sphere of radius $r$. The $k$NN-CDFs quantify the spatial clustering of the tracers.

Parameters
  • boxsize (float): The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
  • kList (list of ints): the list of nearest neighbours to calculate the distances to. For example, if kList = [1, 2, 4], the first, second and fourth-nearest neighbour distributions will be computed.
  • BinsRad (list of numpy float array): list of radial distance arrays (in Mpc/h) for each nearest neighbour. The $i^{th}$ element of the list should contain a numpy array of the desired distances for the nearest neighbour specified by the $i^{th}$ element of kList.
  • QueryPos (numpy float array of shape (n_query, 3)): array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • TracerPos (numpy float array of shape (n_tracer, 3)): array of 3D locations for the discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • ReturnNNdist (bool, optional): if set to True, the sorted arrays of NN distances will be returned along with the $k$NN-CDFs, by default False.
  • Verbose (bool, optional): if set to True, the time taken to complete each step of the calculation will be printed, by default False.
Returns
  • kNN_results (tuple of lists or list of numpy float arrays): results of the kNN computation. If ReturnNNdist is True, returns the tuple (p_gtr_k_list, vol) where p_gtr_k_list is the list of auto kNN-CDFs, and vol is the list of NN distances. If ReturnNNdist is False, returns p_gtr_k_list only
Raises
  • ValueError: if the given query points are not on a three-dimensional grid.
  • ValueError: if x,y, or z coordinate of any of the query points is not in [0, boxsize).
  • ValueError: if x,y, or z coordinate of any of the tracer points is not in [0, boxsize)..
  • ValueError: if the given tracer points are not on a three-dimensional grid.
References

Monthly Notices of the Royal Astronomical Society, Volume 500, Issue 4, February 2021, Pages 5479–5499


  1. Arka Banerjee, Tom Abel, Nearest neighbour distributions: New statistical measures for cosmological clustering, 

def TracerTracerCross3D( boxsize, kA_kB_list, BinsRad, QueryPos, TracerPos_A, TracerPos_B, Verbose=False):
166def TracerTracerCross3D(boxsize, kA_kB_list, BinsRad, QueryPos, TracerPos_A, TracerPos_B, Verbose=False):
167    
168    r'''
169    Returns the probabilities $P_{\geq k_A}$, $P_{\geq k_B}$ and $P_{\geq k_A, \geq k_B}$ for ($k_A$, $k_B$) in `kA_kB_list` 
170    that quantify the extent of the spatial cross-correlation between the given sets of discrete tracers, `TracerPos_A`, `TracerPos_B`.
171    	
172    1. $P_{\geq k_A}(r)$: 
173    	the $k_A$NN-CDF of the first set of discrete tracers, evaluated at radial distance scale $r$
174    		
175    2. $P_{\geq k_B}(\theta)$: 
176    	the $k_B$NN-CDF of the second set of discrete tracers, evaluated at radial distance scale $r$
177    		
178    3.  $P_{\geq k_A, \geq k_B}(\theta)$:
179    	the joint probability of finding at least $k_A$ set A tracers and at least $k_B$ set B tracers within a sphere of radius $r$
180    		
181    The excess cross-correlation (Banerjee & Abel 2023)[^1] can be computed trivially from the quatities (see the `kNNpy.HelperFunctions.kNN_excess_cross_corr()` method to do this)
182    	
183    $$\psi_{k_A, k_B} = P_{\geq k_A, \geq k_B}/(P_{\geq k_A} \times P_{\geq k_B})$$
184    		
185    Parameters
186    ----------
187    boxszie:  float
188        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
189    kA_kB_list : list of int tuples
190        nearest-neighbour combinations for which the cross-correlations need to be computed (see notes for more details)
191    BinsRad : list of numpy float array
192        list of radial distance scale arrays (in Mpc/h) for each nearest neighbour combination in `kA_kB_list`. The $i^{th}$ element of the 
193        list should contain a numpy array of the desired distances for the $i^{th}$ nearest neighbour combination.
194    QueryPos : numpy float array of shape ``(n_query, 3)``
195        array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
196        Please ensure $0<x,y,z<boxsize$.
197    TracerPos_A : numpy float array of shape ``(n_tracer, 3)``
198        array of 3D locations for the first set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
199        Please ensure $0<x,y,z<boxsize$.
200    TracerPos_B : numpy float array of shape ``(n_tracer, 3)``
201        array of 3D locations for the second set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
202        Please ensure $0<x,y,z<boxsize$.
203    Verbose : bool, optional
204        if set to ``True``, the time taken to complete each step of the calculation will be printed, by default ``False``.
205
206    Returns
207    -------
208    p_gtr_kA_list: list of numpy float arrays
209        list of auto kNN-CDFs of the first set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_A^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
210        
211    p_gtr_kB_list: list of numpy float arrays
212        list of auto kNN-CDFs of the second set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_B^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
213    
214    p_gtr_kA_kB_list: list of numpy float arrays
215        list of joint tracer-tracer nearest neighbour distributions evaluated at the desired distance bins. The $i^{th}$ element represents the joint {$k_A^i$, $k_B^i$}NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
216        
217    Raises
218    ------
219    ValueError
220        if the lengths of `BinsRad` and `kA_kB_list` do not match.
221    ValueError
222        if the given query points are not on a three-dimensional grid.
223    ValueError
224        if x,y, or z coordinates of any of the query points is not in ``[0, boxsize)``.
225    ValueError
226        if x,y, or z coordinates of any of the tracer points is not in ``[0, boxsize)``.
227    ValueError
228        if any of the given tracer points are not on a three-dimensional grid.
229
230    Notes
231    -----
232    The parameter `kA_kB_list` should provide the desired combinations of NN indices for the two tracers sets being cross-correlated. For example, if you wish to compute the joint {1,1}, {1,2} and {2,1}NN-CDFs, then set
233            
234        kA_kB_list = [(1,1), (1,2), (2,1)]
235
236    Please note that if the number density of one set of tracers is significantly smaller than the other, the joint kNN-CDFs approach the auto kNN-CDFs of the less dense tracer set. In this scenario, it may be better to treat the denser tracer set as a continuous field and use the `TracerFieldCross2DA()` method instead to conduct the cross-correlation analysis  (see Gupta & Banerjee (2024)[^2] for a detailed discussion).
237
238    References
239    ----------
240    [^1]: Arka Banerjee, Tom Abel, Cosmological cross-correlations and nearest neighbour distributions, [Monthly Notices of the Royal Astronomical Society](https://doi.org/10.1093/mnras/stab961), Volume 504, Issue 2, June 2021, Pages 2911–2923
241        
242    '''
243    
244    #-----------------------------------------------------------------------------------------------
245
246    if Verbose: total_start_time = time.perf_counter()
247
248    #-----------------------------------------------------------------------------------------------
249        
250    #Step 0: Check all inputs are consistent with the function requirement
251
252    if Verbose: print('Checking inputs ...')
253
254    if len(BinsRad)!=len(kA_kB_list): 
255        raise ValueError("length of 'BinsRad' must match length of 'kA_kB_list'.")
256
257    if QueryPos.shape[1]!=3: 
258        raise ValueError('Incorrect spatial dimension for query points: array containing the query point positions must be of shape (n_query,3),' \
259        ' where n_query is the number of query points.')
260    
261    if np.any((QueryPos[:, 0] < 0) | (QueryPos[:, 0] >= boxsize)):
262        raise ValueError('Invalid query point position(s): please ensure 0 < x < boxsize.')
263
264    if np.any((QueryPos[:, 1] < 0) | (QueryPos[:, 1] >= boxsize)):
265        raise ValueError('Invalid query point position(s): please ensure 0 < y < boxsize.')
266
267    if np.any((QueryPos[:, 2] < 0 ) | (QueryPos[:, 2] >= boxsize)):
268        raise ValueError('Invalid query point position(s): please ensure 0 < z < boxsize.')
269
270    if np.any((TracerPos_A[:, 0] < 0) | (TracerPos_A[:, 0] >= boxsize)):
271        raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < x < boxsize.')
272
273    if np.any((TracerPos_A[:, 1]< 0) | (TracerPos_A[:, 1]>= boxsize)):
274        raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < y < boxsize.')
275
276    if np.any((TracerPos_A[:, 2]< 0) | (TracerPos_A[:, 2]>= boxsize)):
277        raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < z < boxsize.')
278
279    if np.any((TracerPos_B[:, 0] < 0) | (TracerPos_B[:, 0] >= boxsize)):
280        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < x < boxsize.')
281
282    if np.any((TracerPos_B[:, 1]< 0) | (TracerPos_B[:, 1]>= boxsize)):
283        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < y < boxsize.')
284
285    if np.any((TracerPos_B[:, 2]< 0) | (TracerPos_B[:, 2]>= boxsize)):
286        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < z < boxsize.')
287
288    
289    if TracerPos_A.shape[1]!=3 or TracerPos_B.shape[1]!=3: 
290        raise ValueError('Incorrect spatial dimension for tracers: array containing the tracer positions must be of shape (n_tracer, 3),' \
291        ' where n_tracer is the number of tracers.')
292
293    if Verbose: print('\tdone.')
294
295    #-----------------------------------------------------------------------------------------------
296
297    #Figuring out the NN indices from the kA_kB_list
298    kList_A, kList_B = [], []
299    for kA, kB in kA_kB_list:
300        kList_A.append(kA)
301        kList_B.append(kB)
302    kMax_A, kMax_B = max(kList_A), max(kList_B)
303
304    #-----------------------------------------------------------------------------------------------
305        
306    #Building the trees
307    if Verbose: 
308        start_time = time.perf_counter()
309        print('\nbuilding the trees ...')
310        start_time_A = time.perf_counter()
311    xtree_A = scipy.spatial.cKDTree(TracerPos_A, boxsize=boxsize)
312    if Verbose: 
313        print('\tfirst set of tracers done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_A))
314        start_time_B = time.perf_counter()
315    xtree_B = scipy.spatial.cKDTree(TracerPos_B, boxsize=boxsize)
316    if Verbose: 
317        print('\tsecond set of tracers done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_B))
318        print('\tcombined time: {:.2e} s.'.format(time.perf_counter()-start_time))
319
320    #-----------------------------------------------------------------------------------------------
321    
322    #Calculating the NN distances
323    if Verbose: 
324        start_time = time.perf_counter()
325        print('\ncomputing the tracer NN distances ...')
326    vol_A, _ = xtree_A.query(QueryPos, k=kMax_A)
327    vol_B, _ = xtree_B.query(QueryPos, k=kMax_B)
328    req_vol_A = vol_A[:, np.array(kList_A)-1]
329    req_vol_B = vol_B[:, np.array(kList_B)-1]
330    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
331
332    #-----------------------------------------------------------------------------------------------
333    
334    #Calculating the auto kNN-CDFs
335    if Verbose: 
336        start_time = time.perf_counter()
337        print('\ncomputing the tracer auto-CDFs P_{>=kA}, P_{>=kB} ...')
338    p_gtr_kA_list = calc_kNN_CDF(req_vol_A, BinsRad)
339    p_gtr_kB_list = calc_kNN_CDF(req_vol_B, BinsRad)
340    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
341
342    #-----------------------------------------------------------------------------------------------
343
344    #Calculating the joint kNN-CDFs
345    if Verbose: 
346        start_time = time.perf_counter()
347        print('\ncomputing the joint-CDFs P_{>=kA, >=kB} ...')
348    joint_vol = np.zeros((vol_A.shape[0], len(kA_kB_list)))
349    for i, _ in enumerate(kA_kB_list):
350        joint_vol[:, i] = np.maximum(req_vol_A[:, i], req_vol_B[:, i])
351    p_gtr_kA_kB_list = calc_kNN_CDF(joint_vol, BinsRad)
352    if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
353
354    #-----------------------------------------------------------------------------------------------
355
356    if Verbose:
357        print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
358    
359    return p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list

Returns the probabilities $P_{\geq k_A}$, $P_{\geq k_B}$ and $P_{\geq k_A, \geq k_B}$ for ($k_A$, $k_B$) in kA_kB_list that quantify the extent of the spatial cross-correlation between the given sets of discrete tracers, TracerPos_A, TracerPos_B.

  1. $P_{\geq k_A}(r)$: the $k_A$NN-CDF of the first set of discrete tracers, evaluated at radial distance scale $r$

  2. $P_{\geq k_B}(\theta)$: the $k_B$NN-CDF of the second set of discrete tracers, evaluated at radial distance scale $r$

  3. $P_{\geq k_A, \geq k_B}(\theta)$: the joint probability of finding at least $k_A$ set A tracers and at least $k_B$ set B tracers within a sphere of radius $r$

The excess cross-correlation (Banerjee & Abel 2023)1 can be computed trivially from the quatities (see the kNNpy.HelperFunctions.kNN_excess_cross_corr() method to do this)

$$\psi_{k_A, k_B} = P_{\geq k_A, \geq k_B}/(P_{\geq k_A} \times P_{\geq k_B})$$

Parameters
  • boxszie (float): The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
  • kA_kB_list (list of int tuples): nearest-neighbour combinations for which the cross-correlations need to be computed (see notes for more details)
  • BinsRad (list of numpy float array): list of radial distance scale arrays (in Mpc/h) for each nearest neighbour combination in kA_kB_list. The $i^{th}$ element of the list should contain a numpy array of the desired distances for the $i^{th}$ nearest neighbour combination.
  • QueryPos (numpy float array of shape (n_query, 3)): array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • TracerPos_A (numpy float array of shape (n_tracer, 3)): array of 3D locations for the first set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • TracerPos_B (numpy float array of shape (n_tracer, 3)): array of 3D locations for the second set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • Verbose (bool, optional): if set to True, the time taken to complete each step of the calculation will be printed, by default False.
Returns
  • p_gtr_kA_list (list of numpy float arrays): list of auto kNN-CDFs of the first set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_A^i$NN-CDF, where the $i^{th}$ element of kA_kB_list is ($k_A^i$, $k_B^i$).
  • p_gtr_kB_list (list of numpy float arrays): list of auto kNN-CDFs of the second set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_B^i$NN-CDF, where the $i^{th}$ element of kA_kB_list is ($k_A^i$, $k_B^i$).
  • p_gtr_kA_kB_list (list of numpy float arrays): list of joint tracer-tracer nearest neighbour distributions evaluated at the desired distance bins. The $i^{th}$ element represents the joint {$k_A^i$, $k_B^i$}NN-CDF, where the $i^{th}$ element of kA_kB_list is ($k_A^i$, $k_B^i$).
Raises
  • ValueError: if the lengths of BinsRad and kA_kB_list do not match.
  • ValueError: if the given query points are not on a three-dimensional grid.
  • ValueError: if x,y, or z coordinates of any of the query points is not in [0, boxsize).
  • ValueError: if x,y, or z coordinates of any of the tracer points is not in [0, boxsize).
  • ValueError: if any of the given tracer points are not on a three-dimensional grid.
Notes

The parameter kA_kB_list should provide the desired combinations of NN indices for the two tracers sets being cross-correlated. For example, if you wish to compute the joint {1,1}, {1,2} and {2,1}NN-CDFs, then set

kA_kB_list = [(1,1), (1,2), (2,1)]

Please note that if the number density of one set of tracers is significantly smaller than the other, the joint kNN-CDFs approach the auto kNN-CDFs of the less dense tracer set. In this scenario, it may be better to treat the denser tracer set as a continuous field and use the TracerFieldCross2DA() method instead to conduct the cross-correlation analysis (see Gupta & Banerjee (2024)[^2] for a detailed discussion).

References

  1. Arka Banerjee, Tom Abel, Cosmological cross-correlations and nearest neighbour distributions, Monthly Notices of the Royal Astronomical Society, Volume 504, Issue 2, June 2021, Pages 2911–2923 

def TracerTracerCross3D_DataVector( boxsize, kA_kB_list, BinsRad, QueryPos, TracerPos_A_dict, TracerPos_B, Verbose=False):
363def TracerTracerCross3D_DataVector(boxsize, kA_kB_list, BinsRad, QueryPos, TracerPos_A_dict, TracerPos_B, Verbose=False ):
364    r'''
365    Returns the probabilities $P_{\geq k_{A_i}}$, $P_{\geq k_B}$ and $P_{\geq k_{A_i}, \geq k_B}$ for ($k_{A_i}$, $k_B$) in `kA_kB_list` for various 
366    realizations of Tracer A, while keeping the set Tracer B constant. Refer to Notes to understand why this might be useful. These quantify
367    the extent of the spatial cross-correlation between the given sets of discrete tracers, the $i^{\text{th}}$ realization of `TracerPos_A`, `TracerPos_B`.
368    We do not vary the 'kA_kB_list' as a function of the realizations of Tracer A.
369    	
370    1. $P_{\geq k_{A_i}}(r)$: 
371    	the $k_A$NN-CDF of the $i^{\text{th}}$ realization of the first set of discrete tracers, evaluated at radial distance scale $r$
372    		
373    2. $P_{\geq k_B}(\theta)$: 
374    	the $k_B$NN-CDF of the second set of discrete tracers, evaluated at radial distance scale $r$
375    		
376    3.  $P_{\geq k_{A_i}, \geq k_B}(\theta)$:
377    	the joint probability of finding at least $k_A$ set A tracers and at least $k_B$ set B tracers within a sphere of radius $r$, for the
378        $i^{\text{th}}$ realization of Tracer A
379    		
380    The excess cross-correlation (Banerjee & Abel 2023)[^1] can be computed trivially from the quatities (see the `kNNpy.HelperFunctions.kNN_excess_cross_corr()` method to do this)
381    	
382    $$\psi_{k_A, k_B} = P_{\geq k_A, \geq k_B}/(P_{\geq k_A} \times P_{\geq k_B})$$
383    		
384    Parameters
385    ----------
386    boxsize:  float
387        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
388    kA_kB_list : list of int tuples
389        nearest-neighbour combinations for which the cross-correlations need to be computed (see notes for more details)
390    BinsRad : list of numpy float array
391        list of radial distance scale arrays (in Mpc/h) for each nearest neighbour combination in `kA_kB_list`. The $i^{th}$ element of the 
392        list should contain a numpy array of the desired distances for the $i^{th}$ nearest neighbour combination.
393    QueryPos : numpy float array of shape ``(n_query, 3)``
394        array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
395        Please ensure $0<x,y,z<boxsize$.
396    TracerPos_A_dict : dictionary, where each key corresponds to the realization, and stores the corresponding numpy array of size ``(n_tracer,3)``, that 
397        is the tracer positions for the $i^{\text{th}}$ realization 
398        The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
399        Please ensure $0<x,y,z<boxsize$.
400    TracerPos_B : numpy float array of shape ``(n_tracer, 3)``
401        array of 3D locations for the second set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. 
402        Please ensure $0<x,y,z<boxsize$.
403    Verbose : bool, optional
404        if set to ``True``, the time taken to complete each step of the calculation will be printed, by default ``False``.
405
406    Returns
407    -------
408    Realizations: a numpy array of arrays where the $i^{\text{th}}$ element corresponds to the 3D cross-correlation calculated between the $i^{\text{th}} 
409    realization of Tracer A and Tracer B. The values correspond to an numpy array: [p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list]
410    p_gtr_kA_list: list of numpy float arrays
411        list of auto kNN-CDFs of the first set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_A^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
412        
413    p_gtr_kB_list: list of numpy float arrays
414        list of auto kNN-CDFs of the second set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_B^i$NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
415    
416    p_gtr_kA_kB_list: list of numpy float arrays
417        list of joint tracer-tracer nearest neighbour distributions evaluated at the desired distance bins. The $i^{th}$ element represents the joint {$k_A^i$, $k_B^i$}NN-CDF, where the $i^{th}$ element of `kA_kB_list` is ($k_A^i$, $k_B^i$).
418        
419    Raises
420    ------
421    ValueError
422        if the lengths of `BinsRad` and `kA_kB_list` do not match.
423    ValueError
424        if the given query points are not on a three-dimensional grid.
425    ValueError
426        if x,y, or z coordinates of any of the query points is not in ``[0, boxsize)``.
427    ValueError
428        if x,y, or z coordinates of any of the tracer points is not in `'[0, boxsize)``.
429    ValueError
430        if any of the given tracer points are not on a three-dimensional grid.
431
432    Notes
433    -----
434    The parameter `kA_kB_list` should provide the desired combinations of NN indices for the two tracers sets being cross-correlated. For example, if you wish to compute the joint {1,1}, {1,2} and {2,1}NN-CDFs, then set
435            
436        kA_kB_list = [(1,1), (1,2), (2,1)]
437
438    Please note that if the number density of one set of tracers is significantly smaller than the other, the joint kNN-CDFs approach the auto kNN-CDFs of the less dense tracer set. In this scenario, it may be better to treat the denser tracer set as a continuous field and use the `TracerFieldCross2DA()` method instead to conduct the cross-correlation analysis  (see Gupta & Banerjee (2024)[^2] for a detailed discussion).
439    #Write why this module might be useful
440
441    References
442    ----------
443    [^1]: Arka Banerjee, Tom Abel, Cosmological cross-correlations and nearest neighbour distributions, [Monthly Notices of the Royal Astronomical Society](https://doi.org/10.1093/mnras/stab961), Volume 504, Issue 2, June 2021, Pages 2911–2923
444        
445    '''
446    
447    #-----------------------------------------------------------------------------------------------
448
449    if Verbose: total_start_time = time.perf_counter()
450    keys=list(TracerPos_A_dict.keys())
451
452    #-----------------------------------------------------------------------------------------------
453        
454    #Step 0: Check all inputs are consistent with the function requirement
455
456    if Verbose: print('Checking inputs ...')
457
458    if len(BinsRad)!=len(kA_kB_list): 
459        raise ValueError("length of 'BinsRad' must match length of 'kA_kB_list'.")
460
461    if QueryPos.shape[1]!=3: 
462        raise ValueError('Incorrect spatial dimension for query points: array containing the query point positions must be of shape (n_query,3),' \
463        ' where n_query is the number of query points.')
464    
465    if np.any((QueryPos[:, 0] < 0) | (QueryPos[:, 0] >= boxsize)):
466        raise ValueError('Invalid query point position(s): please ensure 0 < x < boxsize.')
467
468    if np.any((QueryPos[:, 1] < 0) | (QueryPos[:, 1] >= boxsize)):
469        raise ValueError('Invalid query point position(s): please ensure 0 < y < boxsize.')
470
471    if np.any((QueryPos[:, 2] < 0) | (QueryPos[:, 2] >= boxsize)):
472        raise ValueError('Invalid query point position(s): please ensure 0 < z < boxsize.')
473    for i in range(len(keys)):
474        if np.any((TracerPos_A_dict[keys[i]][:, 0] < 0) | (TracerPos_A_dict[keys[i]][:, 0] >= boxsize)):
475            raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < x < boxsize.')
476
477    for i in range(len(keys)):
478        if np.any((TracerPos_A_dict[keys[i]][:, 1]< 0) | (TracerPos_A_dict[keys[i]][:, 1]>= boxsize)):
479            raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < y < boxsize.')
480
481    for i in range(len(keys)):
482        if np.any((TracerPos_A_dict[keys[i]][:, 2]< 0) | (TracerPos_A_dict[keys[i]][:, 2]>= boxsize)):
483            raise ValueError('Invalid tracer point position(s) for the first set: please ensure 0 < z < boxsize.')
484
485    if np.any((TracerPos_B[:, 0] < 0) | (TracerPos_B[:, 0] >= boxsize)):
486        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < x < boxsize.')
487
488    if np.any((TracerPos_B[:, 1]< 0) | (TracerPos_B[:, 1]>= boxsize)):
489        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < y < boxsize.')
490
491    if np.any((TracerPos_B[:, 2]< 0) | (TracerPos_B[:, 2]>= boxsize)):
492        raise ValueError('Invalid tracer point position(s) for the second set: please ensure 0 < z < boxsize.')
493
494    for i in range(len(keys)):
495        if TracerPos_A_dict[keys[i]].shape[1]!=3 or TracerPos_B.shape[1]!=3: 
496            raise ValueError('Incorrect spatial dimension for tracers: array containing the tracer positions must be of shape (n_tracer, 3),' \
497            ' where n_tracer is the number of tracers.')
498
499    if Verbose: print('\tdone.')
500
501    #-----------------------------------------------------------------------------------------------
502    #Figuring out the NN indices from the kA_kB_list
503    kList_A, kList_B = [], []
504    for kA, kB in kA_kB_list:
505        kList_A.append(kA)
506        kList_B.append(kB)
507    kMax_A, kMax_B = max(kList_A), max(kList_B)
508
509    #-----------------------------------------------------------------------------------------------
510        
511    #Building the trees
512    if Verbose: 
513        start_time = time.perf_counter()
514        print('\nbuilding the trees ...')
515        start_time_B = time.perf_counter()
516    xtree_B = scipy.spatial.cKDTree(TracerPos_B, boxsize=boxsize)  
517    if Verbose: 
518        print('\tsecond set of tracers done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_B))
519
520    #Initializing the containinment array
521    #Realizations=np.zeros((len(TracerPos_A_dict.values()),3,len(kA_kB_list)))
522    Realizations=[]
523    
524    for i, values in enumerate(TracerPos_A_dict.values()):
525        if Verbose:
526            print(f'\n Building the tree for the {i}th relaization of Tracer A')
527            start_time_A=time.perf_counter()
528        xtree_A=scipy.spatial.cKDTree(values, boxsize=boxsize)
529        if Verbose:
530            print('\tset of tracers being iterated over done; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_A))
531            print('\tcombined time: {:.2e} s.'.format(time.perf_counter()-start_time))
532
533        #Calculating the NN distances
534        if Verbose: 
535            start_time = time.perf_counter()
536            print('\ncomputing the tracer NN distances ...')
537        vol_A, _ = xtree_A.query(QueryPos, k=kMax_A)
538        vol_B, _ = xtree_B.query(QueryPos, k=kMax_B)
539        req_vol_A = vol_A[:, np.array(kList_A)-1]
540        req_vol_B = vol_B[:, np.array(kList_B)-1]
541        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
542
543        #-----------------------------------------------------------------------------------------------
544    
545        #Calculating the auto kNN-CDFs
546        if Verbose: 
547            start_time = time.perf_counter()
548            print('\ncomputing the tracer auto-CDFs P_{>=kA}, P_{>=kB} ...')
549        p_gtr_kA_list = calc_kNN_CDF(req_vol_A, BinsRad)
550        p_gtr_kB_list = calc_kNN_CDF(req_vol_B, BinsRad)
551        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
552
553        #-----------------------------------------------------------------------------------------------
554
555        #Calculating the joint kNN-CDFs
556        if Verbose: 
557            start_time = time.perf_counter()
558            print('\ncomputing the joint-CDFs P_{>=kA, >=kB} ...')
559        joint_vol = np.zeros((vol_A.shape[0], len(kA_kB_list)))
560        for i, _ in enumerate(kA_kB_list):
561            joint_vol[:, i] = np.maximum(req_vol_A[:, i], req_vol_B[:, i])
562        p_gtr_kA_kB_list = calc_kNN_CDF(joint_vol, BinsRad)
563        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time))
564
565        #-----------------------------------------------------------------------------------------------
566
567        if Verbose:
568            print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
569        
570        Realizations.append([p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list])
571    Realizations=np.array(Realizations) 
572    return Realizations

Returns the probabilities $P_{\geq k_{A_i}}$, $P_{\geq k_B}$ and $P_{\geq k_{A_i}, \geq k_B}$ for ($k_{A_i}$, $k_B$) in kA_kB_list for various realizations of Tracer A, while keeping the set Tracer B constant. Refer to Notes to understand why this might be useful. These quantify the extent of the spatial cross-correlation between the given sets of discrete tracers, the $i^{\text{th}}$ realization of TracerPos_A, TracerPos_B. We do not vary the 'kA_kB_list' as a function of the realizations of Tracer A.

  1. $P_{\geq k_{A_i}}(r)$: the $k_A$NN-CDF of the $i^{\text{th}}$ realization of the first set of discrete tracers, evaluated at radial distance scale $r$

  2. $P_{\geq k_B}(\theta)$: the $k_B$NN-CDF of the second set of discrete tracers, evaluated at radial distance scale $r$

  3. $P_{\geq k_{A_i}, \geq k_B}(\theta)$: the joint probability of finding at least $k_A$ set A tracers and at least $k_B$ set B tracers within a sphere of radius $r$, for the $i^{\text{th}}$ realization of Tracer A

The excess cross-correlation (Banerjee & Abel 2023)1 can be computed trivially from the quatities (see the kNNpy.HelperFunctions.kNN_excess_cross_corr() method to do this)

$$\psi_{k_A, k_B} = P_{\geq k_A, \geq k_B}/(P_{\geq k_A} \times P_{\geq k_B})$$

Parameters
  • boxsize (float): The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
  • kA_kB_list (list of int tuples): nearest-neighbour combinations for which the cross-correlations need to be computed (see notes for more details)
  • BinsRad (list of numpy float array): list of radial distance scale arrays (in Mpc/h) for each nearest neighbour combination in kA_kB_list. The $i^{th}$ element of the list should contain a numpy array of the desired distances for the $i^{th}$ nearest neighbour combination.
  • QueryPos (numpy float array of shape (n_query, 3)): array of 3D locations for the query points. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • TracerPos_A_dict (dictionary, where each key corresponds to the realization, and stores the corresponding numpy array of size (n_tracer,3), that): is the tracer positions for the $i^{\text{th}}$ realization The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • TracerPos_B (numpy float array of shape (n_tracer, 3)): array of 3D locations for the second set of discrete tracers. The 3D locations must be on a grid. The format is (x,y,z) Cartesian coordinates. Please ensure $0
  • Verbose (bool, optional): if set to True, the time taken to complete each step of the calculation will be printed, by default False.
Returns
  • Realizations (a numpy array of arrays where the $i^{\text{th}}$ element corresponds to the 3D cross-correlation calculated between the $i^{\text{th}}):

  • realization of Tracer A and Tracer B. The values correspond to an numpy array ([p_gtr_kA_list, p_gtr_kB_list, p_gtr_kA_kB_list]):

  • p_gtr_kA_list (list of numpy float arrays): list of auto kNN-CDFs of the first set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_A^i$NN-CDF, where the $i^{th}$ element of kA_kB_list is ($k_A^i$, $k_B^i$).

  • p_gtr_kB_list (list of numpy float arrays): list of auto kNN-CDFs of the second set of discrete tracers evaluated at the desired distance bins. The $i^{th}$ element represents the $k_B^i$NN-CDF, where the $i^{th}$ element of kA_kB_list is ($k_A^i$, $k_B^i$).
  • p_gtr_kA_kB_list (list of numpy float arrays): list of joint tracer-tracer nearest neighbour distributions evaluated at the desired distance bins. The $i^{th}$ element represents the joint {$k_A^i$, $k_B^i$}NN-CDF, where the $i^{th}$ element of kA_kB_list is ($k_A^i$, $k_B^i$).
Raises
  • ValueError: if the lengths of BinsRad and kA_kB_list do not match.
  • ValueError: if the given query points are not on a three-dimensional grid.
  • ValueError: if x,y, or z coordinates of any of the query points is not in [0, boxsize).
  • ValueError: if x,y, or z coordinates of any of the tracer points is not in `'[0, boxsize)``.
  • ValueError: if any of the given tracer points are not on a three-dimensional grid.
Notes

The parameter kA_kB_list should provide the desired combinations of NN indices for the two tracers sets being cross-correlated. For example, if you wish to compute the joint {1,1}, {1,2} and {2,1}NN-CDFs, then set

kA_kB_list = [(1,1), (1,2), (2,1)]

Please note that if the number density of one set of tracers is significantly smaller than the other, the joint kNN-CDFs approach the auto kNN-CDFs of the less dense tracer set. In this scenario, it may be better to treat the denser tracer set as a continuous field and use the TracerFieldCross2DA() method instead to conduct the cross-correlation analysis (see Gupta & Banerjee (2024)[^2] for a detailed discussion).

Write why this module might be useful

References

  1. Arka Banerjee, Tom Abel, Cosmological cross-correlations and nearest neighbour distributions, Monthly Notices of the Royal Astronomical Society, Volume 504, Issue 2, June 2021, Pages 2911–2923 

def TracerFieldCross3D( kList, RBins, BoxSize, QueryPos, TracerPos, Field3D, FieldConstPercThreshold, ReturnSmoothedFieldDict=False, Verbose=False):
576def TracerFieldCross3D(kList, RBins, BoxSize, QueryPos, TracerPos, Field3D, FieldConstPercThreshold, ReturnSmoothedFieldDict=False, Verbose=False):
577    r'''
578    Returns the probabilities $P_{\geq k}$, $P_{>{\rm dt}}$ and $P_{\geq k,>{\rm dt}}$ for $k$ in `kList`, that quantify the extent of the spatial cross-correlation between the given discrete tracer positions (`TracerPos`) and the given continuous overdensity field (`SmoothedFieldDict`) in three-dimensional space.
579    
580    1. $P_{\geq k}(r)$: 
581        the kNN-CDF of the discrete tracers, evaluated at separation $r$
582    
583    2. $P_{>{\rm dt}}(r)$: 
584        the probability of the overdensity field smoothed with a top-hat filter of radius $r$ exceeding the given constant percentile density threshold
585    
586    3. $P_{\geq k, >{\rm dt}}(r)$:
587        the joint probability of finding at least 'k' tracers within a sphere of radius $r$ AND the overdensity field smoothed at scale $r$ exceeding the given density threshold (as specified by the parameter `FieldConstPercThreshold`)
588    
589    The excess cross-correlation (Banerjee & Abel 2023)[^1] can be computed trivially from these quantities:
590    
591    $$\psi_{k, {\rm dt}} = \frac{P_{\geq k, >{\rm dt}}}{P_{\geq k} \times P_{>{\rm dt}}}$$
592
593    Parameters
594    ----------
595    kList : list of int
596        List of nearest neighbours to compute. For example, if ``kList = [1, 2, 4]``, the first, second and fourth-nearest neighbour distributions will be computed.
597
598    RBins : list of numpy float arrays
599        List of radial distance arrays (in comoving Mpc/$h$), one for each value in `kList`. The i-th element of the list should be a numpy array specifying the distances to be used for the nearest neighbour calculation corresponding to `kList[i]`.
600
601    BoxSize : float
602        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
603
604    QueryPos : numpy float array of shape ``(n_query, 3)``
605        Array of 3D positions (e.g., in Cartesian coordinates) used to query the nearest-neighbour distances, and also compute field's CDF.
606
607    TracerPos : numpy float array of shape ``(n_tracer, 3)``
608        Array of 3D positions of discrete tracers, with columns representing the x, y, and z coordinates, respectively.
609    
610    Field3D : numpy float array of shape ``(n_grid, n_grid, n_grid)``
611        A 3D numpy array representing the continuous field (for e.g., the matter overdensity field). The shape of the array should match the grid size used for smoothing.
612
613    FieldConstPercThreshold : float
614        The percentile threshold for identifying overdense regions in the continuous field. For example, ``75.0`` indicates the 75th percentile.
615
616    ReturnSmoothedFieldDict : bool, optional
617        if set to ``True``, the dictionary containing the continuous field smoothed at the provided radial bins, will be returned along with the nearest-neighbour measurements, by default ``False``.
618    
619    Verbose : bool, optional
620        If True, prints timing information for each step. Default is False.
621
622    Returns
623    -------
624    p_gtr_k_list : list of numpy float arrays
625        Auto kNN-CDFs of the discrete tracers evaluated at the desired distance bins.
626
627    p_gtr_dt_list : list of numpy float arrays
628        Overdensity-field auto kNN-CDFs evaluated at the same scales.
629
630    p_gtr_k_dt_list : list of numpy float arrays
631        Joint CDFs of finding $\geq k$ tracers AND field value exceeding the threshold at a given scale.
632
633    SmoothedFieldDict : dict
634        dictionary containing the continuous field smoothed at the provided radial bins, returned only if `ReturnSmoothedDict` is ``True``. For example, ``SmoothedFieldDict['5']`` represents the continuous map smoothed at a scale of 5 Mpc/h.
635
636    Raises
637    ------
638    ValueError
639        If TracerPos are not 3D.
640    ValueError
641        If QueryPos are not 3D.
642    ValueError
643        If tracer positions are outside the specified box size.
644    ValueError
645        If QueryPos are outside the specified box size.
646
647    References
648    ----------
649    [^1]: Arka Banerjee, Tom Abel, Tracer-field cross-correlations with k-nearest neighbour distributions, [MNRAS](https://doi.org/10.1093/mnras/stac3813), Volume 519, Issue 4, March 2023, Pages 4856-4868
650
651    [^2]: Eishica Chand, Arka Banerjee, Simon Foreman, Francisco Villaescusa-Navarro, [MNRAS](https://doi.org/10.1093/mnras/staf433), Volume 538, Issue 3, April 2025, Pages 2204-221 
652    '''
653
654    if Verbose: total_start_time = time.perf_counter()
655
656    #-----------------------------------------------------------------------------------------------
657
658    # Step 0: Input validation
659    if Verbose: print('Checking inputs ...')
660
661    if QueryPos.shape[1] != 3:
662        raise ValueError("Query positions must be 3D (shape: n_query x 3).")
663    if TracerPos.shape[1] != 3:
664        raise ValueError("Tracer positions must be 3D (shape: n_tracer x 3).")
665    if np.any((TracerPos <= 0) | (TracerPos > BoxSize)):
666        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
667    if np.any((QueryPos <= 0) | (QueryPos > BoxSize)):
668        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
669
670    if Verbose: print('\tdone.')
671
672    #-----------------------------------------------------------------------------------------------
673    # Step 1: Compute kNN-CDFs for tracer positions
674    if Verbose:
675        step_1_start_time = time.perf_counter()
676        print('\ninitiating step 1 ...')
677
678    #-----------------------------------------------------------------------------------------------
679
680    # Building the kdTree
681    if Verbose:
682        print('\n\tbuilding the kdTree ...')
683        t_start = time.perf_counter()
684
685    xtree = scipy.spatial.cKDTree(TracerPos, boxsize=BoxSize)
686
687    if Verbose:
688        print('\t\tdone; time taken: {:.2e} s.'.format(time.perf_counter() - t_start))
689
690    #------------------------------------------------------------------------------------------------
691
692    # To store the CDFs for each k  
693    if Verbose:
694        print('\n\tcomputing the tracer NN distances ...')
695        t_start = time.perf_counter()
696    
697
698    #-------------------------------------------------------------------------------------------------
699
700    Nquery = QueryPos.shape[0]
701    dists, _ = xtree.query(QueryPos, k=max(kList), workers=-1)
702    vol = dists[:, np.array(kList)-1]
703    
704    #------------------------------------------------------------------------------------------------
705
706    # Compute the kNN-CDFs for the tracers
707    if Verbose:
708        print('\t\tdone; time taken: {:.2e} s.'.format(time.perf_counter() - t_start))
709   
710    if Verbose:
711        print('\n\tcomputing P_{>=k} ...')
712        t_start = time.perf_counter()
713
714    p_gtr_k_list = calc_kNN_CDF(vol, RBins)
715
716    if Verbose:
717        print('\t\tdone; time taken: {:.2e} s.'.format(time.perf_counter() - t_start))
718        print('time taken for step 1: {:.2e} s.'.format(time.perf_counter() - step_1_start_time))
719
720    #------------------------------------------------------------------------------------------------
721
722    # Step 2: Compute kNN-CDFs for the overdensity field, and the joint CDFs with tracers 
723
724    if Verbose:
725        step_2_start_time = time.perf_counter()
726        print('\ninitiating step 2 ...')
727
728    # Store computed smoothed fields, interpolated values, and percentile thresholds
729    SmoothedFieldDictOut = {}
730    Interpolated_Smoothed_Field = {}
731    Delta_Threshold = {}
732
733    # To store the CDFs for each k
734    p_gtr_k_dt_list = []
735    p_gtr_dt_list = []
736
737    #------------------------------------------------------------------------------------------------
738
739    # Compute the CDFs
740    for k_ind, k in enumerate(kList):
741
742        if Verbose:
743            print(f"\nComputing P_{{>=k, >dt}} and P_{{>dt}} for k = {k} ...")
744            k_start_time = time.perf_counter()
745
746        p_gtr_k_dt = np.zeros(len(RBins[k_ind]))
747        p_gtr_dt   = np.zeros(len(RBins[k_ind]))
748
749        for j, ss in enumerate(RBins[k_ind]):
750
751            #------------------------------------------------------------------------------------------------
752            ss_str = str(ss)
753
754            if ss_str not in SmoothedFieldDictOut:
755                SmoothedFieldDictOut[ss_str] = smoothing_3D(Field3D, Filter='Top-Hat', grid=Field3D.shape[0], BoxSize=BoxSize, R=ss, Verbose=False)
756
757            #-------------------------------------------------------------------------------------------------
758
759            if ss_str not in Interpolated_Smoothed_Field:
760                Interpolated_Smoothed_Field[ss_str] = CIC_3D_Interp(QueryPos, SmoothedFieldDictOut[ss_str], BoxSize)
761
762            interp_field = Interpolated_Smoothed_Field[ss_str]
763
764            
765            #-------------------------------------------------------------------------------------------------
766
767            if ss_str not in Delta_Threshold:
768                Delta_Threshold[ss_str] = np.percentile(Interpolated_Smoothed_Field[ss_str], FieldConstPercThreshold)
769
770            delta_star_ss = Delta_Threshold[ss_str]
771
772            #-------------------------------------------------------------------------------------------------
773
774            # Compute fractions
775            vol_mask      = vol[:, k_ind] < ss
776            field_mask    = interp_field > delta_star_ss
777
778            p_gtr_dt[j]   = np.count_nonzero(field_mask) / Nquery
779            p_gtr_k_dt[j] = np.count_nonzero(vol_mask & field_mask) / Nquery
780
781        #-------------------------------------------------------------------------------------------------
782
783        p_gtr_k_dt_list.append(p_gtr_k_dt)
784        p_gtr_dt_list.append(p_gtr_dt)
785
786        if Verbose:
787            print(f"\tdone for k = {k}; time taken: {time.perf_counter() - k_start_time:.2e} s")
788
789    #------------------------------------------------------------------------------------------------
790
791    if Verbose:
792        print(f"\nTotal time taken: {time.perf_counter() - step_2_start_time:.2e} s")
793    
794    #-----------------------------------------------------------------------------------------------
795
796    if Verbose:
797        print(f"\nTotal time taken for all steps: {time.perf_counter() - total_start_time:.2e} s")
798
799    if ReturnSmoothedFieldDict:
800        return p_gtr_k_list, p_gtr_dt_list, p_gtr_k_dt_list, SmoothedFieldDictOut
801    else:
802        return p_gtr_k_list, p_gtr_dt_list, p_gtr_k_dt_list

Returns the probabilities $P_{\geq k}$, $P_{>{\rm dt}}$ and $P_{\geq k,>{\rm dt}}$ for $k$ in kList, that quantify the extent of the spatial cross-correlation between the given discrete tracer positions (TracerPos) and the given continuous overdensity field (SmoothedFieldDict) in three-dimensional space.

  1. $P_{\geq k}(r)$: the kNN-CDF of the discrete tracers, evaluated at separation $r$

  2. $P_{>{\rm dt}}(r)$: the probability of the overdensity field smoothed with a top-hat filter of radius $r$ exceeding the given constant percentile density threshold

  3. $P_{\geq k, >{\rm dt}}(r)$: the joint probability of finding at least 'k' tracers within a sphere of radius $r$ AND the overdensity field smoothed at scale $r$ exceeding the given density threshold (as specified by the parameter FieldConstPercThreshold)

The excess cross-correlation (Banerjee & Abel 2023)1 can be computed trivially from these quantities:

$$\psi_{k, {\rm dt}} = \frac{P_{\geq k, >{\rm dt}}}{P_{\geq k} \times P_{>{\rm dt}}}$$

Parameters
  • kList (list of int): List of nearest neighbours to compute. For example, if kList = [1, 2, 4], the first, second and fourth-nearest neighbour distributions will be computed.
  • RBins (list of numpy float arrays): List of radial distance arrays (in comoving Mpc/$h$), one for each value in kList. The i-th element of the list should be a numpy array specifying the distances to be used for the nearest neighbour calculation corresponding to kList[i].
  • BoxSize (float): The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
  • QueryPos (numpy float array of shape (n_query, 3)): Array of 3D positions (e.g., in Cartesian coordinates) used to query the nearest-neighbour distances, and also compute field's CDF.
  • TracerPos (numpy float array of shape (n_tracer, 3)): Array of 3D positions of discrete tracers, with columns representing the x, y, and z coordinates, respectively.
  • Field3D (numpy float array of shape (n_grid, n_grid, n_grid)): A 3D numpy array representing the continuous field (for e.g., the matter overdensity field). The shape of the array should match the grid size used for smoothing.
  • FieldConstPercThreshold (float): The percentile threshold for identifying overdense regions in the continuous field. For example, 75.0 indicates the 75th percentile.
  • ReturnSmoothedFieldDict (bool, optional): if set to True, the dictionary containing the continuous field smoothed at the provided radial bins, will be returned along with the nearest-neighbour measurements, by default False.
  • Verbose (bool, optional): If True, prints timing information for each step. Default is False.
Returns
  • p_gtr_k_list (list of numpy float arrays): Auto kNN-CDFs of the discrete tracers evaluated at the desired distance bins.
  • p_gtr_dt_list (list of numpy float arrays): Overdensity-field auto kNN-CDFs evaluated at the same scales.
  • p_gtr_k_dt_list (list of numpy float arrays): Joint CDFs of finding $\geq k$ tracers AND field value exceeding the threshold at a given scale.
  • SmoothedFieldDict (dict): dictionary containing the continuous field smoothed at the provided radial bins, returned only if ReturnSmoothedDict is True. For example, SmoothedFieldDict['5'] represents the continuous map smoothed at a scale of 5 Mpc/h.
Raises
  • ValueError: If TracerPos are not 3D.
  • ValueError: If QueryPos are not 3D.
  • ValueError: If tracer positions are outside the specified box size.
  • ValueError: If QueryPos are outside the specified box size.
References

  1. Arka Banerjee, Tom Abel, Tracer-field cross-correlations with k-nearest neighbour distributions, MNRAS, Volume 519, Issue 4, March 2023, Pages 4856-4868 

def TracerFieldCross3D_DataVector( kList, RBins, BoxSize, QueryPos, TracerPosVector, Field, FieldConstPercThreshold, ReturnSmoothedDict=False, Verbose=False):
 806def TracerFieldCross3D_DataVector(kList, RBins, BoxSize, QueryPos, TracerPosVector, Field, FieldConstPercThreshold, ReturnSmoothedDict=False, Verbose=False):
 807    
 808    r'''
 809    Returns 'data vectors' of the  the probabilities $P_{\geq k}$, $P_{>{\rm dt}}$ and $P_{\geq k,>{\rm dt}}$ [refer to kNNpy.kNN_3D.TracerFieldCross for definitions] for $k$ in `kList` for multiple realisations of the given discrete tracer set [`TracerPosVector`] and a single realisation of the given continuous overdensity field (`Field`). Please refer to notes to understand why this might be useful.
 810    	
 811    Parameters
 812    ----------
 813    kList : list of int
 814        List of nearest neighbours to compute. For example, if ``kList = [1, 2, 4]``, the first, second and fourth-nearest neighbour distributions will be computed.
 815
 816    RBins : list of numpy float arrays
 817        List of radial distance arrays (in comoving Mpc/$h$), one for each value in `kList`. The i-th element of the list should be a numpy array specifying the distances to be used for the nearest neighbour calculation corresponding to `kList[i]`.
 818
 819    BoxSize : float
 820        The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
 821
 822    QueryPos : numpy float array of shape ``(n_query, 3)``
 823        Array of 3D positions (e.g., in Cartesian coordinates) used to query the nearest-neighbour distances, and also compute field's CDF.
 824    
 825    TracerPosVector : numpy float array of shape ``(n_realisations, n_tracer, 3)``
 826        Array of 3D positions of n_realisations of discrete tracers, with columns representing the x, y, and z coordinates, respectively.
 827    
 828    Field : numpy float array of shape ``(n_grid, n_grid, n_grid)``
 829        A 3D numpy array representing the continuous field (for e.g., the matter overdensity field). The shape of the array should match the grid size used for smoothing.
 830
 831    FieldConstPercThreshold : float
 832        The percentile threshold for identifying overdense regions in the continuous field. For example, ``75.0`` indicates the 75th percentile.
 833
 834    ReturnSmoothedFieldDict : bool, optional
 835        if set to ``True``, the dictionary containing the continuous field smoothed at the provided radial bins, will be returned along with the nearest-neighbour measurements, by default ``False``.
 836    
 837    Verbose : bool, optional
 838        If True, prints timing information for each step. Default is False.
 839
 840    Returns
 841    -------
 842    p_gtr_k_veclist: list of numpy float arrays
 843        list of auto kNN-CDFs of the discrete tracers evaluated at the desired distance bins. Each list member is a 2D array of shape ``(n_realisations, n_bins)``.
 844
 845    p_gtr_dt_list: list of numpy float arrays
 846        continuum version of auto kNN-CDFs for the continuous field evaluated at the desired distance bins.
 847
 848    p_gtr_k_dt_veclist: list of numpy float arrays
 849        list of joint tracer-field nearest neighbour distributions evaluated at the desired distance bins. Each list member is a 2D array of shape ``(n_realisations, n_bins)``.
 850
 851    SmoothedFieldDict : dict
 852        dictionary containing the continuous field smoothed at the provided radial bins, returned only if `ReturnSmoothedDict` is ``True``. For example, ``SmoothedFieldDict['5']`` represents the continuous map smoothed at a scale of 5 Mpc/h.
 853
 854    Raises
 855    ------
 856    ValueError
 857        If TracerPos are not on a 3dimensional grid.
 858    ValueError
 859        If QueryPos are not on a 3dimensional grid.
 860    ValueError
 861        If tracer positions are outside the specified box size.
 862    ValueError
 863        If QueryPos are outside the specified box size.
 864
 865    References
 866    ----------
 867    [^1]: Arka Banerjee, Tom Abel, Tracer-field cross-correlations with k-nearest neighbour distributions, [MNRAS](https://doi.org/10.1093/mnras/stac3813), Volume 519, Issue 4, March 2023, Pages 4856-4868
 868
 869    [^2]: Eishica Chand, Arka Banerjee, Simon Foreman, Francisco Villaescusa-Navarro, [MNRAS](https://doi.org/10.1093/mnras/staf433), Volume 538, Issue 3, April 2025, Pages 2204-221 
 870    '''
 871
 872    if Verbose: total_start_time = time.perf_counter()
 873
 874    #-----------------------------------------------------------------------------------------------
 875        
 876    #Step 0: Check all inputs are consistent with the function requirement
 877
 878    if Verbose:
 879        print('Checking inputs ...')
 880
 881    # Query positions must be (n_query, 3)
 882    if QueryPos.ndim != 2 or QueryPos.shape[1] != 3:
 883        raise ValueError("Query positions must be of shape: (n_query, 3), where n_query is the number of query points.")
 884
 885    # Tracer positions must be (n_realizations, n_tracer, 3)
 886    if TracerPosVector.ndim != 3 or TracerPosVector.shape[2] != 3:
 887        raise ValueError("Tracer positions must be of shape: (n_realizations, n_tracer, 3), where n_realizations is the number of tracer realizations and n_tracer is the number of tracers per realization.")
 888
 889    # Tracer positions must lie within [0, BoxSize)
 890    if np.any((TracerPosVector <= 0) | (TracerPosVector > BoxSize)):
 891        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
 892    if np.any((QueryPos <= 0) | (QueryPos > BoxSize)):
 893        raise ValueError("Tracer positions must be within the box [0, BoxSize).")
 894
 895    if Verbose:
 896        print('\tdone.')
 897
 898    #-----------------------------------------------------------------------------------------------
 899        
 900    #Step 1: smooth the continuous field and store in dictionary
 901    if Verbose:
 902        step_1_start_time = time.perf_counter()
 903        print('\ninitiating step 1 (smoothing the continuous field at the given radial scales)...')
 904
 905    #-----------------------------------------------------------------------------------------------
 906
 907    grid = Field.shape[0]  
 908    Filter = 'Top-Hat'
 909
 910    SmoothedFieldDict = create_smoothed_field_dict_3D(Field, Filter, grid, BoxSize, RBins, thickness=None, Verbose=False)
 911
 912    if Verbose: print('\tdone; time taken for step 1: {:.2e} s.'.format(time.perf_counter()-step_1_start_time))
 913
 914    #-----------------------------------------------------------------------------------------------
 915        
 916    #Step 2: 
 917
 918    # A. Compute the fraction of query points at which the smoothed fields at the different radial
 919    #    scales are greater than the overdensity threshold.
 920
 921    # B. For each realization of the discrete tracer set, calculate 
 922    #   (i)  nearest neighbour distances of query points, and the kNN-CDFs for the discrete tracers
 923    #   (ii) the fraction of query points with nearest neighbour distance less than the angular
 924    #        distance and smoothed field greater than the overdensity threshold
 925
 926    if Verbose: 
 927        step_2_start_time = time.perf_counter()
 928        print('\ninitiating step 2 (looping the tracer-field cross-correlation computations over the multiple tracer realisations)...')
 929
 930    #-----------------------------------------------------------------------------------------------
 931
 932    n_reals = TracerPosVector.shape[0]
 933    p_gtr_k_veclist, p_gtr_dt_list, p_gtr_k_dt_veclist = [], [], []
 934
 935    Interpolated_Smoothed_Field = {}
 936    Delta_Threshold = {}
 937
 938    #------------------------------------------------------------------------------------------------
 939    for k_ind, k in enumerate(kList):
 940             
 941            p_gtr_k_veclist.append(np.zeros((n_reals, len(RBins[k_ind]))))
 942            p_gtr_dt_list.append(np.zeros(len(RBins[k_ind])))
 943            p_gtr_k_dt_veclist.append(np.zeros((n_reals, len(RBins[k_ind]))))
 944
 945    #------------------------------------------------------------------------------------------------
 946
 947    for realisation, TracerPos in enumerate(TracerPosVector):
 948
 949        if Verbose:
 950            start_time_real = time.perf_counter()
 951            print(f'\n\n--------------  Realisation {realisation+1}/{n_reals}  --------------\n')
 952
 953        #-------------------------------------------------------------------------------------------
 954
 955        #Tracer calculations
 956
 957        #Building the tree
 958        if Verbose: 
 959            start_time_tree = time.perf_counter()
 960            print('\nbuilding the kdTree for the discrete tracer set ...')
 961
 962        xtree = scipy.spatial.cKDTree(TracerPos, boxsize=BoxSize)
 963
 964        if Verbose: 
 965            print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_tree))
 966
 967        #-------------------------------------------------------------------------------------------
 968
 969        #Calculating the NN distances
 970        if Verbose: 
 971            start_time_NN = time.perf_counter()
 972            print('\ncomputing the tracer NN distances ...')
 973        dists, _ = xtree.query(QueryPos, k=max(kList))
 974        vol = dists[:, np.array(kList)-1]
 975        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_NN))
 976
 977        #-------------------------------------------------------------------------------------------
 978    
 979        #Calculating the auto kNN-CDFs
 980        if Verbose: 
 981            start_time_CDF = time.perf_counter()
 982            print('\ncomputing the tracer auto-CDFs P_{>=k} ...')
 983        p_gtr_k_list = calc_kNN_CDF(vol, RBins)
 984        for k_ind, k in enumerate(kList):
 985            p_gtr_k_veclist[k_ind][realisation] = p_gtr_k_list[k_ind]
 986        if Verbose: print('\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_CDF))
 987
 988        #-------------------------------------------------------------------------------------------
 989
 990        #Tracer-field calculations
 991
 992        if Verbose: 
 993            start_time_tf_cross = time.perf_counter()
 994            print('\ncomputing the tracer-field cross-correlation ...\n')
 995
 996        for k_ind, k in enumerate(kList):
 997             
 998            if Verbose: 
 999                if realisation==0:
1000                    print('\tComputing P_(>dt) and P_(>=k, >dt) for k = {} ...'.format(k))
1001                else:
1002                    print('\tComputing P_(>=k, >dt) for k = {} ...'.format(k))
1003
1004            for i, ss in enumerate(RBins[k_ind]):
1005
1006                ss_str = str(ss)
1007
1008                #-----------------------------------------------------------------------------------
1009
1010                # Interpolate the smoothed field at the query positions
1011                if ss_str not in Interpolated_Smoothed_Field:
1012                    Interpolated_Smoothed_Field[ss_str] = CIC_3D_Interp(QueryPos, SmoothedFieldDict[ss_str], BoxSize)
1013
1014                interp_field = Interpolated_Smoothed_Field[ss_str]
1015            
1016                #-------------------------------------------------------------------------------------------------
1017
1018                # Compute the overdensity threshold for the smoothed field
1019                if ss_str not in Delta_Threshold:
1020                    Delta_Threshold[ss_str] = np.percentile(Interpolated_Smoothed_Field[ss_str], FieldConstPercThreshold)
1021                   
1022                delta_star_ss = Delta_Threshold[ss_str]    
1023
1024                #-----------------------------------------------------------------------------------
1025
1026                #Compute the fraction of query points satisfying the joint condition
1027                ind_gtr_k_dt = np.where((vol[:, k_ind] < ss) & (interp_field > delta_star_ss))
1028                p_gtr_k_dt_veclist[k_ind][realisation, i] = len(ind_gtr_k_dt[0])/QueryPos.shape[0]
1029
1030                #-----------------------------------------------------------------------------------
1031
1032                #Compute the fraction of query points with smoothed field exceeding the overdensity threshold
1033                if realisation==0: 
1034                    ind_gtr_dt = np.where(interp_field > delta_star_ss)
1035                    p_gtr_dt_list[k_ind][i] = len(ind_gtr_dt[0])/QueryPos.shape[0]
1036
1037        if Verbose: print('\n\tdone; time taken: {:.2e} s.'.format(time.perf_counter()-start_time_tf_cross))
1038
1039        #-------------------------------------------------------------------------------------------
1040
1041        if Verbose: 
1042            print('\ntime taken for realisation {}: {:.2e} s.'.format(realisation+1, time.perf_counter()-start_time_real))
1043
1044    #-----------------------------------------------------------------------------------------------
1045
1046    if Verbose: 
1047        print('\n\n--------------  all realisations done!  --------------\n')
1048        print('\n\ttime taken for step 2: {:.2e} s.'.format(time.perf_counter()-step_2_start_time))
1049
1050    #-----------------------------------------------------------------------------------------------
1051
1052    if Verbose: print('\ntotal time taken: {:.2e} s.'.format(time.perf_counter()-total_start_time))
1053    
1054    if ReturnSmoothedDict: 
1055        return p_gtr_k_veclist, p_gtr_dt_list, p_gtr_k_dt_veclist, SmoothedFieldDict
1056    else: 
1057        return p_gtr_k_veclist, p_gtr_dt_list, p_gtr_k_dt_veclist

Returns 'data vectors' of the the probabilities $P_{\geq k}$, $P_{>{\rm dt}}$ and $P_{\geq k,>{\rm dt}}$ [refer to kNNpy.kNN_3D.TracerFieldCross for definitions] for $k$ in kList for multiple realisations of the given discrete tracer set [TracerPosVector] and a single realisation of the given continuous overdensity field (Field). Please refer to notes to understand why this might be useful.

Parameters
  • kList (list of int): List of nearest neighbours to compute. For example, if kList = [1, 2, 4], the first, second and fourth-nearest neighbour distributions will be computed.
  • RBins (list of numpy float arrays): List of radial distance arrays (in comoving Mpc/$h$), one for each value in kList. The i-th element of the list should be a numpy array specifying the distances to be used for the nearest neighbour calculation corresponding to kList[i].
  • BoxSize (float): The size of the cubic box (in comoving Mpc/h) in which the tracers and the continuous field are defined.
  • QueryPos (numpy float array of shape (n_query, 3)): Array of 3D positions (e.g., in Cartesian coordinates) used to query the nearest-neighbour distances, and also compute field's CDF.
  • TracerPosVector (numpy float array of shape (n_realisations, n_tracer, 3)): Array of 3D positions of n_realisations of discrete tracers, with columns representing the x, y, and z coordinates, respectively.
  • Field (numpy float array of shape (n_grid, n_grid, n_grid)): A 3D numpy array representing the continuous field (for e.g., the matter overdensity field). The shape of the array should match the grid size used for smoothing.
  • FieldConstPercThreshold (float): The percentile threshold for identifying overdense regions in the continuous field. For example, 75.0 indicates the 75th percentile.
  • ReturnSmoothedFieldDict (bool, optional): if set to True, the dictionary containing the continuous field smoothed at the provided radial bins, will be returned along with the nearest-neighbour measurements, by default False.
  • Verbose (bool, optional): If True, prints timing information for each step. Default is False.
Returns
  • p_gtr_k_veclist (list of numpy float arrays): list of auto kNN-CDFs of the discrete tracers evaluated at the desired distance bins. Each list member is a 2D array of shape (n_realisations, n_bins).
  • p_gtr_dt_list (list of numpy float arrays): continuum version of auto kNN-CDFs for the continuous field evaluated at the desired distance bins.
  • p_gtr_k_dt_veclist (list of numpy float arrays): list of joint tracer-field nearest neighbour distributions evaluated at the desired distance bins. Each list member is a 2D array of shape (n_realisations, n_bins).
  • SmoothedFieldDict (dict): dictionary containing the continuous field smoothed at the provided radial bins, returned only if ReturnSmoothedDict is True. For example, SmoothedFieldDict['5'] represents the continuous map smoothed at a scale of 5 Mpc/h.
Raises
  • ValueError: If TracerPos are not on a 3dimensional grid.
  • ValueError: If QueryPos are not on a 3dimensional grid.
  • ValueError: If tracer positions are outside the specified box size.
  • ValueError: If QueryPos are outside the specified box size.
References