Source code for vast.voidfinder.voidfinder

#VoidFinder Function to do just about everything


import numpy as np

from astropy.table import Table

import time

from .hole_combine import combine_holes_2

from .voidfinder_functions import mesh_galaxies, save_maximals

from .volume_cut import check_hole_bounds

from .avsepcalc import av_sep_calc

#from .mag_cutoff_function import field_gal_cut

from ._voidfinder import _hole_finder

from .constants import c

from ._voidfinder_cython_find_next import MaskChecker



################################################################################
# Debugging imports
################################################################################
#from sklearn.neighbors import KDTree



################################################################################
DtoR = np.pi/180.
RtoD = 180./np.pi
################################################################################



[docs]def filter_galaxies(galaxy_table, survey_name, out_directory, mag_cut=True, dist_limits=None, rm_isolated=True, write_table=True, sep_neighbor=3, dist_metric='comoving', h=1.0, magnitude_limit=-20.09, verbose=0): """ A hodge podge of miscellaneous tasks which need to be done to format the data into something the main find_voids() function can use. 1) Optional magnitude cut 2) Convert from ra-dec-redshift space into xyz space 3) Calculate the hole search grid shape 4) Optional remove isolated galaxies by partitioning them into wall (non-isolated) and field (isolated) groups 5) Optionally write out the wall and field galaxies to disk Parameters ========== galaxy_table : astropy.table of shape (N,?) variable number of required columns. If doing magnitude cut, must include 'rabsmag' column. If distance metric is 'comoving', must include 'Rgal' column, otherwise must include 'redshift'. Also must always include 'ra' and 'dec' survey_name : str Name of the galxy catalog, string value to prepend or append to output names out_directory : string Directory path for output files mag_cut : bool whether or not to cut on magnitude, removing galaxies less than magnitude_limit dist_limits : list of length 2 [Minimum distance, maximum distance] of galaxy sample (in units of Mpc/h) magnitude_limit : float value at which to perform magnitude cut rm_isolated : bool whether or not to perform Nth neighbor distance calculation, and use it to partition the input galaxies into wall and field galaxies write_table : bool use astropy.table.Table.write to write out the wall and field galaxies to file sep_neighbor : int, positive if rm_isolated_flag is true, find the Nth galaxy neighbors based on this value dist_metric : str Distance metric to use in calculations. Options are 'comoving' (default; distance dependent on cosmology) and 'redshift' (distance independent of cosmology). h : float Fractional value of Hubble's constant. Default value is 1 (where H0 = 100h). verbose : int values greater than zero indicate to print output Returns ======= wall_gals_xyz : numpy.ndarray of shape (K,3) the galaxies which were designated not to be isolated field_gals_xyz : numpy.ndarray of shape (L,3) the galaxies designated as isolated hole_grid_shape : tuple of 3 integers (i,j,k) shape of the hole search grid coords_min : numpy.ndarray of shape (3,) coordinates of the minimum of the survey used for converting from xyz space into ijk space """ print('Filter Galaxies Start', flush=True) ############################################################################ # PRE-PROCESS DATA # Filter based on magnitude and convert from redshift to radius if necessary #--------------------------------------------------------------------------- # Remove faint galaxies if mag_cut: galaxy_table = galaxy_table[galaxy_table['rabsmag'] <= magnitude_limit] # Remove galaxies outside redshift range if dist_limits is not None: if dist_metric == 'comoving': distance_boolean = np.logical_and(galaxy_table['Rgal'] >= dist_limits[0], galaxy_table['Rgal'] <= dist_limits[1]) else: H0 = 100*h distance_boolean = np.logical_and(c*galaxy_table['redshift']/H0 >= dist_limits[0], c*galaxy_table['redshift']/H0 <= dist_limits[1]) galaxy_table = galaxy_table[distance_boolean] # Convert galaxy coordinates to Cartesian coords_xyz = ra_dec_to_xyz(galaxy_table, dist_metric, h) ############################################################################ ############################################################################ # Separation #--------------------------------------------------------------------------- if rm_isolated: wall_gals_xyz, field_gals_xyz = wall_field_separation(coords_xyz, sep_neighbor=sep_neighbor, verbose=verbose) else: wall_gals_xyz = coords_xyz field_gals_xyz = np.array([]) ############################################################################ ############################################################################ # Write results to disk if desired #--------------------------------------------------------------------------- if write_table: write_start = time.time() wall_xyz_table = Table(data=wall_gals_xyz, names=["x", "y", "z"]) wall_xyz_table.write(out_directory + survey_name + 'wall_gal_file.txt', format='ascii.commented_header', overwrite=True) field_xyz_table = Table(data=field_gals_xyz, names=["x", "y", "z"]) field_xyz_table.write(out_directory + survey_name + 'field_gal_file.txt', format='ascii.commented_header', overwrite=True) if verbose > 0: print("Time to write field and wall tables:", time.time() - write_start, flush=True) nf = len(field_gals_xyz) nwall = len(wall_gals_xyz) print('Number of field gals:', nf, 'Number of wall gals:', nwall, flush=True) ############################################################################ return wall_gals_xyz, field_gals_xyz
[docs]def ra_dec_to_xyz(galaxy_table, distance_metric='comoving', h=1.0, ): """ Convert galaxy coordinates from ra-dec-redshift space into xyz space. Parameters ========== galaxy_table : astropy.table of shape (N,?) must contain columns 'ra' and 'dec' in degrees, and either 'Rgal' in who knows what unit if distance_metric is 'comoving' or 'redshift' for everything else distance_metric : str Distance metric to use in calculations. Options are 'comoving' (default; distance dependent on cosmology) and 'redshift' (distance independent of cosmology). h : float Fractional value of Hubble's constant. Default value is 1 (where H0 = 100h). Returns ======= coords_xyz : numpy.ndarray of shape (N,3) values of the galaxies in xyz space """ if distance_metric == 'comoving': r_gal = galaxy_table['Rgal'].data else: r_gal = c*galaxy_table['redshift'].data/(100*h) ra = galaxy_table['ra'].data dec = galaxy_table['dec'].data ############################################################################ # Convert from ra-dec-radius space to xyz space #--------------------------------------------------------------------------- ra_radian = ra*DtoR dec_radian = dec*DtoR x = r_gal*np.cos(ra_radian)*np.cos(dec_radian) y = r_gal*np.sin(ra_radian)*np.cos(dec_radian) z = r_gal*np.sin(dec_radian) num_gal = x.shape[0] coords_xyz = np.concatenate((x.reshape(num_gal,1), y.reshape(num_gal,1), z.reshape(num_gal,1)), axis=1) ############################################################################ return coords_xyz
def calculate_grid(galaxy_coords, mask_type='ra_dec_z', xyz_limits=None, hole_grid_edge_length=5.0, galaxy_map_grid_edge_length=None, verbose=0): """ Given a galaxy survey in xyz/Cartesian coordinates and the length of a cubical grid cell, calculate the cubical grid shape which will contain the survey. The cubical grid is obtained by finding the minimum and maximum values in each of the 3 dimensions, calculating the distance of the survey in each dimension, and dividing each dimension by the cell grid length to get the number of required grid cells in each dimension. In order to transform additional points to their closest grid cell later in VoidFinder, a user will need the origin (0,0,0) of the grid, which is the point (min_x, min_y, min_z) from the survey, so this function also returns that min value. Parameters ========== galaxy_coords : astropy Table or numpy.ndarray of shape (N,3) coordinates of survey galaxies in either sky coordinates or xyz space mask_type : string, one of ['ra_dec_z', 'xyz', 'periodic'] Determines the mode of mask checking to use and which mask parameters to use. 'ra_dec_z' means the mask, mask_resolution, and dist_limits parameters must be provided. The 'mask' represents an angular space in Right Ascension and Declination, the corresponding mask_resolution integer represents the scale needed to index into the Right Ascension and Declination of the mask, and the dist_limits represent the min and max redshift values (as radial distances in xyz space). 'xyz' means that the xyz_limits parameter must be provided which directly encodes a bounding box for the survey in xyz space 'periodic' means that the xyz_limits parameter must be provided, which directly encodes a bounding box representing the periodic boundary of the survey, and the survey will be treated as if its bounding box were tiled to infinity in all directions. Spheres will still only be grown starting from within the original bounding box. xyz_limits : numpy array of shape (2,3) format [x_min, y_min, z_min] [x_max, y_max, z_max] to be used for checking against the mask when mask_type == 'xyz' or for periodic conditions when mask_type == 'periodic' hole_grid_edge_length : float Length in xyz space of the edge of 1 cubical cell in the grid. Default value is 5 Mpc/h. galaxy_map_grid_edge_length : float or None Edge length in Mpc/h for the secondary grid for finding nearest neighbor galaxies. If None, will default to 3*hole_grid_edge_length (which results in a cell volume of 3^3 = 27 times larger cube volume). This parameter yields a tradeoff between number of galaxies in a cell, and number of cells to search when growing a sphere. Too large and many redundant galaxies may be searched, too small and too many cells will need to be searched. verbose : int Level of verbosity to print during running, 0 indicates off, 1 indicates to print after every 'print_after' cells have been processed, and 2 indicates to print all debugging statements. Default is 0. Returns ======= hole_grid_shape : tuple of ints (i,j,k) number of grid cells in each dimension galaxy_map_grid_shape : tuple of ints (i,j,k) number of galaxy map grid cells in each dimension coords_min : numpy.ndarray of shape (3,) the (min_x, min_y, min_z) point which is the (0,0,0) of the grid """ ############################################################################ # Do some sanity checking on the mask modes and various inputs since we have # a lot of None type optional inputs #--------------------------------------------------------------------------- if (mask_type in ['xyz', 'periodic']) and (xyz_limits is None): raise ValueError("Mask type is %s but required mask parameter xyz_limits is None" % (mask_type)) ############################################################################ ############################################################################ # Depending on the mask mode, calculate the transform origin and grid cell # parameters for our Hole grid and our Galaxy Map grids. # # For 'ra-dec-z' - just use the min and max of the provided coordinates of # the survey for the transform # # For 'xyz' mode - use the required/provided xyz_limits # # For 'periodic' mode - use the required/provided xyz_limits, but we also # have to ensure the GalaxyMap cells align with the xyz_limits # boundaries, so that VoidFinder doesn't accidentally introduce dead # space into cells because of misalignment. # # The important values calculated below are: # 'coords_min' - origin for transforming between real and index spaces # 'hole_grid_shape' - grid cells for the hole growing grid # 'galaxy_map_grid_shape' - grid cells for the galaxy finding grid #--------------------------------------------------------------------------- if mask_type == 'ra_dec_z': #ra-dec-redshift # Convert galaxy coords to Cartesian galaxy_coords = ra_dec_to_xyz(galaxy_coords) # Find the maximum coordinate in each direction coords_max = np.max(galaxy_coords, axis=0) # Find the minimum coordinate in each direction coords_min = np.min(galaxy_coords, axis=0) # Size of the grid in each direction box = coords_max - coords_min # Number of grid cells in each direction ngrid = box/hole_grid_edge_length # Shape of the grid hole_grid_shape = tuple(np.ceil(ngrid).astype(int)) # Calculate the size of a galaxy map grid cell if galaxy_map_grid_edge_length is None: galaxy_map_grid_edge_length = 3.0*hole_grid_edge_length # Number of galaxy map grid cells in each direction ngrid_galaxymap = box/galaxy_map_grid_edge_length # Shape of the galaxy map grid galaxy_map_grid_shape = tuple(np.ceil(ngrid_galaxymap).astype(int)) elif mask_type == 'xyz': #xyz # Size of the grid in each direction box = xyz_limits[1,:] - xyz_limits[0,:] # Number of grid cells in each direction ngrid = box/hole_grid_edge_length # Shape of the grid hole_grid_shape = tuple(np.ceil(ngrid).astype(int)) # Origin of the grid coords_min = xyz_limits[0,:] # Calculate the size of a galaxy map grid cell if galaxy_map_grid_edge_length is None: galaxy_map_grid_edge_length = 3.0*hole_grid_edge_length # Number of galaxy map grid cells in each direction ngrid_galaxymap = box/galaxy_map_grid_edge_length # Shape of the galaxy map grid galaxy_map_grid_shape = tuple(np.ceil(ngrid_galaxymap).astype(int)) elif mask_type == 'periodic': #periodic # Size of the grid in each direction box = xyz_limits[1,:] - xyz_limits[0,:] # Number of grid cells in each direction ngrid = box/hole_grid_edge_length # Shape of the grid hole_grid_shape = tuple(np.ceil(ngrid).astype(int)) # Origin of the grid coords_min = xyz_limits[0,:] # Calculate the size of a galaxy map grid cell if galaxy_map_grid_edge_length is None: desired_length = 3.0*hole_grid_edge_length # Find the common integer divisors of the length dimensions of the # survey limits common_divisors = get_common_divisors(box) if len(common_divisors) == 0 or \ (len(common_divisors) == 1 and common_divisors[0] == 1): error_str = """Could not automatically determine meaningful galaxy_map_grid_edge_length from the provided xyz_limits. In mask_mode==periodic, the survey limits provided by the xyz_limits variable must be divisible by a common integer in all dimensions""" raise ValueError(error_str) common_divisors = np.array(common_divisors) argmin = np.abs(common_divisors - desired_length).argmin() galaxy_map_grid_edge_length = float(common_divisors[argmin]) # Number of galaxy map grid cells in each direction ngrid_galaxymap = box/galaxy_map_grid_edge_length # Shape of the galaxy map grid galaxy_map_grid_shape = tuple(np.ceil(ngrid_galaxymap).astype(int)) else: # Number of galaxy map grid cells in each direction ngrid_galaxymap = box/galaxy_map_grid_edge_length rounded = np.rint(ngrid_galaxymap) print(rounded) close_to_round = np.isclose(ngrid_galaxymap, rounded) print(close_to_round) if np.all(close_to_round): # Vals are good, just proceed with given galaxy_map_grid_shape = tuple(np.rint(ngrid_galaxymap).astype(int)) else: # Attempt to adjust galaxy_map_grid_edge_length error_str = """The provided combination of xyz_limits and galaxy_map_grid_edge length will not work. In mask_mode==periodic, the edge length must be an integer divisor of all dimensions of the survey as provided by the xyz_limits input.""" raise ValueError(error_str) # Recast coords_min coords_min = coords_min.reshape(1,3).astype(np.float64) if verbose > 0: print("Hole-growing Grid:", hole_grid_shape, flush=True) print("Galaxy-searching Grid:", galaxy_map_grid_shape, flush=True) print("Galaxy-searching edge length:", galaxy_map_grid_edge_length, flush=True) ############################################################################ ''' coords_max = np.max(galaxy_coords_xyz, axis=0) coords_min = np.min(galaxy_coords_xyz, axis=0) box = coords_max - coords_min ngrid = box/hole_grid_edge_length #print("Ngrid: ", ngrid) hole_grid_shape = tuple(np.ceil(ngrid).astype(int)) ''' return hole_grid_shape, galaxy_map_grid_shape, coords_min#, coords_max def wall_field_separation(galaxy_coords_xyz, sep_neighbor=3, verbose=0): """ Given a set of galaxy coordinates in xyz space, find all the galaxies whose distance to their Nth nearest neighbor is above or below some limit. Galaxies whose Nth nearest neighbor is close (below), will become 'wall' galaxies, and galaxies whose Nth nearest neighbor is far (above) will become field/void/isolated galaxies. The distance limit used below is the mean distance to Nth nearest neighbor plus 1.5 times the standard deviation of the Nth nearest neighbor distance. Parameters ========== galaxy_coords_xyz : numpy.ndarray of shape (N,3) coordinates in xyz space of the galaxies sep_neighbor : int Nth neighbor verbose : int whether to print timing output, 0 for off and >= 1 for on Returns ======= wall_gals_xyz : ndarray of shape (K, 3) xyz coordinate subset of the input corresponding to tightly packed galaxies field_gals_xyz : ndarray of shape (L, 3) xyz coordinate subset of the input corresponding to isolated galaxies """ ############################################################################ # Calculate the average distance to the 3rd nearest neighbor #--------------------------------------------------------------------------- print('Finding isolated galaxy distance',flush=True) sep_start = time.time() dists3 = av_sep_calc(galaxy_coords_xyz, sep_neighbor) sep_end = time.time() print('Time to find isolated galaxy distance:', sep_end-sep_start, flush=True) ############################################################################ ############################################################################ # Calculate the isolated galaxy criterion #--------------------------------------------------------------------------- avsep = np.mean(dists3) sd = np.std(dists3) l = avsep + 1.5*sd if verbose > 0: print('Average separation of 3rd neighbor gal is', avsep, flush=True) print('The standard deviation is', sd, flush=True) print('Removing all galaxies with 3rd nearest neighbors further than', l, flush=True) ############################################################################ ############################################################################ # Separate galaxies into field and wall #--------------------------------------------------------------------------- fw_start = time.time() #f_coord_table, w_coord_table = field_gal_cut(coord_in_table, dists3, l) gal_close_neighbor_index = dists3 < l wall_gals_xyz = galaxy_coords_xyz[gal_close_neighbor_index] field_gals_xyz = galaxy_coords_xyz[np.logical_not(gal_close_neighbor_index)] fw_end = time.time() if verbose > 0: print('Time to sort field and wall gals:', fw_end-fw_start, flush=True) ############################################################################ return wall_gals_xyz, field_gals_xyz
[docs]def find_voids(galaxy_coords_xyz, survey_name, mask_type='ra_dec_z', mask=None, mask_resolution=None, dist_limits=None, xyz_limits=None, check_only_empty_cells=True, max_hole_mask_overlap=0.1, hole_grid_edge_length=5.0, grid_origin=None, min_maximal_radius=10.0, galaxy_map_grid_edge_length=None, pts_per_unit_volume=0.01, maximal_spheres_filename="maximal_spheres.txt", void_table_filename="voids_table.txt", potential_voids_filename="potential_voids_list.txt", num_cpus=None, save_after=None, use_start_checkpoint=False, batch_size=10000, verbose=0, print_after=5.0 ): """ Main entry point for VoidFinder. Using the VoidFinder algorithm, this function grows a sphere in each empty grid cell of a grid imposed over the target galaxy distribution. It then combines these spheres into unique voids, identifying a maximal sphere for each void. This algorithm at a high level uses 3 data to find voids in the large-scale structure of the universe: 1) The galaxy coordinates 2) A survey-limiting mask 3) A cubic-cell grid of potential void locations Before running VoidFinder, a preprocessing stage of removing isolated galaxies from the target galaxy survey is performed. Currently this is done by removing galaxies whose distance to their 3rd nearest neighbor is greater than 1.5 times the standard deviation of 3rd nearest neighbor distance for the survey. This step should be performed prior to calling this function. Next, VoidFinder will impose a grid of cubic cells over the remaining non-isolated, or "wall" galaxies. The cell size of this grid should be small enough to allow a thorough search, but is also the primary consumer of time in this algorithm. At each grid cell, VoidFinder will evaluate whether that cubic cell is "empty" or "nonempty." Empty cells contain no galaxies, non-empty cells contain at least 1 galaxy. This makes the removal of isolated galaxies in the preprocessing stage important. VoidFinder will proceed to grow a sphere (or hole), at every Empty grid cell. These pre-void holes will be filtered such that the potential voids along the edge of the survey will be removed, since any void on the edge of the survey could potentially grow unbounded, and there may be galaxies not present which would have bounded the void. After the filtering, these pre-voids will be combined into the actual voids based on an analysis of their overlap. This implementation uses a reference point, 'coords_min', from xyz space, and the 'hole_grid_edge_length' to convert between the x,y,z coordinates of a galaxy, and the i,j,k coordinates of a cell in the search grid such that: ijk = ((xyz - coords_min)/hole_grid_edge_length).astype(integer) During the sphere growth, VoidFinder also uses a secondary grid to help find the bounding galaxies for a sphere. This secondary grid facilitates nearest-neighbor and radius queries, and uses a coordinate space referred to in the code as pqr, which uses a similar transformation: pqr = ((xyz - coords_min)/neighbor_grid_edge_length).astype(integer) In VoidFinder terminology, a Void is a union of spheres, and a single sphere is just a hole. The Voids are found by taking the set of holes, and ordering them based on radius. Starting from the largest found hole, label it a maximal sphere, and continue to the next hole. If the next hole does not overlap with any of the previous maximal spheres by some factor, it is also considered a maximal sphere. This process is repeated until there are no more maximal spheres, and all other spheres are joined to the maximal spheres. A note on the purpose of VoidFinder - VoidFinder is intended to find distinct, discrete void *locations* within the large scale structure of the universe. This is in contrast to finding the large scale void *structure*. VoidFinder answers the question "Where are the voids?" with a concrete "Here is a list of x,y,z coordinates", but it does not answer the questions "What do the voids look like? How are they shaped? How much do they overlap?" These questions can be partially answered with additional analysis on the output of VoidFinder, but the main algorithm is intended to find discrete, disjoint x-y-z coordinates of the centers of void regions. If you wanted a local density estimate for a given galaxy, you could just use the distance to Nth nearest neighbor, for example. To do this, VoidFinder makes the following assumptions: 1. A Void region can be approximated by a union of spheres. Note: the center of the maximal sphere in that void region will yield the x-y-z coordinate of that void region. 2. Void regions are distinct/discrete - we are not looking for huge tunneling structures throughout space, if does happen to be the structure of space (it basically does happen to be that way) we want the locations of the biggest rooms Parameters ========== galaxy_coords : numpy.ndarray of shape (num_galaxies, 3) coordinates of the galaxies in the survey, units of Mpc/h (xyz space) survey_name : str identifier for the survey running, may be prepended or appended to output filenames including the checkpoint filename mask_type : string, one of ['ra_dec_z', 'xyz', 'periodic'] Determines the mode of mask checking to use and which mask parameters to use. 'ra_dec_z' means the mask, mask_resolution, and dist_limits parameters must be provided. The 'mask' represents an angular space in Right Ascension and Declination, the corresponding mask_resolution integer represents the scale needed to index into the Right Ascension and Declination of the mask, and the dist_limits represent the min and max redshift values (as radial distances in xyz space). 'xyz' means that the xyz_limits parameter must be provided which directly encodes a bounding box for the survey in xyz space 'periodic' means that the xyz_limits parameter must be provided, which directly encodes a bounding box representing the periodic boundary of the survey, and the survey will be treated as if its bounding box were tiled to infinity in all directions. Spheres will still only be grown starting from within the original bounding box. mask : numpy.ndarray of shape (N,M) type bool Represents the survey footprint in scaled ra/dec space. Value of True indicates that a location is within the survey (ra/dec space) mask_resolution : integer Scale factor of coordinates needed to index mask dist_limits : numpy array of shape (2,) [min_dist, max_dist] in units of Mpc/h (xyz space) xyz_limits : numpy array of shape (2,3) format [x_min, y_min, z_min] [x_max, y_max, z_max] to be used for checking against the mask when mask_type == 'xyz' or for periodic conditions when mask_type == 'periodic' hole_grid_edge_length : float Size in Mpc/h of the edge of 1 cube in the search grid, or distance between 2 grid cells (xyz space) grid_origin : ndarray of shape (3,) or None The spatial location to use as (0,0,0) in the search grid. if None, will use the numpy.min() function on the provided galaxies as the grid origin min_maximal_radius : float The minimum radius in units of distance for a hole to be considered for maximal status. Default value is 10 Mpc/h. max_hole_mask_overlap : float in range (0, 0.5) When the volume of a hole overlaps the mask by this fraction, discard that hole. Maximum value of 0.5 because a value of 0.5 means that the hole center will be outside the mask, but more importantly because the numpy.roots() function used below won't return a valid polynomial root. galaxy_map_grid_edge_length : float or None Edge length in Mpc/h for the secondary grid for finding nearest neighbor galaxies. If None, will default to 3*hole_grid_edge_length (which results in a cell volume of 3^3 = 27 times larger cube volume). This parameter yields a tradeoff between number of galaxies in a cell, and number of cells to search when growing a sphere. Too large and many redundant galaxies may be searched, too small and too many cells will need to be searched. (xyz space) hole_center_iter_dist : float Distance to move the sphere center each iteration while growing a void sphere in units of Mpc/h (xyz space) pts_per_unit_volume : float Number of points per unit volume that are distributed within the holes to calculate the fraction of the hole's volume that falls outside the survey bounds. Default is 0.01. maximal_spheres_filename : str Location to save maximal spheres file void_table_filename : str Location to save void table to potential_voids_filename : str Location to save potential voids file to num_cpus : int or None Number of cpus to use while running the main algorithm. None will result in using number of physical cores on the machine. Some speedup benefit may be obtained from using additional logical cores via Intel Hyperthreading but with diminishing returns. This can safely be set above the number of physical cores without issue if desired. save_after : int or None Save a VoidFinderCheckpoint.h5 file after *approximately* every save_after cells have been processed. This will over-write this checkpoint file every save_after cells, NOT append to it. Also, saving the checkpoint file forces the worker processes to pause and synchronize with the master process to ensure the correct values get written, so choose a good balance between saving too often and not often enough if using this parameter. Note that it is an approximate value because it depends on the number of worker processes and the provided batch_size value, if your batch size is 10,000 and your save_after is 1,000,000 you might actually get a checkpoint at say 1,030,000. If None, disables saving the checkpoint file. check_only_empty_cells : bool Whether or not to start growing a hole in a cell which has galaxies in it, aka "non-empty". If True (default), don't grow holes in these cells. use_start_checkpoint : bool Whether to attempt looking for a VoidFinderCheckpoint.h5 file which can be used to restart the VF run. If False, VoidFinder will start fresh from 0. batch_size : int Number of potential void cells to evaluate at a time. Lower values may be a bit slower as it involves some memory allocation overhead, and values which are too high may cause the status update printing to take more than print_after seconds. Default value 10,000 verbose : int or bool Level of verbosity to print during running, 0 indicates off, 1 indicates to print after every 'print_after' cells have been processed, and 2 indicates to print all debugging statements print_after : float Number of seconds to wait before printing a status update Returns ======= All output is currently written to disk: potential voids table, ascii.commented_header format combined voids table, ascii.commented_header format maximal spheres table """ if mask_type == "ra_dec_z": mask_mode = 0 elif mask_type == "xyz": mask_mode = 1 elif mask_type == "periodic": mask_mode = 2 else: raise ValueError("mask_type must be 'ra_dec_z', 'xyz', or 'periodic'") if dist_limits is None: min_dist = None max_dist = None else: min_dist = dist_limits[0] max_dist = dist_limits[1] try: verbose = int(verbose) except: raise ValueError("verbose argument invalid, must be int or bool: "+str(verbose)) ############################################################################ # GROW HOLES #--------------------------------------------------------------------------- if verbose > 0: print('Growing holes', flush=True) tot_hole_start = time.time() x_y_z_r_array, n_holes = _hole_finder(galaxy_coords_xyz, hole_grid_edge_length, galaxy_map_grid_edge_length, survey_name, grid_origin=grid_origin, mask_mode=mask_mode, mask=mask, mask_resolution=mask_resolution, min_dist=min_dist, max_dist=max_dist, xyz_limits=xyz_limits, check_only_empty_cells=check_only_empty_cells, save_after=save_after, use_start_checkpoint=use_start_checkpoint, batch_size=batch_size, verbose=verbose, print_after=print_after, num_cpus=num_cpus) if verbose > 0: print('Found a total of', n_holes, 'potential voids.', flush=True) print('Time to grow all holes:', time.time() - tot_hole_start, flush=True) ############################################################################ ############################################################################ # Debug Section ############################################################################ """ #print(galaxy_coords_xyz[0].shape) #print(x_y_z_r_array.shape) galaxy_tree = KDTree(galaxy_coords_xyz[0]) distances, indices = galaxy_tree.query(x_y_z_r_array[:,0:3], k=1) #print(distances.shape, indices.shape) has_bad_hole = False for row in x_y_z_r_array: curr_pos = row[0:3].reshape(1,3) curr_rad = row[3] ind, dist = galaxy_tree.query_radius(curr_pos, 0.99*curr_rad, return_distance=True) ind = ind[0] if ind.shape[0] > 0: print("Bad Hole, num galaxies inside: ", ind.shape) has_bad_hole = True if has_bad_hole: print("ERROR: DEBUG SECTION FOUND HOLE WITH ONE OR MORE GALAXIES INSIDE") """ ############################################################################ # End Debug Section ############################################################################ ############################################################################ # Initialize mask object #--------------------------------------------------------------------------- if mask_mode == 0: # sky mask mask_checker = MaskChecker(mask_mode, survey_mask_ra_dec=mask.astype(np.uint8), n=mask_resolution, rmin=min_dist, rmax=max_dist) elif mask_mode in [1,2]: # Cartesian mask mask_checker = MaskChecker(mask_mode, xyz_limits=xyz_limits) ############################################################################ ############################################################################ # CHECK IF 90% OF VOID VOLUME IS WITHIN SURVEY LIMITS #--------------------------------------------------------------------------- if verbose > 0: print("Starting volume cut", flush=True) vol_cut_start = time.time() valid_idx, monte_index = check_hole_bounds(x_y_z_r_array, mask_checker, cut_pct=0.1, pts_per_unit_volume=pts_per_unit_volume, num_surf_pts=20, num_cpus=num_cpus, verbose=verbose) if verbose > 0: print("Found", np.sum(np.logical_not(valid_idx)), "holes to cut", time.time() - vol_cut_start, flush=True) x_y_z_r_array = x_y_z_r_array[valid_idx] # Array that stores whether or not any part of holes fall outside survey boundary_hole = monte_index[valid_idx] ############################################################################ ############################################################################ # SORT HOLES BY SIZE #--------------------------------------------------------------------------- if verbose > 0: print("Sorting holes based on radius", flush=True) sort_order = x_y_z_r_array[:,3].argsort()[::-1] x_y_z_r_array = x_y_z_r_array[sort_order] boundary_hole = boundary_hole[sort_order] ############################################################################ ############################################################################ # FILTER AND SORT HOLES INTO UNIQUE VOIDS #--------------------------------------------------------------------------- if verbose > 0: print("Combining holes into unique voids", flush=True) combine_start = time.time() maximal_spheres_table, myvoids_table = combine_holes_2(x_y_z_r_array, boundary_hole, mask_checker, min_maximal_radius=min_maximal_radius, verbose=verbose) if verbose > 0: print("Combine time:", time.time() - combine_start, flush=True) print('Number of unique voids is', len(maximal_spheres_table), flush=True) ############################################################################ ############################################################################ # Save list of all void holes #--------------------------------------------------------------------------- myvoids_table.write(void_table_filename, format='ascii.commented_header', overwrite=True) ############################################################################ ############################################################################ # Compute volume of each void #--------------------------------------------------------------------------- ############################################################################ ############################################################################ # Save list of maximal hole in each void #--------------------------------------------------------------------------- save_maximals(maximal_spheres_table, maximal_spheres_filename, verbose=verbose) ############################################################################ ############################################################################ # Void region size #--------------------------------------------------------------------------- ############################################################################ return maximal_spheres_table, myvoids_table