import numpy as np # Main loop over image broken into segments. Will try to calculate pixel values outside of # the masked area using the preconfigured weighting system. def worker(hunk, shm_pixels, shm_mask, shm_bools, shm_margin, margin, limit, hit_target): np_pixels = np.ndarray(shm_pixels[1], dtype=shm_pixels[2], buffer=shm_pixels[0].buf) np_mask = np.ndarray(shm_mask[1], dtype=shm_mask[2], buffer=shm_mask[0].buf) np_bools = np.ndarray(shm_bools[1], dtype=shm_bools[2], buffer=shm_bools[0].buf) margins_bool = np.ndarray(shm_margin[1], dtype=shm_margin[2], buffer=shm_margin[0].buf) hit_stub = np.zeros((0,3)) lim = len(margins_bool) if not limit else limit for multi_index in hunk: # Index ranges to create local view of arrays centred on pixel view_idx = [multi_index[0], multi_index[0]+(margin*2)+1, multi_index[1], multi_index[1]+(margin*2)+1] bool_view = np_bools[view_idx[0]:view_idx[1],view_idx[2]:view_idx[3]] hit_max = np.count_nonzero(bool_view) # Count number of non alpha pixels in view if hit_max: pixel_view = np_pixels[view_idx[0]:view_idx[1],view_idx[2]:view_idx[3]] # Get view of pixel data hit_targ = hit_max if hit_max < hit_target else hit_target hits = hit_stub iteration = 0 # Majority of time cost is here due to arrays being copied in every case while hits.shape[0] < hit_targ and iteration < lim: sub_bool = bool_view[margins_bool[iteration]] if np.count_nonzero(sub_bool): sub_pixel = pixel_view[margins_bool[iteration]] hits = np.append(hits, sub_pixel[sub_bool,:3], axis=0) if hits.shape[0] else sub_pixel[sub_bool,:3] iteration += 1 # Get average of selected pixels colour and write value if hits.shape[0] >= hit_target: np_pixels[multi_index[0]+margin, multi_index[1]+margin,:3] = hits.sum(0) / hits.shape[0] np_mask[multi_index[0]+margin, multi_index[1]+margin] = 1.0 # Simply writes pixels to a bpy.image. This is to keep bpy outside of the main working loop def write_back(image, pixels): import bpy image.pixels.foreach_set(pixels.ravel()) image.update() # Create numpy arrays of the image and mask as well as set up a weighting system for sampling # pixels within the margin step area def set_up(image, mask, margin): import bpy # Load numpy array from input image and mask w, h = image.size np_pixels = np.zeros((w, h, 4), 'f') np_mask = np.zeros((w, h, 4), 'f') image.pixels.foreach_get(np_pixels.ravel()) mask.pixels.foreach_get(np_mask.ravel()) # Create a weighting system for pixel samples within the margin area px_offsets = np.array(np.meshgrid(np.arange(0,margin*2+1), np.arange(0,margin*2+1))) px_offsets = np.moveaxis(px_offsets, 0, -1) # Change to X by Y by 2 px_offsets = np.absolute(px_offsets - [margin,margin]) # Manhattan distance array #px_manhat = px_offsets.sum(2) # Euclid distances px_euclid = np.sqrt(np.power(px_offsets[:,:,0],2) + np.power(px_offsets[:,:,1],2)) px_euclid_c = np.int_(np.ceil(px_euclid)) px_euclid_r = np.int_(np.round(px_euclid)) # Bool arrays for each weight level starting at 1 margins_bool = [] for i in range(1,margin+1): margins_bool.append(px_euclid_r == i) # Expand pixel data by margin size by copying the start onto the end to hopefully make iteration faster # (negative array indexes work, but you can't exceed array bounds) np_pixels = np.vstack((np_pixels, np_pixels[0:margin,:,:])) # Add rows from the bottom to the top np_pixels = np.vstack((np_pixels[h-margin:h,:,:], np_pixels)) # Add rows from old top to the new top np_pixels = np.hstack((np_pixels, np_pixels[:,0:margin,:])) # Add cols from left to right np_pixels = np.hstack((np_pixels[:,w-margin:w,:], np_pixels)) # Add cols from old right to new right # Do same for mask np_mask = np.vstack((np_mask, np_mask[0:margin,:,:])) # Add rows from the bottom to the top np_mask = np.vstack((np_mask[h-margin:h,:,:], np_mask)) # Add rows from old top to the new top np_mask = np.hstack((np_mask, np_mask[:,0:margin,:])) # Add cols from left to right np_mask = np.hstack((np_mask[:,w-margin:w,:], np_mask)) # Add cols from old right to new right # Reduce mask values to just reds to save space np_mask = np_mask[...,0].copy() np_bool = np_mask > 0.9 return np_pixels, np_mask, np_bool, np.asarray(margins_bool), w, h, margin # Takes all the outputs from the setup routine (not called from within to avoid interacting with # bpy in the subprocesses). Creates shared memory versions of the data and spawns a bunch of # processes to work on smaller hunks of pixels in parallel. def add_margin(pixels, mask, bools, margins, w, h, margin_step, margin, hit_target): import concurrent.futures from multiprocessing.managers import SharedMemoryManager m_step = margin_step if margin >= margin_step or margin == -1 else margin with SharedMemoryManager() as smm: # Create shared memory versions of these arrays for the processes to share shm_pixels = smm.SharedMemory(size=pixels.nbytes) shm_mask = smm.SharedMemory(size=mask.nbytes) shm_bools = smm.SharedMemory(size=bools.nbytes) shm_margin = smm.SharedMemory(size=margins.nbytes) np_pixels = np.ndarray(pixels.shape, dtype=pixels.dtype, buffer=shm_pixels.buf) np_pixels[:] = pixels[:] del pixels np_mask = np.ndarray(mask.shape, dtype=mask.dtype, buffer=shm_mask.buf) np_mask[:] = mask[:] del mask np_bools = np.ndarray(bools.shape, dtype=bools.dtype, buffer=shm_bools.buf) np_bools[:] = bools[:] del bools margins_bool = np.ndarray(margins.shape, dtype=margins.dtype, buffer=shm_margin.buf) margins_bool[:] = margins del margins # Split work into smaller hunks to split between cpu cores import os cpus = os.cpu_count() * 2 mask_where = np.argwhere(np_bools[margin_step:w+margin_step,margin_step:h+margin_step] == False) hunks = np.array_split(mask_where, cpus) # Do the processing in parallel with concurrent.futures.ProcessPoolExecutor() as executor: futures = [] limit = 0 # Negative margin indicates complete fill is wanted if margin == -1: # Simply keep processing hunks until they come back empty while len(hunks[0]) > 0: for i in hunks: futures.append(executor.submit(worker, i, [shm_pixels, np_pixels.shape, np_pixels.dtype], [shm_mask, np_mask.shape, np_mask.dtype], [shm_bools, np_bools.shape, np_bools.dtype], [shm_margin, margins_bool.shape, margins_bool.dtype], m_step, limit, hit_target)) # Wait for this steps hunks to finish, then calculate the next set concurrent.futures.wait(futures) np_bools[:] = np_mask > 0.9 mask_where = np.argwhere(np_bools[margin_step:w+margin_step,margin_step:h+margin_step] == False) hunks = np.array_split(mask_where, cpus) # Check the margin actually has a size before doing anything elif margin > 0: # Work out how many steps are needed and if a last sub step size pass will be needed at the end steps = int(margin / m_step) lasts = margin % m_step if lasts: steps += 1 # Process all hunks for each step in parallel for step in range(steps): # If the margin step didn't fit evenly a last sub sized step will be done to fill it if lasts and step == steps-1: limit = lasts for i in hunks: futures.append(executor.submit(worker, i, [shm_pixels, np_pixels.shape, np_pixels.dtype], [shm_mask, np_mask.shape, np_mask.dtype], [shm_bools, np_bools.shape, np_bools.dtype], [shm_margin, margins_bool.shape, margins_bool.dtype], m_step, limit, hit_target)) # Wait for this steps hunks to finish, then calculate the next set if there are more steps concurrent.futures.wait(futures) if step < steps-1: np_bools[:] = np_mask > 0.9 mask_where = np.argwhere(np_bools[margin_step:w+margin_step,margin_step:h+margin_step] == False) hunks = np.array_split(mask_where, cpus) # Copy pixels from shared memory before the smm exits output_px = np_pixels[margin_step:w+margin_step,margin_step:h+margin_step].copy() return output_px if __name__ == '__main__': pass