Coverage for ezstitcher/tests/generators/generate_synthetic_data.py: 96%

306 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2025-04-30 13:20 +0000

1#!/usr/bin/env python3 

2""" 

3Generate synthetic microscopy images for testing ezstitcher. 

4 

5This module generates synthetic microscopy images with the following features: 

6- Multiple wavelengths (channels) 

7- Z-stack support with varying focus levels 

8- Cell-like structures (circular particles with varying eccentricity) 

9- Proper tiling with configurable overlap 

10- Realistic stage positioning errors 

11- HTD file generation for metadata 

12- Automatic image size calculation based on grid and tile parameters 

13 

14Usage: 

15 from ezstitcher.tests.generators.generate_synthetic_data import SyntheticMicroscopyGenerator 

16 

17 generator = SyntheticMicroscopyGenerator( 

18 output_dir="path/to/output", 

19 grid_size=(3, 3), 

20 wavelengths=2, 

21 z_stack_levels=3, 

22 auto_image_size=True 

23 ) 

24 generator.generate_dataset() 

25""" 

26 

27import os 

28import sys 

29import random 

30import numpy as np 

31import tifffile 

32from pathlib import Path 

33from skimage import draw, filters, transform, util 

34from datetime import datetime 

35 

36 

37class SyntheticMicroscopyGenerator: 

38 """Generate synthetic microscopy images for testing.""" 

39 

40 def __init__(self, 

41 output_dir, 

42 grid_size=(3, 3), 

43 image_size=(1024, 1024), 

44 tile_size=(512, 512), 

45 overlap_percent=10, 

46 stage_error_px=2, 

47 wavelengths=2, 

48 z_stack_levels=1, 

49 z_step_size=1, 

50 num_cells=50, 

51 cell_size_range=(10, 30), 

52 cell_eccentricity_range=(0.1, 0.5), 

53 cell_intensity_range=(5000, 20000), 

54 background_intensity=500, 

55 noise_level=100, 

56 wavelength_params=None, 

57 shared_cell_fraction=0.95, # Fraction of cells shared between wavelengths 

58 wavelength_intensities=None, # Fixed intensities for each wavelength 

59 wavelength_backgrounds=None, # Background intensities for each wavelength 

60 wells=['A01'], # List of wells to generate 

61 format='ImageXpress', # Format of the filenames ('ImageXpress' or 'OperaPhenix') 

62 auto_image_size=True, # Automatically calculate image size based on grid and tile size 

63 random_seed=None): 

64 """ 

65 Initialize the synthetic microscopy generator. 

66 

67 Args: 

68 output_dir: Directory to save generated images 

69 grid_size: Tuple of (rows, cols) for the grid of tiles 

70 image_size: Size of the full image before tiling 

71 tile_size: Size of each tile 

72 overlap_percent: Percentage of overlap between tiles 

73 stage_error_px: Random error in stage positioning (pixels) 

74 wavelengths: Number of wavelength channels to generate 

75 z_stack_levels: Number of Z-stack levels to generate 

76 z_step_size: Spacing between Z-steps in microns 

77 num_cells: Number of cells to generate 

78 cell_size_range: Range of cell sizes (min, max) 

79 cell_eccentricity_range: Range of cell eccentricity (min, max) 

80 cell_intensity_range: Range of cell intensity (min, max) 

81 background_intensity: Background intensity level 

82 noise_level: Amount of noise to add 

83 wavelength_params: Optional dictionary of parameters for each wavelength 

84 Example: { 

85 1: { 

86 'num_cells': 100, 

87 'cell_size_range': (10, 30), 

88 'cell_eccentricity_range': (0.1, 0.5), 

89 'cell_intensity_range': (5000, 20000), 

90 'background_intensity': 500 

91 }, 

92 2: { 

93 'num_cells': 50, 

94 'cell_size_range': (5, 15), 

95 'cell_eccentricity_range': (0.3, 0.8), 

96 'cell_intensity_range': (3000, 12000), 

97 'background_intensity': 300 

98 } 

99 } 

100 shared_cell_fraction: Fraction of cells shared between wavelengths (0.0-1.0) 

101 0.0 means all cells are unique to each wavelength 

102 1.0 means all cells are shared between wavelengths 

103 Default is 0.95 (95% shared) 

104 wavelength_intensities: Dictionary mapping wavelength indices to fixed intensities 

105 Example: {1: 20000, 2: 10000} 

106 wavelength_backgrounds: Dictionary mapping wavelength indices to background intensities 

107 Example: {1: 800, 2: 400} 

108 wells: List of well IDs to generate (e.g., ['A01', 'A02']) 

109 format: Format of the filenames ('ImageXpress' or 'OperaPhenix') 

110 auto_image_size: If True, automatically calculate image size based on grid and tile parameters 

111 random_seed: Random seed for reproducibility 

112 """ 

113 self.output_dir = Path(output_dir) 

114 self.grid_size = grid_size 

115 self.tile_size = tile_size 

116 self.overlap_percent = overlap_percent 

117 self.stage_error_px = stage_error_px 

118 

119 # Calculate image size if auto_image_size is True 

120 if auto_image_size: 

121 self.image_size = self._calculate_image_size(grid_size, tile_size, overlap_percent, stage_error_px) 

122 print(f"Auto-calculated image size: {self.image_size[0]}x{self.image_size[1]}") 

123 else: 

124 self.image_size = image_size 

125 self.wavelengths = wavelengths 

126 self.z_stack_levels = z_stack_levels 

127 self.z_step_size = z_step_size 

128 self.num_cells = num_cells 

129 self.cell_size_range = cell_size_range 

130 self.cell_eccentricity_range = cell_eccentricity_range 

131 self.cell_intensity_range = cell_intensity_range 

132 self.background_intensity = background_intensity 

133 self.noise_level = noise_level 

134 self.wavelength_params = wavelength_params or {} 

135 self.shared_cell_fraction = shared_cell_fraction 

136 

137 # Set default wavelength intensities if not provided 

138 if wavelength_intensities is None: 

139 self.wavelength_intensities = {1: 20000, 2: 10000} 

140 # Add defaults for additional wavelengths if needed 

141 for w in range(3, wavelengths + 1): 

142 self.wavelength_intensities[w] = 15000 

143 else: 

144 self.wavelength_intensities = wavelength_intensities 

145 

146 # Set default wavelength backgrounds if not provided 

147 if wavelength_backgrounds is None: 

148 self.wavelength_backgrounds = {1: 800, 2: 400} 

149 # Add defaults for additional wavelengths if needed 

150 for w in range(3, wavelengths + 1): 

151 self.wavelength_backgrounds[w] = 600 

152 else: 

153 self.wavelength_backgrounds = wavelength_backgrounds 

154 

155 # Store the wells to generate 

156 self.wells = wells 

157 

158 # Store the format 

159 self.format = format 

160 

161 # Store the base random seed 

162 self.base_random_seed = random_seed 

163 

164 # Set random seed if provided 

165 if random_seed is not None: 

166 np.random.seed(random_seed) 

167 random.seed(random_seed) 

168 

169 # Create output directory structure 

170 # For ImageXpress, create TimePoint_1 directory 

171 # For Opera Phenix, create Images directory 

172 if self.format == 'ImageXpress': 

173 self.timepoint_dir = self.output_dir / "TimePoint_1" 

174 self.timepoint_dir.mkdir(parents=True, exist_ok=True) 

175 else: # OperaPhenix 

176 # Create the Images directory for Opera Phenix 

177 self.images_dir = self.output_dir / "Images" 

178 self.images_dir.mkdir(parents=True, exist_ok=True) 

179 self.timepoint_dir = self.images_dir # Store images in the Images directory 

180 

181 # Calculate effective step size with overlap 

182 self.step_x = int(tile_size[0] * (1 - overlap_percent / 100)) 

183 self.step_y = int(tile_size[1] * (1 - overlap_percent / 100)) 

184 

185 # First, generate a common set of base cells for all wavelengths 

186 # This ensures consistent patterns across wavelengths for better registration 

187 base_cells = [] 

188 max_num_cells = max([ 

189 self.wavelength_params.get(w+1, {}).get('num_cells', self.num_cells) 

190 for w in range(wavelengths) 

191 ]) 

192 

193 # Generate shared cell positions and common attributes 

194 for i in range(max_num_cells): 

195 # Generate cell in a way that ensures good features in overlap regions 

196 # Bias cell positions to increase density in overlap regions for better registration 

197 

198 # Calculate overlap regions (where tiles will overlap) 

199 overlap_x = int(tile_size[0] * (overlap_percent / 100)) 

200 overlap_y = int(tile_size[1] * (overlap_percent / 100)) 

201 

202 # Very strongly favor overlap regions with 80% probability 

203 # to ensure very high density of features in the 10% overlap region for reliable registration 

204 if np.random.random() < 0.8: 

205 # Position in an overlap region between tiles 

206 col = np.random.randint(0, grid_size[1]) 

207 row = np.random.randint(0, grid_size[0]) 

208 

209 # Calculate base tile position 

210 base_x = col * self.step_x 

211 base_y = row * self.step_y 

212 

213 # Position cells in right/bottom overlapping regions 

214 if np.random.random() < 0.5: 

215 # Right overlap region 

216 x = base_x + tile_size[0] - overlap_x + np.random.randint(0, overlap_x) 

217 y = base_y + np.random.randint(0, tile_size[1]) 

218 else: 

219 # Bottom overlap region 

220 x = base_x + np.random.randint(0, tile_size[0]) 

221 y = base_y + tile_size[1] - overlap_y + np.random.randint(0, overlap_y) 

222 

223 # Ensure we're within image bounds 

224 x = min(x, image_size[0] - 1) 

225 y = min(y, image_size[1] - 1) 

226 else: 

227 # Random position anywhere in the image 

228 x = np.random.randint(0, image_size[0]) 

229 y = np.random.randint(0, image_size[1]) 

230 

231 # Common cell attributes 

232 size = np.random.uniform(*self.cell_size_range) 

233 eccentricity = np.random.uniform(*self.cell_eccentricity_range) 

234 rotation = np.random.uniform(0, 2*np.pi) 

235 

236 base_cells.append({ 

237 'x': x, 

238 'y': y, 

239 'size': size, 

240 'eccentricity': eccentricity, 

241 'rotation': rotation 

242 }) 

243 

244 # We'll generate cell parameters for each well and wavelength on demand 

245 # This is just a placeholder initialization 

246 self.cell_params = {} 

247 

248 # Store wavelength-specific parameters for later use 

249 self.wavelength_specific_params = [] 

250 for w in range(wavelengths): 

251 wavelength_idx = w + 1 # 1-based wavelength index 

252 

253 # Get wavelength-specific parameters or use defaults 

254 w_params = self.wavelength_params.get(wavelength_idx, {}) 

255 w_num_cells = w_params.get('num_cells', self.num_cells) 

256 w_cell_size_range = w_params.get('cell_size_range', self.cell_size_range) 

257 w_cell_intensity_range = w_params.get('cell_intensity_range', self.cell_intensity_range) 

258 

259 self.wavelength_specific_params.append({ 

260 'wavelength_idx': wavelength_idx, 

261 'num_cells': w_num_cells, 

262 'cell_size_range': w_cell_size_range, 

263 'cell_intensity_range': w_cell_intensity_range 

264 }) 

265 

266 # We'll generate cells on demand in generate_cell_image 

267 

268 def generate_cell_image(self, wavelength, z_level, well=None): 

269 """ 

270 Generate a full image with cells for a specific wavelength and Z level. 

271 

272 Args: 

273 wavelength: Wavelength channel index 

274 z_level: Z-stack level index 

275 well: Well identifier (e.g., 'A01') 

276 

277 Returns: 

278 Full image with cells 

279 """ 

280 # Generate a unique key for this well and wavelength 

281 key = f"{well}_{wavelength}" if well else f"default_{wavelength}" 

282 

283 # Get wavelength-specific parameters 

284 wavelength_idx = wavelength + 1 # Convert to 1-based index for params lookup 

285 w_params = self.wavelength_params.get(wavelength_idx, {}) 

286 

287 # Generate cells for this well and wavelength if not already generated 

288 if key not in self.cell_params: 

289 # Get parameters for cell generation 

290 w_num_cells = w_params.get('num_cells', self.num_cells) 

291 w_cell_size_range = w_params.get('cell_size_range', self.cell_size_range) 

292 w_cell_intensity_range = w_params.get('cell_intensity_range', self.cell_intensity_range) 

293 

294 # Generate cells for this wavelength 

295 cells = [] 

296 for i in range(w_num_cells): 

297 # Generate random position for this wavelength 

298 x = np.random.randint(0, self.image_size[0]) 

299 y = np.random.randint(0, self.image_size[1]) 

300 

301 # Generate random cell properties 

302 size = np.random.uniform(w_cell_size_range[0], w_cell_size_range[1]) 

303 eccentricity = np.random.uniform(self.cell_eccentricity_range[0], self.cell_eccentricity_range[1]) 

304 rotation = np.random.uniform(0, 2*np.pi) 

305 

306 # Set very different intensities for each wavelength to make them easily distinguishable 

307 if wavelength_idx == 1: 

308 # First wavelength: very high intensity 

309 intensity = 25000 

310 elif wavelength_idx == 2: 

311 # Second wavelength: medium intensity 

312 intensity = 10000 

313 else: 

314 # Other wavelengths: lower intensity 

315 intensity = 5000 + (wavelength_idx * 1000) # Increase slightly for each additional wavelength 

316 

317 cells.append({ 

318 'x': x, 

319 'y': y, 

320 'size': size, 

321 'eccentricity': eccentricity, 

322 'rotation': rotation, 

323 'intensity': intensity 

324 }) 

325 

326 # Store cells for this well and wavelength 

327 self.cell_params[key] = cells 

328 

329 # Get cells for this well and wavelength 

330 cells = self.cell_params[key] 

331 

332 # Get background intensity from wavelength_backgrounds or use default 

333 w_background = self.wavelength_backgrounds.get(wavelength_idx, self.background_intensity) 

334 

335 # Create empty image with wavelength-specific background intensity 

336 # Ensure image is 2D (not 3D) to avoid shape mismatch in ashlar 

337 image = np.ones(self.image_size, dtype=np.uint16) * w_background 

338 

339 # Get cell parameters for this well and wavelength 

340 cells = self.cell_params[key] 

341 

342 # Calculate Z-focus factor (1.0 at center Z, decreasing toward edges) 

343 if self.z_stack_levels > 1: 

344 z_center = (self.z_stack_levels - 1) / 2 

345 z_distance = abs(z_level - z_center) 

346 z_factor = 1.0 - (z_distance / z_center) if z_center > 0 else 1.0 

347 else: 

348 z_factor = 1.0 

349 

350 # Draw each cell 

351 for cell in cells: 

352 # Adjust intensity based on Z level (cells are brightest at focus) 

353 intensity = cell['intensity'] * z_factor 

354 

355 # Calculate ellipse parameters 

356 a = cell['size'] 

357 b = cell['size'] * (1 - cell['eccentricity']) 

358 

359 # Generate ellipse coordinates 

360 rr, cc = draw.ellipse( 

361 cell['y'], cell['x'], 

362 b, a, 

363 rotation=cell['rotation'], 

364 shape=self.image_size 

365 ) 

366 

367 # Add cell to image 

368 image[rr, cc] = intensity 

369 

370 # Add noise 

371 # Use wavelength-specific noise level if provided 

372 w_noise_level = w_params.get('noise_level', self.noise_level) 

373 noise = np.random.normal(0, w_noise_level, self.image_size) 

374 image = image + noise 

375 

376 # Apply blur based on Z distance from focus 

377 if self.z_stack_levels > 1: 

378 # More blur for Z levels further from center 

379 # Scale blur by z_step_size to create more realistic Z-stack effect 

380 # z_step_size controls the amount of blur between Z-steps 

381 blur_sigma = (self.z_step_size/500) * (1.0 + 2.0 * (1.0 - z_factor)) 

382 print(f" Z-level {z_level}: blur_sigma={blur_sigma:.2f} (z_factor={z_factor:.2f}, z_step_size={self.z_step_size})") 

383 image = filters.gaussian(image, sigma=blur_sigma, preserve_range=True) 

384 

385 # Ensure valid pixel values 

386 image = np.clip(image, 0, 65535).astype(np.uint16) 

387 

388 return image 

389 

390 # We've replaced the generate_tiles method with position pre-generation in generate_dataset 

391 

392 def generate_htd_file(self): 

393 """Generate HTD file with metadata in the format expected by ezstitcher.""" 

394 # Derive plate name from output directory name 

395 plate_name = self.output_dir.name 

396 

397 if self.format == 'OperaPhenix': 

398 # Generate Index.xml for Opera Phenix 

399 return self.generate_opera_phenix_index_xml(plate_name) 

400 else: 

401 # Generate HTD file for ImageXpress 

402 htd_filename = f"{plate_name}.HTD" 

403 

404 # Generate the main HTD file in the plate dir 

405 htd_path = self.output_dir / htd_filename 

406 

407 # Basic HTD file content matching the format of real HTD files 

408 htd_content = f""""HTSInfoFile", Version 1.0 

409"Description", "Synthetic microscopy data for testing" 

410"PlateType", 6 

411"TimePoints", 1 

412"ZSeries", {"TRUE" if self.z_stack_levels > 1 else "FALSE"} 

413"ZSteps", {self.z_stack_levels} 

414"ZProjection", FALSE 

415"XWells", 4 

416"YWells", 3""" 

417 

418 # Add wells selection (only the wells we're using are TRUE) 

419 for y in range(3): # 3 rows (A, B, C) 

420 row_wells = [] 

421 for x in range(4): # 4 columns (1, 2, 3, 4) 

422 well = f"{chr(65+y)}{x+1:02d}" # A01, A02, etc. 

423 row_wells.append("TRUE" if well in self.wells else "FALSE") 

424 htd_content += f"\n\"WellsSelection{y+1}\", {', '.join(row_wells)}" 

425 

426 # Add sites information 

427 htd_content += f"\n\"Sites\", TRUE" 

428 htd_content += f"\n\"XSites\", {self.grid_size[1]}" 

429 htd_content += f"\n\"YSites\", {self.grid_size[0]}" 

430 

431 # Add site selection rows (all set to FALSE except the ones we're using) 

432 for y in range(self.grid_size[0]): 

433 row = [] 

434 for x in range(self.grid_size[1]): 

435 row.append("TRUE") # All sites are used in our synthetic data 

436 htd_content += f"\n\"SiteSelection{y+1}\", {', '.join(row)}" 

437 

438 # Add wavelength information 

439 htd_content += f"\n\"Waves\", TRUE" 

440 htd_content += f"\n\"NWavelengths\", {self.wavelengths}" 

441 

442 # Add wavelength names and collection flags 

443 for w in range(self.wavelengths): 

444 htd_content += f"\n\"WaveName{w+1}\", \"W{w+1}\"" 

445 htd_content += f"\n\"WaveCollect{w+1}\", 1" 

446 

447 # Add unique identifier and end file marker 

448 htd_content += f"\n\"UniquePlateIdentifier\", \"{plate_name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}\"" 

449 htd_content += "\n\"EndFile\"" 

450 

451 # Write HTD file in plate root directory 

452 with open(htd_path, 'w') as f: 

453 f.write(htd_content) 

454 

455 # For ImageXpress, also create a copy in the TimePoint directory 

456 timepoint_htd_path = self.timepoint_dir / htd_filename 

457 with open(timepoint_htd_path, 'w') as f: 

458 f.write(htd_content) 

459 

460 return htd_path 

461 

462 def generate_opera_phenix_index_xml(self, plate_name): 

463 """Generate Index.xml file for Opera Phenix format.""" 

464 # Create the Index.xml file in the Images directory 

465 index_xml_path = self.images_dir / "Index.xml" 

466 

467 # Get current date and time for the measurement ID 

468 current_time = datetime.now().strftime("%Y-%m-%dT%H_%M_%S") 

469 measurement_id = f"{plate_name}__{current_time}-Measurement 1" 

470 

471 # Create a unique ID for the measurement 

472 import uuid 

473 unique_id = str(uuid.uuid4()) 

474 

475 # Extract row and column numbers from wells 

476 # Convert well names like 'A01' to row and column indices 

477 well_indices = [] 

478 for well in self.wells: 

479 row = ord(well[0]) - ord('A') + 1 # A -> 1, B -> 2, etc. 

480 col = int(well[1:3]) 

481 well_indices.append((row, col)) 

482 

483 # Calculate pixel size in meters (for ImageResolutionX/Y) 

484 # Default is 0.65 µm, but we'll use a more realistic value for Opera Phenix 

485 pixel_size_meters = 1.1867525298988041E-06 # ~1.19 µm 

486 

487 # Calculate Z-step size in meters 

488 z_step_size_meters = self.z_step_size * 1e-6 # Convert from µm to m 

489 

490 # Start building the XML content 

491 xml_content = f"""<?xml version="1.0" encoding="utf-8"?> 

492<EvaluationInputData xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="1" xmlns="http://www.perkinelmer.com/PEHH/HarmonyV6"> 

493 <User>Synthetic</User> 

494 <InstrumentType>Phenix</InstrumentType> 

495 <Plates> 

496 <Plate> 

497 <PlateID>{plate_name}</PlateID> 

498 <MeasurementID>{unique_id}</MeasurementID> 

499 <MeasurementStartTime>{datetime.now().isoformat()}-04:00</MeasurementStartTime> 

500 <n>{plate_name}</n> 

501 <PlateTypeName>96well</PlateTypeName> 

502 <PlateRows>8</PlateRows> 

503 <PlateColumns>12</PlateColumns>""" 

504 

505 # Add wells 

506 for row, col in well_indices: 

507 xml_content += f"\n <Well id=\"{row:02d}{col:02d}\" />" 

508 

509 xml_content += "\n </Plate>\n </Plates>\n <Wells>" 

510 

511 # Add well details 

512 for row, col in well_indices: 

513 xml_content += f"\n <Well>\n <id>{row:02d}{col:02d}</id>\n <Row>{row}</Row>\n <Col>{col}</Col>" 

514 

515 # Add images for each site and channel 

516 for site in range(1, self.grid_size[0] * self.grid_size[1] + 1): 

517 for channel in range(1, self.wavelengths + 1): 

518 for z in range(1, self.z_stack_levels + 1): 

519 xml_content += f"\n <Image id=\"{row:02d}{col:02d}K1F{site}P{z}R{channel}\" />" 

520 

521 xml_content += "\n </Well>" 

522 

523 # Add image details section 

524 xml_content += """ 

525 </Wells> 

526 <Images> 

527 <Map> 

528 <Entry ChannelID="1"> 

529 <ChannelName>HOECHST 33342</ChannelName> 

530 <ImageType>Signal</ImageType> 

531 <AcquisitionType>NonConfocal</AcquisitionType> 

532 <IlluminationType>Epifluorescence</IlluminationType> 

533 <ChannelType>Fluorescence</ChannelType> 

534 <ImageResolutionX Unit="m">{}</ImageResolutionX> 

535 <ImageResolutionY Unit="m">{}</ImageResolutionY> 

536 <ImageSizeX>{}</ImageSizeX> 

537 <ImageSizeY>{}</ImageSizeY> 

538 <BinningX>2</BinningX> 

539 <BinningY>2</BinningY> 

540 <MaxIntensity>65536</MaxIntensity> 

541 <CameraType>AndorZylaCam</CameraType> 

542 <MainExcitationWavelength Unit="nm">375</MainExcitationWavelength> 

543 <MainEmissionWavelength Unit="nm">456</MainEmissionWavelength> 

544 <ObjectiveMagnification Unit="">10</ObjectiveMagnification> 

545 <ObjectiveNA Unit="">0.3</ObjectiveNA> 

546 <ExposureTime Unit="s">0.03</ExposureTime> 

547 <OrientationMatrix>[[1.009457,0,0,34.3],[0,-1.009457,0,-15.1],[0,0,1.33,-6.014]]</OrientationMatrix> 

548 <CropArea>[[0,0],[2160,2160],[2160,2160]]</CropArea> 

549 </Entry>""".format(pixel_size_meters, pixel_size_meters, self.image_size[0], self.image_size[1]) 

550 

551 # Add entries for each channel 

552 channel_names = ["Calcein", "Alexa 647", "FITC", "TRITC", "Cy5"] 

553 excitation_wavelengths = [488, 647, 488, 561, 647] 

554 emission_wavelengths = [525, 665, 525, 590, 665] 

555 exposure_times = [0.05, 0.1, 0.05, 0.08, 0.1] 

556 

557 for channel in range(2, self.wavelengths + 1): 

558 channel_idx = min(channel - 2, len(channel_names) - 1) # Ensure we don't go out of bounds 

559 xml_content += f""" 

560 <Entry ChannelID="{channel}"> 

561 <ChannelName>{channel_names[channel_idx]}</ChannelName> 

562 <ImageType>Signal</ImageType> 

563 <AcquisitionType>NonConfocal</AcquisitionType> 

564 <IlluminationType>Epifluorescence</IlluminationType> 

565 <ChannelType>Fluorescence</ChannelType> 

566 <ImageResolutionX Unit="m">{pixel_size_meters}</ImageResolutionX> 

567 <ImageResolutionY Unit="m">{pixel_size_meters}</ImageResolutionY> 

568 <ImageSizeX>{self.image_size[0]}</ImageSizeX> 

569 <ImageSizeY>{self.image_size[1]}</ImageSizeY> 

570 <BinningX>2</BinningX> 

571 <BinningY>2</BinningY> 

572 <MaxIntensity>65536</MaxIntensity> 

573 <CameraType>AndorZylaCam</CameraType> 

574 <MainExcitationWavelength Unit="nm">{excitation_wavelengths[channel_idx]}</MainExcitationWavelength> 

575 <MainEmissionWavelength Unit="nm">{emission_wavelengths[channel_idx]}</MainEmissionWavelength> 

576 <ObjectiveMagnification Unit="">10</ObjectiveMagnification> 

577 <ObjectiveNA Unit="">0.3</ObjectiveNA> 

578 <ExposureTime Unit="s">{exposure_times[channel_idx]}</ExposureTime> 

579 <OrientationMatrix>[[1.009457,0,0,34.3],[0,-1.009457,0,-15.1],[0,0,1.33,-6.014]]</OrientationMatrix> 

580 <CropArea>[[0,0],[2160,2160],[2160,2160]]</CropArea> 

581 </Entry>""" 

582 

583 # Add image information section 

584 xml_content += f""" 

585 </Map> 

586 <PixelSizeCalibration> 

587 <PixelSize Unit="µm">{pixel_size_meters * 1e6:.4f}</PixelSize> 

588 <MagnificationRatio>1.0</MagnificationRatio> 

589 </PixelSizeCalibration>""" 

590 

591 # Add detailed image information for each image 

592 for row, col in well_indices: 

593 for site in range(1, self.grid_size[0] * self.grid_size[1] + 1): 

594 # Calculate position for this site 

595 site_row = (site - 1) // self.grid_size[1] 

596 site_col = (site - 1) % self.grid_size[1] 

597 

598 # Calculate position in meters (typical Opera Phenix values) 

599 # These are arbitrary values for demonstration 

600 pos_x = 0.000576762 + site_col * 0.001 # Arbitrary X position 

601 pos_y = 0.000576762 + site_row * 0.001 # Arbitrary Y position 

602 

603 for z in range(1, self.z_stack_levels + 1): 

604 # Calculate Z position based on Z level 

605 pos_z = 0.0001 + (z - 1) * z_step_size_meters 

606 abs_pos_z = 0.135809004 + (z - 1) * z_step_size_meters # Arbitrary base Z position 

607 

608 for channel in range(1, self.wavelengths + 1): 

609 # Create image ID 

610 image_id = f"{row:02d}{col:02d}K1F{site}P{z}R{channel}" 

611 

612 # Create URL (filename) 

613 url = f"r{row:02d}c{col:02d}f{site}p{z:02d}-ch{channel}sk1fk1fl1.tiff" 

614 

615 # Add image element 

616 xml_content += f""" 

617 <Image Version="1"> 

618 <id>{image_id}</id> 

619 <State>Ok</State> 

620 <URL>{url}</URL> 

621 <Row>{row}</Row> 

622 <Col>{col}</Col> 

623 <FieldID>{site}</FieldID> 

624 <PlaneID>{z}</PlaneID> 

625 <TimepointID>1</TimepointID> 

626 <SequenceID>1</SequenceID> 

627 <GroupID>1</GroupID> 

628 <ChannelID>{channel}</ChannelID> 

629 <FlimID>1</FlimID> 

630 <PositionX Unit="m">{pos_x}</PositionX> 

631 <PositionY Unit="m">{pos_y}</PositionY> 

632 <PositionZ Unit="m">{pos_z}</PositionZ> 

633 <AbsPositionZ Unit="m">{abs_pos_z}</AbsPositionZ> 

634 <MeasurementTimeOffset Unit="s">0</MeasurementTimeOffset> 

635 <AbsTime>{datetime.now().isoformat()}-04:00</AbsTime> 

636 </Image>""" 

637 

638 # Close the XML 

639 xml_content += """ 

640 </Images> 

641</EvaluationInputData>""" 

642 

643 # Write the XML file 

644 with open(index_xml_path, 'w', encoding='utf-8') as f: 

645 f.write(xml_content) 

646 

647 return index_xml_path 

648 

649 def _calculate_image_size(self, grid_size, tile_size, overlap_percent, stage_error_px): 

650 """ 

651 Calculate the appropriate image size based on grid dimensions, tile size, and overlap. 

652 

653 Args: 

654 grid_size: Tuple of (rows, cols) for the grid of tiles 

655 tile_size: Size of each tile (width, height) 

656 overlap_percent: Percentage of overlap between tiles 

657 stage_error_px: Random error in stage positioning (pixels) 

658 

659 Returns: 

660 tuple: (width, height) of the calculated image size 

661 """ 

662 # Calculate effective step size with overlap 

663 step_x = int(tile_size[0] * (1 - overlap_percent / 100)) 

664 step_y = int(tile_size[1] * (1 - overlap_percent / 100)) 

665 

666 # Calculate minimum required size 

667 min_width = step_x * (grid_size[1] - 1) + tile_size[0] 

668 min_height = step_y * (grid_size[0] - 1) + tile_size[1] 

669 

670 # Add margin for stage positioning errors 

671 margin = stage_error_px * 2 

672 width = min_width + margin 

673 height = min_height + margin 

674 

675 return (width, height) 

676 

677 def generate_dataset(self): 

678 """Generate the complete dataset.""" 

679 print(f"Generating synthetic microscopy dataset in {self.output_dir}") 

680 print(f"Grid size: {self.grid_size[0]}x{self.grid_size[1]}") 

681 print(f"Wavelengths: {self.wavelengths}") 

682 print(f"Z-stack levels: {self.z_stack_levels}") 

683 print(f"Wells: {', '.join(self.wells)}") 

684 

685 # Generate HTD file 

686 htd_path = self.generate_htd_file() 

687 print(f"Generated HTD file: {htd_path}") 

688 

689 # Process each well 

690 for well_index, well in enumerate(self.wells): 

691 print(f"\nGenerating data for well {well}...") 

692 

693 # Use a different random seed for each well if base seed is provided 

694 if self.base_random_seed is not None: 

695 well_seed = self.base_random_seed + well_index 

696 np.random.seed(well_seed) 

697 random.seed(well_seed) 

698 print(f"Using random seed {well_seed} for well {well}") 

699 

700 # Pre-generate the positions for each site to ensure consistency across Z-levels 

701 # This creates a mapping of site_index -> (base_x_pos, base_y_pos) 

702 site_positions = {} 

703 site_index = 1 

704 for row in range(self.grid_size[0]): 

705 for col in range(self.grid_size[1]): 

706 # Calculate base position 

707 x = col * self.step_x 

708 y = row * self.step_y 

709 

710 # Add random stage positioning error 

711 # We apply this error to the base position, it will be constant across Z-steps 

712 x_error = np.random.randint(-self.stage_error_px, self.stage_error_px) 

713 y_error = np.random.randint(-self.stage_error_px, self.stage_error_px) 

714 

715 x_pos = x + x_error 

716 y_pos = y + y_error 

717 

718 # Ensure we don't go out of bounds 

719 x_pos = max(0, min(x_pos, self.image_size[0] - self.tile_size[0])) 

720 y_pos = max(0, min(y_pos, self.image_size[1] - self.tile_size[1])) 

721 

722 site_positions[site_index] = (x_pos, y_pos) 

723 site_index += 1 

724 

725 # For multiple Z-stack levels 

726 if self.z_stack_levels > 1: 

727 # Handle differently based on format 

728 if self.format == 'ImageXpress': 

729 # For ImageXpress, create ZStep folders 

730 # Make sure all ZStep folders are created first 

731 for z in range(self.z_stack_levels): 

732 z_level = z + 1 # 1-based Z level index 

733 zstep_dir = self.timepoint_dir / f"ZStep_{z_level}" 

734 zstep_dir.mkdir(exist_ok=True) 

735 print(f"Created ZStep folder: {zstep_dir}") 

736 else: # OperaPhenix 

737 # Opera Phenix doesn't use ZStep folders - all images go directly in the Images folder 

738 print(f"Opera Phenix format: all Z-stack images will be placed directly in the Images folder") 

739 

740 # Now generate images for each Z-level 

741 for z in range(self.z_stack_levels): 

742 z_level = z + 1 # 1-based Z level index 

743 

744 # For ImageXpress, use ZStep folders; for Opera Phenix, use the Images folder directly 

745 if self.format == 'ImageXpress': 

746 target_dir = self.timepoint_dir / f"ZStep_{z_level}" 

747 else: # OperaPhenix 

748 target_dir = self.timepoint_dir # This is already set to self.images_dir for Opera Phenix 

749 

750 # Generate images for each wavelength at this Z level 

751 for w in range(self.wavelengths): 

752 wavelength = w + 1 # 1-based wavelength index 

753 

754 # Generate full image 

755 print(f"Generating full image for wavelength {wavelength}, Z level {z_level}...") 

756 full_image = self.generate_cell_image(w, z, well=well) 

757 

758 # Save tiles for this Z level using the pre-generated positions 

759 site_index = 1 

760 for row in range(self.grid_size[0]): 

761 for col in range(self.grid_size[1]): 

762 # Get the pre-generated position 

763 x_pos, y_pos = site_positions[site_index] 

764 

765 # Extract tile 

766 tile = full_image[ 

767 y_pos:y_pos + self.tile_size[1], 

768 x_pos:x_pos + self.tile_size[0] 

769 ] 

770 

771 # Create filename based on format 

772 if self.format == 'ImageXpress': 

773 # ImageXpress format: WellID_sXXX_wY.tif (Z-level is indicated by the ZStep folder) 

774 # Create filename without zero-padding site indices 

775 # This tests the padding functionality in the stitcher 

776 filename = f"{well}_s{site_index}_w{wavelength}.tif" 

777 else: # OperaPhenix 

778 # Opera Phenix format: rXXcYYfZZZpWW-chVskNfkNflN.tiff 

779 # Extract row and column from well ID (e.g., 'A01' -> row=1, col=1) 

780 row = ord(well[0]) - ord('A') + 1 

781 col = int(well[1:3]) 

782 filename = f"r{row:02d}c{col:02d}f{site_index}p{z_level:02d}-ch{wavelength}sk1fk1fl1.tiff" 

783 filepath = target_dir / filename 

784 

785 # Save image without compression 

786 tifffile.imwrite(filepath, tile, compression=None) 

787 

788 # Print progress with full path for debugging 

789 print(f" Saved tile: {target_dir.name}/{filename} (position: {x_pos}, {y_pos})") 

790 print(f" Full path: {filepath.resolve()}") 

791 site_index += 1 

792 else: 

793 # For single Z level (no Z-stack), just save files directly in TimePoint_1 

794 for w in range(self.wavelengths): 

795 wavelength = w + 1 # 1-based wavelength index 

796 

797 # Generate full image for the single Z level 

798 print(f"Generating full image for wavelength {wavelength} (no Z-stack)...") 

799 full_image = self.generate_cell_image(w, 0, well=well) 

800 

801 # Save tiles without Z-stack index 

802 site_index = 1 

803 for row in range(self.grid_size[0]): 

804 for col in range(self.grid_size[1]): 

805 # Get the pre-generated position 

806 x_pos, y_pos = site_positions[site_index] 

807 

808 # Extract tile 

809 tile = full_image[ 

810 y_pos:y_pos + self.tile_size[1], 

811 x_pos:x_pos + self.tile_size[0] 

812 ] 

813 

814 # Create filename based on format 

815 if self.format == 'ImageXpress': 

816 # ImageXpress format: WellID_sXXX_wY.tif 

817 # Create filename without Z-index and without zero-padding site indices 

818 # This tests the padding functionality in the stitcher 

819 filename = f"{well}_s{site_index}_w{wavelength}.tif" 

820 else: # OperaPhenix 

821 # Opera Phenix format: rXXcYYfZZZpWW-chVskNfkNflN.tiff 

822 # Extract row and column from well ID (e.g., 'A01' -> row=1, col=1) 

823 row = ord(well[0]) - ord('A') + 1 

824 col = int(well[1:3]) 

825 filename = f"r{row:02d}c{col:02d}f{site_index}p01-ch{wavelength}sk1fk1fl1.tiff" 

826 filepath = self.timepoint_dir / filename 

827 

828 # Save image without compression 

829 tifffile.imwrite(filepath, tile, compression=None) 

830 

831 # Print progress 

832 print(f" Saved tile: {filename} (position: {x_pos}, {y_pos})") 

833 site_index += 1 

834 

835 print("Dataset generation complete!")