Coverage for ezstitcher/tests/generators/generate_synthetic

1#!/usr/bin/env python3

2"""

3Generate synthetic microscopy images for testing ezstitcher.

5This module generates synthetic microscopy images with the following features:

6- Multiple wavelengths (channels)

7- Z-stack support with varying focus levels

8- Cell-like structures (circular particles with varying eccentricity)

9- Proper tiling with configurable overlap

10- Realistic stage positioning errors

11- HTD file generation for metadata

12- Automatic image size calculation based on grid and tile parameters

14Usage:

15 from ezstitcher.tests.generators.generate_synthetic_data import SyntheticMicroscopyGenerator

17 generator = SyntheticMicroscopyGenerator(

18 output_dir="path/to/output",

19 grid_size=(3, 3),

20 wavelengths=2,

21 z_stack_levels=3,

22 auto_image_size=True

23 )

24 generator.generate_dataset()

25"""

27import os

28import sys

29import random

30import numpy as np

31import tifffile

32from pathlib import Path

33from skimage import draw, filters, transform, util

34from datetime import datetime

37class SyntheticMicroscopyGenerator:

38 """Generate synthetic microscopy images for testing."""

40 def __init__(self,

41 output_dir,

42 grid_size=(3, 3),

43 image_size=(1024, 1024),

44 tile_size=(512, 512),

45 overlap_percent=10,

46 stage_error_px=2,

47 wavelengths=2,

48 z_stack_levels=1,

49 z_step_size=1,

50 num_cells=50,

51 cell_size_range=(10, 30),

52 cell_eccentricity_range=(0.1, 0.5),

53 cell_intensity_range=(5000, 20000),

54 background_intensity=500,

55 noise_level=100,

56 wavelength_params=None,

57 shared_cell_fraction=0.95, # Fraction of cells shared between wavelengths

58 wavelength_intensities=None, # Fixed intensities for each wavelength

59 wavelength_backgrounds=None, # Background intensities for each wavelength

60 wells=['A01'], # List of wells to generate

61 format='ImageXpress', # Format of the filenames ('ImageXpress' or 'OperaPhenix')

62 auto_image_size=True, # Automatically calculate image size based on grid and tile size

63 random_seed=None):

64 """

65 Initialize the synthetic microscopy generator.

67 Args:

68 output_dir: Directory to save generated images

69 grid_size: Tuple of (rows, cols) for the grid of tiles

70 image_size: Size of the full image before tiling

71 tile_size: Size of each tile

72 overlap_percent: Percentage of overlap between tiles

73 stage_error_px: Random error in stage positioning (pixels)

74 wavelengths: Number of wavelength channels to generate

75 z_stack_levels: Number of Z-stack levels to generate

76 z_step_size: Spacing between Z-steps in microns

77 num_cells: Number of cells to generate

78 cell_size_range: Range of cell sizes (min, max)

79 cell_eccentricity_range: Range of cell eccentricity (min, max)

80 cell_intensity_range: Range of cell intensity (min, max)

81 background_intensity: Background intensity level

82 noise_level: Amount of noise to add

83 wavelength_params: Optional dictionary of parameters for each wavelength

84 Example: {

85 1: {

86 'num_cells': 100,

87 'cell_size_range': (10, 30),

88 'cell_eccentricity_range': (0.1, 0.5),

89 'cell_intensity_range': (5000, 20000),

90 'background_intensity': 500

91 },

92 2: {

93 'num_cells': 50,

94 'cell_size_range': (5, 15),

95 'cell_eccentricity_range': (0.3, 0.8),

96 'cell_intensity_range': (3000, 12000),

97 'background_intensity': 300

98 }

99 }

100 shared_cell_fraction: Fraction of cells shared between wavelengths (0.0-1.0)

101 0.0 means all cells are unique to each wavelength

102 1.0 means all cells are shared between wavelengths

103 Default is 0.95 (95% shared)

104 wavelength_intensities: Dictionary mapping wavelength indices to fixed intensities

105 Example: {1: 20000, 2: 10000}

106 wavelength_backgrounds: Dictionary mapping wavelength indices to background intensities

107 Example: {1: 800, 2: 400}

108 wells: List of well IDs to generate (e.g., ['A01', 'A02'])

109 format: Format of the filenames ('ImageXpress' or 'OperaPhenix')

110 auto_image_size: If True, automatically calculate image size based on grid and tile parameters

111 random_seed: Random seed for reproducibility

112 """

113 self.output_dir = Path(output_dir)

114 self.grid_size = grid_size

115 self.tile_size = tile_size

116 self.overlap_percent = overlap_percent

117 self.stage_error_px = stage_error_px

118

119 # Calculate image size if auto_image_size is True

120 if auto_image_size:

121 self.image_size = self._calculate_image_size(grid_size, tile_size, overlap_percent, stage_error_px)

122 print(f"Auto-calculated image size: {self.image_size[0]}x{self.image_size[1]}")

123 else:

124 self.image_size = image_size

125 self.wavelengths = wavelengths

126 self.z_stack_levels = z_stack_levels

127 self.z_step_size = z_step_size

128 self.num_cells = num_cells

129 self.cell_size_range = cell_size_range

130 self.cell_eccentricity_range = cell_eccentricity_range

131 self.cell_intensity_range = cell_intensity_range

132 self.background_intensity = background_intensity

133 self.noise_level = noise_level

134 self.wavelength_params = wavelength_params or {}

135 self.shared_cell_fraction = shared_cell_fraction

136

137 # Set default wavelength intensities if not provided

138 if wavelength_intensities is None:

139 self.wavelength_intensities = {1: 20000, 2: 10000}

140 # Add defaults for additional wavelengths if needed

141 for w in range(3, wavelengths + 1):

142 self.wavelength_intensities[w] = 15000

143 else:

144 self.wavelength_intensities = wavelength_intensities

145

146 # Set default wavelength backgrounds if not provided

147 if wavelength_backgrounds is None:

148 self.wavelength_backgrounds = {1: 800, 2: 400}

149 # Add defaults for additional wavelengths if needed

150 for w in range(3, wavelengths + 1):

151 self.wavelength_backgrounds[w] = 600

152 else:

153 self.wavelength_backgrounds = wavelength_backgrounds

154

155 # Store the wells to generate

156 self.wells = wells

157

158 # Store the format

159 self.format = format

160

161 # Store the base random seed

162 self.base_random_seed = random_seed

163

164 # Set random seed if provided

165 if random_seed is not None:

166 np.random.seed(random_seed)

167 random.seed(random_seed)

168

169 # Create output directory structure

170 # For ImageXpress, create TimePoint_1 directory

171 # For Opera Phenix, create Images directory

172 if self.format == 'ImageXpress':

173 self.timepoint_dir = self.output_dir / "TimePoint_1"

174 self.timepoint_dir.mkdir(parents=True, exist_ok=True)

175 else: # OperaPhenix

176 # Create the Images directory for Opera Phenix

177 self.images_dir = self.output_dir / "Images"

178 self.images_dir.mkdir(parents=True, exist_ok=True)

179 self.timepoint_dir = self.images_dir # Store images in the Images directory

180

181 # Calculate effective step size with overlap

182 self.step_x = int(tile_size[0] * (1 - overlap_percent / 100))

183 self.step_y = int(tile_size[1] * (1 - overlap_percent / 100))

184

185 # First, generate a common set of base cells for all wavelengths

186 # This ensures consistent patterns across wavelengths for better registration

187 base_cells = []

188 max_num_cells = max([

189 self.wavelength_params.get(w+1, {}).get('num_cells', self.num_cells)

190 for w in range(wavelengths)

191 ])

192

193 # Generate shared cell positions and common attributes

194 for i in range(max_num_cells):

195 # Generate cell in a way that ensures good features in overlap regions

196 # Bias cell positions to increase density in overlap regions for better registration

197

198 # Calculate overlap regions (where tiles will overlap)

199 overlap_x = int(tile_size[0] * (overlap_percent / 100))

200 overlap_y = int(tile_size[1] * (overlap_percent / 100))

201

202 # Very strongly favor overlap regions with 80% probability

203 # to ensure very high density of features in the 10% overlap region for reliable registration

204 if np.random.random() < 0.8:

205 # Position in an overlap region between tiles

206 col = np.random.randint(0, grid_size[1])

207 row = np.random.randint(0, grid_size[0])

208

209 # Calculate base tile position

210 base_x = col * self.step_x

211 base_y = row * self.step_y

212

213 # Position cells in right/bottom overlapping regions

214 if np.random.random() < 0.5:

215 # Right overlap region

216 x = base_x + tile_size[0] - overlap_x + np.random.randint(0, overlap_x)

217 y = base_y + np.random.randint(0, tile_size[1])

218 else:

219 # Bottom overlap region

220 x = base_x + np.random.randint(0, tile_size[0])

221 y = base_y + tile_size[1] - overlap_y + np.random.randint(0, overlap_y)

222

223 # Ensure we're within image bounds

224 x = min(x, image_size[0] - 1)

225 y = min(y, image_size[1] - 1)

226 else:

227 # Random position anywhere in the image

228 x = np.random.randint(0, image_size[0])

229 y = np.random.randint(0, image_size[1])

230

231 # Common cell attributes

232 size = np.random.uniform(*self.cell_size_range)

233 eccentricity = np.random.uniform(*self.cell_eccentricity_range)

234 rotation = np.random.uniform(0, 2*np.pi)

235

236 base_cells.append({

237 'x': x,

238 'y': y,

239 'size': size,

240 'eccentricity': eccentricity,

241 'rotation': rotation

242 })

243

244 # We'll generate cell parameters for each well and wavelength on demand

245 # This is just a placeholder initialization

246 self.cell_params = {}

247

248 # Store wavelength-specific parameters for later use

249 self.wavelength_specific_params = []

250 for w in range(wavelengths):

251 wavelength_idx = w + 1 # 1-based wavelength index

252

253 # Get wavelength-specific parameters or use defaults

254 w_params = self.wavelength_params.get(wavelength_idx, {})

255 w_num_cells = w_params.get('num_cells', self.num_cells)

256 w_cell_size_range = w_params.get('cell_size_range', self.cell_size_range)

257 w_cell_intensity_range = w_params.get('cell_intensity_range', self.cell_intensity_range)

258

259 self.wavelength_specific_params.append({

260 'wavelength_idx': wavelength_idx,

261 'num_cells': w_num_cells,

262 'cell_size_range': w_cell_size_range,

263 'cell_intensity_range': w_cell_intensity_range

264 })

265

266 # We'll generate cells on demand in generate_cell_image

267

268 def generate_cell_image(self, wavelength, z_level, well=None):

269 """

270 Generate a full image with cells for a specific wavelength and Z level.

271

272 Args:

273 wavelength: Wavelength channel index

274 z_level: Z-stack level index

275 well: Well identifier (e.g., 'A01')

276

277 Returns:

278 Full image with cells

279 """

280 # Generate a unique key for this well and wavelength

281 key = f"{well}_{wavelength}" if well else f"default_{wavelength}"

282

283 # Get wavelength-specific parameters

284 wavelength_idx = wavelength + 1 # Convert to 1-based index for params lookup

285 w_params = self.wavelength_params.get(wavelength_idx, {})

286

287 # Generate cells for this well and wavelength if not already generated

288 if key not in self.cell_params:

289 # Get parameters for cell generation

290 w_num_cells = w_params.get('num_cells', self.num_cells)

291 w_cell_size_range = w_params.get('cell_size_range', self.cell_size_range)

292 w_cell_intensity_range = w_params.get('cell_intensity_range', self.cell_intensity_range)

293

294 # Generate cells for this wavelength

295 cells = []

296 for i in range(w_num_cells):

297 # Generate random position for this wavelength

298 x = np.random.randint(0, self.image_size[0])

299 y = np.random.randint(0, self.image_size[1])

300

301 # Generate random cell properties

302 size = np.random.uniform(w_cell_size_range[0], w_cell_size_range[1])

303 eccentricity = np.random.uniform(self.cell_eccentricity_range[0], self.cell_eccentricity_range[1])

304 rotation = np.random.uniform(0, 2*np.pi)

305

306 # Set very different intensities for each wavelength to make them easily distinguishable

307 if wavelength_idx == 1:

308 # First wavelength: very high intensity

309 intensity = 25000

310 elif wavelength_idx == 2:

311 # Second wavelength: medium intensity

312 intensity = 10000

313 else:

314 # Other wavelengths: lower intensity

315 intensity = 5000 + (wavelength_idx * 1000) # Increase slightly for each additional wavelength

316

317 cells.append({

318 'x': x,

319 'y': y,

320 'size': size,

321 'eccentricity': eccentricity,

322 'rotation': rotation,

323 'intensity': intensity

324 })

325

326 # Store cells for this well and wavelength

327 self.cell_params[key] = cells

328

329 # Get cells for this well and wavelength

330 cells = self.cell_params[key]

331

332 # Get background intensity from wavelength_backgrounds or use default

333 w_background = self.wavelength_backgrounds.get(wavelength_idx, self.background_intensity)

334

335 # Create empty image with wavelength-specific background intensity

336 # Ensure image is 2D (not 3D) to avoid shape mismatch in ashlar

337 image = np.ones(self.image_size, dtype=np.uint16) * w_background

338

339 # Get cell parameters for this well and wavelength

340 cells = self.cell_params[key]

341

342 # Calculate Z-focus factor (1.0 at center Z, decreasing toward edges)

343 if self.z_stack_levels > 1:

344 z_center = (self.z_stack_levels - 1) / 2

345 z_distance = abs(z_level - z_center)

346 z_factor = 1.0 - (z_distance / z_center) if z_center > 0 else 1.0

347 else:

348 z_factor = 1.0

349

350 # Draw each cell

351 for cell in cells:

352 # Adjust intensity based on Z level (cells are brightest at focus)

353 intensity = cell['intensity'] * z_factor

354

355 # Calculate ellipse parameters

356 a = cell['size']

357 b = cell['size'] * (1 - cell['eccentricity'])

358

359 # Generate ellipse coordinates

360 rr, cc = draw.ellipse(

361 cell['y'], cell['x'],

362 b, a,

363 rotation=cell['rotation'],

364 shape=self.image_size

365 )

366

367 # Add cell to image

368 image[rr, cc] = intensity

369

370 # Add noise

371 # Use wavelength-specific noise level if provided

372 w_noise_level = w_params.get('noise_level', self.noise_level)

373 noise = np.random.normal(0, w_noise_level, self.image_size)

374 image = image + noise

375

376 # Apply blur based on Z distance from focus

377 if self.z_stack_levels > 1:

378 # More blur for Z levels further from center

379 # Scale blur by z_step_size to create more realistic Z-stack effect

380 # z_step_size controls the amount of blur between Z-steps

381 blur_sigma = (self.z_step_size/500) * (1.0 + 2.0 * (1.0 - z_factor))

382 print(f" Z-level {z_level}: blur_sigma={blur_sigma:.2f} (z_factor={z_factor:.2f}, z_step_size={self.z_step_size})")

383 image = filters.gaussian(image, sigma=blur_sigma, preserve_range=True)

384

385 # Ensure valid pixel values

386 image = np.clip(image, 0, 65535).astype(np.uint16)

387

388 return image

389

390 # We've replaced the generate_tiles method with position pre-generation in generate_dataset

391

392 def generate_htd_file(self):

393 """Generate HTD file with metadata in the format expected by ezstitcher."""

394 # Derive plate name from output directory name

395 plate_name = self.output_dir.name

396

397 if self.format == 'OperaPhenix':

398 # Generate Index.xml for Opera Phenix

399 return self.generate_opera_phenix_index_xml(plate_name)

400 else:

401 # Generate HTD file for ImageXpress

402 htd_filename = f"{plate_name}.HTD"

403

404 # Generate the main HTD file in the plate dir

405 htd_path = self.output_dir / htd_filename

406

407 # Basic HTD file content matching the format of real HTD files

408 htd_content = f""""HTSInfoFile", Version 1.0

409"Description", "Synthetic microscopy data for testing"

410"PlateType", 6

411"TimePoints", 1

412"ZSeries", {"TRUE" if self.z_stack_levels > 1 else "FALSE"}

413"ZSteps", {self.z_stack_levels}

414"ZProjection", FALSE

415"XWells", 4

416"YWells", 3"""

417

418 # Add wells selection (only the wells we're using are TRUE)

419 for y in range(3): # 3 rows (A, B, C)

420 row_wells = []

421 for x in range(4): # 4 columns (1, 2, 3, 4)

422 well = f"{chr(65+y)}{x+1:02d}" # A01, A02, etc.

423 row_wells.append("TRUE" if well in self.wells else "FALSE")

424 htd_content += f"\n\"WellsSelection{y+1}\", {', '.join(row_wells)}"

425

426 # Add sites information

427 htd_content += f"\n\"Sites\", TRUE"

428 htd_content += f"\n\"XSites\", {self.grid_size[1]}"

429 htd_content += f"\n\"YSites\", {self.grid_size[0]}"

430

431 # Add site selection rows (all set to FALSE except the ones we're using)

432 for y in range(self.grid_size[0]):

433 row = []

434 for x in range(self.grid_size[1]):

435 row.append("TRUE") # All sites are used in our synthetic data

436 htd_content += f"\n\"SiteSelection{y+1}\", {', '.join(row)}"

437

438 # Add wavelength information

439 htd_content += f"\n\"Waves\", TRUE"

440 htd_content += f"\n\"NWavelengths\", {self.wavelengths}"

441

442 # Add wavelength names and collection flags

443 for w in range(self.wavelengths):

444 htd_content += f"\n\"WaveName{w+1}\", \"W{w+1}\""

445 htd_content += f"\n\"WaveCollect{w+1}\", 1"

446

447 # Add unique identifier and end file marker

448 htd_content += f"\n\"UniquePlateIdentifier\", \"{plate_name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}\""

449 htd_content += "\n\"EndFile\""

450

451 # Write HTD file in plate root directory

452 with open(htd_path, 'w') as f:

453 f.write(htd_content)

454

455 # For ImageXpress, also create a copy in the TimePoint directory

456 timepoint_htd_path = self.timepoint_dir / htd_filename

457 with open(timepoint_htd_path, 'w') as f:

458 f.write(htd_content)

459

460 return htd_path

461

462 def generate_opera_phenix_index_xml(self, plate_name):

463 """Generate Index.xml file for Opera Phenix format."""

464 # Create the Index.xml file in the Images directory

465 index_xml_path = self.images_dir / "Index.xml"

466

467 # Get current date and time for the measurement ID

468 current_time = datetime.now().strftime("%Y-%m-%dT%H_%M_%S")

469 measurement_id = f"{plate_name}__{current_time}-Measurement 1"

470

471 # Create a unique ID for the measurement

472 import uuid

473 unique_id = str(uuid.uuid4())

474

475 # Extract row and column numbers from wells

476 # Convert well names like 'A01' to row and column indices

477 well_indices = []

478 for well in self.wells:

479 row = ord(well[0]) - ord('A') + 1 # A -> 1, B -> 2, etc.

480 col = int(well[1:3])

481 well_indices.append((row, col))

482

483 # Calculate pixel size in meters (for ImageResolutionX/Y)

484 # Default is 0.65 µm, but we'll use a more realistic value for Opera Phenix

485 pixel_size_meters = 1.1867525298988041E-06 # ~1.19 µm

486

487 # Calculate Z-step size in meters

488 z_step_size_meters = self.z_step_size * 1e-6 # Convert from µm to m

489

490 # Start building the XML content

491 xml_content = f"""<?xml version="1.0" encoding="utf-8"?>

492<EvaluationInputData xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="1" xmlns="http://www.perkinelmer.com/PEHH/HarmonyV6">

493 <User>Synthetic</User>

494 <InstrumentType>Phenix</InstrumentType>

495 <Plates>

496 <Plate>

497 <PlateID>{plate_name}</PlateID>

498 <MeasurementID>{unique_id}</MeasurementID>

499 <MeasurementStartTime>{datetime.now().isoformat()}-04:00</MeasurementStartTime>

500 <n>{plate_name}</n>

501 <PlateTypeName>96well</PlateTypeName>

502 <PlateRows>8</PlateRows>

503 <PlateColumns>12</PlateColumns>"""

504

505 # Add wells

506 for row, col in well_indices:

507 xml_content += f"\n <Well id=\"{row:02d}{col:02d}\" />"

508

509 xml_content += "\n </Plate>\n </Plates>\n <Wells>"

510

511 # Add well details

512 for row, col in well_indices:

513 xml_content += f"\n <Well>\n <id>{row:02d}{col:02d}</id>\n <Row>{row}</Row>\n <Col>{col}</Col>"

514

515 # Add images for each site and channel

516 for site in range(1, self.grid_size[0] * self.grid_size[1] + 1):

517 for channel in range(1, self.wavelengths + 1):

518 for z in range(1, self.z_stack_levels + 1):

519 xml_content += f"\n <Image id=\"{row:02d}{col:02d}K1F{site}P{z}R{channel}\" />"

520

521 xml_content += "\n </Well>"

522

523 # Add image details section

524 xml_content += """

525 </Wells>

526 <Images>

527 <Map>

528 <Entry ChannelID="1">

529 <ChannelName>HOECHST 33342</ChannelName>

530 <ImageType>Signal</ImageType>

531 <AcquisitionType>NonConfocal</AcquisitionType>

532 <IlluminationType>Epifluorescence</IlluminationType>

533 <ChannelType>Fluorescence</ChannelType>

534 <ImageResolutionX Unit="m">{}</ImageResolutionX>

535 <ImageResolutionY Unit="m">{}</ImageResolutionY>

536 <ImageSizeX>{}</ImageSizeX>

537 <ImageSizeY>{}</ImageSizeY>

538 <BinningX>2</BinningX>

539 <BinningY>2</BinningY>

540 <MaxIntensity>65536</MaxIntensity>

541 <CameraType>AndorZylaCam</CameraType>

542 <MainExcitationWavelength Unit="nm">375</MainExcitationWavelength>

543 <MainEmissionWavelength Unit="nm">456</MainEmissionWavelength>

544 <ObjectiveMagnification Unit="">10</ObjectiveMagnification>

545 <ObjectiveNA Unit="">0.3</ObjectiveNA>

546 <ExposureTime Unit="s">0.03</ExposureTime>

547 <OrientationMatrix>[[1.009457,0,0,34.3],[0,-1.009457,0,-15.1],[0,0,1.33,-6.014]]</OrientationMatrix>

548 <CropArea>[[0,0],[2160,2160],[2160,2160]]</CropArea>

549 </Entry>""".format(pixel_size_meters, pixel_size_meters, self.image_size[0], self.image_size[1])

550

551 # Add entries for each channel

552 channel_names = ["Calcein", "Alexa 647", "FITC", "TRITC", "Cy5"]

553 excitation_wavelengths = [488, 647, 488, 561, 647]

554 emission_wavelengths = [525, 665, 525, 590, 665]

555 exposure_times = [0.05, 0.1, 0.05, 0.08, 0.1]

556

557 for channel in range(2, self.wavelengths + 1):

558 channel_idx = min(channel - 2, len(channel_names) - 1) # Ensure we don't go out of bounds

559 xml_content += f"""

560 <Entry ChannelID="{channel}">

561 <ChannelName>{channel_names[channel_idx]}</ChannelName>

562 <ImageType>Signal</ImageType>

563 <AcquisitionType>NonConfocal</AcquisitionType>

564 <IlluminationType>Epifluorescence</IlluminationType>

565 <ChannelType>Fluorescence</ChannelType>

566 <ImageResolutionX Unit="m">{pixel_size_meters}</ImageResolutionX>

567 <ImageResolutionY Unit="m">{pixel_size_meters}</ImageResolutionY>

568 <ImageSizeX>{self.image_size[0]}</ImageSizeX>

569 <ImageSizeY>{self.image_size[1]}</ImageSizeY>

570 <BinningX>2</BinningX>

571 <BinningY>2</BinningY>

572 <MaxIntensity>65536</MaxIntensity>

573 <CameraType>AndorZylaCam</CameraType>

574 <MainExcitationWavelength Unit="nm">{excitation_wavelengths[channel_idx]}</MainExcitationWavelength>

575 <MainEmissionWavelength Unit="nm">{emission_wavelengths[channel_idx]}</MainEmissionWavelength>

576 <ObjectiveMagnification Unit="">10</ObjectiveMagnification>

577 <ObjectiveNA Unit="">0.3</ObjectiveNA>

578 <ExposureTime Unit="s">{exposure_times[channel_idx]}</ExposureTime>

579 <OrientationMatrix>[[1.009457,0,0,34.3],[0,-1.009457,0,-15.1],[0,0,1.33,-6.014]]</OrientationMatrix>

580 <CropArea>[[0,0],[2160,2160],[2160,2160]]</CropArea>

581 </Entry>"""

582

583 # Add image information section

584 xml_content += f"""

585 </Map>

586 <PixelSizeCalibration>

587 <PixelSize Unit="µm">{pixel_size_meters * 1e6:.4f}</PixelSize>

588 <MagnificationRatio>1.0</MagnificationRatio>

589 </PixelSizeCalibration>"""

590

591 # Add detailed image information for each image

592 for row, col in well_indices:

593 for site in range(1, self.grid_size[0] * self.grid_size[1] + 1):

594 # Calculate position for this site

595 site_row = (site - 1) // self.grid_size[1]

596 site_col = (site - 1) % self.grid_size[1]

597

598 # Calculate position in meters (typical Opera Phenix values)

599 # These are arbitrary values for demonstration

600 pos_x = 0.000576762 + site_col * 0.001 # Arbitrary X position

601 pos_y = 0.000576762 + site_row * 0.001 # Arbitrary Y position

602

603 for z in range(1, self.z_stack_levels + 1):

604 # Calculate Z position based on Z level

605 pos_z = 0.0001 + (z - 1) * z_step_size_meters

606 abs_pos_z = 0.135809004 + (z - 1) * z_step_size_meters # Arbitrary base Z position

607

608 for channel in range(1, self.wavelengths + 1):

609 # Create image ID

610 image_id = f"{row:02d}{col:02d}K1F{site}P{z}R{channel}"

611

612 # Create URL (filename)

613 url = f"r{row:02d}c{col:02d}f{site}p{z:02d}-ch{channel}sk1fk1fl1.tiff"

614

615 # Add image element

616 xml_content += f"""

617 <Image Version="1">

618 <id>{image_id}</id>

619 <State>Ok</State>

620 <URL>{url}</URL>

621 <Row>{row}</Row>

622 <Col>{col}</Col>

623 <FieldID>{site}</FieldID>

624 <PlaneID>{z}</PlaneID>

625 <TimepointID>1</TimepointID>

626 <SequenceID>1</SequenceID>

627 <GroupID>1</GroupID>

628 <ChannelID>{channel}</ChannelID>

629 <FlimID>1</FlimID>

630 <PositionX Unit="m">{pos_x}</PositionX>

631 <PositionY Unit="m">{pos_y}</PositionY>

632 <PositionZ Unit="m">{pos_z}</PositionZ>

633 <AbsPositionZ Unit="m">{abs_pos_z}</AbsPositionZ>

634 <MeasurementTimeOffset Unit="s">0</MeasurementTimeOffset>

635 <AbsTime>{datetime.now().isoformat()}-04:00</AbsTime>

636 </Image>"""

637

638 # Close the XML

639 xml_content += """

640 </Images>

641</EvaluationInputData>"""

642

643 # Write the XML file

644 with open(index_xml_path, 'w', encoding='utf-8') as f:

645 f.write(xml_content)

646

647 return index_xml_path

648

649 def _calculate_image_size(self, grid_size, tile_size, overlap_percent, stage_error_px):

650 """

651 Calculate the appropriate image size based on grid dimensions, tile size, and overlap.

652

653 Args:

654 grid_size: Tuple of (rows, cols) for the grid of tiles

655 tile_size: Size of each tile (width, height)

656 overlap_percent: Percentage of overlap between tiles

657 stage_error_px: Random error in stage positioning (pixels)

658

659 Returns:

660 tuple: (width, height) of the calculated image size

661 """

662 # Calculate effective step size with overlap

663 step_x = int(tile_size[0] * (1 - overlap_percent / 100))

664 step_y = int(tile_size[1] * (1 - overlap_percent / 100))

665

666 # Calculate minimum required size

667 min_width = step_x * (grid_size[1] - 1) + tile_size[0]

668 min_height = step_y * (grid_size[0] - 1) + tile_size[1]

669

670 # Add margin for stage positioning errors

671 margin = stage_error_px * 2

672 width = min_width + margin

673 height = min_height + margin

674

675 return (width, height)

676

677 def generate_dataset(self):

678 """Generate the complete dataset."""

679 print(f"Generating synthetic microscopy dataset in {self.output_dir}")

680 print(f"Grid size: {self.grid_size[0]}x{self.grid_size[1]}")

681 print(f"Wavelengths: {self.wavelengths}")

682 print(f"Z-stack levels: {self.z_stack_levels}")

683 print(f"Wells: {', '.join(self.wells)}")

684

685 # Generate HTD file

686 htd_path = self.generate_htd_file()

687 print(f"Generated HTD file: {htd_path}")

688

689 # Process each well

690 for well_index, well in enumerate(self.wells):

691 print(f"\nGenerating data for well {well}...")

692

693 # Use a different random seed for each well if base seed is provided

694 if self.base_random_seed is not None:

695 well_seed = self.base_random_seed + well_index

696 np.random.seed(well_seed)

697 random.seed(well_seed)

698 print(f"Using random seed {well_seed} for well {well}")

699

700 # Pre-generate the positions for each site to ensure consistency across Z-levels

701 # This creates a mapping of site_index -> (base_x_pos, base_y_pos)

702 site_positions = {}

703 site_index = 1

704 for row in range(self.grid_size[0]):

705 for col in range(self.grid_size[1]):

706 # Calculate base position

707 x = col * self.step_x

708 y = row * self.step_y

709

710 # Add random stage positioning error

711 # We apply this error to the base position, it will be constant across Z-steps

712 x_error = np.random.randint(-self.stage_error_px, self.stage_error_px)

713 y_error = np.random.randint(-self.stage_error_px, self.stage_error_px)

714

715 x_pos = x + x_error

716 y_pos = y + y_error

717

718 # Ensure we don't go out of bounds

719 x_pos = max(0, min(x_pos, self.image_size[0] - self.tile_size[0]))

720 y_pos = max(0, min(y_pos, self.image_size[1] - self.tile_size[1]))

721

722 site_positions[site_index] = (x_pos, y_pos)

723 site_index += 1

724

725 # For multiple Z-stack levels

726 if self.z_stack_levels > 1:

727 # Handle differently based on format

728 if self.format == 'ImageXpress':

729 # For ImageXpress, create ZStep folders

730 # Make sure all ZStep folders are created first

731 for z in range(self.z_stack_levels):

732 z_level = z + 1 # 1-based Z level index

733 zstep_dir = self.timepoint_dir / f"ZStep_{z_level}"

734 zstep_dir.mkdir(exist_ok=True)

735 print(f"Created ZStep folder: {zstep_dir}")

736 else: # OperaPhenix

737 # Opera Phenix doesn't use ZStep folders - all images go directly in the Images folder

738 print(f"Opera Phenix format: all Z-stack images will be placed directly in the Images folder")

739

740 # Now generate images for each Z-level

741 for z in range(self.z_stack_levels):

742 z_level = z + 1 # 1-based Z level index

743

744 # For ImageXpress, use ZStep folders; for Opera Phenix, use the Images folder directly

745 if self.format == 'ImageXpress':

746 target_dir = self.timepoint_dir / f"ZStep_{z_level}"

747 else: # OperaPhenix

748 target_dir = self.timepoint_dir # This is already set to self.images_dir for Opera Phenix

749

750 # Generate images for each wavelength at this Z level

751 for w in range(self.wavelengths):

752 wavelength = w + 1 # 1-based wavelength index

753

754 # Generate full image

755 print(f"Generating full image for wavelength {wavelength}, Z level {z_level}...")

756 full_image = self.generate_cell_image(w, z, well=well)

757

758 # Save tiles for this Z level using the pre-generated positions

759 site_index = 1

760 for row in range(self.grid_size[0]):

761 for col in range(self.grid_size[1]):

762 # Get the pre-generated position

763 x_pos, y_pos = site_positions[site_index]

764

765 # Extract tile

766 tile = full_image[

767 y_pos:y_pos + self.tile_size[1],

768 x_pos:x_pos + self.tile_size[0]

769 ]

770

771 # Create filename based on format

772 if self.format == 'ImageXpress':

773 # ImageXpress format: WellID_sXXX_wY.tif (Z-level is indicated by the ZStep folder)

774 # Create filename without zero-padding site indices

775 # This tests the padding functionality in the stitcher

776 filename = f"{well}_s{site_index}_w{wavelength}.tif"

777 else: # OperaPhenix

778 # Opera Phenix format: rXXcYYfZZZpWW-chVskNfkNflN.tiff

779 # Extract row and column from well ID (e.g., 'A01' -> row=1, col=1)

780 row = ord(well[0]) - ord('A') + 1

781 col = int(well[1:3])

782 filename = f"r{row:02d}c{col:02d}f{site_index}p{z_level:02d}-ch{wavelength}sk1fk1fl1.tiff"

783 filepath = target_dir / filename

784

785 # Save image without compression

786 tifffile.imwrite(filepath, tile, compression=None)

787

788 # Print progress with full path for debugging

789 print(f" Saved tile: {target_dir.name}/{filename} (position: {x_pos}, {y_pos})")

790 print(f" Full path: {filepath.resolve()}")

791 site_index += 1

792 else:

793 # For single Z level (no Z-stack), just save files directly in TimePoint_1

794 for w in range(self.wavelengths):

795 wavelength = w + 1 # 1-based wavelength index

796

797 # Generate full image for the single Z level

798 print(f"Generating full image for wavelength {wavelength} (no Z-stack)...")

799 full_image = self.generate_cell_image(w, 0, well=well)

800

801 # Save tiles without Z-stack index

802 site_index = 1

803 for row in range(self.grid_size[0]):

804 for col in range(self.grid_size[1]):

805 # Get the pre-generated position

806 x_pos, y_pos = site_positions[site_index]

807

808 # Extract tile

809 tile = full_image[

810 y_pos:y_pos + self.tile_size[1],

811 x_pos:x_pos + self.tile_size[0]

812 ]

813

814 # Create filename based on format

815 if self.format == 'ImageXpress':

816 # ImageXpress format: WellID_sXXX_wY.tif

817 # Create filename without Z-index and without zero-padding site indices

818 # This tests the padding functionality in the stitcher

819 filename = f"{well}_s{site_index}_w{wavelength}.tif"

820 else: # OperaPhenix

821 # Opera Phenix format: rXXcYYfZZZpWW-chVskNfkNflN.tiff

822 # Extract row and column from well ID (e.g., 'A01' -> row=1, col=1)

823 row = ord(well[0]) - ord('A') + 1

824 col = int(well[1:3])

825 filename = f"r{row:02d}c{col:02d}f{site_index}p01-ch{wavelength}sk1fk1fl1.tiff"

826 filepath = self.timepoint_dir / filename

827

828 # Save image without compression

829 tifffile.imwrite(filepath, tile, compression=None)

830

831 # Print progress

832 print(f" Saved tile: {filename} (position: {x_pos}, {y_pos})")

833 site_index += 1

834

835 print("Dataset generation complete!")

Coverage for ezstitcher/tests/generators/generate_synthetic_data.py: 96%

306 statements