Coverage for ezstitcher/tests/generators/generate_synthetic_data.py: 96%
306 statements
« prev ^ index » next coverage.py v7.3.2, created at 2025-04-30 13:20 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2025-04-30 13:20 +0000
1#!/usr/bin/env python3
2"""
3Generate synthetic microscopy images for testing ezstitcher.
5This module generates synthetic microscopy images with the following features:
6- Multiple wavelengths (channels)
7- Z-stack support with varying focus levels
8- Cell-like structures (circular particles with varying eccentricity)
9- Proper tiling with configurable overlap
10- Realistic stage positioning errors
11- HTD file generation for metadata
12- Automatic image size calculation based on grid and tile parameters
14Usage:
15 from ezstitcher.tests.generators.generate_synthetic_data import SyntheticMicroscopyGenerator
17 generator = SyntheticMicroscopyGenerator(
18 output_dir="path/to/output",
19 grid_size=(3, 3),
20 wavelengths=2,
21 z_stack_levels=3,
22 auto_image_size=True
23 )
24 generator.generate_dataset()
25"""
27import os
28import sys
29import random
30import numpy as np
31import tifffile
32from pathlib import Path
33from skimage import draw, filters, transform, util
34from datetime import datetime
37class SyntheticMicroscopyGenerator:
38 """Generate synthetic microscopy images for testing."""
40 def __init__(self,
41 output_dir,
42 grid_size=(3, 3),
43 image_size=(1024, 1024),
44 tile_size=(512, 512),
45 overlap_percent=10,
46 stage_error_px=2,
47 wavelengths=2,
48 z_stack_levels=1,
49 z_step_size=1,
50 num_cells=50,
51 cell_size_range=(10, 30),
52 cell_eccentricity_range=(0.1, 0.5),
53 cell_intensity_range=(5000, 20000),
54 background_intensity=500,
55 noise_level=100,
56 wavelength_params=None,
57 shared_cell_fraction=0.95, # Fraction of cells shared between wavelengths
58 wavelength_intensities=None, # Fixed intensities for each wavelength
59 wavelength_backgrounds=None, # Background intensities for each wavelength
60 wells=['A01'], # List of wells to generate
61 format='ImageXpress', # Format of the filenames ('ImageXpress' or 'OperaPhenix')
62 auto_image_size=True, # Automatically calculate image size based on grid and tile size
63 random_seed=None):
64 """
65 Initialize the synthetic microscopy generator.
67 Args:
68 output_dir: Directory to save generated images
69 grid_size: Tuple of (rows, cols) for the grid of tiles
70 image_size: Size of the full image before tiling
71 tile_size: Size of each tile
72 overlap_percent: Percentage of overlap between tiles
73 stage_error_px: Random error in stage positioning (pixels)
74 wavelengths: Number of wavelength channels to generate
75 z_stack_levels: Number of Z-stack levels to generate
76 z_step_size: Spacing between Z-steps in microns
77 num_cells: Number of cells to generate
78 cell_size_range: Range of cell sizes (min, max)
79 cell_eccentricity_range: Range of cell eccentricity (min, max)
80 cell_intensity_range: Range of cell intensity (min, max)
81 background_intensity: Background intensity level
82 noise_level: Amount of noise to add
83 wavelength_params: Optional dictionary of parameters for each wavelength
84 Example: {
85 1: {
86 'num_cells': 100,
87 'cell_size_range': (10, 30),
88 'cell_eccentricity_range': (0.1, 0.5),
89 'cell_intensity_range': (5000, 20000),
90 'background_intensity': 500
91 },
92 2: {
93 'num_cells': 50,
94 'cell_size_range': (5, 15),
95 'cell_eccentricity_range': (0.3, 0.8),
96 'cell_intensity_range': (3000, 12000),
97 'background_intensity': 300
98 }
99 }
100 shared_cell_fraction: Fraction of cells shared between wavelengths (0.0-1.0)
101 0.0 means all cells are unique to each wavelength
102 1.0 means all cells are shared between wavelengths
103 Default is 0.95 (95% shared)
104 wavelength_intensities: Dictionary mapping wavelength indices to fixed intensities
105 Example: {1: 20000, 2: 10000}
106 wavelength_backgrounds: Dictionary mapping wavelength indices to background intensities
107 Example: {1: 800, 2: 400}
108 wells: List of well IDs to generate (e.g., ['A01', 'A02'])
109 format: Format of the filenames ('ImageXpress' or 'OperaPhenix')
110 auto_image_size: If True, automatically calculate image size based on grid and tile parameters
111 random_seed: Random seed for reproducibility
112 """
113 self.output_dir = Path(output_dir)
114 self.grid_size = grid_size
115 self.tile_size = tile_size
116 self.overlap_percent = overlap_percent
117 self.stage_error_px = stage_error_px
119 # Calculate image size if auto_image_size is True
120 if auto_image_size:
121 self.image_size = self._calculate_image_size(grid_size, tile_size, overlap_percent, stage_error_px)
122 print(f"Auto-calculated image size: {self.image_size[0]}x{self.image_size[1]}")
123 else:
124 self.image_size = image_size
125 self.wavelengths = wavelengths
126 self.z_stack_levels = z_stack_levels
127 self.z_step_size = z_step_size
128 self.num_cells = num_cells
129 self.cell_size_range = cell_size_range
130 self.cell_eccentricity_range = cell_eccentricity_range
131 self.cell_intensity_range = cell_intensity_range
132 self.background_intensity = background_intensity
133 self.noise_level = noise_level
134 self.wavelength_params = wavelength_params or {}
135 self.shared_cell_fraction = shared_cell_fraction
137 # Set default wavelength intensities if not provided
138 if wavelength_intensities is None:
139 self.wavelength_intensities = {1: 20000, 2: 10000}
140 # Add defaults for additional wavelengths if needed
141 for w in range(3, wavelengths + 1):
142 self.wavelength_intensities[w] = 15000
143 else:
144 self.wavelength_intensities = wavelength_intensities
146 # Set default wavelength backgrounds if not provided
147 if wavelength_backgrounds is None:
148 self.wavelength_backgrounds = {1: 800, 2: 400}
149 # Add defaults for additional wavelengths if needed
150 for w in range(3, wavelengths + 1):
151 self.wavelength_backgrounds[w] = 600
152 else:
153 self.wavelength_backgrounds = wavelength_backgrounds
155 # Store the wells to generate
156 self.wells = wells
158 # Store the format
159 self.format = format
161 # Store the base random seed
162 self.base_random_seed = random_seed
164 # Set random seed if provided
165 if random_seed is not None:
166 np.random.seed(random_seed)
167 random.seed(random_seed)
169 # Create output directory structure
170 # For ImageXpress, create TimePoint_1 directory
171 # For Opera Phenix, create Images directory
172 if self.format == 'ImageXpress':
173 self.timepoint_dir = self.output_dir / "TimePoint_1"
174 self.timepoint_dir.mkdir(parents=True, exist_ok=True)
175 else: # OperaPhenix
176 # Create the Images directory for Opera Phenix
177 self.images_dir = self.output_dir / "Images"
178 self.images_dir.mkdir(parents=True, exist_ok=True)
179 self.timepoint_dir = self.images_dir # Store images in the Images directory
181 # Calculate effective step size with overlap
182 self.step_x = int(tile_size[0] * (1 - overlap_percent / 100))
183 self.step_y = int(tile_size[1] * (1 - overlap_percent / 100))
185 # First, generate a common set of base cells for all wavelengths
186 # This ensures consistent patterns across wavelengths for better registration
187 base_cells = []
188 max_num_cells = max([
189 self.wavelength_params.get(w+1, {}).get('num_cells', self.num_cells)
190 for w in range(wavelengths)
191 ])
193 # Generate shared cell positions and common attributes
194 for i in range(max_num_cells):
195 # Generate cell in a way that ensures good features in overlap regions
196 # Bias cell positions to increase density in overlap regions for better registration
198 # Calculate overlap regions (where tiles will overlap)
199 overlap_x = int(tile_size[0] * (overlap_percent / 100))
200 overlap_y = int(tile_size[1] * (overlap_percent / 100))
202 # Very strongly favor overlap regions with 80% probability
203 # to ensure very high density of features in the 10% overlap region for reliable registration
204 if np.random.random() < 0.8:
205 # Position in an overlap region between tiles
206 col = np.random.randint(0, grid_size[1])
207 row = np.random.randint(0, grid_size[0])
209 # Calculate base tile position
210 base_x = col * self.step_x
211 base_y = row * self.step_y
213 # Position cells in right/bottom overlapping regions
214 if np.random.random() < 0.5:
215 # Right overlap region
216 x = base_x + tile_size[0] - overlap_x + np.random.randint(0, overlap_x)
217 y = base_y + np.random.randint(0, tile_size[1])
218 else:
219 # Bottom overlap region
220 x = base_x + np.random.randint(0, tile_size[0])
221 y = base_y + tile_size[1] - overlap_y + np.random.randint(0, overlap_y)
223 # Ensure we're within image bounds
224 x = min(x, image_size[0] - 1)
225 y = min(y, image_size[1] - 1)
226 else:
227 # Random position anywhere in the image
228 x = np.random.randint(0, image_size[0])
229 y = np.random.randint(0, image_size[1])
231 # Common cell attributes
232 size = np.random.uniform(*self.cell_size_range)
233 eccentricity = np.random.uniform(*self.cell_eccentricity_range)
234 rotation = np.random.uniform(0, 2*np.pi)
236 base_cells.append({
237 'x': x,
238 'y': y,
239 'size': size,
240 'eccentricity': eccentricity,
241 'rotation': rotation
242 })
244 # We'll generate cell parameters for each well and wavelength on demand
245 # This is just a placeholder initialization
246 self.cell_params = {}
248 # Store wavelength-specific parameters for later use
249 self.wavelength_specific_params = []
250 for w in range(wavelengths):
251 wavelength_idx = w + 1 # 1-based wavelength index
253 # Get wavelength-specific parameters or use defaults
254 w_params = self.wavelength_params.get(wavelength_idx, {})
255 w_num_cells = w_params.get('num_cells', self.num_cells)
256 w_cell_size_range = w_params.get('cell_size_range', self.cell_size_range)
257 w_cell_intensity_range = w_params.get('cell_intensity_range', self.cell_intensity_range)
259 self.wavelength_specific_params.append({
260 'wavelength_idx': wavelength_idx,
261 'num_cells': w_num_cells,
262 'cell_size_range': w_cell_size_range,
263 'cell_intensity_range': w_cell_intensity_range
264 })
266 # We'll generate cells on demand in generate_cell_image
268 def generate_cell_image(self, wavelength, z_level, well=None):
269 """
270 Generate a full image with cells for a specific wavelength and Z level.
272 Args:
273 wavelength: Wavelength channel index
274 z_level: Z-stack level index
275 well: Well identifier (e.g., 'A01')
277 Returns:
278 Full image with cells
279 """
280 # Generate a unique key for this well and wavelength
281 key = f"{well}_{wavelength}" if well else f"default_{wavelength}"
283 # Get wavelength-specific parameters
284 wavelength_idx = wavelength + 1 # Convert to 1-based index for params lookup
285 w_params = self.wavelength_params.get(wavelength_idx, {})
287 # Generate cells for this well and wavelength if not already generated
288 if key not in self.cell_params:
289 # Get parameters for cell generation
290 w_num_cells = w_params.get('num_cells', self.num_cells)
291 w_cell_size_range = w_params.get('cell_size_range', self.cell_size_range)
292 w_cell_intensity_range = w_params.get('cell_intensity_range', self.cell_intensity_range)
294 # Generate cells for this wavelength
295 cells = []
296 for i in range(w_num_cells):
297 # Generate random position for this wavelength
298 x = np.random.randint(0, self.image_size[0])
299 y = np.random.randint(0, self.image_size[1])
301 # Generate random cell properties
302 size = np.random.uniform(w_cell_size_range[0], w_cell_size_range[1])
303 eccentricity = np.random.uniform(self.cell_eccentricity_range[0], self.cell_eccentricity_range[1])
304 rotation = np.random.uniform(0, 2*np.pi)
306 # Set very different intensities for each wavelength to make them easily distinguishable
307 if wavelength_idx == 1:
308 # First wavelength: very high intensity
309 intensity = 25000
310 elif wavelength_idx == 2:
311 # Second wavelength: medium intensity
312 intensity = 10000
313 else:
314 # Other wavelengths: lower intensity
315 intensity = 5000 + (wavelength_idx * 1000) # Increase slightly for each additional wavelength
317 cells.append({
318 'x': x,
319 'y': y,
320 'size': size,
321 'eccentricity': eccentricity,
322 'rotation': rotation,
323 'intensity': intensity
324 })
326 # Store cells for this well and wavelength
327 self.cell_params[key] = cells
329 # Get cells for this well and wavelength
330 cells = self.cell_params[key]
332 # Get background intensity from wavelength_backgrounds or use default
333 w_background = self.wavelength_backgrounds.get(wavelength_idx, self.background_intensity)
335 # Create empty image with wavelength-specific background intensity
336 # Ensure image is 2D (not 3D) to avoid shape mismatch in ashlar
337 image = np.ones(self.image_size, dtype=np.uint16) * w_background
339 # Get cell parameters for this well and wavelength
340 cells = self.cell_params[key]
342 # Calculate Z-focus factor (1.0 at center Z, decreasing toward edges)
343 if self.z_stack_levels > 1:
344 z_center = (self.z_stack_levels - 1) / 2
345 z_distance = abs(z_level - z_center)
346 z_factor = 1.0 - (z_distance / z_center) if z_center > 0 else 1.0
347 else:
348 z_factor = 1.0
350 # Draw each cell
351 for cell in cells:
352 # Adjust intensity based on Z level (cells are brightest at focus)
353 intensity = cell['intensity'] * z_factor
355 # Calculate ellipse parameters
356 a = cell['size']
357 b = cell['size'] * (1 - cell['eccentricity'])
359 # Generate ellipse coordinates
360 rr, cc = draw.ellipse(
361 cell['y'], cell['x'],
362 b, a,
363 rotation=cell['rotation'],
364 shape=self.image_size
365 )
367 # Add cell to image
368 image[rr, cc] = intensity
370 # Add noise
371 # Use wavelength-specific noise level if provided
372 w_noise_level = w_params.get('noise_level', self.noise_level)
373 noise = np.random.normal(0, w_noise_level, self.image_size)
374 image = image + noise
376 # Apply blur based on Z distance from focus
377 if self.z_stack_levels > 1:
378 # More blur for Z levels further from center
379 # Scale blur by z_step_size to create more realistic Z-stack effect
380 # z_step_size controls the amount of blur between Z-steps
381 blur_sigma = (self.z_step_size/500) * (1.0 + 2.0 * (1.0 - z_factor))
382 print(f" Z-level {z_level}: blur_sigma={blur_sigma:.2f} (z_factor={z_factor:.2f}, z_step_size={self.z_step_size})")
383 image = filters.gaussian(image, sigma=blur_sigma, preserve_range=True)
385 # Ensure valid pixel values
386 image = np.clip(image, 0, 65535).astype(np.uint16)
388 return image
390 # We've replaced the generate_tiles method with position pre-generation in generate_dataset
392 def generate_htd_file(self):
393 """Generate HTD file with metadata in the format expected by ezstitcher."""
394 # Derive plate name from output directory name
395 plate_name = self.output_dir.name
397 if self.format == 'OperaPhenix':
398 # Generate Index.xml for Opera Phenix
399 return self.generate_opera_phenix_index_xml(plate_name)
400 else:
401 # Generate HTD file for ImageXpress
402 htd_filename = f"{plate_name}.HTD"
404 # Generate the main HTD file in the plate dir
405 htd_path = self.output_dir / htd_filename
407 # Basic HTD file content matching the format of real HTD files
408 htd_content = f""""HTSInfoFile", Version 1.0
409"Description", "Synthetic microscopy data for testing"
410"PlateType", 6
411"TimePoints", 1
412"ZSeries", {"TRUE" if self.z_stack_levels > 1 else "FALSE"}
413"ZSteps", {self.z_stack_levels}
414"ZProjection", FALSE
415"XWells", 4
416"YWells", 3"""
418 # Add wells selection (only the wells we're using are TRUE)
419 for y in range(3): # 3 rows (A, B, C)
420 row_wells = []
421 for x in range(4): # 4 columns (1, 2, 3, 4)
422 well = f"{chr(65+y)}{x+1:02d}" # A01, A02, etc.
423 row_wells.append("TRUE" if well in self.wells else "FALSE")
424 htd_content += f"\n\"WellsSelection{y+1}\", {', '.join(row_wells)}"
426 # Add sites information
427 htd_content += f"\n\"Sites\", TRUE"
428 htd_content += f"\n\"XSites\", {self.grid_size[1]}"
429 htd_content += f"\n\"YSites\", {self.grid_size[0]}"
431 # Add site selection rows (all set to FALSE except the ones we're using)
432 for y in range(self.grid_size[0]):
433 row = []
434 for x in range(self.grid_size[1]):
435 row.append("TRUE") # All sites are used in our synthetic data
436 htd_content += f"\n\"SiteSelection{y+1}\", {', '.join(row)}"
438 # Add wavelength information
439 htd_content += f"\n\"Waves\", TRUE"
440 htd_content += f"\n\"NWavelengths\", {self.wavelengths}"
442 # Add wavelength names and collection flags
443 for w in range(self.wavelengths):
444 htd_content += f"\n\"WaveName{w+1}\", \"W{w+1}\""
445 htd_content += f"\n\"WaveCollect{w+1}\", 1"
447 # Add unique identifier and end file marker
448 htd_content += f"\n\"UniquePlateIdentifier\", \"{plate_name}-{datetime.now().strftime('%Y%m%d-%H%M%S')}\""
449 htd_content += "\n\"EndFile\""
451 # Write HTD file in plate root directory
452 with open(htd_path, 'w') as f:
453 f.write(htd_content)
455 # For ImageXpress, also create a copy in the TimePoint directory
456 timepoint_htd_path = self.timepoint_dir / htd_filename
457 with open(timepoint_htd_path, 'w') as f:
458 f.write(htd_content)
460 return htd_path
462 def generate_opera_phenix_index_xml(self, plate_name):
463 """Generate Index.xml file for Opera Phenix format."""
464 # Create the Index.xml file in the Images directory
465 index_xml_path = self.images_dir / "Index.xml"
467 # Get current date and time for the measurement ID
468 current_time = datetime.now().strftime("%Y-%m-%dT%H_%M_%S")
469 measurement_id = f"{plate_name}__{current_time}-Measurement 1"
471 # Create a unique ID for the measurement
472 import uuid
473 unique_id = str(uuid.uuid4())
475 # Extract row and column numbers from wells
476 # Convert well names like 'A01' to row and column indices
477 well_indices = []
478 for well in self.wells:
479 row = ord(well[0]) - ord('A') + 1 # A -> 1, B -> 2, etc.
480 col = int(well[1:3])
481 well_indices.append((row, col))
483 # Calculate pixel size in meters (for ImageResolutionX/Y)
484 # Default is 0.65 µm, but we'll use a more realistic value for Opera Phenix
485 pixel_size_meters = 1.1867525298988041E-06 # ~1.19 µm
487 # Calculate Z-step size in meters
488 z_step_size_meters = self.z_step_size * 1e-6 # Convert from µm to m
490 # Start building the XML content
491 xml_content = f"""<?xml version="1.0" encoding="utf-8"?>
492<EvaluationInputData xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="1" xmlns="http://www.perkinelmer.com/PEHH/HarmonyV6">
493 <User>Synthetic</User>
494 <InstrumentType>Phenix</InstrumentType>
495 <Plates>
496 <Plate>
497 <PlateID>{plate_name}</PlateID>
498 <MeasurementID>{unique_id}</MeasurementID>
499 <MeasurementStartTime>{datetime.now().isoformat()}-04:00</MeasurementStartTime>
500 <n>{plate_name}</n>
501 <PlateTypeName>96well</PlateTypeName>
502 <PlateRows>8</PlateRows>
503 <PlateColumns>12</PlateColumns>"""
505 # Add wells
506 for row, col in well_indices:
507 xml_content += f"\n <Well id=\"{row:02d}{col:02d}\" />"
509 xml_content += "\n </Plate>\n </Plates>\n <Wells>"
511 # Add well details
512 for row, col in well_indices:
513 xml_content += f"\n <Well>\n <id>{row:02d}{col:02d}</id>\n <Row>{row}</Row>\n <Col>{col}</Col>"
515 # Add images for each site and channel
516 for site in range(1, self.grid_size[0] * self.grid_size[1] + 1):
517 for channel in range(1, self.wavelengths + 1):
518 for z in range(1, self.z_stack_levels + 1):
519 xml_content += f"\n <Image id=\"{row:02d}{col:02d}K1F{site}P{z}R{channel}\" />"
521 xml_content += "\n </Well>"
523 # Add image details section
524 xml_content += """
525 </Wells>
526 <Images>
527 <Map>
528 <Entry ChannelID="1">
529 <ChannelName>HOECHST 33342</ChannelName>
530 <ImageType>Signal</ImageType>
531 <AcquisitionType>NonConfocal</AcquisitionType>
532 <IlluminationType>Epifluorescence</IlluminationType>
533 <ChannelType>Fluorescence</ChannelType>
534 <ImageResolutionX Unit="m">{}</ImageResolutionX>
535 <ImageResolutionY Unit="m">{}</ImageResolutionY>
536 <ImageSizeX>{}</ImageSizeX>
537 <ImageSizeY>{}</ImageSizeY>
538 <BinningX>2</BinningX>
539 <BinningY>2</BinningY>
540 <MaxIntensity>65536</MaxIntensity>
541 <CameraType>AndorZylaCam</CameraType>
542 <MainExcitationWavelength Unit="nm">375</MainExcitationWavelength>
543 <MainEmissionWavelength Unit="nm">456</MainEmissionWavelength>
544 <ObjectiveMagnification Unit="">10</ObjectiveMagnification>
545 <ObjectiveNA Unit="">0.3</ObjectiveNA>
546 <ExposureTime Unit="s">0.03</ExposureTime>
547 <OrientationMatrix>[[1.009457,0,0,34.3],[0,-1.009457,0,-15.1],[0,0,1.33,-6.014]]</OrientationMatrix>
548 <CropArea>[[0,0],[2160,2160],[2160,2160]]</CropArea>
549 </Entry>""".format(pixel_size_meters, pixel_size_meters, self.image_size[0], self.image_size[1])
551 # Add entries for each channel
552 channel_names = ["Calcein", "Alexa 647", "FITC", "TRITC", "Cy5"]
553 excitation_wavelengths = [488, 647, 488, 561, 647]
554 emission_wavelengths = [525, 665, 525, 590, 665]
555 exposure_times = [0.05, 0.1, 0.05, 0.08, 0.1]
557 for channel in range(2, self.wavelengths + 1):
558 channel_idx = min(channel - 2, len(channel_names) - 1) # Ensure we don't go out of bounds
559 xml_content += f"""
560 <Entry ChannelID="{channel}">
561 <ChannelName>{channel_names[channel_idx]}</ChannelName>
562 <ImageType>Signal</ImageType>
563 <AcquisitionType>NonConfocal</AcquisitionType>
564 <IlluminationType>Epifluorescence</IlluminationType>
565 <ChannelType>Fluorescence</ChannelType>
566 <ImageResolutionX Unit="m">{pixel_size_meters}</ImageResolutionX>
567 <ImageResolutionY Unit="m">{pixel_size_meters}</ImageResolutionY>
568 <ImageSizeX>{self.image_size[0]}</ImageSizeX>
569 <ImageSizeY>{self.image_size[1]}</ImageSizeY>
570 <BinningX>2</BinningX>
571 <BinningY>2</BinningY>
572 <MaxIntensity>65536</MaxIntensity>
573 <CameraType>AndorZylaCam</CameraType>
574 <MainExcitationWavelength Unit="nm">{excitation_wavelengths[channel_idx]}</MainExcitationWavelength>
575 <MainEmissionWavelength Unit="nm">{emission_wavelengths[channel_idx]}</MainEmissionWavelength>
576 <ObjectiveMagnification Unit="">10</ObjectiveMagnification>
577 <ObjectiveNA Unit="">0.3</ObjectiveNA>
578 <ExposureTime Unit="s">{exposure_times[channel_idx]}</ExposureTime>
579 <OrientationMatrix>[[1.009457,0,0,34.3],[0,-1.009457,0,-15.1],[0,0,1.33,-6.014]]</OrientationMatrix>
580 <CropArea>[[0,0],[2160,2160],[2160,2160]]</CropArea>
581 </Entry>"""
583 # Add image information section
584 xml_content += f"""
585 </Map>
586 <PixelSizeCalibration>
587 <PixelSize Unit="µm">{pixel_size_meters * 1e6:.4f}</PixelSize>
588 <MagnificationRatio>1.0</MagnificationRatio>
589 </PixelSizeCalibration>"""
591 # Add detailed image information for each image
592 for row, col in well_indices:
593 for site in range(1, self.grid_size[0] * self.grid_size[1] + 1):
594 # Calculate position for this site
595 site_row = (site - 1) // self.grid_size[1]
596 site_col = (site - 1) % self.grid_size[1]
598 # Calculate position in meters (typical Opera Phenix values)
599 # These are arbitrary values for demonstration
600 pos_x = 0.000576762 + site_col * 0.001 # Arbitrary X position
601 pos_y = 0.000576762 + site_row * 0.001 # Arbitrary Y position
603 for z in range(1, self.z_stack_levels + 1):
604 # Calculate Z position based on Z level
605 pos_z = 0.0001 + (z - 1) * z_step_size_meters
606 abs_pos_z = 0.135809004 + (z - 1) * z_step_size_meters # Arbitrary base Z position
608 for channel in range(1, self.wavelengths + 1):
609 # Create image ID
610 image_id = f"{row:02d}{col:02d}K1F{site}P{z}R{channel}"
612 # Create URL (filename)
613 url = f"r{row:02d}c{col:02d}f{site}p{z:02d}-ch{channel}sk1fk1fl1.tiff"
615 # Add image element
616 xml_content += f"""
617 <Image Version="1">
618 <id>{image_id}</id>
619 <State>Ok</State>
620 <URL>{url}</URL>
621 <Row>{row}</Row>
622 <Col>{col}</Col>
623 <FieldID>{site}</FieldID>
624 <PlaneID>{z}</PlaneID>
625 <TimepointID>1</TimepointID>
626 <SequenceID>1</SequenceID>
627 <GroupID>1</GroupID>
628 <ChannelID>{channel}</ChannelID>
629 <FlimID>1</FlimID>
630 <PositionX Unit="m">{pos_x}</PositionX>
631 <PositionY Unit="m">{pos_y}</PositionY>
632 <PositionZ Unit="m">{pos_z}</PositionZ>
633 <AbsPositionZ Unit="m">{abs_pos_z}</AbsPositionZ>
634 <MeasurementTimeOffset Unit="s">0</MeasurementTimeOffset>
635 <AbsTime>{datetime.now().isoformat()}-04:00</AbsTime>
636 </Image>"""
638 # Close the XML
639 xml_content += """
640 </Images>
641</EvaluationInputData>"""
643 # Write the XML file
644 with open(index_xml_path, 'w', encoding='utf-8') as f:
645 f.write(xml_content)
647 return index_xml_path
649 def _calculate_image_size(self, grid_size, tile_size, overlap_percent, stage_error_px):
650 """
651 Calculate the appropriate image size based on grid dimensions, tile size, and overlap.
653 Args:
654 grid_size: Tuple of (rows, cols) for the grid of tiles
655 tile_size: Size of each tile (width, height)
656 overlap_percent: Percentage of overlap between tiles
657 stage_error_px: Random error in stage positioning (pixels)
659 Returns:
660 tuple: (width, height) of the calculated image size
661 """
662 # Calculate effective step size with overlap
663 step_x = int(tile_size[0] * (1 - overlap_percent / 100))
664 step_y = int(tile_size[1] * (1 - overlap_percent / 100))
666 # Calculate minimum required size
667 min_width = step_x * (grid_size[1] - 1) + tile_size[0]
668 min_height = step_y * (grid_size[0] - 1) + tile_size[1]
670 # Add margin for stage positioning errors
671 margin = stage_error_px * 2
672 width = min_width + margin
673 height = min_height + margin
675 return (width, height)
677 def generate_dataset(self):
678 """Generate the complete dataset."""
679 print(f"Generating synthetic microscopy dataset in {self.output_dir}")
680 print(f"Grid size: {self.grid_size[0]}x{self.grid_size[1]}")
681 print(f"Wavelengths: {self.wavelengths}")
682 print(f"Z-stack levels: {self.z_stack_levels}")
683 print(f"Wells: {', '.join(self.wells)}")
685 # Generate HTD file
686 htd_path = self.generate_htd_file()
687 print(f"Generated HTD file: {htd_path}")
689 # Process each well
690 for well_index, well in enumerate(self.wells):
691 print(f"\nGenerating data for well {well}...")
693 # Use a different random seed for each well if base seed is provided
694 if self.base_random_seed is not None:
695 well_seed = self.base_random_seed + well_index
696 np.random.seed(well_seed)
697 random.seed(well_seed)
698 print(f"Using random seed {well_seed} for well {well}")
700 # Pre-generate the positions for each site to ensure consistency across Z-levels
701 # This creates a mapping of site_index -> (base_x_pos, base_y_pos)
702 site_positions = {}
703 site_index = 1
704 for row in range(self.grid_size[0]):
705 for col in range(self.grid_size[1]):
706 # Calculate base position
707 x = col * self.step_x
708 y = row * self.step_y
710 # Add random stage positioning error
711 # We apply this error to the base position, it will be constant across Z-steps
712 x_error = np.random.randint(-self.stage_error_px, self.stage_error_px)
713 y_error = np.random.randint(-self.stage_error_px, self.stage_error_px)
715 x_pos = x + x_error
716 y_pos = y + y_error
718 # Ensure we don't go out of bounds
719 x_pos = max(0, min(x_pos, self.image_size[0] - self.tile_size[0]))
720 y_pos = max(0, min(y_pos, self.image_size[1] - self.tile_size[1]))
722 site_positions[site_index] = (x_pos, y_pos)
723 site_index += 1
725 # For multiple Z-stack levels
726 if self.z_stack_levels > 1:
727 # Handle differently based on format
728 if self.format == 'ImageXpress':
729 # For ImageXpress, create ZStep folders
730 # Make sure all ZStep folders are created first
731 for z in range(self.z_stack_levels):
732 z_level = z + 1 # 1-based Z level index
733 zstep_dir = self.timepoint_dir / f"ZStep_{z_level}"
734 zstep_dir.mkdir(exist_ok=True)
735 print(f"Created ZStep folder: {zstep_dir}")
736 else: # OperaPhenix
737 # Opera Phenix doesn't use ZStep folders - all images go directly in the Images folder
738 print(f"Opera Phenix format: all Z-stack images will be placed directly in the Images folder")
740 # Now generate images for each Z-level
741 for z in range(self.z_stack_levels):
742 z_level = z + 1 # 1-based Z level index
744 # For ImageXpress, use ZStep folders; for Opera Phenix, use the Images folder directly
745 if self.format == 'ImageXpress':
746 target_dir = self.timepoint_dir / f"ZStep_{z_level}"
747 else: # OperaPhenix
748 target_dir = self.timepoint_dir # This is already set to self.images_dir for Opera Phenix
750 # Generate images for each wavelength at this Z level
751 for w in range(self.wavelengths):
752 wavelength = w + 1 # 1-based wavelength index
754 # Generate full image
755 print(f"Generating full image for wavelength {wavelength}, Z level {z_level}...")
756 full_image = self.generate_cell_image(w, z, well=well)
758 # Save tiles for this Z level using the pre-generated positions
759 site_index = 1
760 for row in range(self.grid_size[0]):
761 for col in range(self.grid_size[1]):
762 # Get the pre-generated position
763 x_pos, y_pos = site_positions[site_index]
765 # Extract tile
766 tile = full_image[
767 y_pos:y_pos + self.tile_size[1],
768 x_pos:x_pos + self.tile_size[0]
769 ]
771 # Create filename based on format
772 if self.format == 'ImageXpress':
773 # ImageXpress format: WellID_sXXX_wY.tif (Z-level is indicated by the ZStep folder)
774 # Create filename without zero-padding site indices
775 # This tests the padding functionality in the stitcher
776 filename = f"{well}_s{site_index}_w{wavelength}.tif"
777 else: # OperaPhenix
778 # Opera Phenix format: rXXcYYfZZZpWW-chVskNfkNflN.tiff
779 # Extract row and column from well ID (e.g., 'A01' -> row=1, col=1)
780 row = ord(well[0]) - ord('A') + 1
781 col = int(well[1:3])
782 filename = f"r{row:02d}c{col:02d}f{site_index}p{z_level:02d}-ch{wavelength}sk1fk1fl1.tiff"
783 filepath = target_dir / filename
785 # Save image without compression
786 tifffile.imwrite(filepath, tile, compression=None)
788 # Print progress with full path for debugging
789 print(f" Saved tile: {target_dir.name}/{filename} (position: {x_pos}, {y_pos})")
790 print(f" Full path: {filepath.resolve()}")
791 site_index += 1
792 else:
793 # For single Z level (no Z-stack), just save files directly in TimePoint_1
794 for w in range(self.wavelengths):
795 wavelength = w + 1 # 1-based wavelength index
797 # Generate full image for the single Z level
798 print(f"Generating full image for wavelength {wavelength} (no Z-stack)...")
799 full_image = self.generate_cell_image(w, 0, well=well)
801 # Save tiles without Z-stack index
802 site_index = 1
803 for row in range(self.grid_size[0]):
804 for col in range(self.grid_size[1]):
805 # Get the pre-generated position
806 x_pos, y_pos = site_positions[site_index]
808 # Extract tile
809 tile = full_image[
810 y_pos:y_pos + self.tile_size[1],
811 x_pos:x_pos + self.tile_size[0]
812 ]
814 # Create filename based on format
815 if self.format == 'ImageXpress':
816 # ImageXpress format: WellID_sXXX_wY.tif
817 # Create filename without Z-index and without zero-padding site indices
818 # This tests the padding functionality in the stitcher
819 filename = f"{well}_s{site_index}_w{wavelength}.tif"
820 else: # OperaPhenix
821 # Opera Phenix format: rXXcYYfZZZpWW-chVskNfkNflN.tiff
822 # Extract row and column from well ID (e.g., 'A01' -> row=1, col=1)
823 row = ord(well[0]) - ord('A') + 1
824 col = int(well[1:3])
825 filename = f"r{row:02d}c{col:02d}f{site_index}p01-ch{wavelength}sk1fk1fl1.tiff"
826 filepath = self.timepoint_dir / filename
828 # Save image without compression
829 tifffile.imwrite(filepath, tile, compression=None)
831 # Print progress
832 print(f" Saved tile: {filename} (position: {x_pos}, {y_pos})")
833 site_index += 1
835 print("Dataset generation complete!")