Coverage for openhcs/io/metadata_migration.py: 9.2%
117 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-04 02:09 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-11-04 02:09 +0000
1"""
2OpenHCS Legacy Metadata Migration Utilities
4This module provides utilities to migrate old OpenHCS metadata files from the flat format
5with absolute paths to the new subdirectory-keyed format with relative paths.
7The migration handles:
8- Converting flat metadata structure to subdirectory-keyed format
9- Converting absolute paths to relative paths
10- Renaming .zarr directories to clean names
11- Detecting and preserving backend information (disk vs zarr)
12- Creating atomic backups during migration
14Usage as module:
15 from openhcs.io.metadata_migration import migrate_plate_metadata, detect_legacy_format
17 # Check if migration is needed
18 if detect_legacy_format(metadata_dict):
19 success = migrate_plate_metadata(plate_dir)
21Usage as script:
22 python -m openhcs.io.metadata_migration /path/to/plate/directory
23 python -m openhcs.io.metadata_migration /path/to/plate/directory --dry-run
24"""
26import argparse
27import json
28import logging
29import sys
30from pathlib import Path
31from typing import Dict, Any
33from .metadata_writer import METADATA_CONFIG
35logger = logging.getLogger(__name__)
37# Use the centralized metadata filename constant
38METADATA_FILENAME = METADATA_CONFIG.METADATA_FILENAME
41def detect_legacy_format(metadata_dict: Dict[str, Any]) -> bool:
42 """
43 Detect if metadata is in legacy format.
45 Legacy format characteristics:
46 - No 'subdirectories' key
47 - 'image_files' contains absolute paths
49 Args:
50 metadata_dict: Loaded metadata dictionary
52 Returns:
53 True if legacy format detected, False otherwise
54 """
55 # New format has subdirectories key
56 if "subdirectories" in metadata_dict:
57 return False
59 # Check if image_files contains absolute paths
60 image_files = metadata_dict.get("image_files", [])
61 if image_files and isinstance(image_files[0], str):
62 # If first file path is absolute, assume legacy format
63 return Path(image_files[0]).is_absolute()
65 return False
69def _rename_zarr_directories(plate_root: Path, dry_run: bool = False) -> Dict[str, str]:
70 """
71 Rename any directories containing '.zarr' in their name to remove the suffix.
73 Args:
74 plate_root: Root directory of the plate
75 dry_run: If True, only simulate the renames
77 Returns:
78 Dictionary mapping old names to new names
79 """
80 renames = {}
82 for item in plate_root.iterdir():
83 if item.is_dir() and '.zarr' in item.name:
84 old_name = item.name
85 new_name = old_name.replace('.zarr', '')
86 new_path = plate_root / new_name
88 # Only rename if target doesn't already exist
89 if not new_path.exists():
90 if dry_run:
91 logger.info(f"DRY RUN: Would rename directory: {old_name} → {new_name}")
92 else:
93 logger.info(f"Renaming directory: {old_name} → {new_name}")
94 item.rename(new_path)
95 renames[old_name] = new_name
96 else:
97 logger.warning(f"Cannot rename {old_name} to {new_name}: target already exists")
99 return renames
102def migrate_legacy_metadata(legacy_metadata: Dict[str, Any], plate_root: Path, dry_run: bool = False) -> Dict[str, Any]:
103 """
104 Migrate legacy flat metadata format to new subdirectory-keyed format.
106 Args:
107 legacy_metadata: Legacy metadata dictionary
108 plate_root: Root directory of the plate
110 Returns:
111 Migrated metadata in new format
112 """
113 # Step 1: Rename any .zarr directories to clean names
114 renames = _rename_zarr_directories(plate_root, dry_run)
116 # Step 2: Determine subdirectory and backend from renames or find data directories
117 has_zarr = bool(renames) # If we renamed .zarr directories, this is zarr storage
119 if renames:
120 # Use the first renamed directory as the subdirectory
121 sub_dir = next(iter(renames.values()))
122 else:
123 # Look for existing data directories
124 potential_dirs = ["images", "data", "raw"]
125 sub_dir = None
126 for potential_dir in potential_dirs:
127 if (plate_root / potential_dir).exists():
128 sub_dir = potential_dir
129 break
130 if sub_dir is None:
131 sub_dir = "images" # Default fallback
133 # Step 3: Build relative paths using the subdirectory
134 image_files = legacy_metadata.get("image_files", [])
135 relative_files = []
137 for legacy_path_str in image_files:
138 # Extract filename from legacy path
139 filename = Path(legacy_path_str).name
140 # Create relative path with subdirectory prefix
141 relative_files.append(f"{sub_dir}/{filename}")
144 # Create new subdirectory-keyed structure
145 migrated_metadata = {
146 "subdirectories": {
147 sub_dir: {
148 "microscope_handler_name": legacy_metadata.get("microscope_handler_name"),
149 "source_filename_parser_name": legacy_metadata.get("source_filename_parser_name"),
150 "grid_dimensions": legacy_metadata.get("grid_dimensions"),
151 "pixel_size": legacy_metadata.get("pixel_size"),
152 "image_files": relative_files,
153 "channels": legacy_metadata.get("channels"),
154 "wells": legacy_metadata.get("wells"),
155 "sites": legacy_metadata.get("sites"),
156 "z_indexes": legacy_metadata.get("z_indexes"),
157 "available_backends": {"zarr": True} if has_zarr else {"disk": True}
158 }
159 }
160 }
162 return migrated_metadata
165def migrate_plate_metadata(plate_dir: Path, dry_run: bool = False, backup_suffix: str = ".backup") -> bool:
166 """
167 Migrate metadata file in a plate directory.
169 Args:
170 plate_dir: Path to plate directory
171 dry_run: If True, only show what would be done
172 backup_suffix: Suffix for backup file
174 Returns:
175 True if migration was needed and successful, False otherwise
176 """
177 metadata_file = plate_dir / METADATA_FILENAME
179 if not metadata_file.exists():
180 logger.error(f"Metadata file not found: {metadata_file}")
181 return False
183 # Load existing metadata
184 try:
185 with open(metadata_file, 'r') as f:
186 metadata_dict = json.load(f)
187 except (json.JSONDecodeError, IOError) as e:
188 logger.error(f"Failed to load metadata from {metadata_file}: {e}")
189 return False
191 # Check if migration is needed
192 if not detect_legacy_format(metadata_dict):
193 logger.info(f"Metadata file {metadata_file} is already in new format - no migration needed")
194 return False
196 logger.info(f"Legacy format detected in {metadata_file}")
198 # Perform migration
199 try:
200 migrated_metadata = migrate_legacy_metadata(metadata_dict, plate_dir, dry_run)
201 except Exception as e:
202 logger.error(f"Failed to migrate metadata: {e}")
203 return False
205 if dry_run:
206 logger.info(f"DRY RUN: Would migrate {metadata_file}")
207 logger.info(f"DRY RUN: Would create backup {metadata_file}{backup_suffix}")
208 logger.info(f"DRY RUN: Migrated metadata would have {len(migrated_metadata['subdirectories'])} subdirectories")
209 return True
211 # Create backup
212 backup_file = metadata_file.with_suffix(f"{metadata_file.suffix}{backup_suffix}")
213 try:
214 metadata_file.rename(backup_file)
215 logger.info(f"Created backup: {backup_file}")
216 except OSError as e:
217 logger.error(f"Failed to create backup: {e}")
218 return False
220 # Write migrated metadata
221 try:
222 with open(metadata_file, 'w') as f:
223 json.dump(migrated_metadata, f, indent=2)
224 logger.info(f"Successfully migrated metadata file: {metadata_file}")
225 return True
226 except IOError as e:
227 logger.error(f"Failed to write migrated metadata: {e}")
228 # Restore backup
229 try:
230 backup_file.rename(metadata_file)
231 logger.info("Restored original file from backup")
232 except OSError:
233 logger.error(f"Failed to restore backup - original file is at {backup_file}")
234 return False
237def main():
238 parser = argparse.ArgumentParser(description="Migrate OpenHCS legacy metadata files")
239 parser.add_argument("plate_directory", type=Path, help="Path to plate directory containing openhcs_metadata.json")
240 parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes")
241 parser.add_argument("--backup-suffix", default=".backup", help="Suffix for backup files (default: .backup)")
243 args = parser.parse_args()
245 plate_dir = args.plate_directory
247 if not plate_dir.exists():
248 logger.error(f"Plate directory does not exist: {plate_dir}")
249 sys.exit(1)
251 if not plate_dir.is_dir():
252 logger.error(f"Path is not a directory: {plate_dir}")
253 sys.exit(1)
255 success = migrate_plate_metadata(plate_dir, args.dry_run, args.backup_suffix)
257 if success:
258 if args.dry_run:
259 logger.info("Dry run completed - no changes made")
260 else:
261 logger.info("Migration completed successfully")
262 sys.exit(0)
263 else:
264 logger.error("Migration failed")
265 sys.exit(1)
268if __name__ == "__main__":
269 main()