Coverage for openhcs/io/metadata_migration.py: 9.7%
118 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
1"""
2OpenHCS Legacy Metadata Migration Utilities
4This module provides utilities to migrate old OpenHCS metadata files from the flat format
5with absolute paths to the new subdirectory-keyed format with relative paths.
7The migration handles:
8- Converting flat metadata structure to subdirectory-keyed format
9- Converting absolute paths to relative paths
10- Renaming .zarr directories to clean names
11- Detecting and preserving backend information (disk vs zarr)
12- Creating atomic backups during migration
14Usage as module:
15 from openhcs.io.metadata_migration import migrate_plate_metadata, detect_legacy_format
17 # Check if migration is needed
18 if detect_legacy_format(metadata_dict):
19 success = migrate_plate_metadata(plate_dir)
21Usage as script:
22 python -m openhcs.io.metadata_migration /path/to/plate/directory
23 python -m openhcs.io.metadata_migration /path/to/plate/directory --dry-run
24"""
26import argparse
27import json
28import logging
29import os
30import sys
31from pathlib import Path
32from typing import Dict, Any, List
34from .metadata_writer import METADATA_CONFIG
36logger = logging.getLogger(__name__)
38# Use the centralized metadata filename constant
39METADATA_FILENAME = METADATA_CONFIG.METADATA_FILENAME
42def detect_legacy_format(metadata_dict: Dict[str, Any]) -> bool:
43 """
44 Detect if metadata is in legacy format.
46 Legacy format characteristics:
47 - No 'subdirectories' key
48 - 'image_files' contains absolute paths
50 Args:
51 metadata_dict: Loaded metadata dictionary
53 Returns:
54 True if legacy format detected, False otherwise
55 """
56 # New format has subdirectories key
57 if "subdirectories" in metadata_dict:
58 return False
60 # Check if image_files contains absolute paths
61 image_files = metadata_dict.get("image_files", [])
62 if image_files and isinstance(image_files[0], str):
63 # If first file path is absolute, assume legacy format
64 return Path(image_files[0]).is_absolute()
66 return False
70def _rename_zarr_directories(plate_root: Path, dry_run: bool = False) -> Dict[str, str]:
71 """
72 Rename any directories containing '.zarr' in their name to remove the suffix.
74 Args:
75 plate_root: Root directory of the plate
76 dry_run: If True, only simulate the renames
78 Returns:
79 Dictionary mapping old names to new names
80 """
81 renames = {}
83 for item in plate_root.iterdir():
84 if item.is_dir() and '.zarr' in item.name:
85 old_name = item.name
86 new_name = old_name.replace('.zarr', '')
87 new_path = plate_root / new_name
89 # Only rename if target doesn't already exist
90 if not new_path.exists():
91 if dry_run:
92 logger.info(f"DRY RUN: Would rename directory: {old_name} → {new_name}")
93 else:
94 logger.info(f"Renaming directory: {old_name} → {new_name}")
95 item.rename(new_path)
96 renames[old_name] = new_name
97 else:
98 logger.warning(f"Cannot rename {old_name} to {new_name}: target already exists")
100 return renames
103def migrate_legacy_metadata(legacy_metadata: Dict[str, Any], plate_root: Path, dry_run: bool = False) -> Dict[str, Any]:
104 """
105 Migrate legacy flat metadata format to new subdirectory-keyed format.
107 Args:
108 legacy_metadata: Legacy metadata dictionary
109 plate_root: Root directory of the plate
111 Returns:
112 Migrated metadata in new format
113 """
114 # Step 1: Rename any .zarr directories to clean names
115 renames = _rename_zarr_directories(plate_root, dry_run)
117 # Step 2: Determine subdirectory and backend from renames or find data directories
118 has_zarr = bool(renames) # If we renamed .zarr directories, this is zarr storage
120 if renames:
121 # Use the first renamed directory as the subdirectory
122 sub_dir = next(iter(renames.values()))
123 else:
124 # Look for existing data directories
125 potential_dirs = ["images", "data", "raw"]
126 sub_dir = None
127 for potential_dir in potential_dirs:
128 if (plate_root / potential_dir).exists():
129 sub_dir = potential_dir
130 break
131 if sub_dir is None:
132 sub_dir = "images" # Default fallback
134 # Step 3: Build relative paths using the subdirectory
135 image_files = legacy_metadata.get("image_files", [])
136 relative_files = []
138 for legacy_path_str in image_files:
139 # Extract filename from legacy path
140 filename = Path(legacy_path_str).name
141 # Create relative path with subdirectory prefix
142 relative_files.append(f"{sub_dir}/{filename}")
145 # Create new subdirectory-keyed structure
146 migrated_metadata = {
147 "subdirectories": {
148 sub_dir: {
149 "microscope_handler_name": legacy_metadata.get("microscope_handler_name"),
150 "source_filename_parser_name": legacy_metadata.get("source_filename_parser_name"),
151 "grid_dimensions": legacy_metadata.get("grid_dimensions"),
152 "pixel_size": legacy_metadata.get("pixel_size"),
153 "image_files": relative_files,
154 "channels": legacy_metadata.get("channels"),
155 "wells": legacy_metadata.get("wells"),
156 "sites": legacy_metadata.get("sites"),
157 "z_indexes": legacy_metadata.get("z_indexes"),
158 "available_backends": {"zarr": True} if has_zarr else {"disk": True}
159 }
160 }
161 }
163 return migrated_metadata
166def migrate_plate_metadata(plate_dir: Path, dry_run: bool = False, backup_suffix: str = ".backup") -> bool:
167 """
168 Migrate metadata file in a plate directory.
170 Args:
171 plate_dir: Path to plate directory
172 dry_run: If True, only show what would be done
173 backup_suffix: Suffix for backup file
175 Returns:
176 True if migration was needed and successful, False otherwise
177 """
178 metadata_file = plate_dir / METADATA_FILENAME
180 if not metadata_file.exists():
181 logger.error(f"Metadata file not found: {metadata_file}")
182 return False
184 # Load existing metadata
185 try:
186 with open(metadata_file, 'r') as f:
187 metadata_dict = json.load(f)
188 except (json.JSONDecodeError, IOError) as e:
189 logger.error(f"Failed to load metadata from {metadata_file}: {e}")
190 return False
192 # Check if migration is needed
193 if not detect_legacy_format(metadata_dict):
194 logger.info(f"Metadata file {metadata_file} is already in new format - no migration needed")
195 return False
197 logger.info(f"Legacy format detected in {metadata_file}")
199 # Perform migration
200 try:
201 migrated_metadata = migrate_legacy_metadata(metadata_dict, plate_dir, dry_run)
202 except Exception as e:
203 logger.error(f"Failed to migrate metadata: {e}")
204 return False
206 if dry_run:
207 logger.info(f"DRY RUN: Would migrate {metadata_file}")
208 logger.info(f"DRY RUN: Would create backup {metadata_file}{backup_suffix}")
209 logger.info(f"DRY RUN: Migrated metadata would have {len(migrated_metadata['subdirectories'])} subdirectories")
210 return True
212 # Create backup
213 backup_file = metadata_file.with_suffix(f"{metadata_file.suffix}{backup_suffix}")
214 try:
215 metadata_file.rename(backup_file)
216 logger.info(f"Created backup: {backup_file}")
217 except OSError as e:
218 logger.error(f"Failed to create backup: {e}")
219 return False
221 # Write migrated metadata
222 try:
223 with open(metadata_file, 'w') as f:
224 json.dump(migrated_metadata, f, indent=2)
225 logger.info(f"Successfully migrated metadata file: {metadata_file}")
226 return True
227 except IOError as e:
228 logger.error(f"Failed to write migrated metadata: {e}")
229 # Restore backup
230 try:
231 backup_file.rename(metadata_file)
232 logger.info(f"Restored original file from backup")
233 except OSError:
234 logger.error(f"Failed to restore backup - original file is at {backup_file}")
235 return False
238def main():
239 parser = argparse.ArgumentParser(description="Migrate OpenHCS legacy metadata files")
240 parser.add_argument("plate_directory", type=Path, help="Path to plate directory containing openhcs_metadata.json")
241 parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes")
242 parser.add_argument("--backup-suffix", default=".backup", help="Suffix for backup files (default: .backup)")
244 args = parser.parse_args()
246 plate_dir = args.plate_directory
248 if not plate_dir.exists():
249 logger.error(f"Plate directory does not exist: {plate_dir}")
250 sys.exit(1)
252 if not plate_dir.is_dir():
253 logger.error(f"Path is not a directory: {plate_dir}")
254 sys.exit(1)
256 success = migrate_plate_metadata(plate_dir, args.dry_run, args.backup_suffix)
258 if success:
259 if args.dry_run:
260 logger.info("Dry run completed - no changes made")
261 else:
262 logger.info("Migration completed successfully")
263 sys.exit(0)
264 else:
265 logger.error("Migration failed")
266 sys.exit(1)
269if __name__ == "__main__":
270 main()