Coverage for openhcs/core/memory/gpu_utils.py: 35.8%
83 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-01 18:33 +0000
1"""
2GPU utility functions for OpenHCS.
4This module provides utility functions for checking GPU availability
5across different frameworks (cupy, torch, tensorflow, jax).
7Doctrinal Clauses:
8- Clause 88 — No Inferred Capabilities
9- Clause 293 — GPU Pre-Declaration Enforcement
10"""
12import logging
13import os
14from typing import Optional
16from openhcs.core.utils import optional_import
18logger = logging.getLogger(__name__)
21def check_cupy_gpu_available() -> Optional[int]:
22 """
23 Check if cupy is available and can access a GPU.
25 Returns:
26 GPU device ID if available, None otherwise
27 """
28 # Skip GPU checks in subprocess runner mode
29 if os.getenv('OPENHCS_SUBPROCESS_NO_GPU') == '1': 29 ↛ 30line 29 didn't jump to line 30 because the condition on line 29 was never true
30 logger.debug("Subprocess runner mode - skipping cupy GPU check")
31 return None
33 cp = optional_import("cupy")
34 if cp is None: 34 ↛ 35line 34 didn't jump to line 35 because the condition on line 34 was never true
35 logger.debug("Cupy not installed")
36 return None
38 try:
39 # Check if cupy is available and can access a GPU
40 if cp.cuda.is_available(): 40 ↛ anywhereline 40 didn't jump anywhere: it always raised an exception.
41 # Get the current device ID
42 device_id = cp.cuda.get_device_id()
43 logger.debug("Cupy GPU available: device_id=%s", device_id)
44 return device_id
45 else:
46 logger.debug("Cupy CUDA not available")
47 return None
48 except Exception as e:
49 logger.debug("Error checking cupy GPU availability: %s", e)
50 return None
53def check_torch_gpu_available() -> Optional[int]:
54 """
55 Check if torch is available and can access a GPU.
57 Returns:
58 GPU device ID if available, None otherwise
59 """
60 # Skip GPU checks in subprocess runner mode
61 if os.getenv('OPENHCS_SUBPROCESS_NO_GPU') == '1': 61 ↛ 62line 61 didn't jump to line 62 because the condition on line 61 was never true
62 logger.debug("Subprocess runner mode - skipping torch GPU check")
63 return None
65 torch = optional_import("torch")
66 if torch is None: 66 ↛ 67line 66 didn't jump to line 67 because the condition on line 66 was never true
67 logger.debug("Torch not installed")
68 return None
70 try:
71 # Check if torch is available and can access a GPU
72 if torch.cuda.is_available(): 72 ↛ anywhereline 72 didn't jump anywhere: it always raised an exception.
73 # Get the current device ID
74 device_id = torch.cuda.current_device()
75 logger.debug("Torch GPU available: device_id=%s", device_id)
76 return device_id
77 else:
78 logger.debug("Torch CUDA not available")
79 return None
80 except Exception as e:
81 logger.debug("Error checking torch GPU availability: %s", e)
82 return None
85def check_tf_gpu_available() -> Optional[int]:
86 """
87 Check if tensorflow is available and can access a GPU.
89 Returns:
90 GPU device ID if available, None otherwise
91 """
92 # Skip GPU checks in subprocess runner mode
93 if os.getenv('OPENHCS_SUBPROCESS_NO_GPU') == '1': 93 ↛ 94line 93 didn't jump to line 94 because the condition on line 93 was never true
94 logger.debug("Subprocess runner mode - skipping tensorflow GPU check")
95 return None
97 tf = optional_import("tensorflow")
98 if tf is None: 98 ↛ 99line 98 didn't jump to line 99 because the condition on line 98 was never true
99 logger.debug("TensorFlow not installed")
100 return None
102 try:
103 # Check if tensorflow is available and can access a GPU
104 gpus = tf.config.list_physical_devices('GPU')
105 if gpus:
106 # Get the first GPU device ID
107 # TensorFlow doesn't have a direct way to get the CUDA device ID,
108 # so we'll just use the index in the list
109 device_id = 0
110 logger.debug("TensorFlow GPU available: device_id=%s", device_id)
111 return device_id
112 else:
113 logger.debug("TensorFlow GPU not available")
114 return None
115 except Exception as e:
116 logger.debug("Error checking TensorFlow GPU availability: %s", e)
117 return None
120def check_jax_gpu_available() -> Optional[int]:
121 """
122 Check if JAX is available and can access a GPU.
124 Returns:
125 GPU device ID if available, None otherwise
126 """
127 # Skip GPU checks in subprocess runner mode
128 if os.getenv('OPENHCS_SUBPROCESS_NO_GPU') == '1':
129 logger.debug("Subprocess runner mode - skipping JAX GPU check")
130 return None
132 jax = optional_import("jax")
133 if jax is None:
134 logger.debug("JAX not installed")
135 return None
137 try:
138 # Check if JAX is available and can access a GPU
139 devices = jax.devices()
140 gpu_devices = [d for d in devices if d.platform == 'gpu']
142 if gpu_devices:
143 # Get the first GPU device ID
144 # JAX device IDs are typically in the form 'gpu:0'
145 device_str = str(gpu_devices[0])
146 if ':' in device_str:
147 device_id = int(device_str.split(':')[-1])
148 else:
149 # Default to 0 if we can't parse the device ID
150 device_id = 0
151 logger.debug("JAX GPU available: device_id=%s", device_id)
152 return device_id
153 else:
154 logger.debug("JAX GPU not available")
155 return None
156 except Exception as e:
157 logger.debug("Error checking JAX GPU availability: %s", e)
158 return None