cudaError_t obb_scanline_rotate_gpu(const uint8_t *aabb_pixels, uint8_t *obb_pixels, int aabb_width, int aabb_height, int aabb_pitch, int obb_width, int obb_height, float obb_cx, float obb_cy, float angle_rad, cudaStream_t stream)
Perform scanline-based OBB rotation on GPU.