This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

feat: iOS MLKit pose + split EXIF + skeleton extensions (v4.15.0)

- iOS: switch from Vision VNDetectHumanBodyPoseRequest to MLKit Accurate
pose, embedded as cinterop static archives (no cocoapods propagation
to downstream consumers).
- iOS: fix coordinate-space bugs across all four device orientations.
Pose and object paths now have independent EXIF derivations +
independent preview-coord mapping (aspect-fit/fill for pose, original
pointForCaptureDevicePointOfInterest for objects).
- iOS: fix object-detection bounding boxes in non-landscape-right
orientations. Root cause was VNImageRequestHandler silently ignoring
the VNImageOptionCGImagePropertyOrientation options-dict key;
switched to the orientation-parameter constructor.
- Skeleton: add leftHeel/rightHeel, leftToe/rightToe (foot index on
Android), and leftIndex/rightIndex (finger tip). Wired through lerp,
mirror, rotate, bones (new foot + hand bones), joints, and both
MLKit pose builders.
- iOS overlay: replace BlendMode.Softlight/Color bones and radial
gradient joint dots with solid crisp strokes + circles at ~1/3
thickness. Matches Android.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

+1660 -289
+90 -3
posedetection/build.gradle.kts
··· 4 4 5 5 mavenPublishing { 6 6 publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL) 7 - coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "4.14.0") 7 + coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "4.15.0") 8 8 9 9 pom { 10 10 name.set("Pose Detection") ··· 52 52 iosX64(), 53 53 iosArm64(), 54 54 iosSimulatorArm64() 55 - ).forEach { 56 - it.binaries.framework { 55 + ).forEach { target -> 56 + target.binaries.framework { 57 57 baseName = "ComposeApp" 58 58 isStatic = true 59 59 } 60 + // MLKit pose detection is embedded via cinterop `staticLibraries` for 61 + // iosArm64 only — the library's klib ships the MLKit binaries so 62 + // downstream consumers get MLKit without running cocoapods themselves. 63 + // Sim targets fall back to Apple Vision (upstream MLKit has no 64 + // arm64-simulator slice). 65 + if (target.name == "iosArm64") { 66 + target.compilations.getByName("main").cinterops.create("mlkitAccurate") { 67 + defFile(layout.buildDirectory.file("mlkit-archives/mlkitAccurate.def").get().asFile) 68 + packageName("cocoapods.MLKitPoseDetectionAccurate") 69 + } 70 + } 60 71 } 61 72 62 73 sourceSets { ··· 114 125 androidTestImplementation(libs.androidx.uitest.junit4) 115 126 debugImplementation(libs.androidx.uitest.testManifest) 116 127 } 128 + 129 + // ============================================================================ 130 + // MLKit iOS integration 131 + // ---------------------------------------------------------------------------- 132 + // The library's iosArm64 klib ships MLKit statically-embedded via cinterop 133 + // `staticLibraries`. Downstream consumers get MLKit symbols without needing 134 + // cocoapods — standard Maven/Gradle KMP dependency resolution is enough. 135 + // 136 + // The `syncMlkitBinaries` task runs tools/sync-mlkit.sh, which fetches MLKit 137 + // pods into build/mlkit-staging and extracts per-target static archives into 138 + // build/mlkit-archives. `generateMlkitDefFile` then writes a cinterop .def 139 + // referencing those archives with absolute paths resolved at configuration 140 + // time. `cinteropMlkitAccurateIosArm64` is wired to depend on both. 141 + // ============================================================================ 142 + 143 + val mlkitStagingDir = layout.buildDirectory.dir("mlkit-staging") 144 + val mlkitArchivesDir = layout.buildDirectory.dir("mlkit-archives") 145 + val mlkitDefFile = layout.buildDirectory.file("mlkit-archives/mlkitAccurate.def") 146 + 147 + val syncMlkitBinaries = tasks.register<Exec>("syncMlkitBinaries") { 148 + group = "mlkit" 149 + description = "Fetch MLKit pods + extract static archives for each Kotlin iOS target." 150 + inputs.file("tools/sync-mlkit.sh") 151 + outputs.dir(mlkitArchivesDir) 152 + executable = project.file("tools/sync-mlkit.sh").absolutePath 153 + environment("MLKIT_STAGING_DIR", mlkitStagingDir.get().asFile.absolutePath) 154 + environment("MLKIT_ARCHIVES_DIR", mlkitArchivesDir.get().asFile.absolutePath) 155 + // Only iosArm64 (iPhone device) is supported upstream; sim targets fall 156 + // back to Vision via the MlKitPose expect/actual stub. 157 + environment("MLKIT_TARGETS", "ios_arm64") 158 + } 159 + 160 + val generateMlkitDefFile = tasks.register("generateMlkitDefFile") { 161 + group = "mlkit" 162 + description = "Generate the MLKit cinterop .def with absolute archive paths." 163 + dependsOn(syncMlkitBinaries) 164 + outputs.file(mlkitDefFile) 165 + // Capture as locals so the closure doesn't hold project-level refs — 166 + // configuration-cache-safe. 167 + val pods = mlkitStagingDir.get().asFile.resolve("Pods").absolutePath 168 + val archives = mlkitArchivesDir.get().asFile.absolutePath 169 + val defFile = mlkitDefFile.get().asFile 170 + doLast { 171 + val defContent = buildString { 172 + appendLine("language = Objective-C") 173 + appendLine("modules = MLKitPoseDetectionAccurate MLKitPoseDetectionCommon MLKitVision") 174 + appendLine("package = cocoapods.MLKitPoseDetectionAccurate") 175 + appendLine( 176 + "compilerOpts = -fmodules " + 177 + "-F$pods/MLKitPoseDetectionAccurate/Frameworks " + 178 + "-F$pods/MLKitPoseDetectionCommon/Frameworks " + 179 + "-F$pods/MLKitVision/Frameworks " + 180 + "-F$pods/MLKitCommon/Frameworks " + 181 + "-F$pods/MLImage/Frameworks " + 182 + "-F$pods/MLKitXenoCommon/Frameworks" 183 + ) 184 + appendLine( 185 + "staticLibraries = " + 186 + "libMLKitPoseDetectionAccurate.a libMLKitPoseDetectionCommon.a " + 187 + "libMLKitVision.a libMLKitCommon.a libMLImage.a libMLKitXenoCommon.a " + 188 + "libGTMSessionFetcher.a libGoogleDataTransport.a libGoogleToolboxForMac.a " + 189 + "libGoogleUtilities.a libFBLPromises.a libnanopb.a" 190 + ) 191 + appendLine("libraryPaths.ios_arm64 = $archives/ios_arm64") 192 + appendLine( 193 + "linkerOpts = -ObjC -lc++ -lsqlite3 -lz " + 194 + "-framework Accelerate -framework CoreML" 195 + ) 196 + } 197 + defFile.writeText(defContent) 198 + } 199 + } 200 + 201 + tasks.matching { it.name.startsWith("cinteropMlkitAccurate") }.configureEach { 202 + dependsOn(generateMlkitDefFile) 203 + }
+6
posedetection/src/androidMain/kotlin/com.performancecoachlab/posedetection/camera/CameraView.android.kt
··· 586 586 rightKnee = pose?.getPoseLandmark(PoseLandmark.RIGHT_KNEE)?.toSkeletonCoords(), 587 587 leftAnkle = pose?.getPoseLandmark(PoseLandmark.LEFT_ANKLE)?.toSkeletonCoords(), 588 588 rightAnkle = pose?.getPoseLandmark(PoseLandmark.RIGHT_ANKLE)?.toSkeletonCoords(), 589 + leftHeel = pose?.getPoseLandmark(PoseLandmark.LEFT_HEEL)?.toSkeletonCoords(), 590 + rightHeel = pose?.getPoseLandmark(PoseLandmark.RIGHT_HEEL)?.toSkeletonCoords(), 591 + leftToe = pose?.getPoseLandmark(PoseLandmark.LEFT_FOOT_INDEX)?.toSkeletonCoords(), 592 + rightToe = pose?.getPoseLandmark(PoseLandmark.RIGHT_FOOT_INDEX)?.toSkeletonCoords(), 593 + leftIndex = pose?.getPoseLandmark(PoseLandmark.LEFT_INDEX)?.toSkeletonCoords(), 594 + rightIndex = pose?.getPoseLandmark(PoseLandmark.RIGHT_INDEX)?.toSkeletonCoords(), 589 595 width = width.toFloat(), 590 596 height = height.toFloat(), 591 597 )
+19 -122
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt
··· 291 291 // Update the follow-crop state so the next frame can tighten around 292 292 // the current skeleton. No-op in MASK/CROP (nothing reads it). 293 293 if (poseFocusMode == PoseFocusMode.CROP_FOLLOW && focusArea != null) { 294 - FollowCropState.updateFromSkeleton( 294 + followCropState.updateFromSkeleton( 295 295 skeleton = result.skeleton, 296 296 analysisW = analysisBitmap.width, 297 297 analysisH = analysisBitmap.height, ··· 311 311 // Shared executor for running pose detection in parallel with object detection. 312 312 private val poseExecutor = Executors.newSingleThreadExecutor() 313 313 314 - // Dynamic follow-the-player crop: once a confident full skeleton is detected, 315 - // remember its bbox (padded) so the next frame crops tightly around it. Reset 316 - // when the skeleton is lost, comes back partial for MISS_TOLERANCE frames in a 317 - // row (implying it moved out of the tight crop), or goes stale past TIMEOUT_MS. 318 - private object FollowCropState { 319 - private const val TIMEOUT_MS = 500L 320 - // Pad generously — basketball shots have fast lateral hand/foot motion, 321 - // and a clipped joint drops the whole frame via the conf filter. 322 - private const val PAD_FRACTION = 0.5f 323 - // Floor on normalized crop size (either dim) so we never hand MLKit an 324 - // extremely narrow/tall image. Tall-narrow aspect hurts recall. 325 - private const val MIN_NORMALIZED_SIDE = 0.25f 326 - // Hysteresis: a single bad frame doesn't bounce us back to the wide crop. 327 - private const val MISS_TOLERANCE = 2 328 - private var tightRect: Rect? = null 329 - private var lastUpdatedMs: Long = 0L 330 - private var consecutiveMisses: Int = 0 331 - 332 - @Synchronized 333 - fun current(nowMs: Long): Rect? { 334 - if (nowMs - lastUpdatedMs > TIMEOUT_MS) { 335 - tightRect = null 336 - consecutiveMisses = 0 337 - } 338 - return tightRect 339 - } 340 - 341 - @Synchronized 342 - fun reset() { 343 - tightRect = null 344 - consecutiveMisses = 0 345 - } 346 - 347 - /** Called after each pose inference. Skeleton coords are in analysis-bitmap 348 - * pixel space; clampTo is the static user-configured focus area in 0..1. */ 349 - @Synchronized 350 - fun updateFromSkeleton( 351 - skeleton: Skeleton?, 352 - analysisW: Int, 353 - analysisH: Int, 354 - clampTo: Rect, 355 - nowMs: Long, 356 - ) { 357 - val joints = skeleton?.joints().orEmpty() 358 - if (joints.size < 12) { 359 - consecutiveMisses += 1 360 - if (consecutiveMisses >= MISS_TOLERANCE) { 361 - tightRect = null 362 - consecutiveMisses = 0 363 - } 364 - return 365 - } 366 - consecutiveMisses = 0 367 - val w = analysisW.toFloat(); val h = analysisH.toFloat() 368 - if (w <= 0f || h <= 0f) { tightRect = null; return } 369 - val minX = joints.minOf { it.x } 370 - val minY = joints.minOf { it.y } 371 - val maxX = joints.maxOf { it.x } 372 - val maxY = joints.maxOf { it.y } 373 - val padW = (maxX - minX) * PAD_FRACTION 374 - val padH = (maxY - minY) * PAD_FRACTION 375 - var l = (minX - padW) / w 376 - var t = (minY - padH) / h 377 - var r = (maxX + padW) / w 378 - var b = (maxY + padH) / h 379 - // Enforce a minimum normalized side length by expanding around the 380 - // centroid when the padded rect is narrower/shorter than the floor. 381 - val cx = (l + r) / 2f 382 - val cy = (t + b) / 2f 383 - if (r - l < MIN_NORMALIZED_SIDE) { 384 - l = cx - MIN_NORMALIZED_SIDE / 2f 385 - r = cx + MIN_NORMALIZED_SIDE / 2f 386 - } 387 - if (b - t < MIN_NORMALIZED_SIDE) { 388 - t = cy - MIN_NORMALIZED_SIDE / 2f 389 - b = cy + MIN_NORMALIZED_SIDE / 2f 390 - } 391 - l = l.coerceIn(clampTo.left, clampTo.right) 392 - t = t.coerceIn(clampTo.top, clampTo.bottom) 393 - r = r.coerceIn(clampTo.left, clampTo.right) 394 - b = b.coerceIn(clampTo.top, clampTo.bottom) 395 - if (r - l > 0.01f && b - t > 0.01f) { 396 - tightRect = Rect(l, t, r, b) 397 - lastUpdatedMs = nowMs 398 - } else { 399 - tightRect = null 400 - } 401 - } 402 - } 403 - 404 - // Drop landmarks whose MLKit inFrameLikelihood is below this threshold — keeps 405 - // obvious phantoms (occluded joints guessed with low conf) out of downstream. 406 - private const val LANDMARK_CONF_THRESHOLD = 0.5f 407 - 408 - // Temporal smoothing across consecutive skeletons reduces jitter during fast 409 - // shot motion. α is the weight on the new frame; (1-α) on the previous. 410 - private const val SMOOTHING_ALPHA = 0.6f 411 - // Reset the smoother after a long gap so a re-acquired pose doesn't inherit a 412 - // stale position from where the shooter used to be. 413 - private const val SMOOTHING_GAP_MS = 500L 414 - 415 - private object PoseSmoother { 416 - private var last: Skeleton? = null 417 - private var lastWallMs: Long = 0L 418 - 419 - @Synchronized 420 - fun smooth(skel: Skeleton?, timestamp: Long): Skeleton? { 421 - if (skel == null) { 422 - if (timestamp - lastWallMs > SMOOTHING_GAP_MS) last = null 423 - return null 424 - } 425 - val prev = last 426 - val gapOk = (timestamp - lastWallMs) <= SMOOTHING_GAP_MS 427 - val out = if (prev != null && gapOk) Skeleton.lerp(prev, skel, SMOOTHING_ALPHA) else skel 428 - last = out 429 - lastWallMs = timestamp 430 - return out 431 - } 432 - } 314 + // Process-wide trackers. Lifted from local singletons to the shared commonMain 315 + // classes so iOS uses the same implementation. 316 + private val followCropState = FollowCropState() 317 + private val poseSmoother = PoseSmoother() 433 318 434 319 // Core processing used by both live camera (ImageProxy) and offline frames (FrameAnalyser). 435 320 fun process( ··· 465 350 // Empty skeletons (no landmarks passed conf threshold) smooth as 466 351 // null so the smoother doesn't lerp toward a zeroed pose. 467 352 val hasAny = raw.joints().isNotEmpty() 468 - PoseSmoother.smooth(if (hasAny) raw else null, timestamp) 353 + poseSmoother.smooth(if (hasAny) raw else null, timestamp) 469 354 }.onFailure { t -> 470 355 Logger.e(t) { "MLKit poseDetector.process failed" } 471 356 }.getOrNull() ··· 881 766 val effectiveFocus: Rect? = when { 882 767 focusArea == null -> null 883 768 poseFocusMode == PoseFocusMode.CROP_FOLLOW -> 884 - FollowCropState.current(System.currentTimeMillis()) ?: focusArea 769 + followCropState.current(System.currentTimeMillis()) ?: focusArea 885 770 else -> focusArea 886 771 } 887 772 val useCrop = effectiveFocus != null && ··· 1015 900 leftAnkle = pose?.getPoseLandmark(PoseLandmark.LEFT_ANKLE) 1016 901 ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 1017 902 rightAnkle = pose?.getPoseLandmark(PoseLandmark.RIGHT_ANKLE) 903 + ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 904 + leftHeel = pose?.getPoseLandmark(PoseLandmark.LEFT_HEEL) 905 + ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 906 + rightHeel = pose?.getPoseLandmark(PoseLandmark.RIGHT_HEEL) 907 + ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 908 + leftToe = pose?.getPoseLandmark(PoseLandmark.LEFT_FOOT_INDEX) 909 + ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 910 + rightToe = pose?.getPoseLandmark(PoseLandmark.RIGHT_FOOT_INDEX) 911 + ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 912 + leftIndex = pose?.getPoseLandmark(PoseLandmark.LEFT_INDEX) 913 + ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 914 + rightIndex = pose?.getPoseLandmark(PoseLandmark.RIGHT_INDEX) 1018 915 ?.toSkeletonCoordsScaled(scaleX, scaleY, offsetX, offsetY), 1019 916 width = width.toFloat(), 1020 917 height = height.toFloat(),
+21
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.kt
··· 35 35 * 36 36 * MASK — black out everything outside the focus area, then downscale the full 37 37 * frame. Object detection is unaffected (it always sees the unmasked frame). 38 + * On iOS this is implemented as a post-filter only (Vision's body-pose model 39 + * already picks the most-confident body, so the pixel-level mask is omitted). 38 40 * 39 41 * CROP — crop the frame to the focus area, then downscale only the crop. Gives 40 42 * the pose model higher effective resolution of the focus region at the same ··· 95 97 interface CameraViewController { 96 98 fun requestData(onResult: (CameraViewData) -> Unit) 97 99 fun setRequestDataProvider(provider: (() -> CameraViewData)?) 100 + 101 + // Save the current camera frame composited with the drawn skeleton/object 102 + // overlay as a PNG into the app's Documents directory. Returns the file 103 + // path (or null if unavailable). Currently iOS-only; Android no-ops. 104 + fun captureComposite(filename: String, onResult: (String?) -> Unit) {} 105 + fun setCaptureCompositeProvider(provider: ((String, (String?) -> Unit) -> Unit)?) {} 98 106 } 99 107 100 108 class CameraViewControllerImpl : CameraViewController { 101 109 private var dataProvider: (() -> CameraViewData)? = null 110 + private var compositeProvider: ((String, (String?) -> Unit) -> Unit)? = null 111 + 102 112 override fun requestData(onResult: (CameraViewData) -> Unit) { 103 113 dataProvider?.let { onResult(it()) } 104 114 } 105 115 106 116 override fun setRequestDataProvider(provider: (() -> CameraViewData)?) { 107 117 dataProvider = provider 118 + } 119 + 120 + override fun captureComposite(filename: String, onResult: (String?) -> Unit) { 121 + val p = compositeProvider 122 + if (p != null) p(filename, onResult) else onResult(null) 123 + } 124 + 125 + override fun setCaptureCompositeProvider( 126 + provider: ((String, (String?) -> Unit) -> Unit)? 127 + ) { 128 + compositeProvider = provider 108 129 } 109 130 }
+137
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/PoseTracking.kt
··· 1 + package com.performancecoachlab.posedetection.camera 2 + 3 + import androidx.compose.ui.geometry.Rect 4 + import com.performancecoachlab.posedetection.skeleton.Skeleton 5 + import kotlin.concurrent.Volatile 6 + 7 + // Drop landmarks whose detector-reported confidence is below this threshold — 8 + // keeps obvious phantoms (occluded joints guessed with low conf) out of 9 + // downstream. Calibrated against MLKit's inFrameLikelihood on Android; Vision's 10 + // VNRecognizedPoint.confidence is a close analogue on iOS. 11 + const val LANDMARK_CONF_THRESHOLD: Float = 0.5f 12 + 13 + // Tight-rect tracking: once a confident full skeleton is detected, remember its 14 + // bbox (padded) so the next frame crops tightly around it. Reset when the 15 + // skeleton is lost, comes back partial for MISS_TOLERANCE frames in a row, or 16 + // goes stale past TIMEOUT_MS. 17 + // 18 + // Lifted from the Android singleton into a class so each FrameProcessor owns 19 + // its own tracker (safer for any future multi-camera / multi-view setup). 20 + // 21 + // Thread-safety: on Android the updater runs on the pose executor and the 22 + // reader runs on the image-analyzer executor; on iOS everything runs on 23 + // `frameProcessingQueue`. `@Volatile` gives us visibility across threads 24 + // without a lock — the worst-case race is reading a one-frame-stale tightRect, 25 + // which the TIMEOUT_MS path already handles. 26 + class FollowCropState { 27 + private val padFraction: Float = 0.5f 28 + private val minNormalizedSide: Float = 0.25f 29 + private val missTolerance: Int = 2 30 + private val timeoutMs: Long = 500L 31 + 32 + @Volatile private var tightRect: Rect? = null 33 + @Volatile private var lastUpdatedMs: Long = 0L 34 + @Volatile private var consecutiveMisses: Int = 0 35 + 36 + fun current(nowMs: Long): Rect? { 37 + if (nowMs - lastUpdatedMs > timeoutMs) { 38 + tightRect = null 39 + consecutiveMisses = 0 40 + } 41 + return tightRect 42 + } 43 + 44 + fun reset() { 45 + tightRect = null 46 + consecutiveMisses = 0 47 + } 48 + 49 + /** 50 + * Called after each pose inference. Skeleton coords are in analysis-frame 51 + * pixel space; [clampTo] is the static user-configured focus area in 0..1. 52 + */ 53 + fun updateFromSkeleton( 54 + skeleton: Skeleton?, 55 + analysisW: Int, 56 + analysisH: Int, 57 + clampTo: Rect, 58 + nowMs: Long, 59 + ) { 60 + val joints = skeleton?.joints().orEmpty() 61 + if (joints.size < 12) { 62 + consecutiveMisses += 1 63 + if (consecutiveMisses >= missTolerance) { 64 + tightRect = null 65 + consecutiveMisses = 0 66 + } 67 + return 68 + } 69 + consecutiveMisses = 0 70 + val w = analysisW.toFloat() 71 + val h = analysisH.toFloat() 72 + if (w <= 0f || h <= 0f) { 73 + tightRect = null 74 + return 75 + } 76 + val minX = joints.minOf { it.x } 77 + val minY = joints.minOf { it.y } 78 + val maxX = joints.maxOf { it.x } 79 + val maxY = joints.maxOf { it.y } 80 + val padW = (maxX - minX) * padFraction 81 + val padH = (maxY - minY) * padFraction 82 + var l = (minX - padW) / w 83 + var t = (minY - padH) / h 84 + var r = (maxX + padW) / w 85 + var b = (maxY + padH) / h 86 + val cx = (l + r) / 2f 87 + val cy = (t + b) / 2f 88 + if (r - l < minNormalizedSide) { 89 + l = cx - minNormalizedSide / 2f 90 + r = cx + minNormalizedSide / 2f 91 + } 92 + if (b - t < minNormalizedSide) { 93 + t = cy - minNormalizedSide / 2f 94 + b = cy + minNormalizedSide / 2f 95 + } 96 + l = l.coerceIn(clampTo.left, clampTo.right) 97 + t = t.coerceIn(clampTo.top, clampTo.bottom) 98 + r = r.coerceIn(clampTo.left, clampTo.right) 99 + b = b.coerceIn(clampTo.top, clampTo.bottom) 100 + if (r - l > 0.01f && b - t > 0.01f) { 101 + tightRect = Rect(l, t, r, b) 102 + lastUpdatedMs = nowMs 103 + } else { 104 + tightRect = null 105 + } 106 + } 107 + } 108 + 109 + // Temporal EMA smoother. Reduces jitter during fast shot motion. α is the 110 + // weight on the new frame; (1-α) on the previous. Reset after a long gap so a 111 + // re-acquired pose doesn't inherit a stale position from where the shooter 112 + // used to be. 113 + class PoseSmoother { 114 + private val alpha: Float = 0.6f 115 + private val gapMs: Long = 500L 116 + 117 + @Volatile private var last: Skeleton? = null 118 + @Volatile private var lastWallMs: Long = 0L 119 + 120 + fun smooth(skel: Skeleton?, timestamp: Long): Skeleton? { 121 + if (skel == null) { 122 + if (timestamp - lastWallMs > gapMs) last = null 123 + return null 124 + } 125 + val prev = last 126 + val gapOk = (timestamp - lastWallMs) <= gapMs 127 + val out = if (prev != null && gapOk) Skeleton.lerp(prev, skel, alpha) else skel 128 + last = out 129 + lastWallMs = timestamp 130 + return out 131 + } 132 + 133 + fun reset() { 134 + last = null 135 + lastWallMs = 0L 136 + } 137 + }
+14 -30
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.kt
··· 170 170 } 171 171 } 172 172 skeleton?.apply { 173 - val paintWhite = Paint().apply { 174 - color = Color.White 175 - strokeWidth = scaledStrokeWidth 176 - style = Stroke 177 - } 178 - val paintBlue = Paint().apply { 179 - color = Color.Blue 180 - strokeWidth = 0.8f * scaledStrokeWidth 181 - style = Fill 182 - } 173 + // Crisp solid rendering matching Android. Previously used 174 + // BlendMode.Softlight/Color on bones and a radialGradient on 175 + // joints, both of which produced a washed-out/fuzzy look. 176 + // Stroke + joint radius both at 1/3 of the adaptive base — the 177 + // original thickness felt chunky compared to Android. 178 + val boneWidth = scaledStrokeWidth / 3f 179 + val jointRadius = 0.9f * scaledStrokeWidth / 3f 183 180 bones().forEach { line -> 184 181 drawLine( 185 - color = paintWhite.color, start = androidx.compose.ui.geometry.Offset( 186 - line.first.x, line.first.y 187 - ), end = androidx.compose.ui.geometry.Offset( 188 - line.second.x, line.second.y 189 - ), strokeWidth = paintWhite.strokeWidth, blendMode = BlendMode.Softlight 190 - ) 191 - drawLine( 192 - color = paintBlue.color, start = androidx.compose.ui.geometry.Offset( 193 - line.first.x, line.first.y 194 - ), end = androidx.compose.ui.geometry.Offset( 195 - line.second.x, line.second.y 196 - ), strokeWidth = paintBlue.strokeWidth, blendMode = BlendMode.Color 182 + color = Color.White, 183 + start = androidx.compose.ui.geometry.Offset(line.first.x, line.first.y), 184 + end = androidx.compose.ui.geometry.Offset(line.second.x, line.second.y), 185 + strokeWidth = boneWidth, 197 186 ) 198 187 } 199 - 200 188 joints().forEach { joint -> 201 189 drawCircle( 202 - brush = Brush.radialGradient( 203 - colors = listOf(Color.Blue, Color.Transparent), 204 - center = androidx.compose.ui.geometry.Offset(joint.x, joint.y), 205 - radius = 1.2f * scaledStrokeWidth 206 - ), 207 - radius = 1.2f * scaledStrokeWidth, 208 - center = androidx.compose.ui.geometry.Offset(joint.x, joint.y) 190 + color = Color.Blue, 191 + radius = jointRadius, 192 + center = androidx.compose.ui.geometry.Offset(joint.x, joint.y), 209 193 ) 210 194 } 211 195 }
+40
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/skeleton/Skeleton.kt
··· 18 18 val rightKnee: SkeletonCoordinate? = null, 19 19 val leftAnkle: SkeletonCoordinate? = null, 20 20 val rightAnkle: SkeletonCoordinate? = null, 21 + val leftHeel: SkeletonCoordinate? = null, 22 + val rightHeel: SkeletonCoordinate? = null, 23 + val leftToe: SkeletonCoordinate? = null, 24 + val rightToe: SkeletonCoordinate? = null, 25 + val leftIndex: SkeletonCoordinate? = null, 26 + val rightIndex: SkeletonCoordinate? = null, 21 27 val width: Float, 22 28 val height: Float, 23 29 ) { ··· 65 71 rightKnee = lerpCoord(a.rightKnee, b.rightKnee, t), 66 72 leftAnkle = lerpCoord(a.leftAnkle, b.leftAnkle, t), 67 73 rightAnkle = lerpCoord(a.rightAnkle, b.rightAnkle, t), 74 + leftHeel = lerpCoord(a.leftHeel, b.leftHeel, t), 75 + rightHeel = lerpCoord(a.rightHeel, b.rightHeel, t), 76 + leftToe = lerpCoord(a.leftToe, b.leftToe, t), 77 + rightToe = lerpCoord(a.rightToe, b.rightToe, t), 78 + leftIndex = lerpCoord(a.leftIndex, b.leftIndex, t), 79 + rightIndex = lerpCoord(a.rightIndex, b.rightIndex, t), 68 80 width = a.width, 69 81 height = a.height, 70 82 ) ··· 88 100 if (rightHip != null && rightKnee != null) lines += Pair(rightHip, rightKnee) 89 101 if (leftKnee != null && leftAnkle != null) lines += Pair(leftKnee, leftAnkle) 90 102 if (rightKnee != null && rightAnkle != null) lines += Pair(rightKnee, rightAnkle) 103 + // Feet: ankle → heel → toe, closed by ankle → toe 104 + if (leftAnkle != null && leftHeel != null) lines += Pair(leftAnkle, leftHeel) 105 + if (rightAnkle != null && rightHeel != null) lines += Pair(rightAnkle, rightHeel) 106 + if (leftHeel != null && leftToe != null) lines += Pair(leftHeel, leftToe) 107 + if (rightHeel != null && rightToe != null) lines += Pair(rightHeel, rightToe) 108 + if (leftAnkle != null && leftToe != null) lines += Pair(leftAnkle, leftToe) 109 + if (rightAnkle != null && rightToe != null) lines += Pair(rightAnkle, rightToe) 110 + // Hands: wrist → index finger 111 + if (leftWrist != null && leftIndex != null) lines += Pair(leftWrist, leftIndex) 112 + if (rightWrist != null && rightIndex != null) lines += Pair(rightWrist, rightIndex) 91 113 return lines.toList() 92 114 } 93 115 ··· 105 127 if (rightKnee != null) joints += rightKnee 106 128 if (leftAnkle != null) joints += leftAnkle 107 129 if (rightAnkle != null) joints += rightAnkle 130 + if (leftHeel != null) joints += leftHeel 131 + if (rightHeel != null) joints += rightHeel 132 + if (leftToe != null) joints += leftToe 133 + if (rightToe != null) joints += rightToe 134 + if (leftIndex != null) joints += leftIndex 135 + if (rightIndex != null) joints += rightIndex 108 136 return joints.toList() 109 137 } 110 138 ··· 204 232 rightKnee = rightKnee?.let { SkeletonCoordinate(w - it.x, it.y) }, 205 233 leftAnkle = leftAnkle?.let { SkeletonCoordinate(w - it.x, it.y) }, 206 234 rightAnkle = rightAnkle?.let { SkeletonCoordinate(w - it.x, it.y) }, 235 + leftHeel = leftHeel?.let { SkeletonCoordinate(w - it.x, it.y) }, 236 + rightHeel = rightHeel?.let { SkeletonCoordinate(w - it.x, it.y) }, 237 + leftToe = leftToe?.let { SkeletonCoordinate(w - it.x, it.y) }, 238 + rightToe = rightToe?.let { SkeletonCoordinate(w - it.x, it.y) }, 239 + leftIndex = leftIndex?.let { SkeletonCoordinate(w - it.x, it.y) }, 240 + rightIndex = rightIndex?.let { SkeletonCoordinate(w - it.x, it.y) }, 207 241 width = width, 208 242 height = height, 209 243 ) ··· 244 278 rightKnee = rightKnee?.let(transform), 245 279 leftAnkle = leftAnkle?.let(transform), 246 280 rightAnkle = rightAnkle?.let(transform), 281 + leftHeel = leftHeel?.let(transform), 282 + rightHeel = rightHeel?.let(transform), 283 + leftToe = leftToe?.let(transform), 284 + rightToe = rightToe?.let(transform), 285 + leftIndex = leftIndex?.let(transform), 286 + rightIndex = rightIndex?.let(transform), 247 287 width = newDimensions.first, 248 288 height = newDimensions.second 249 289 )
+197
posedetection/src/iosArm64Main/kotlin/com/performancecoachlab/posedetection/camera/MlKitPose.kt
··· 1 + package com.performancecoachlab.posedetection.camera 2 + 3 + import cocoapods.MLKitPoseDetectionAccurate.MLKAccuratePoseDetectorOptions 4 + import cocoapods.MLKitPoseDetectionAccurate.MLKCommonPoseDetectorOptions 5 + import cocoapods.MLKitPoseDetectionAccurate.MLKPose 6 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseDetector 7 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseDetectorModeStream 8 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkType 9 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftAnkle 10 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftElbow 11 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftHeel 12 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftHip 13 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftIndexFinger 14 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftKnee 15 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftShoulder 16 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftToe 17 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeLeftWrist 18 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightAnkle 19 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightElbow 20 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightHeel 21 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightHip 22 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightIndexFinger 23 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightKnee 24 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightShoulder 25 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightToe 26 + import cocoapods.MLKitPoseDetectionAccurate.MLKPoseLandmarkTypeRightWrist 27 + import cocoapods.MLKitPoseDetectionAccurate.MLKVisionImage 28 + import cocoapods.MLKitPoseDetectionAccurate.MLKVisionPoint 29 + import com.performancecoachlab.posedetection.skeleton.Skeleton 30 + import kotlinx.cinterop.ExperimentalForeignApi 31 + import kotlinx.cinterop.ObjCObjectVar 32 + import kotlinx.cinterop.alloc 33 + import kotlinx.cinterop.memScoped 34 + import kotlinx.cinterop.ptr 35 + import kotlinx.cinterop.useContents 36 + import platform.CoreGraphics.CGImageRelease 37 + import platform.CoreGraphics.CGRectMake 38 + import platform.CoreImage.CIContext 39 + import platform.CoreImage.CIImage 40 + import platform.CoreImage.createCGImage 41 + import platform.CoreVideo.CVImageBufferRef 42 + import platform.Foundation.NSError 43 + import platform.UIKit.UIImage 44 + 45 + @OptIn(ExperimentalForeignApi::class) 46 + internal actual class MlKitPose actual constructor() { 47 + // CIContext is expensive; reuse across frames. 48 + private val ciContext: CIContext = CIContext.contextWithOptions(null) 49 + 50 + // Accurate-model, stream-mode detector — same config as Android. 51 + // Created lazily because the model loads on first use. 52 + private val detector: MLKPoseDetector by lazy { 53 + val options = MLKAccuratePoseDetectorOptions().apply { 54 + setDetectorMode(MLKPoseDetectorModeStream) 55 + } 56 + @Suppress("UNCHECKED_CAST") 57 + MLKPoseDetector.poseDetectorWithOptions( 58 + options as MLKCommonPoseDetectorOptions 59 + ) 60 + } 61 + 62 + actual fun isAvailable(): Boolean = true 63 + 64 + actual fun detect( 65 + buffer: CVImageBufferRef, 66 + exifOrientation: Int, 67 + orientedW: Float, 68 + orientedH: Float, 69 + useCrop: Boolean, 70 + cropLeftPx: Float, 71 + cropTopPx: Float, 72 + cropWPx: Float, 73 + cropHPx: Float, 74 + timestamp: Long, 75 + ): Skeleton? { 76 + // Pre-rotate the pixels so the CGImage handed to MLKit is already 77 + // physically upright at oriented (logical/display) dimensions. MLKit 78 + // returns landmarks in the input CGImage's raw pixel space, so when 79 + // raw == oriented, landmarks land in the same oriented top-left pixel 80 + // space the rest of the library works in — no post-hoc remapping, 81 + // no UIImage/MLKVisionImage orientation juggling. 82 + val ciRaw = CIImage.imageWithCVPixelBuffer(buffer) ?: return null 83 + val ciOriented: CIImage = ciRaw.imageByApplyingOrientation(exifOrientation) 84 + 85 + // Rotated extent may have a non-zero origin depending on orientation. 86 + // Read the actual values so our render rect aligns with the pixels. 87 + var eOriginX = 0f 88 + var eOriginY = 0f 89 + var eW = 0f 90 + var eH = 0f 91 + ciOriented.extent.useContents { 92 + eOriginX = origin.x.toFloat() 93 + eOriginY = origin.y.toFloat() 94 + eW = size.width.toFloat() 95 + eH = size.height.toFloat() 96 + } 97 + 98 + // Compute the render rect in CIImage (bottom-left origin, absolute) 99 + // coords, anchored at the extent origin. 100 + val renderRect = if (useCrop) { 101 + // cropLeftPx / cropTopPx / cropWPx / cropHPx arrive in oriented 102 + // top-left space; flip Y into CIImage bottom-left space and shift 103 + // by the extent origin. 104 + CGRectMake( 105 + x = (eOriginX + cropLeftPx).toDouble(), 106 + y = (eOriginY + (eH - cropTopPx - cropHPx)).toDouble(), 107 + width = cropWPx.toDouble(), 108 + height = cropHPx.toDouble(), 109 + ) 110 + } else { 111 + CGRectMake( 112 + x = eOriginX.toDouble(), 113 + y = eOriginY.toDouble(), 114 + width = eW.toDouble(), 115 + height = eH.toDouble(), 116 + ) 117 + } 118 + val ciForRender = if (useCrop) ciOriented.imageByCroppingToRect(renderRect) else ciOriented 119 + val cgImage = ciContext.createCGImage(ciForRender, renderRect) ?: return null 120 + 121 + return try { 122 + // Plain UIImage with default .up orientation — we already rotated. 123 + val uiImage = UIImage(cgImage) 124 + val visionImage = MLKVisionImage(image = uiImage) 125 + // Leave visionImage.orientation at default .up. Setting it (or 126 + // giving the UIImage non-.up orientation metadata) either 127 + // double-rotates or leaves MLKit producing landmarks in a 128 + // different coord space from ours. 129 + 130 + val pose = memScoped { 131 + val errPtr = alloc<ObjCObjectVar<NSError?>>() 132 + @Suppress("UNCHECKED_CAST") 133 + val poses = detector.resultsInImage( 134 + visionImage as objcnames.protocols.MLKCompatibleImageProtocol, 135 + errPtr.ptr, 136 + ) as? List<MLKPose> 137 + poses?.firstOrNull() 138 + } ?: return null 139 + buildSkeletonFromPose( 140 + pose = pose, 141 + timestamp = timestamp, 142 + orientedW = orientedW, 143 + orientedH = orientedH, 144 + useCrop = useCrop, 145 + cropLeftPx = cropLeftPx, 146 + cropTopPx = cropTopPx, 147 + ) 148 + } finally { 149 + CGImageRelease(cgImage) 150 + } 151 + } 152 + 153 + private fun buildSkeletonFromPose( 154 + pose: MLKPose, 155 + timestamp: Long, 156 + orientedW: Float, 157 + orientedH: Float, 158 + useCrop: Boolean, 159 + cropLeftPx: Float, 160 + cropTopPx: Float, 161 + ): Skeleton { 162 + fun c(type: MLKPoseLandmarkType): Skeleton.SkeletonCoordinate? { 163 + val lm = pose.landmarkOfType(type) 164 + if (lm.inFrameLikelihood < LANDMARK_CONF_THRESHOLD) return null 165 + // MLKit returns pixel coords in the input image space. Because we 166 + // pre-rotated, that == oriented top-left space for MASK, or crop- 167 + // local for CROP. Add the crop offset to reach oriented full frame. 168 + val pos = lm.position as MLKVisionPoint 169 + val x = if (useCrop) cropLeftPx + pos.x.toFloat() else pos.x.toFloat() 170 + val y = if (useCrop) cropTopPx + pos.y.toFloat() else pos.y.toFloat() 171 + return Skeleton.SkeletonCoordinate(x, y) 172 + } 173 + return Skeleton( 174 + timestamp = timestamp, 175 + leftShoulder = c(MLKPoseLandmarkTypeLeftShoulder), 176 + rightShoulder = c(MLKPoseLandmarkTypeRightShoulder), 177 + leftElbow = c(MLKPoseLandmarkTypeLeftElbow), 178 + rightElbow = c(MLKPoseLandmarkTypeRightElbow), 179 + leftWrist = c(MLKPoseLandmarkTypeLeftWrist), 180 + rightWrist = c(MLKPoseLandmarkTypeRightWrist), 181 + leftHip = c(MLKPoseLandmarkTypeLeftHip), 182 + rightHip = c(MLKPoseLandmarkTypeRightHip), 183 + leftKnee = c(MLKPoseLandmarkTypeLeftKnee), 184 + rightKnee = c(MLKPoseLandmarkTypeRightKnee), 185 + leftAnkle = c(MLKPoseLandmarkTypeLeftAnkle), 186 + rightAnkle = c(MLKPoseLandmarkTypeRightAnkle), 187 + leftHeel = c(MLKPoseLandmarkTypeLeftHeel), 188 + rightHeel = c(MLKPoseLandmarkTypeRightHeel), 189 + leftToe = c(MLKPoseLandmarkTypeLeftToe), 190 + rightToe = c(MLKPoseLandmarkTypeRightToe), 191 + leftIndex = c(MLKPoseLandmarkTypeLeftIndexFinger), 192 + rightIndex = c(MLKPoseLandmarkTypeRightIndexFinger), 193 + height = orientedH, 194 + width = orientedW, 195 + ) 196 + } 197 + }
+473 -76
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraEngine.kt
··· 61 61 import platform.CoreFoundation.CFDataGetBytePtr 62 62 import platform.CoreFoundation.CFDataGetLength 63 63 import platform.CoreFoundation.CFRelease 64 + import platform.CoreFoundation.CFRetain 65 + import platform.CoreGraphics.CGContextAddLineToPoint 66 + import platform.CoreGraphics.CGContextFillEllipseInRect 67 + import platform.CoreGraphics.CGContextMoveToPoint 68 + import platform.CoreGraphics.CGContextSetFillColorWithColor 69 + import platform.CoreGraphics.CGContextSetLineWidth 70 + import platform.CoreGraphics.CGContextSetStrokeColorWithColor 71 + import platform.CoreGraphics.CGContextStrokeEllipseInRect 72 + import platform.CoreGraphics.CGContextStrokePath 73 + import platform.CoreGraphics.CGContextStrokeRect 74 + import platform.CoreGraphics.CGRectMake 75 + import platform.CoreVideo.CVImageBufferRef 64 76 import platform.CoreGraphics.CGBitmapContextCreate 65 77 import platform.CoreGraphics.CGBitmapContextCreateImage 66 78 import platform.CoreGraphics.CGColorSpaceCreateDeviceRGB ··· 74 86 import platform.CoreGraphics.CGImageRelease 75 87 import platform.CoreGraphics.CGPointMake 76 88 import platform.CoreGraphics.CGSize 89 + import platform.CoreGraphics.CGSizeMake 90 + import platform.CoreImage.CIContext 91 + import platform.CoreImage.CIImage 92 + import platform.CoreImage.createCGImage 93 + import platform.ImageIO.kCGImagePropertyOrientationDown 94 + import platform.ImageIO.kCGImagePropertyOrientationDownMirrored 95 + import platform.ImageIO.kCGImagePropertyOrientationLeft 96 + import platform.ImageIO.kCGImagePropertyOrientationLeftMirrored 97 + import platform.ImageIO.kCGImagePropertyOrientationRight 98 + import platform.ImageIO.kCGImagePropertyOrientationRightMirrored 99 + import platform.ImageIO.kCGImagePropertyOrientationUp 100 + import platform.ImageIO.kCGImagePropertyOrientationUpMirrored 101 + import platform.Foundation.NSDocumentDirectory 102 + import platform.Foundation.NSFileManager 103 + import platform.Foundation.NSUserDomainMask 104 + import platform.Foundation.writeToURL 105 + import platform.UIKit.UIColor 106 + import platform.UIKit.UIGraphicsGetCurrentContext 107 + import platform.UIKit.UIGraphicsImageRenderer 108 + import platform.UIKit.UIImageOrientation 109 + import platform.UIKit.UIImagePNGRepresentation 77 110 import platform.CoreMedia.CMSampleBufferGetImageBuffer 78 111 import platform.CoreMedia.CMSampleBufferRef 79 112 import platform.Foundation.NSData ··· 97 130 import platform.darwin.dispatch_queue_create 98 131 import platform.darwin.dispatch_sync 99 132 import platform.posix.memcpy 133 + import kotlin.concurrent.Volatile 100 134 import kotlin.math.abs 101 135 import kotlin.native.runtime.NativeRuntimeApi 102 136 ··· 133 167 MemoryManager.updateMemoryStatus() 134 168 } 135 169 170 + @OptIn(ExperimentalForeignApi::class) 171 + override fun viewDidAppear(animated: Boolean) { 172 + super.viewDidAppear(animated) 173 + // The view is now in a window, so `view.window?.windowScene?.interfaceOrientation` 174 + // is reliable — unlike at viewDidLoad time, where `keyWindow` may be nil. 175 + // Re-sync so the initial orientation (esp. portrait) reaches both preview 176 + // layer and video-data-output connections, which otherwise default to 177 + // LandscapeRight and make captureOutput frames come through misoriented. 178 + cameraController.setDesiredVideoOrientation(resolveInterfaceOrientation()) 179 + } 180 + 136 181 override fun viewDidDisappear(animated: Boolean) { 137 182 super.viewDidDisappear(animated) 138 183 ··· 145 190 withTransitionCoordinator: UIViewControllerTransitionCoordinatorProtocol 146 191 ) { 147 192 super.viewWillTransitionToSize(size, withTransitionCoordinator) 148 - val orientation = interfaceOrientationToVideoOrientation() 149 - cameraController.cameraPreviewLayer?.connection?.videoOrientation = orientation 150 - // Also update the video-data output connection (used for Vision/CoreML) 151 - cameraController.updateVideoOutputOrientation(orientation) 193 + cameraController.setDesiredVideoOrientation(resolveInterfaceOrientation()) 194 + } 195 + 196 + @OptIn(ExperimentalForeignApi::class) 197 + private fun resolveInterfaceOrientation(): AVCaptureVideoOrientation { 198 + // Prefer this view's window scene (reliable once viewDidAppear has run), 199 + // falling back to the global keyWindow lookup, then portrait. 200 + val sceneOrientation = view.window?.windowScene?.interfaceOrientation 201 + ?: UIApplication.sharedApplication.keyWindow?.windowScene?.interfaceOrientation 202 + ?: UIInterfaceOrientationPortrait 203 + return when (sceneOrientation) { 204 + UIInterfaceOrientationPortrait -> AVCaptureVideoOrientationPortrait 205 + UIInterfaceOrientationPortraitUpsideDown -> AVCaptureVideoOrientationPortraitUpsideDown 206 + UIInterfaceOrientationLandscapeLeft -> AVCaptureVideoOrientationLandscapeLeft 207 + UIInterfaceOrientationLandscapeRight -> AVCaptureVideoOrientationLandscapeRight 208 + else -> AVCaptureVideoOrientationPortrait 209 + } 152 210 } 153 211 154 212 fun getCameraPreviewLayer() = cameraController.cameraPreviewLayer ··· 248 306 249 307 fun addCameraViewController(controller: CameraViewController?) { 250 308 cameraController.cameraViewController = controller 309 + // Wire the test-harness composite-capture hook. Forwards the call to 310 + // the inner controller which has access to the last camera buffer. 311 + controller?.setCaptureCompositeProvider { filename, onResult -> 312 + cameraController.captureCompositeToPng(filename, onResult) 313 + } 251 314 } 252 315 253 316 fun addFrameListener(listener: FrameRepository) { ··· 280 343 281 344 fun setFocusArea(focusArea: Rect?) { 282 345 cameraController.setFocusArea(focusArea) 346 + } 347 + 348 + fun setPoseFocusMode(mode: PoseFocusMode) { 349 + cameraController.setPoseFocusMode(mode) 283 350 } 284 351 285 352 fun setObjectModel(objectModel: ObjectModel?) { ··· 328 395 private val frameProcessingQueue = 329 396 dispatch_queue_create("com.performancecoachlab.frameProcessing", null) 330 397 398 + // Test-harness composite-capture state. Retains the most recent camera 399 + // buffer + the pre-preview-mapping skeleton/objects so captureComposite 400 + // can render them into a single PNG on demand. Access via 401 + // CameraViewController.captureComposite(...) wired below. 402 + @OptIn(ExperimentalForeignApi::class) 403 + private var lastBuffer: CVImageBufferRef? = null 404 + private var lastCaptureConnection: AVCaptureConnection? = null 405 + private var lastDetectedSkeleton: Skeleton? = null 406 + private var lastPreviewSkeleton: Skeleton? = null 407 + private var lastPreviewObjects: List<AnalysisObject> = emptyList() 408 + private var lastPreviewBoundsW: Double = 0.0 409 + private var lastPreviewBoundsH: Double = 0.0 410 + private var lastDetectedObjects: List<AnalysisObject> = emptyList() 411 + private val lastBufferLock = dispatch_queue_create( 412 + "com.performancecoachlab.lastBufferLock", 413 + null 414 + ) 415 + 331 416 // iOS 14+ introduces a new way to access the ultra-wide camera, which we can use conditionally. 332 417 private var backUltraWideCamera: AVCaptureDevice? = null 333 418 private var backWideCamera: AVCaptureDevice? = null 334 419 private var useUltraWideBack: Boolean = false 335 420 421 + // Source-of-truth for the orientation the capture pipeline should be running 422 + // in. Updated from main-thread lifecycle hooks (viewDidAppear, 423 + // viewWillTransitionToSize, setupPreviewLayer, switchCamera). Read from the 424 + // frame-processing queue to force the videoOutput connection in sync each 425 + // frame, since AVCaptureConnection.videoOrientation on the video-data 426 + // output is easy to end up stale (defaults to LandscapeRight). 427 + @Volatile 428 + var desiredVideoOrientation: AVCaptureVideoOrientation = AVCaptureVideoOrientationPortrait 429 + private set 430 + 431 + @OptIn(ExperimentalForeignApi::class) 432 + fun setDesiredVideoOrientation(orientation: AVCaptureVideoOrientation) { 433 + desiredVideoOrientation = orientation 434 + // Preview-layer connection: safe to set — AVCaptureVideoPreviewLayer 435 + // just uses this for visual rotation, keeps the underlying capture 436 + // device frames in raw (landscape) orientation. 437 + cameraPreviewLayer?.connection?.let { conn -> 438 + if (conn.isVideoOrientationSupported()) { 439 + conn.videoOrientation = orientation 440 + } 441 + } 442 + // Deliberately do NOT set videoOrientation on the video-data-output 443 + // connection — AVCaptureVideoDataOutput auto-rotates the delivered 444 + // buffer when that's set, causing double-rotation downstream (the 445 + // preview layer already rotates for display). Vision gets the raw 446 + // landscape buffer + explicit EXIF via VNImageRequestHandler's 447 + // orientation parameter; MLKit gets the raw buffer + the pose EXIF 448 + // for its own imageByApplyingOrientation. 449 + } 450 + 451 + // Resolve the orientation the downstream EXIF pipeline should use. We 452 + // always prefer the tracked desired orientation (set from main-thread 453 + // lifecycle hooks) over the connection's reported orientation, because 454 + // (a) on initial portrait launch the video-data output connection 455 + // defaults to LandscapeRight until the first view transition, and 456 + // (b) we deliberately don't rotate the video-data output connection 457 + // since that would make AVCaptureVideoDataOutput auto-rotate the buffer. 458 + @OptIn(ExperimentalForeignApi::class) 459 + fun effectiveVideoOrientation(): AVCaptureVideoOrientation = desiredVideoOrientation 460 + 336 461 sealed class CameraException : Exception() { 337 462 class DeviceNotAvailable : CameraException() 338 463 class ConfigurationError(message: String) : CameraException() ··· 385 510 386 511 fun setFocusArea(focusArea: Rect?) { 387 512 frameProcessor.setFocusArea(focusArea) 513 + } 514 + 515 + fun setPoseFocusMode(mode: PoseFocusMode) { 516 + frameProcessor.setPoseFocusMode(mode) 388 517 } 389 518 390 519 fun setDetectMode(detectMode: DetectMode) { ··· 563 692 @OptIn(ExperimentalForeignApi::class) 564 693 fun setupPreviewLayer(view: UIView) { 565 694 captureSession?.let { session -> 695 + val initialOrientation = view.window?.windowScene?.interfaceOrientation 696 + ?.let { intf -> 697 + when (intf) { 698 + UIInterfaceOrientationPortrait -> AVCaptureVideoOrientationPortrait 699 + UIInterfaceOrientationPortraitUpsideDown -> AVCaptureVideoOrientationPortraitUpsideDown 700 + UIInterfaceOrientationLandscapeLeft -> AVCaptureVideoOrientationLandscapeLeft 701 + UIInterfaceOrientationLandscapeRight -> AVCaptureVideoOrientationLandscapeRight 702 + else -> null 703 + } 704 + } ?: desiredVideoOrientation 705 + 566 706 val newPreviewLayer = AVCaptureVideoPreviewLayer(session = session).apply { 567 707 videoGravity = AVLayerVideoGravityResizeAspectFill 568 708 setFrame(view.bounds) 569 - connection?.videoOrientation = interfaceOrientationToVideoOrientation() 570 - 571 - connection?.let { 572 - if (it.isVideoMirroringSupported()) { 573 - it.automaticallyAdjustsVideoMirroring = false 574 - it.videoMirrored = isUsingFrontCamera 709 + connection?.let { conn -> 710 + if (conn.isVideoOrientationSupported()) { 711 + conn.videoOrientation = initialOrientation 712 + } 713 + if (conn.isVideoMirroringSupported()) { 714 + conn.automaticallyAdjustsVideoMirroring = false 715 + conn.videoMirrored = isUsingFrontCamera 575 716 } 576 717 } 577 - 578 718 } 579 719 580 720 view.layer.addSublayer(newPreviewLayer) 581 721 cameraPreviewLayer = newPreviewLayer 582 722 583 - cameraPreviewLayer?.connection?.let { connection -> 584 - connection.videoOrientation = interfaceOrientationToVideoOrientation() 585 - // Ensure video-data output connection matches the same orientation/mirroring 586 - updateVideoOutputOrientation(interfaceOrientationToVideoOrientation()) 587 - } 723 + // Route through setDesiredVideoOrientation so the cached state, 724 + // preview-layer connection, and video-data-output connection all 725 + // share the same source of truth. viewDidAppear re-calls this with 726 + // the windowScene value once the view is actually in a window. 727 + setDesiredVideoOrientation(initialOrientation) 588 728 } 589 729 } 590 730 ··· 652 792 653 793 // Apply orientation + mirroring on main queue while still hidden. 654 794 dispatch_async(dispatch_get_main_queue()) { 795 + setDesiredVideoOrientation(interfaceOrientationToVideoOrientation()) 655 796 cameraPreviewLayer?.connection?.let { connection -> 656 - connection.videoOrientation = interfaceOrientationToVideoOrientation() 657 797 if (connection.isVideoMirroringSupported()) { 658 798 connection.automaticallyAdjustsVideoMirroring = false 659 799 connection.setVideoMirrored(isUsingFrontCamera) 660 800 } 661 801 } 662 - updateVideoOutputOrientation(interfaceOrientationToVideoOrientation()) 663 802 664 803 cameraPreviewLayer?.apply { 665 804 // Use a tiny fade-in to avoid a harsh blink. ··· 685 824 686 825 // Ensure the connection is correct after commit, then show the preview again. 687 826 dispatch_async(dispatch_get_main_queue()) { 827 + setDesiredVideoOrientation(interfaceOrientationToVideoOrientation()) 688 828 cameraPreviewLayer?.connection?.let { connection -> 689 - connection.videoOrientation = interfaceOrientationToVideoOrientation() 690 829 if (connection.isVideoMirroringSupported()) { 691 830 connection.automaticallyAdjustsVideoMirroring = false 692 831 connection.setVideoMirrored(isUsingFrontCamera) ··· 728 867 // This avoids brief flashes of stale frames from the previous lens. 729 868 if (isSwitchingCamera) return@dispatch_async 730 869 870 + // Important: do NOT set videoOrientation on fromConnection. 871 + // AVCaptureVideoDataOutput auto-rotates delivered buffers 872 + // when videoOrientation is set, which would double-rotate 873 + // against our own EXIF pipeline. We deliberately override 874 + // the orientation downstream via effectiveVideoOrientation 875 + // while letting the raw landscape buffer flow through here. 876 + val targetOrientation = desiredVideoOrientation 877 + 731 878 var detectedSkeleton: Skeleton? = null 732 879 var detectedObjects: List<AnalysisObject> = emptyList() 733 880 881 + val cvBuf = CMSampleBufferGetImageBuffer(didOutputSampleBuffer) 734 882 frameProcessor.analyseBufferForAll( 735 - CMSampleBufferGetImageBuffer(didOutputSampleBuffer), 883 + cvBuf, 736 884 timestamp, 737 885 preview = cameraPreviewLayer, 738 886 captureConnection = fromConnection, 887 + orientationOverride = targetOrientation, 888 + mirroredOverride = isUsingFrontCamera, 739 889 onSkeletonProcessed = { skeleton -> 740 890 detectedSkeleton = skeleton 741 891 }, ··· 744 894 } 745 895 ) 746 896 897 + // Retain the raw buffer + detections for captureComposite. 898 + dispatch_sync(lastBufferLock) { 899 + lastBuffer?.also { CFRelease(it) } 900 + lastBuffer = cvBuf?.also { CFRetain(it) } 901 + lastCaptureConnection = fromConnection 902 + lastDetectedSkeleton = detectedSkeleton 903 + lastDetectedObjects = detectedObjects 904 + } 905 + 747 906 cameraPreviewLayer?.also { preview -> 748 907 val previewSkeleton = detectedSkeleton?.let { 749 908 mapSkeletonToPreview( 750 909 skeleton = it, 751 910 previewLayer = preview, 752 911 width = it.width, 753 - height = it.height 912 + height = it.height, 913 + orientationOverride = targetOrientation, 914 + mirroredOverride = isUsingFrontCamera, 754 915 ) 755 916 } 756 917 918 + val bw: Double 919 + val bh: Double 920 + preview.bounds.useContents { 921 + bw = size.width 922 + bh = size.height 923 + } 757 924 previewSkeleton?.also { 758 925 skeletonRepository?.updateSkeleton(it) 759 926 } ··· 769 936 // Keep the original analysis frame size; don't try to derive it in preview space. 770 937 frameSize = obj.frameSize 771 938 ) 939 + } 940 + 941 + dispatch_sync(lastBufferLock) { 942 + lastPreviewSkeleton = previewSkeleton 943 + lastPreviewObjects = previewObjects 944 + lastPreviewBoundsW = bw 945 + lastPreviewBoundsH = bh 772 946 } 773 947 774 948 customObjectRepository?.updateCustomObject(previewObjects) ··· 859 1033 } 860 1034 } 861 1035 862 - // Replace mapBoxToPreview implementation to map oriented analysis pixels -> raw buffer normalized -> preview space. 1036 + // Maps an oriented-analysis-pixel point to capture-device-normalized. 1037 + // Original pipeline used by the object-detection box mapping — this was 1038 + // already correct pre-pose-work and we're deliberately keeping it. Only 1039 + // the skeleton path uses the new aspect-fill math. 863 1040 @OptIn(ExperimentalForeignApi::class) 864 1041 private fun orientedNormalizedToCaptureDeviceNormalized( 865 1042 uTopLeft: Double, 866 1043 vTopLeft: Double, 867 1044 previewLayer: AVCaptureVideoPreviewLayer, 868 1045 ): Pair<Double, Double> { 869 - // Clamp first to avoid ever returning out-of-range points. 870 1046 val u = uTopLeft.coerceIn(0.0, 1.0) 871 1047 val v = vTopLeft.coerceIn(0.0, 1.0) 872 1048 ··· 874 1050 previewLayer.connection?.videoOrientation ?: AVCaptureVideoOrientationLandscapeRight 875 1051 val mirrored = previewLayer.connection?.videoMirrored ?: false 876 1052 877 - // Convert from oriented (top-left origin) to capture-device normalized expected by pointForCaptureDevicePointOfInterest. 878 - // Empirically, in portrait the old mapping appears 90° clockwise, so we apply a 90° counter-clockwise fix: 879 - // (u,v) -> (x=v, y=1-u) 1053 + // Pre-restore mapping from commit ccf4dc8 "fix: io object detection in 1054 + // non natural orientations". This is what objects were using when they 1055 + // were working; don't change it without also adjusting the object path. 880 1056 var (x, y) = when (orientation) { 881 1057 AVCaptureVideoOrientationPortrait -> Pair(v, 1.0 - u) 882 1058 AVCaptureVideoOrientationPortraitUpsideDown -> Pair(1.0 - v, u) ··· 885 1061 else -> Pair(u, v) 886 1062 } 887 1063 888 - if (mirrored) { 889 - x = 1.0 - x 890 - } 891 - 1064 + if (mirrored) x = 1.0 - x 892 1065 return Pair(x.coerceIn(0.0, 1.0), y.coerceIn(0.0, 1.0)) 893 1066 } 894 1067 1068 + // Original working implementation (pre-any-pose-work). Objects were 1069 + // correctly transformed through this path — the skeleton-path fix does 1070 + // not belong here. Ref: commit ccf4dc8. 895 1071 @OptIn(ExperimentalForeignApi::class) 896 1072 fun mapBoxToPreview( 897 1073 box: Rect, ··· 912 1088 ) 913 1089 } 914 1090 915 - // Map all 4 corners to ensure correct rect under rotations. 916 1091 val p1 = mapPoint(Offset(box.left, box.top)) 917 1092 val p2 = mapPoint(Offset(box.right, box.top)) 918 1093 val p3 = mapPoint(Offset(box.right, box.bottom)) ··· 935 1110 ) { 936 1111 onVideoSaved?.invoke(didFinishRecordingToOutputFileAtURL.path ?: "") 937 1112 } 1113 + 1114 + // Test-harness / debug helper: composite the last camera frame with the 1115 + // last detected skeleton + object boxes, save as a PNG in Documents/. 1116 + // Called via CameraViewController.captureComposite — see setup in 1117 + // addCameraViewController on the outer CameraEngine. 1118 + @OptIn(ExperimentalForeignApi::class) 1119 + internal fun captureCompositeToPng(filename: String, onResult: (String?) -> Unit) { 1120 + var buffer: CVImageBufferRef? = null 1121 + var connection: AVCaptureConnection? = null 1122 + var skeleton: Skeleton? = null 1123 + var previewSkel: Skeleton? = null 1124 + var previewObjs: List<AnalysisObject> = emptyList() 1125 + var pBoundsW = 0.0 1126 + var pBoundsH = 0.0 1127 + var objects: List<AnalysisObject> = emptyList() 1128 + dispatch_sync(lastBufferLock) { 1129 + val b = lastBuffer 1130 + if (b != null) { 1131 + CFRetain(b) 1132 + buffer = b 1133 + } 1134 + connection = lastCaptureConnection 1135 + skeleton = lastDetectedSkeleton 1136 + previewSkel = lastPreviewSkeleton 1137 + previewObjs = lastPreviewObjects 1138 + pBoundsW = lastPreviewBoundsW 1139 + pBoundsH = lastPreviewBoundsH 1140 + objects = lastDetectedObjects 1141 + } 1142 + val buf = buffer ?: return onResult(null) 1143 + try { 1144 + val ciCtx = CIContext.contextWithOptions(null) 1145 + val ciRaw = CIImage.imageWithCVPixelBuffer(buf) ?: return onResult(null) 1146 + // Pre-rotate to oriented dims so skeleton coords line up 1:1. 1147 + val exif = captureConnectionToExifOrientation(connection) 1148 + val ciOriented = ciRaw.imageByApplyingOrientation(exif) 1149 + var eOriginX = 0.0 1150 + var eOriginY = 0.0 1151 + var eW = 0.0 1152 + var eH = 0.0 1153 + ciOriented.extent.useContents { 1154 + eOriginX = origin.x 1155 + eOriginY = origin.y 1156 + eW = size.width 1157 + eH = size.height 1158 + } 1159 + val cgImage = ciCtx.createCGImage( 1160 + ciOriented, 1161 + CGRectMake(eOriginX, eOriginY, eW, eH) 1162 + ) ?: return onResult(null) 1163 + 1164 + val cameraUIImage = platform.UIKit.UIImage(cgImage) 1165 + val rendererSize = CGSizeMake(eW, eH) 1166 + 1167 + // 1) Save a CLEAN camera frame (no overlay) for comparison. 1168 + val cleanRenderer = UIGraphicsImageRenderer(size = rendererSize) 1169 + val cleanImage = cleanRenderer.imageWithActions { _ -> 1170 + cameraUIImage.drawInRect(CGRectMake(0.0, 0.0, eW, eH)) 1171 + } 1172 + val cleanPng = UIImagePNGRepresentation(cleanImage) 1173 + val docsUrl = (NSFileManager.defaultManager 1174 + .URLsForDirectory(NSDocumentDirectory, NSUserDomainMask) 1175 + .firstOrNull() as? NSURL) 1176 + ?: run { CGImageRelease(cgImage); return onResult(null) } 1177 + val cleanName = filename.replace(".png", "_clean.png") 1178 + cleanPng?.writeToURL( 1179 + docsUrl.URLByAppendingPathComponent(cleanName) 1180 + ?: run { CGImageRelease(cgImage); return onResult(null) }, 1181 + atomically = true, 1182 + ) 1183 + 1184 + // 2) Save the composite (camera + overlay). 1185 + val renderer = UIGraphicsImageRenderer(size = rendererSize) 1186 + val composite = renderer.imageWithActions { _ -> 1187 + cameraUIImage.drawInRect(CGRectMake(0.0, 0.0, eW, eH)) 1188 + val ctx = UIGraphicsGetCurrentContext() 1189 + if (ctx != null) { 1190 + // DEBUG: draw coord-space fiducials so we can see 1191 + // whether the skeleton coord-space lines up with the 1192 + // pixel space of the rendered camera image. 1193 + drawDebugFiducials(ctx, eW, eH) 1194 + drawSkeletonOnContext(ctx, skeleton) 1195 + drawObjectsOnContext(ctx, objects) 1196 + } 1197 + } 1198 + val png = UIImagePNGRepresentation(composite) 1199 + ?: run { CGImageRelease(cgImage); return onResult(null) } 1200 + val fileUrl = docsUrl.URLByAppendingPathComponent(filename) 1201 + ?: run { CGImageRelease(cgImage); return onResult(null) } 1202 + png.writeToURL(fileUrl, atomically = true) 1203 + 1204 + // 3) Preview-mapped overlay — replicates what the user sees 1205 + // on-device: preview layer's aspect-fill of the camera image 1206 + // plus the mapSkeletonToPreview-mapped skeleton drawn in the 1207 + // preview layer's bounds coord space. If this diverges from 1208 + // the oriented-pixel composite above, the bug is in the 1209 + // preview mapping / pointForCaptureDevicePointOfInterest path. 1210 + if (pBoundsW > 0 && pBoundsH > 0) { 1211 + val previewSize = CGSizeMake(pBoundsW, pBoundsH) 1212 + val previewRenderer = UIGraphicsImageRenderer(size = previewSize) 1213 + val previewImage = previewRenderer.imageWithActions { _ -> 1214 + // Match the preview layer's videoGravity — FIT for the test 1215 + // harness (PreviewFillMode.FIT), FILL for production default. 1216 + val gravity = cameraPreviewLayer?.videoGravity 1217 + val fit = gravity == AVLayerVideoGravityResizeAspect 1218 + val scale = if (fit) minOf(pBoundsW / eW, pBoundsH / eH) 1219 + else maxOf(pBoundsW / eW, pBoundsH / eH) 1220 + val drawW = eW * scale 1221 + val drawH = eH * scale 1222 + val drawX = (pBoundsW - drawW) / 2.0 1223 + val drawY = (pBoundsH - drawH) / 2.0 1224 + cameraUIImage.drawInRect(CGRectMake(drawX, drawY, drawW, drawH)) 1225 + val ctx = UIGraphicsGetCurrentContext() 1226 + if (ctx != null) { 1227 + drawDebugFiducials(ctx, pBoundsW, pBoundsH) 1228 + drawSkeletonOnContext(ctx, previewSkel) 1229 + drawObjectsOnContext(ctx, previewObjs) 1230 + } 1231 + } 1232 + val previewPng = UIImagePNGRepresentation(previewImage) 1233 + val previewName = filename.replace(".png", "_preview.png") 1234 + val previewUrl = docsUrl.URLByAppendingPathComponent(previewName) 1235 + if (previewPng != null && previewUrl != null) { 1236 + previewPng.writeToURL(previewUrl, atomically = true) 1237 + } 1238 + } 1239 + 1240 + CGImageRelease(cgImage) 1241 + onResult(fileUrl.path) 1242 + } finally { 1243 + CFRelease(buf) 1244 + } 1245 + } 1246 + 1247 + @OptIn(ExperimentalForeignApi::class) 1248 + private fun drawDebugFiducials( 1249 + ctx: platform.CoreGraphics.CGContextRef, 1250 + w: Double, 1251 + h: Double, 1252 + ) { 1253 + // Draw a cyan crosshair at (0,0) (top-left per oriented-top-left coord 1254 + // space), a magenta cross at the center, and a blue square at bottom- 1255 + // right. If the coord space is correct, cyan sits in the top-left of 1256 + // the image, center-cross in the middle, and blue in the bottom-right. 1257 + CGContextSetStrokeColorWithColor(ctx, UIColor.cyanColor.CGColor) 1258 + CGContextSetLineWidth(ctx, 6.0) 1259 + CGContextMoveToPoint(ctx, 0.0, 0.0) 1260 + CGContextAddLineToPoint(ctx, 120.0, 0.0) 1261 + CGContextMoveToPoint(ctx, 0.0, 0.0) 1262 + CGContextAddLineToPoint(ctx, 0.0, 120.0) 1263 + CGContextStrokePath(ctx) 1264 + 1265 + CGContextSetStrokeColorWithColor(ctx, UIColor.magentaColor.CGColor) 1266 + CGContextSetLineWidth(ctx, 4.0) 1267 + CGContextMoveToPoint(ctx, w / 2 - 60, h / 2) 1268 + CGContextAddLineToPoint(ctx, w / 2 + 60, h / 2) 1269 + CGContextMoveToPoint(ctx, w / 2, h / 2 - 60) 1270 + CGContextAddLineToPoint(ctx, w / 2, h / 2 + 60) 1271 + CGContextStrokePath(ctx) 1272 + 1273 + CGContextSetStrokeColorWithColor(ctx, UIColor.blueColor.CGColor) 1274 + CGContextStrokeRect(ctx, CGRectMake(w - 80, h - 80, 60.0, 60.0)) 1275 + } 1276 + 1277 + @OptIn(ExperimentalForeignApi::class) 1278 + private fun drawSkeletonOnContext( 1279 + ctx: platform.CoreGraphics.CGContextRef, 1280 + skeleton: Skeleton?, 1281 + ) { 1282 + if (skeleton == null) return 1283 + CGContextSetStrokeColorWithColor(ctx, UIColor.yellowColor.CGColor) 1284 + CGContextSetLineWidth(ctx, 5.0) 1285 + skeleton.bones().forEach { (a, b) -> 1286 + CGContextMoveToPoint(ctx, a.x.toDouble(), a.y.toDouble()) 1287 + CGContextAddLineToPoint(ctx, b.x.toDouble(), b.y.toDouble()) 1288 + } 1289 + CGContextStrokePath(ctx) 1290 + CGContextSetFillColorWithColor(ctx, UIColor.redColor.CGColor) 1291 + skeleton.joints().forEach { j -> 1292 + val r = 6.0 1293 + CGContextFillEllipseInRect( 1294 + ctx, 1295 + CGRectMake(j.x.toDouble() - r, j.y.toDouble() - r, r * 2, r * 2) 1296 + ) 1297 + } 1298 + } 1299 + 1300 + @OptIn(ExperimentalForeignApi::class) 1301 + private fun drawObjectsOnContext( 1302 + ctx: platform.CoreGraphics.CGContextRef, 1303 + objects: List<AnalysisObject>, 1304 + ) { 1305 + CGContextSetStrokeColorWithColor(ctx, UIColor.greenColor.CGColor) 1306 + CGContextSetLineWidth(ctx, 4.0) 1307 + objects.forEach { o -> 1308 + val b = o.boundingBox 1309 + CGContextStrokeRect( 1310 + ctx, 1311 + CGRectMake( 1312 + b.left.toDouble(), 1313 + b.top.toDouble(), 1314 + (b.right - b.left).toDouble(), 1315 + (b.bottom - b.top).toDouble(), 1316 + ) 1317 + ) 1318 + } 1319 + } 1320 + 1321 + @OptIn(ExperimentalForeignApi::class) 1322 + private fun captureConnectionToExifOrientation(connection: AVCaptureConnection?): Int { 1323 + // Prefer the tracked desiredVideoOrientation. The connection's own 1324 + // videoOrientation is unreliable at initial portrait launch 1325 + // (defaults to LandscapeRight until a view transition fires), and we 1326 + // deliberately don't write it back to the video-data output (that 1327 + // would make AVFoundation auto-rotate the buffer against us). 1328 + val videoOrientation = effectiveVideoOrientation() 1329 + val mirrored = connection?.videoMirrored ?: isUsingFrontCamera 1330 + val exif: UInt = when (videoOrientation) { 1331 + AVCaptureVideoOrientationPortrait -> 1332 + if (mirrored) kCGImagePropertyOrientationLeftMirrored else kCGImagePropertyOrientationRight 1333 + AVCaptureVideoOrientationPortraitUpsideDown -> 1334 + if (mirrored) kCGImagePropertyOrientationRightMirrored else kCGImagePropertyOrientationLeft 1335 + AVCaptureVideoOrientationLandscapeLeft -> 1336 + if (mirrored) kCGImagePropertyOrientationUpMirrored else kCGImagePropertyOrientationDown 1337 + AVCaptureVideoOrientationLandscapeRight -> 1338 + if (mirrored) kCGImagePropertyOrientationDownMirrored else kCGImagePropertyOrientationUp 1339 + else -> 1340 + if (mirrored) kCGImagePropertyOrientationDownMirrored else kCGImagePropertyOrientationUp 1341 + } 1342 + return exif.toInt() 1343 + } 938 1344 } 939 1345 940 1346 @OptIn(ExperimentalForeignApi::class) ··· 1082 1488 skeleton: Skeleton, 1083 1489 previewLayer: AVCaptureVideoPreviewLayer, 1084 1490 width: Float, 1085 - height: Float 1491 + height: Float, 1492 + orientationOverride: AVCaptureVideoOrientation? = null, 1493 + mirroredOverride: Boolean? = null, 1086 1494 ): Skeleton { 1087 - fun orientedNormalizedToCaptureDeviceNormalized( 1088 - uTopLeft: Double, 1089 - vTopLeft: Double 1090 - ): Pair<Double, Double> { 1091 - val u = uTopLeft.coerceIn(0.0, 1.0) 1092 - val v = vTopLeft.coerceIn(0.0, 1.0) 1093 - 1094 - val orientation = 1095 - previewLayer.connection?.videoOrientation ?: AVCaptureVideoOrientationLandscapeRight 1096 - val mirrored = previewLayer.connection?.videoMirrored ?: false 1097 - 1098 - var (x, y) = when (orientation) { 1099 - AVCaptureVideoOrientationPortrait -> Pair(v, 1.0 - u) 1100 - AVCaptureVideoOrientationPortraitUpsideDown -> Pair(1.0 - v, u) 1101 - AVCaptureVideoOrientationLandscapeRight -> Pair(u, v) 1102 - AVCaptureVideoOrientationLandscapeLeft -> Pair(1.0 - u, 1.0 - v) 1103 - else -> Pair(u, v) 1104 - } 1105 - 1106 - if (mirrored) x = 1.0 - x 1107 - return Pair(x.coerceIn(0.0, 1.0), y.coerceIn(0.0, 1.0)) 1495 + // We bypass previewLayer.pointForCaptureDevicePointOfInterest — it was 1496 + // empirically placing points at the wrong location in portrait/upside-down. 1497 + // Instead we do the aspect-fill math directly in oriented pixel space, 1498 + // which is what MLKit returns and what the preview layer renders after 1499 + // applying connection.videoOrientation. The rotation is already baked 1500 + // into the detection pipeline (see visionExifOrientation + MlKitPose), 1501 + // so all we need here is a scale+offset matching the preview's 1502 + // videoGravity crop/fit. 1503 + val bw: Double 1504 + val bh: Double 1505 + previewLayer.bounds.useContents { 1506 + bw = size.width 1507 + bh = size.height 1108 1508 } 1509 + val oriW = width.toDouble() 1510 + val oriH = height.toDouble() 1511 + val gravity = previewLayer.videoGravity 1512 + val fit = gravity == AVLayerVideoGravityResizeAspect 1513 + val scale = if (fit) minOf(bw / oriW, bh / oriH) else maxOf(bw / oriW, bh / oriH) 1514 + val offsetX = (bw - oriW * scale) / 2.0 1515 + val offsetY = (bh - oriH * scale) / 2.0 1516 + val mirrored = mirroredOverride ?: (previewLayer.connection?.videoMirrored ?: false) 1109 1517 1110 1518 fun mapPoint(point: Skeleton.SkeletonCoordinate?): Skeleton.SkeletonCoordinate? { 1111 1519 if (point == null) return null 1112 - 1113 - val u = (point.x.toDouble() / width.toDouble()) 1114 - val v = (point.y.toDouble() / height.toDouble()) 1115 - val (cx, cy) = orientedNormalizedToCaptureDeviceNormalized(u, v) 1116 - 1117 - val normalizedPoint = CGPointMake(cx, cy) 1118 - val screenPoint = previewLayer.pointForCaptureDevicePointOfInterest(normalizedPoint) 1119 - 1120 - return Skeleton.SkeletonCoordinate( 1121 - screenPoint.useContents { x.toFloat() }, 1122 - screenPoint.useContents { y.toFloat() } 1123 - ) 1520 + var px = point.x.toDouble() * scale + offsetX 1521 + val py = point.y.toDouble() * scale + offsetY 1522 + if (mirrored) px = bw - px 1523 + return Skeleton.SkeletonCoordinate(px.toFloat(), py.toFloat()) 1124 1524 } 1125 - 1126 - val minbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(0.0, 0.0)) 1127 - .useContents { Pair(x.toFloat(), y.toFloat()) } 1128 - val maxbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(1.0, 1.0)) 1129 - .useContents { Pair(x.toFloat(), y.toFloat()) } 1130 - val bounds = Pair( 1131 - abs(maxbounds.first - minbounds.first), 1132 - abs(maxbounds.second - minbounds.second) 1133 - ) 1134 1525 1135 1526 return Skeleton( 1136 1527 timestamp = skeleton.timestamp, ··· 1146 1537 rightKnee = mapPoint(skeleton.rightKnee), 1147 1538 leftAnkle = mapPoint(skeleton.leftAnkle), 1148 1539 rightAnkle = mapPoint(skeleton.rightAnkle), 1149 - width = bounds.first, 1150 - height = bounds.second, 1540 + leftHeel = mapPoint(skeleton.leftHeel), 1541 + rightHeel = mapPoint(skeleton.rightHeel), 1542 + leftToe = mapPoint(skeleton.leftToe), 1543 + rightToe = mapPoint(skeleton.rightToe), 1544 + leftIndex = mapPoint(skeleton.leftIndex), 1545 + rightIndex = mapPoint(skeleton.rightIndex), 1546 + width = bw.toFloat(), 1547 + height = bh.toFloat(), 1151 1548 ) 1152 1549 }
+4
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.ios.kt
··· 95 95 LaunchedEffect(focusArea) { 96 96 cameraEngine.value?.setFocusArea(focusArea) 97 97 } 98 + LaunchedEffect(poseFocusMode) { 99 + cameraEngine.value?.setPoseFocusMode(poseFocusMode) 100 + } 98 101 LaunchedEffect(objectModel) { 99 102 cameraEngine.value?.setObjectModel(objectModel) 100 103 } ··· 102 105 delay(1000L) 103 106 cameraEngine.value?.setObjectModel(objectModel) 104 107 cameraEngine.value?.setFocusArea(focusArea) 108 + cameraEngine.value?.setPoseFocusMode(poseFocusMode) 105 109 cameraEngine.value?.setDetectMode(detectMode) 106 110 } 107 111 LaunchedEffect(previewFillMode) {
+239 -52
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/FrameProcessor.kt
··· 18 18 import kotlinx.cinterop.useContents 19 19 import kotlinx.cinterop.value 20 20 import platform.AVFoundation.AVCaptureConnection 21 + import platform.AVFoundation.AVCaptureVideoOrientation 21 22 import platform.AVFoundation.AVCaptureVideoOrientationLandscapeLeft 22 23 import platform.AVFoundation.AVCaptureVideoOrientationLandscapeRight 23 24 import platform.AVFoundation.AVCaptureVideoOrientationPortrait ··· 169 170 observation.availableJointNames.forEach { 170 171 observation.recognizedPointForJointName(it as VNHumanBodyPoseObservationJointName, null) 171 172 ?.also { point -> 172 - if (point.confidence > 0.2f) { 173 + if (point.confidence > LANDMARK_CONF_THRESHOLD) { 173 174 points[it] = point 174 175 } 175 176 } ··· 226 227 } 227 228 } 228 229 230 + // EXIF for the MLKit pose path. The landscape mapping is swapped relative to 231 + // the object path — empirically required so that the pre-rotated CIImage fed 232 + // to MLKit lands upright in landscape and the aspect-fit/fill skeleton 233 + // mapping places landmarks correctly. 229 234 @OptIn(ExperimentalForeignApi::class) 230 - private fun AVCaptureConnection?.visionExifOrientation(): Int { 231 - // Prefer the actual capture connection orientation (not UI orientation). 232 - val videoOrientation = this?.videoOrientation ?: AVCaptureVideoOrientationLandscapeLeft 233 - val mirrored = this?.videoMirrored == true 235 + private fun AVCaptureConnection?.poseExifOrientation( 236 + orientationOverride: AVCaptureVideoOrientation? = null, 237 + mirroredOverride: Boolean? = null, 238 + ): Int { 239 + val videoOrientation = orientationOverride 240 + ?: this?.videoOrientation 241 + ?: AVCaptureVideoOrientationLandscapeLeft 242 + val mirrored = mirroredOverride ?: (this?.videoMirrored == true) 234 243 235 244 val exif: UInt = when (videoOrientation) { 236 - // These mappings match Apple's docs: EXIF/CGImagePropertyOrientation values describe 237 - // how to rotate the underlying pixel buffer into "Up". 238 245 AVCaptureVideoOrientationPortrait -> if (mirrored) kCGImagePropertyOrientationLeftMirrored else kCGImagePropertyOrientationRight 239 246 AVCaptureVideoOrientationPortraitUpsideDown -> if (mirrored) kCGImagePropertyOrientationRightMirrored else kCGImagePropertyOrientationLeft 240 - AVCaptureVideoOrientationLandscapeLeft -> if (mirrored) kCGImagePropertyOrientationDownMirrored else kCGImagePropertyOrientationUp 241 - AVCaptureVideoOrientationLandscapeRight -> if (mirrored) kCGImagePropertyOrientationUpMirrored else kCGImagePropertyOrientationDown 247 + AVCaptureVideoOrientationLandscapeLeft -> if (mirrored) kCGImagePropertyOrientationUpMirrored else kCGImagePropertyOrientationDown 248 + AVCaptureVideoOrientationLandscapeRight -> if (mirrored) kCGImagePropertyOrientationDownMirrored else kCGImagePropertyOrientationUp 242 249 else -> if (mirrored) kCGImagePropertyOrientationDownMirrored else kCGImagePropertyOrientationUp 243 250 } 244 251 245 252 return exif.toInt() 246 253 } 247 254 255 + // EXIF for the Vision object-detection path. The mapping historically in 256 + // HEAD (commit ccf4dc8) used swapped landscape EXIF (LandscapeLeft→Up, 257 + // LandscapeRight→Down), which was "correct" only because the original code 258 + // constructed VNImageRequestHandler without an explicit orientation arg and 259 + // Vision silently ignored the options-dict EXIF key. Now that we pass 260 + // orientation explicitly to VNImageRequestHandler, Vision honors it — so 261 + // the EXIF must match the actual sensor-native layout, same as the pose 262 + // path. Both paths effectively share the same mapping, but we keep them as 263 + // separate functions so we can re-diverge if needed. 264 + @OptIn(ExperimentalForeignApi::class) 265 + private fun AVCaptureConnection?.objectExifOrientation( 266 + orientationOverride: AVCaptureVideoOrientation? = null, 267 + mirroredOverride: Boolean? = null, 268 + ): Int = poseExifOrientation(orientationOverride, mirroredOverride) 269 + 248 270 @OptIn(ExperimentalForeignApi::class) 249 271 class FrameProcessor(var modelObj: VNCoreMLModel?) { 250 - private val skelBuffer = SkelBuffer(maxSize = 10) 272 + private val poseSmoother = PoseSmoother() 273 + private val followCropState = FollowCropState() 274 + private val mlkitPose = MlKitPose() 251 275 private var regionOfInterest = CGRectMake(0.0, 0.0, 1.0, 1.0) 252 276 private var requests = mutableListOf<VNRequest>() 253 277 val objectRecognition = setUpRecognition() 254 278 private var focusArea: Rect? = null 279 + private var poseFocusMode: PoseFocusMode = PoseFocusMode.MASK 255 280 private var path = "" 256 281 private var detectMode = DetectMode.BOTH 257 282 // Populated by setObjectModel from ObjectModel.inputWidth/Height (filename ··· 262 287 263 288 fun setFocusArea(focusArea: Rect?) { 264 289 this.focusArea = focusArea 290 + } 291 + 292 + fun setPoseFocusMode(mode: PoseFocusMode) { 293 + this.poseFocusMode = mode 265 294 } 266 295 267 296 fun setDetectMode(detectMode: DetectMode) { ··· 433 462 height = height.toFloat(), 434 463 width = width.toFloat() 435 464 ) 436 - onSkeletonProcessed(skelBuffer.smooth(updatedSkeleton)) 465 + val hasAny = updatedSkeleton.joints().isNotEmpty() 466 + onSkeletonProcessed( 467 + poseSmoother.smooth( 468 + if (hasAny) updatedSkeleton else null, 469 + timestamp, 470 + ) 471 + ) 437 472 } 438 473 } 439 474 } ··· 478 513 } 479 514 } 480 515 516 + private fun finalizeSkeletonVisionPath( 517 + raw: Skeleton?, 518 + currentFocus: Rect?, 519 + currentMode: PoseFocusMode, 520 + usePoseCrop: Boolean, 521 + orientedW: Float, 522 + orientedH: Float, 523 + timestamp: Long, 524 + onSkeletonProcessed: (Skeleton?) -> Unit, 525 + ) { 526 + // When we cropped for real (CROP / CROP_FOLLOW), the detector only saw 527 + // the focus region so the result is inside by construction. Otherwise 528 + // (MASK / no focus) apply the existing post-filter. 529 + val filtered: Skeleton? = when { 530 + raw == null -> null 531 + usePoseCrop -> raw 532 + currentFocus != null -> if (raw.isInFocusArea(currentFocus)) raw else null 533 + else -> raw 534 + } 535 + if (currentMode == PoseFocusMode.CROP_FOLLOW && currentFocus != null) { 536 + followCropState.updateFromSkeleton( 537 + skeleton = filtered, 538 + analysisW = orientedW.toInt(), 539 + analysisH = orientedH.toInt(), 540 + clampTo = currentFocus, 541 + nowMs = timestamp, 542 + ) 543 + } 544 + val hasAny = filtered?.joints().orEmpty().isNotEmpty() 545 + onSkeletonProcessed( 546 + poseSmoother.smooth(if (hasAny) filtered else null, timestamp) 547 + ) 548 + } 549 + 481 550 fun Skeleton.isInFocusArea(focusArea: Rect?): Boolean { 482 551 if (focusArea == null || joints().isEmpty()) return true 483 552 val focusRect = Rect( ··· 497 566 timestamp: Long, 498 567 preview: AVCaptureVideoPreviewLayer?, 499 568 captureConnection: AVCaptureConnection? = preview?.connection, 569 + orientationOverride: AVCaptureVideoOrientation? = null, 570 + mirroredOverride: Boolean? = null, 500 571 onObjectsProcessed: (List<AnalysisObject>) -> Unit, 501 572 onSkeletonProcessed: (Skeleton?) -> Unit 502 573 ) { ··· 509 580 val retainedBuffer = CFRetain(buffer) 510 581 val rawWidth = CVPixelBufferGetWidth(buffer).toULong() 511 582 val rawHeight = CVPixelBufferGetHeight(buffer).toULong() 512 - val exifOrientation = captureConnection.visionExifOrientation() 513 - val orientedSize = orientedFrameSize(rawWidth, rawHeight, exifOrientation) 583 + // Object and pose paths use independent EXIF derivations — see 584 + // poseExifOrientation / objectExifOrientation for rationale. 585 + val exifForObjects = captureConnection.objectExifOrientation( 586 + orientationOverride = orientationOverride, 587 + mirroredOverride = mirroredOverride, 588 + ) 589 + val exifForPose = captureConnection.poseExifOrientation( 590 + orientationOverride = orientationOverride, 591 + mirroredOverride = mirroredOverride, 592 + ) 593 + // Both mappings agree on which orientations swap dims 594 + // (Portrait/PortraitUpsideDown → Left/Right EXIF, landscape → Up/Down), 595 + // so orientedSize is identical under either. Derive from exifForObjects 596 + // for direct continuity with the HEAD (pre-MLKit) object code path. 597 + val orientedSize = orientedFrameSize(rawWidth, rawHeight, exifForObjects) 514 598 515 599 memScoped { 516 600 val errorPtr = alloc<ObjCObjectVar<NSError?>>() ··· 532 616 observation.boundingBox.toOrientedPixelRect( 533 617 rawWidth = rawWidth, 534 618 rawHeight = rawHeight, 535 - exifOrientation = exifOrientation 619 + exifOrientation = exifForObjects 536 620 ) 537 621 val labels = observation.labels.mapNotNull { 538 622 (it as VNClassificationObservation).let { ca -> ··· 573 657 } 574 658 } else null 575 659 660 + // VNImageRequestHandler hosts the object (Vision) request and 661 + // — on simulator only — the Vision pose fallback. Use the 662 + // object EXIF here; on device the pose fallback never fires 663 + // (MLKit handles it below with exifForPose). 576 664 val options: Map<Any?, Any?> = mapOf( 577 - VN_IMAGE_OPTION_CG_IMAGE_PROPERTY_ORIENTATION to exifOrientation 665 + VN_IMAGE_OPTION_CG_IMAGE_PROPERTY_ORIENTATION to exifForObjects 578 666 ) 579 667 580 - val requestForSkeleton = if (detectMode.doPose()) { 668 + // CROP / CROP_FOLLOW: pick the effective crop rect in 669 + // oriented-top-left normalized space. 670 + val currentFocus = focusArea 671 + val currentMode = poseFocusMode 672 + val effectiveFocus: Rect? = when { 673 + currentFocus == null -> null 674 + currentMode == PoseFocusMode.CROP_FOLLOW -> 675 + followCropState.current(timestamp) ?: currentFocus 676 + currentMode == PoseFocusMode.CROP -> currentFocus 677 + else -> null 678 + } 679 + val usePoseCrop = effectiveFocus != null && detectMode.doPose() && 680 + (currentMode == PoseFocusMode.CROP || 681 + currentMode == PoseFocusMode.CROP_FOLLOW) 682 + val cropLeftPx: Float 683 + val cropTopPx: Float 684 + val cropWPx: Float 685 + val cropHPx: Float 686 + if (usePoseCrop && effectiveFocus != null) { 687 + cropLeftPx = (effectiveFocus.left * orientedSize.width) 688 + .coerceIn(0f, orientedSize.width) 689 + cropTopPx = (effectiveFocus.top * orientedSize.height) 690 + .coerceIn(0f, orientedSize.height) 691 + cropWPx = (effectiveFocus.width * orientedSize.width) 692 + .coerceIn(1f, orientedSize.width - cropLeftPx) 693 + cropHPx = (effectiveFocus.height * orientedSize.height) 694 + .coerceIn(1f, orientedSize.height - cropTopPx) 695 + } else { 696 + cropLeftPx = 0f; cropTopPx = 0f 697 + cropWPx = orientedSize.width; cropHPx = orientedSize.height 698 + } 699 + 700 + // Try MLKit first (iosArm64 only — sim targets stub out). 701 + // If MLKit returns a skeleton, skip Vision's pose request. 702 + var mlkitSkeleton: Skeleton? = null 703 + if (detectMode.doPose() && mlkitPose.isAvailable()) { 704 + mlkitSkeleton = mlkitPose.detect( 705 + buffer = buffer, 706 + exifOrientation = exifForPose, 707 + orientedW = orientedSize.width, 708 + orientedH = orientedSize.height, 709 + useCrop = usePoseCrop, 710 + cropLeftPx = cropLeftPx, 711 + cropTopPx = cropTopPx, 712 + cropWPx = cropWPx, 713 + cropHPx = cropHPx, 714 + timestamp = timestamp, 715 + ) 716 + } 717 + 718 + val mlkitHandled = detectMode.doPose() && mlkitPose.isAvailable() 719 + 720 + val requestForSkeleton = if (detectMode.doPose() && !mlkitHandled) { 581 721 VNDetectHumanBodyPoseRequest { request, error -> 582 722 if (error != null) { 583 723 onSkeletonProcessed(null) ··· 591 731 leftShoulder = pointsMap[VNHumanBodyPoseObservationJointNameLeftShoulder]?.location?.toOrientedPixelPoint( 592 732 rawWidth, 593 733 rawHeight, 594 - exifOrientation 734 + exifForObjects 595 735 ), 596 736 rightShoulder = pointsMap[VNHumanBodyPoseObservationJointNameRightShoulder]?.location?.toOrientedPixelPoint( 597 737 rawWidth, 598 738 rawHeight, 599 - exifOrientation 739 + exifForObjects 600 740 ), 601 741 leftElbow = pointsMap[VNHumanBodyPoseObservationJointNameLeftElbow]?.location?.toOrientedPixelPoint( 602 742 rawWidth, 603 743 rawHeight, 604 - exifOrientation 744 + exifForObjects 605 745 ), 606 746 rightElbow = pointsMap[VNHumanBodyPoseObservationJointNameRightElbow]?.location?.toOrientedPixelPoint( 607 747 rawWidth, 608 748 rawHeight, 609 - exifOrientation 749 + exifForObjects 610 750 ), 611 751 leftWrist = pointsMap[VNHumanBodyPoseObservationJointNameLeftWrist]?.location?.toOrientedPixelPoint( 612 752 rawWidth, 613 753 rawHeight, 614 - exifOrientation 754 + exifForObjects 615 755 ), 616 756 rightWrist = pointsMap[VNHumanBodyPoseObservationJointNameRightWrist]?.location?.toOrientedPixelPoint( 617 757 rawWidth, 618 758 rawHeight, 619 - exifOrientation 759 + exifForObjects 620 760 ), 621 761 leftHip = pointsMap[VNHumanBodyPoseObservationJointNameLeftHip]?.location?.toOrientedPixelPoint( 622 762 rawWidth, 623 763 rawHeight, 624 - exifOrientation 764 + exifForObjects 625 765 ), 626 766 rightHip = pointsMap[VNHumanBodyPoseObservationJointNameRightHip]?.location?.toOrientedPixelPoint( 627 767 rawWidth, 628 768 rawHeight, 629 - exifOrientation 769 + exifForObjects 630 770 ), 631 771 leftKnee = pointsMap[VNHumanBodyPoseObservationJointNameLeftKnee]?.location?.toOrientedPixelPoint( 632 772 rawWidth, 633 773 rawHeight, 634 - exifOrientation 774 + exifForObjects 635 775 ), 636 776 rightKnee = pointsMap[VNHumanBodyPoseObservationJointNameRightKnee]?.location?.toOrientedPixelPoint( 637 777 rawWidth, 638 778 rawHeight, 639 - exifOrientation 779 + exifForObjects 640 780 ), 641 781 leftAnkle = pointsMap[VNHumanBodyPoseObservationJointNameLeftAnkle]?.location?.toOrientedPixelPoint( 642 782 rawWidth, 643 783 rawHeight, 644 - exifOrientation 784 + exifForObjects 645 785 ), 646 786 rightAnkle = pointsMap[VNHumanBodyPoseObservationJointNameRightAnkle]?.location?.toOrientedPixelPoint( 647 787 rawWidth, 648 788 rawHeight, 649 - exifOrientation 789 + exifForObjects 650 790 ), 651 791 height = orientedSize.height, 652 792 width = orientedSize.width ··· 664 804 )?.location?.toOrientedPixelPoint( 665 805 rawWidth, 666 806 rawHeight, 667 - exifOrientation 807 + exifForObjects 668 808 ), 669 809 rightShoulder = recognizedPoints?.get( 670 810 VNHumanBodyPoseObservationJointNameRightShoulder 671 811 )?.location?.toOrientedPixelPoint( 672 812 rawWidth, 673 813 rawHeight, 674 - exifOrientation 814 + exifForObjects 675 815 ), 676 816 leftElbow = recognizedPoints?.get( 677 817 VNHumanBodyPoseObservationJointNameLeftElbow 678 818 )?.location?.toOrientedPixelPoint( 679 819 rawWidth, 680 820 rawHeight, 681 - exifOrientation 821 + exifForObjects 682 822 ), 683 823 rightElbow = recognizedPoints?.get( 684 824 VNHumanBodyPoseObservationJointNameRightElbow 685 825 )?.location?.toOrientedPixelPoint( 686 826 rawWidth, 687 827 rawHeight, 688 - exifOrientation 828 + exifForObjects 689 829 ), 690 830 leftWrist = recognizedPoints?.get( 691 831 VNHumanBodyPoseObservationJointNameLeftWrist 692 832 )?.location?.toOrientedPixelPoint( 693 833 rawWidth, 694 834 rawHeight, 695 - exifOrientation 835 + exifForObjects 696 836 ), 697 837 rightWrist = recognizedPoints?.get( 698 838 VNHumanBodyPoseObservationJointNameRightWrist 699 839 )?.location?.toOrientedPixelPoint( 700 840 rawWidth, 701 841 rawHeight, 702 - exifOrientation 842 + exifForObjects 703 843 ), 704 844 leftHip = recognizedPoints?.get( 705 845 VNHumanBodyPoseObservationJointNameLeftHip 706 846 )?.location?.toOrientedPixelPoint( 707 847 rawWidth, 708 848 rawHeight, 709 - exifOrientation 849 + exifForObjects 710 850 ), 711 851 rightHip = recognizedPoints?.get( 712 852 VNHumanBodyPoseObservationJointNameRightHip 713 853 )?.location?.toOrientedPixelPoint( 714 854 rawWidth, 715 855 rawHeight, 716 - exifOrientation 856 + exifForObjects 717 857 ), 718 858 leftKnee = recognizedPoints?.get( 719 859 VNHumanBodyPoseObservationJointNameLeftKnee 720 860 )?.location?.toOrientedPixelPoint( 721 861 rawWidth, 722 862 rawHeight, 723 - exifOrientation 863 + exifForObjects 724 864 ), 725 865 rightKnee = recognizedPoints?.get( 726 866 VNHumanBodyPoseObservationJointNameRightKnee 727 867 )?.location?.toOrientedPixelPoint( 728 868 rawWidth, 729 869 rawHeight, 730 - exifOrientation 870 + exifForObjects 731 871 ), 732 872 leftAnkle = recognizedPoints?.get( 733 873 VNHumanBodyPoseObservationJointNameLeftAnkle 734 874 )?.location?.toOrientedPixelPoint( 735 875 rawWidth, 736 876 rawHeight, 737 - exifOrientation 877 + exifForObjects 738 878 ), 739 879 rightAnkle = recognizedPoints?.get( 740 880 VNHumanBodyPoseObservationJointNameRightAnkle 741 881 )?.location?.toOrientedPixelPoint( 742 882 rawWidth, 743 883 rawHeight, 744 - exifOrientation 884 + exifForObjects 745 885 ), 746 886 height = orientedSize.height, 747 887 width = orientedSize.width 748 888 ) 749 889 750 - onSkeletonProcessed(skelBuffer.smooth(updatedSkeleton)) 890 + // Vision skeleton completes post-processing via finalizeSkeleton below. 891 + finalizeSkeletonVisionPath( 892 + raw = updatedSkeleton, 893 + currentFocus = currentFocus, 894 + currentMode = currentMode, 895 + usePoseCrop = usePoseCrop, 896 + orientedW = orientedSize.width, 897 + orientedH = orientedSize.height, 898 + timestamp = timestamp, 899 + onSkeletonProcessed = onSkeletonProcessed, 900 + ) 751 901 } 752 902 } 753 903 } 754 904 } else null 755 905 756 906 requestForSkeleton?.regionOfInterest = regionOfInterest 757 - val handler = VNImageRequestHandler(buffer, options) 907 + // Use the init that takes orientation as an explicit 908 + // parameter — passing EXIF via the options dictionary 909 + // (VNImageOptionCGImagePropertyOrientation key) was silently 910 + // ignored by Vision, leaving it to process raw landscape 911 + // pixels and return coords in that frame regardless of the 912 + // phone's actual orientation. 913 + val handler = VNImageRequestHandler( 914 + buffer, 915 + exifForObjects.toUInt(), 916 + emptyMap<Any?, Any?>() 917 + ) 758 918 handler.performRequests( 759 919 listOfNotNull(requestForObjects, requestForSkeleton), 760 920 errorPtr.ptr 761 921 ) 922 + 923 + // If MLKit handled pose, emit its result (with post-processing) here. 924 + // When MLKit is unavailable / returned null, the Vision callback already emitted above. 925 + if (mlkitHandled) { 926 + finalizeSkeletonVisionPath( 927 + raw = mlkitSkeleton, 928 + currentFocus = currentFocus, 929 + currentMode = currentMode, 930 + usePoseCrop = usePoseCrop, 931 + orientedW = orientedSize.width, 932 + orientedH = orientedSize.height, 933 + timestamp = timestamp, 934 + onSkeletonProcessed = onSkeletonProcessed, 935 + ) 936 + } 762 937 763 938 CFRelease(retainedBuffer) 764 939 ··· 869 1044 val colStride = if (strides.size == 3) (strides[2] as NSNumber).intValue else 1 870 1045 fun at(i: Int, j: Int): Float = dataPtr[i * rowStride + j * colStride] 871 1046 1047 + // Ultralytics end2end CoreML export emits pixel-space coordinates over 1048 + // the model input. Vision's VNImageCropAndScaleOptionScaleFit letterboxes 1049 + // the source into the model input, preserving aspect ratio. When source 1050 + // aspect != model input aspect, the letterbox adds padding that we must 1051 + // undo — otherwise boxes get pulled toward the image center by the 1052 + // fraction of padding. Specifically: portrait source (3:4) into landscape 1053 + // model input (4:3) adds horizontal black bars, and a simple 1054 + // `(mx/modelInputW)*orientedW` produces x coords shifted toward center. 1055 + val mW = modelInputW.toFloat() 1056 + val mH = modelInputH.toFloat() 1057 + val scale = minOf(mW / orientedW, mH / orientedH) 1058 + val scaledW = orientedW * scale 1059 + val scaledH = orientedH * scale 1060 + val padX = (mW - scaledW) / 2f 1061 + val padY = (mH - scaledH) / 2f 1062 + fun modelXToScene(mx: Float): Float = ((mx - padX) / scale).coerceIn(0f, orientedW) 1063 + fun modelYToScene(my: Float): Float = ((my - padY) / scale).coerceIn(0f, orientedH) 1064 + 872 1065 for (i in 0 until dim1) { 873 1066 val conf = at(i, 4) 874 1067 if (conf <= 0.25f) continue ··· 877 1070 val x2n = at(i, 2) 878 1071 val y2n = at(i, 3) 879 1072 val cls = at(i, 5).toInt() 880 - // Ultralytics end2end CoreML export emits pixel-space coordinates 881 - // over the model input. Normalize by dividing by the model input 882 - // dimensions (passed in from ObjectModel.inputWidth/Height) before 883 - // mapping to oriented source pixel space. 884 - val mW = modelInputW.toFloat() 885 - val mH = modelInputH.toFloat() 886 - val leftPx = (min(x1n, x2n) / mW) * orientedW 887 - val rightPx = (max(x1n, x2n) / mW) * orientedW 888 - val topPx = (min(y1n, y2n) / mH) * orientedH 889 - val bottomPx = (max(y1n, y2n) / mH) * orientedH 1073 + val leftPx = modelXToScene(min(x1n, x2n)) 1074 + val rightPx = modelXToScene(max(x1n, x2n)) 1075 + val topPx = modelYToScene(min(y1n, y2n)) 1076 + val bottomPx = modelYToScene(max(y1n, y2n)) 890 1077 val label = RAW_CLASS_NAMES.getOrNull(cls) ?: "class_$cls" 891 1078 out.add( 892 1079 AnalysisObject(
+31
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/MlKitPose.kt
··· 1 + package com.performancecoachlab.posedetection.camera 2 + 3 + import com.performancecoachlab.posedetection.skeleton.Skeleton 4 + import kotlinx.cinterop.ExperimentalForeignApi 5 + import platform.CoreVideo.CVImageBufferRef 6 + 7 + /** 8 + * Per-platform MLKit pose adapter. Only iosArm64 ships MLKit static archives 9 + * (embedded into the library's cinterop klib). Simulator targets get a stub 10 + * that reports unavailable so FrameProcessor falls back to Apple Vision. 11 + * 12 + * Landmarks come back in **oriented full-frame pixel space, top-left origin** 13 + * regardless of whether a crop was supplied — the adapter handles the remap. 14 + */ 15 + @OptIn(ExperimentalForeignApi::class) 16 + internal expect class MlKitPose() { 17 + fun isAvailable(): Boolean 18 + 19 + fun detect( 20 + buffer: CVImageBufferRef, 21 + exifOrientation: Int, 22 + orientedW: Float, 23 + orientedH: Float, 24 + useCrop: Boolean, 25 + cropLeftPx: Float, 26 + cropTopPx: Float, 27 + cropWPx: Float, 28 + cropHPx: Float, 29 + timestamp: Long, 30 + ): Skeleton? 31 + }
+26
posedetection/src/iosSimulatorArm64Main/kotlin/com/performancecoachlab/posedetection/camera/MlKitPose.kt
··· 1 + package com.performancecoachlab.posedetection.camera 2 + 3 + import com.performancecoachlab.posedetection.skeleton.Skeleton 4 + import kotlinx.cinterop.ExperimentalForeignApi 5 + import platform.CoreVideo.CVImageBufferRef 6 + 7 + // Sim targets don't ship MLKit binaries (no arm64-simulator slice available 8 + // upstream). FrameProcessor will see isAvailable()=false and fall back to 9 + // Apple Vision. 10 + @OptIn(ExperimentalForeignApi::class) 11 + internal actual class MlKitPose actual constructor() { 12 + actual fun isAvailable(): Boolean = false 13 + 14 + actual fun detect( 15 + buffer: CVImageBufferRef, 16 + exifOrientation: Int, 17 + orientedW: Float, 18 + orientedH: Float, 19 + useCrop: Boolean, 20 + cropLeftPx: Float, 21 + cropTopPx: Float, 22 + cropWPx: Float, 23 + cropHPx: Float, 24 + timestamp: Long, 25 + ): Skeleton? = null 26 + }
+24
posedetection/src/iosX64Main/kotlin/com/performancecoachlab/posedetection/camera/MlKitPose.kt
··· 1 + package com.performancecoachlab.posedetection.camera 2 + 3 + import com.performancecoachlab.posedetection.skeleton.Skeleton 4 + import kotlinx.cinterop.ExperimentalForeignApi 5 + import platform.CoreVideo.CVImageBufferRef 6 + 7 + // Sim targets don't ship MLKit binaries. See iosSimulatorArm64Main/MlKitPose.kt. 8 + @OptIn(ExperimentalForeignApi::class) 9 + internal actual class MlKitPose actual constructor() { 10 + actual fun isAvailable(): Boolean = false 11 + 12 + actual fun detect( 13 + buffer: CVImageBufferRef, 14 + exifOrientation: Int, 15 + orientedW: Float, 16 + orientedH: Float, 17 + useCrop: Boolean, 18 + cropLeftPx: Float, 19 + cropTopPx: Float, 20 + cropWPx: Float, 21 + cropHPx: Float, 22 + timestamp: Long, 23 + ): Skeleton? = null 24 + }
+146
posedetection/tools/sync-mlkit.sh
··· 1 + #!/usr/bin/env bash 2 + # Build MLKit pose-detection static archives for all Kotlin/Native iOS targets 3 + # and stage them at build/mlkit-archives/<target>/lib<Pod>.a for cinterop to 4 + # embed into the library's klibs. 5 + # 6 + # Required env: MLKIT_STAGING_DIR, MLKIT_ARCHIVES_DIR 7 + # Optional env: MLKIT_TARGETS (space-separated; default "ios_arm64 ios_simulator_arm64 ios_x64") 8 + 9 + set -euo pipefail 10 + 11 + STAGING="${MLKIT_STAGING_DIR:?MLKIT_STAGING_DIR required}" 12 + ARCHIVES="${MLKIT_ARCHIVES_DIR:?MLKIT_ARCHIVES_DIR required}" 13 + TARGETS="${MLKIT_TARGETS:-ios_arm64 ios_simulator_arm64 ios_x64}" 14 + 15 + MLKIT_VERSION="1.0.0-beta16" 16 + IOS_DEPLOYMENT_TARGET="16.2" 17 + 18 + # Pods whose binaries we embed. Order matters for link-time resolution: 19 + # dependents before their deps. 20 + VENDORED_PODS=( 21 + MLKitPoseDetectionAccurate 22 + MLKitPoseDetectionCommon 23 + MLKitVision 24 + MLKitCommon 25 + MLImage 26 + MLKitXenoCommon 27 + ) 28 + # Built-from-source pods. Their framework binary paths inside the build dir 29 + # don't always match the pod name, so we map explicitly. 30 + # Format: "<pod-name>:<output-framework-name>" 31 + SOURCE_PODS=( 32 + "GTMSessionFetcher:GTMSessionFetcher" 33 + "GoogleDataTransport:GoogleDataTransport" 34 + "GoogleToolboxForMac:GoogleToolboxForMac" 35 + "GoogleUtilities:GoogleUtilities" 36 + "PromisesObjC:FBLPromises" 37 + "nanopb:nanopb" 38 + ) 39 + 40 + mkdir -p "$STAGING" 41 + mkdir -p "$ARCHIVES" 42 + cd "$STAGING" 43 + 44 + # Write a fresh Podfile for every run — ensures version changes propagate. 45 + cat > Podfile <<EOF 46 + platform :ios, '${IOS_DEPLOYMENT_TARGET}' 47 + use_frameworks! :linkage => :static 48 + 49 + install! 'cocoapods', :integrate_targets => false, :deterministic_uuids => false 50 + 51 + target 'MlkitSync' do 52 + pod 'MLKitPoseDetectionAccurate', '${MLKIT_VERSION}' 53 + end 54 + EOF 55 + 56 + echo "==> pod install in $STAGING" 57 + if [ ! -d Pods ] || [ ! -f Podfile.lock ] || ! diff -q Podfile Podfile.lock.input 2>/dev/null; then 58 + pod install --no-repo-update 59 + cp Podfile Podfile.lock.input 60 + fi 61 + 62 + # Map a Kotlin target name to an (sdk, arch) tuple. 63 + target_to_sdk() { 64 + case "$1" in 65 + ios_arm64) echo "iphoneos arm64" ;; 66 + ios_simulator_arm64) echo "iphonesimulator arm64" ;; 67 + ios_x64) echo "iphonesimulator x86_64" ;; 68 + *) echo "UNKNOWN UNKNOWN" ;; 69 + esac 70 + } 71 + 72 + extract_slice() { 73 + # $1 = input (fat or single-arch Mach-O) 74 + # $2 = desired arch (arm64, x86_64) 75 + # $3 = output path 76 + local input="$1" arch="$2" output="$3" 77 + mkdir -p "$(dirname "$output")" 78 + # lipo -thin fails on already-thin; use -info to branch 79 + if lipo -info "$input" 2>&1 | grep -q "Non-fat"; then 80 + local existing_arch 81 + existing_arch=$(lipo -info "$input" | sed -E 's/.*architecture: //') 82 + if [ "$existing_arch" = "$arch" ]; then 83 + cp "$input" "$output" 84 + else 85 + echo " skip: $input is $existing_arch, need $arch" 86 + return 1 87 + fi 88 + else 89 + lipo -thin "$arch" "$input" -output "$output" 2>/dev/null || { 90 + echo " skip: no $arch slice in $input" 91 + return 1 92 + } 93 + fi 94 + } 95 + 96 + for target in $TARGETS; do 97 + read -r sdk arch <<< "$(target_to_sdk "$target")" 98 + if [ "$sdk" = "UNKNOWN" ]; then 99 + echo "skipping unknown target $target" 100 + continue 101 + fi 102 + 103 + echo "==> Building source pods for $target ($sdk $arch)" 104 + # Build only the source pods — vendored ones don't need building. 105 + for entry in "${SOURCE_PODS[@]}"; do 106 + pod_name="${entry%%:*}" 107 + xcodebuild -project Pods/Pods.xcodeproj \ 108 + -target "$pod_name" \ 109 + -configuration Release \ 110 + -sdk "$sdk" \ 111 + -arch "$arch" \ 112 + ONLY_ACTIVE_ARCH=NO \ 113 + BUILD_LIBRARY_FOR_DISTRIBUTION=NO \ 114 + build 2>&1 | tail -3 115 + done 116 + 117 + out_dir="$ARCHIVES/$target" 118 + rm -rf "$out_dir" 119 + mkdir -p "$out_dir" 120 + 121 + echo "==> Extracting $target archives to $out_dir" 122 + # Vendored: pull the right slice from the fat .framework binary. 123 + for pod in "${VENDORED_PODS[@]}"; do 124 + fw_bin="Pods/$pod/Frameworks/$pod.framework/$pod" 125 + if [ -f "$fw_bin" ]; then 126 + extract_slice "$fw_bin" "$arch" "$out_dir/lib${pod}.a" || true 127 + else 128 + echo " miss: vendored $pod at $fw_bin" 129 + fi 130 + done 131 + # Built-from-source: single-arch output at build/Release-<sdk>/<pod>/<fw>.framework/<fw> 132 + for entry in "${SOURCE_PODS[@]}"; do 133 + pod_name="${entry%%:*}" 134 + fw_name="${entry##*:}" 135 + built="build/Release-$sdk/$pod_name/$fw_name.framework/$fw_name" 136 + if [ -f "$built" ]; then 137 + extract_slice "$built" "$arch" "$out_dir/lib${fw_name}.a" || true 138 + else 139 + echo " miss: built $pod_name at $built" 140 + fi 141 + done 142 + 143 + echo "==> $target: $(ls "$out_dir" | wc -l | tr -d ' ') archives" 144 + done 145 + 146 + echo "==> Done. Archives at $ARCHIVES"
+10 -3
sample/composeApp/build.gradle.kts
··· 18 18 iosX64(), 19 19 iosArm64(), 20 20 iosSimulatorArm64() 21 - ).forEach { 22 - it.binaries.framework { 21 + ).forEach { target -> 22 + target.binaries.framework { 23 23 baseName = "ComposeApp" 24 - isStatic = true 24 + // Dynamic framework so MLKit's ObjC class / category metadata is 25 + // preserved in a dylib's __objc_classlist / __objc_catlist 26 + // sections (static archives strip category .o files during the 27 + // framework repack, causing 'unrecognized selector' at runtime). 28 + isStatic = false 29 + if (target.name == "iosArm64") { 30 + linkerOpts += "-ObjC" 31 + } 25 32 } 26 33 } 27 34
sample/composeApp/src/androidMain/assets/dataset_v13_20260417_011448__yolo26n_v13_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_20260417_011448__yolo26n_v13_rect_384x288_fp32.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_20260417_011448__yolo26n_v13_rect_640x480_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_20260417_011448__yolo26n_v13_rect_640x480_fp32.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_20260417_011448__yolo26n_v13_rect_960x736_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_20260417_011448__yolo26n_v13_rect_960x736_fp32.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_baseline_20260418_014221__yolo26n_v13_baseline_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_baseline_int8_20260418_185621__yolo26n_v13_baseline_int8_rect_384x288.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_loss_heavy_20260418_014533__yolo26n_v13_loss_heavy_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_loss_recall_20260418_094455__yolo26n_v13_loss_recall_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_scale_high_20260418_014357__yolo26n_v13_scale_high_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_softloss_mixup_20260418_171706__yolo26n_v13_softloss_mixup_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_softloss_mixup_20260418_171731__yolo26n_v13_softloss_mixup_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_softloss_multiscale_20260418_142742__yolo26n_v13_softloss_multiscale_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_softloss_multiscale_confirm_20260418_162958__yolo26n_v13_softloss_multiscale_confirm_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_softloss_multiscale_confirm_20260418_170929__yolo26n_v13_softloss_multiscale_confirm_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/dataset_v13_translate_high_20260418_014710__yolo26n_v13_translate_high_rect_384x288_fp16.tflite

This is a binary file and will not be displayed.

sample/composeApp/src/androidMain/assets/yolo26n_v11_rect_384x288_fp16_20260411_075952__best_float16.tflite

This is a binary file and will not be displayed.

+3
sample/composeApp/src/androidMain/kotlin/com/nate/posedetection/ScreenshotHelper.android.kt
··· 1 + package com.nate.posedetection 2 + 3 + actual fun saveAppScreenshot(filename: String): String? = null
+34 -1
sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt
··· 420 420 var poseFocusMode by remember { mutableStateOf(PoseFocusMode.MASK) } 421 421 val controller = remember { CameraViewControllerImpl() } 422 422 423 + // TEST HARNESS: cycle MASK → CROP → CROP_FOLLOW once, save a camera- 424 + // framebuffer + skeleton composite PNG per mode to app Documents/. Host 425 + // pulls the PNGs via `devicectl device copy from` and feeds them to a 426 + // visual-alignment check. 427 + LaunchedEffect(Unit) { 428 + val modes = listOf(PoseFocusMode.MASK, PoseFocusMode.CROP, PoseFocusMode.CROP_FOLLOW) 429 + // Initial wait for camera preview + MLKit detector warmup. 430 + kotlinx.coroutines.delay(4000) 431 + for (mode in modes) { 432 + poseFocusMode = mode 433 + // Give the detector a few frames to stabilize in the new mode. 434 + kotlinx.coroutines.delay(3000) 435 + controller.captureComposite("ab_${mode.name}.png") { _ -> } 436 + } 437 + } 438 + 423 439 // Experiment Mode state — buffers per-detection events into a JSON log 424 440 // that the Mac orchestrator pulls back for the model comparison report. 425 441 val experimentLogger = rememberExperimentLogger() ··· 604 620 }, 605 621 objectModel = generalModel, 606 622 modifier = Modifier.weight(1f), 607 - focusArea = Rect(0f, 0f, 0.5f, 1f), 623 + focusArea = Rect(0.1f, 0.1f, 0.9f, 0.9f), 608 624 poseFocusMode = poseFocusMode, 609 625 frontCamera = frontCamera, 610 626 useUltraWide = ultrawide, ··· 614 630 onVideoSaved = { id, url -> path = url }, 615 631 ) 616 632 } 633 + } 634 + // TEST HARNESS: mode badge (top-center) so screenshots are 635 + // self-labeling for the alignment check. 636 + Box( 637 + modifier = Modifier 638 + .padding(12.dp) 639 + .align(Alignment.TopCenter) 640 + ) { 641 + Text( 642 + text = "MODE=" + when (poseFocusMode) { 643 + PoseFocusMode.MASK -> "MASK" 644 + PoseFocusMode.CROP -> "CROP" 645 + PoseFocusMode.CROP_FOLLOW -> "CROP_FOLLOW" 646 + }, 647 + color = Color.Yellow, 648 + fontSize = 18.sp, 649 + ) 617 650 } 618 651 Box( 619 652 modifier = Modifier
+6
sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/ScreenshotHelper.kt
··· 1 + package com.nate.posedetection 2 + 3 + // Test-harness screenshot: grab the current app window as PNG, write to the 4 + // app's Documents/ for the host to pull back via `devicectl device copy from`. 5 + // No-op on Android (we only screenshot iOS device runs). 6 + expect fun saveAppScreenshot(filename: String): String?
+39
sample/composeApp/src/iosMain/kotlin/com/nate/posedetection/ScreenshotHelper.ios.kt
··· 1 + package com.nate.posedetection 2 + 3 + import kotlinx.cinterop.BetaInteropApi 4 + import kotlinx.cinterop.ExperimentalForeignApi 5 + import kotlinx.cinterop.useContents 6 + import platform.CoreGraphics.CGSizeMake 7 + import platform.Foundation.NSDocumentDirectory 8 + import platform.Foundation.NSFileManager 9 + import platform.Foundation.NSURL 10 + import platform.Foundation.NSUserDomainMask 11 + import platform.Foundation.writeToURL 12 + import platform.UIKit.UIApplication 13 + import platform.UIKit.UIGraphicsImageRenderer 14 + import platform.UIKit.UIImage 15 + import platform.UIKit.UIImagePNGRepresentation 16 + import platform.UIKit.drawViewHierarchyInRect 17 + 18 + @OptIn(ExperimentalForeignApi::class, BetaInteropApi::class) 19 + actual fun saveAppScreenshot(filename: String): String? { 20 + val window = UIApplication.sharedApplication.keyWindow ?: return null 21 + val bounds = window.bounds 22 + val sizeW: Double 23 + val sizeH: Double 24 + bounds.useContents { 25 + sizeW = size.width 26 + sizeH = size.height 27 + } 28 + val renderer = UIGraphicsImageRenderer(size = CGSizeMake(sizeW, sizeH)) 29 + val image: UIImage = renderer.imageWithActions { _ -> 30 + window.drawViewHierarchyInRect(bounds, afterScreenUpdates = false) 31 + } 32 + val png = UIImagePNGRepresentation(image) ?: return null 33 + val docsDir: NSURL = NSFileManager.defaultManager 34 + .URLsForDirectory(NSDocumentDirectory, NSUserDomainMask) 35 + .firstOrNull() as? NSURL ?: return null 36 + val fileUrl = docsDir.URLByAppendingPathComponent(filename) ?: return null 37 + png.writeToURL(fileUrl, atomically = true) 38 + return fileUrl.path 39 + }
+29 -2
sample/iosApp/iosApp.xcodeproj/project.pbxproj
··· 88 88 A93A953329CC810C00F8E227 /* Sources */, 89 89 A93A953429CC810C00F8E227 /* Frameworks */, 90 90 A93A953529CC810C00F8E227 /* Resources */, 91 + ABCDEF0123456789ABC00003 /* Copy MLKit Resource Bundles */, 91 92 ); 92 93 buildRules = ( 93 94 ); ··· 163 164 runOnlyForDeploymentPostprocessing = 0; 164 165 shellPath = /bin/sh; 165 166 shellScript = "export JAVA_HOME=/Applications/Android\\ Studio.app/Contents/jbr/Contents/Home/\nexport JDK_HOME=/Applications/Android\\ Studio.app/Contents/jbr/Contents/\ncd \"$SRCROOT/..\"\n./gradlew :sample:composeApp:embedAndSignAppleFrameworkForXcode\n"; 167 + }; 168 + ABCDEF0123456789ABC00003 /* Copy MLKit Resource Bundles */ = { 169 + isa = PBXShellScriptBuildPhase; 170 + buildActionMask = 2147483647; 171 + name = "Copy MLKit Resource Bundles"; 172 + files = ( 173 + ); 174 + inputFileListPaths = ( 175 + ); 176 + inputPaths = ( 177 + ); 178 + outputFileListPaths = ( 179 + ); 180 + outputPaths = ( 181 + ); 182 + runOnlyForDeploymentPostprocessing = 0; 183 + shellPath = /bin/sh; 184 + shellScript = "set -e\nMLKIT_PODS=\"$SRCROOT/../../posedetection/build/mlkit-staging/Pods\"\nDEST=\"$TARGET_BUILD_DIR/$PRODUCT_NAME.app\"\ncopy_bundle() {\n # $1 = pod resource dir, $2 = bundle name\n local src=\"$1\"\n local bundle_name=\"$2\"\n if [ ! -d \"$src\" ]; then\n echo \"warning: MLKit resources not at $src\"\n return\n fi\n local dst=\"$DEST/$bundle_name.bundle\"\n rm -rf \"$dst\"\n mkdir -p \"$dst\"\n cp -R \"$src/\" \"$dst/\"\n # Codesign the bundle if signing is enabled\n if [ \"${CODE_SIGNING_REQUIRED:-YES}\" = \"YES\" ] && [ -n \"${EXPANDED_CODE_SIGN_IDENTITY:-}\" ]; then\n /usr/bin/codesign --force --sign \"$EXPANDED_CODE_SIGN_IDENTITY\" --preserve-metadata=identifier,entitlements --timestamp=none \"$dst\" 2>/dev/null || true\n fi\n}\n\ncopy_bundle \"$MLKIT_PODS/MLKitPoseDetectionAccurate/Resources/MLKitPoseDetectionAccurateResources\" \"MLKitPoseDetectionAccurateResources\"\ncopy_bundle \"$MLKIT_PODS/MLKitPoseDetectionCommon/Resources/MLKitPoseDetectionCommonResources\" \"MLKitPoseDetectionCommonResources\"\ncopy_bundle \"$MLKIT_PODS/MLKitXenoCommon/Frameworks/MLKitXenoCommon.framework/MLKitXenoResources.bundle\" \"MLKitXenoResources\"\n"; 166 185 }; 167 186 /* End PBXShellScriptBuildPhase section */ 168 187 ··· 302 321 CODE_SIGN_STYLE = Automatic; 303 322 CURRENT_PROJECT_VERSION = 1; 304 323 DEVELOPMENT_ASSET_PATHS = "\"iosApp/Preview Content\""; 305 - DEVELOPMENT_TEAM = FAGG2XS28P; 324 + DEVELOPMENT_TEAM = R633H24ZDK; 306 325 ENABLE_PREVIEWS = YES; 307 326 GENERATE_INFOPLIST_FILE = YES; 308 327 INFOPLIST_FILE = iosApp/Info.plist; ··· 312 331 "@executable_path/Frameworks", 313 332 ); 314 333 MARKETING_VERSION = 1.0; 334 + OTHER_LDFLAGS = ( 335 + "$(inherited)", 336 + "-ObjC", 337 + ); 315 338 PRODUCT_BUNDLE_IDENTIFIER = com.nate.posedetection.iosApp; 316 339 PRODUCT_NAME = PoseDetection; 317 340 SWIFT_EMIT_LOC_STRINGS = YES; ··· 328 351 CODE_SIGN_STYLE = Automatic; 329 352 CURRENT_PROJECT_VERSION = 1; 330 353 DEVELOPMENT_ASSET_PATHS = "\"iosApp/Preview Content\""; 331 - DEVELOPMENT_TEAM = FAGG2XS28P; 354 + DEVELOPMENT_TEAM = R633H24ZDK; 332 355 ENABLE_PREVIEWS = YES; 333 356 GENERATE_INFOPLIST_FILE = YES; 334 357 INFOPLIST_FILE = iosApp/Info.plist; ··· 338 361 "@executable_path/Frameworks", 339 362 ); 340 363 MARKETING_VERSION = 1.0; 364 + OTHER_LDFLAGS = ( 365 + "$(inherited)", 366 + "-ObjC", 367 + ); 341 368 PRODUCT_BUNDLE_IDENTIFIER = com.nate.posedetection.iosApp; 342 369 PRODUCT_NAME = PoseDetection; 343 370 SWIFT_EMIT_LOC_STRINGS = YES;
sample/iosApp/iosApp/models/yolo11n_better_data.mlpackage/Data/com.apple.CoreML/model.mlmodel

This is a binary file and will not be displayed.

sample/iosApp/iosApp/models/yolo11n_better_data.mlpackage/Data/com.apple.CoreML/weights/weight.bin

This is a binary file and will not be displayed.

+18
sample/iosApp/iosApp/models/yolo11n_better_data.mlpackage/Manifest.json
··· 1 + { 2 + "fileFormatVersion": "1.0.0", 3 + "itemInfoEntries": { 4 + "16e6b489-696b-41c9-9753-07f4481a1efd": { 5 + "author": "com.apple.CoreML", 6 + "description": "CoreML Model Weights", 7 + "name": "weights", 8 + "path": "com.apple.CoreML/weights" 9 + }, 10 + "17c86ae4-7308-46b1-9423-7a4ce47adeba": { 11 + "author": "com.apple.CoreML", 12 + "description": "CoreML Model Specification", 13 + "name": "model.mlmodel", 14 + "path": "com.apple.CoreML/model.mlmodel" 15 + } 16 + }, 17 + "rootModelIdentifier": "17c86ae4-7308-46b1-9423-7a4ce47adeba" 18 + }
sample/iosApp/iosApp/models/yolo26n_v13_rect_384x288.mlpackage/Data/com.apple.CoreML/model.mlmodel

This is a binary file and will not be displayed.

sample/iosApp/iosApp/models/yolo26n_v13_rect_384x288.mlpackage/Data/com.apple.CoreML/weights/weight.bin

This is a binary file and will not be displayed.

+18
sample/iosApp/iosApp/models/yolo26n_v13_rect_384x288.mlpackage/Manifest.json
··· 1 + { 2 + "fileFormatVersion": "1.0.0", 3 + "itemInfoEntries": { 4 + "57b81d8b-f832-43d6-9c66-c7c960921694": { 5 + "author": "com.apple.CoreML", 6 + "description": "CoreML Model Specification", 7 + "name": "model.mlmodel", 8 + "path": "com.apple.CoreML/model.mlmodel" 9 + }, 10 + "67714b2f-a270-45ff-9231-dda13ed32954": { 11 + "author": "com.apple.CoreML", 12 + "description": "CoreML Model Weights", 13 + "name": "weights", 14 + "path": "com.apple.CoreML/weights" 15 + } 16 + }, 17 + "rootModelIdentifier": "57b81d8b-f832-43d6-9c66-c7c960921694" 18 + }
sample/iosApp/iosApp/models/yolo26n_v13_rect_640x480.mlpackage/Data/com.apple.CoreML/model.mlmodel

This is a binary file and will not be displayed.

sample/iosApp/iosApp/models/yolo26n_v13_rect_640x480.mlpackage/Data/com.apple.CoreML/weights/weight.bin

This is a binary file and will not be displayed.

+18
sample/iosApp/iosApp/models/yolo26n_v13_rect_640x480.mlpackage/Manifest.json
··· 1 + { 2 + "fileFormatVersion": "1.0.0", 3 + "itemInfoEntries": { 4 + "2fb85f7e-613f-499c-80fa-3acf12af91d5": { 5 + "author": "com.apple.CoreML", 6 + "description": "CoreML Model Specification", 7 + "name": "model.mlmodel", 8 + "path": "com.apple.CoreML/model.mlmodel" 9 + }, 10 + "a097795c-1871-4760-aef7-888eaaf4cbc4": { 11 + "author": "com.apple.CoreML", 12 + "description": "CoreML Model Weights", 13 + "name": "weights", 14 + "path": "com.apple.CoreML/weights" 15 + } 16 + }, 17 + "rootModelIdentifier": "2fb85f7e-613f-499c-80fa-3acf12af91d5" 18 + }
sample/iosApp/iosApp/models/yolo26n_v13_rect_960x736.mlpackage/Data/com.apple.CoreML/model.mlmodel

This is a binary file and will not be displayed.

sample/iosApp/iosApp/models/yolo26n_v13_rect_960x736.mlpackage/Data/com.apple.CoreML/weights/weight.bin

This is a binary file and will not be displayed.

+18
sample/iosApp/iosApp/models/yolo26n_v13_rect_960x736.mlpackage/Manifest.json
··· 1 + { 2 + "fileFormatVersion": "1.0.0", 3 + "itemInfoEntries": { 4 + "57f1cae0-6698-4051-a0c7-9ac0b9e9a321": { 5 + "author": "com.apple.CoreML", 6 + "description": "CoreML Model Weights", 7 + "name": "weights", 8 + "path": "com.apple.CoreML/weights" 9 + }, 10 + "c8eb1b96-f90f-4be1-9370-1128c3bec33c": { 11 + "author": "com.apple.CoreML", 12 + "description": "CoreML Model Specification", 13 + "name": "model.mlmodel", 14 + "path": "com.apple.CoreML/model.mlmodel" 15 + } 16 + }, 17 + "rootModelIdentifier": "c8eb1b96-f90f-4be1-9370-1128c3bec33c" 18 + }