fix: io object detection in non natural orientations · nateholland.bsky.social/PoseDetection@ccf4dc8

+21 -7

README.md

··· 1 - An realtime pose detection library for [Android](https://www.android.com/) and [Compose Multiplatform](https://www.jetbrains.com/lp/compose-multiplatform/). 2 - Android version uses CameraX and GoogleML Kit, while iOS version uses AVFoundation with VisionKit and CoreML. 1 + An realtime pose detection library for [Android](https://www.android.com/) 2 + and [Compose Multiplatform](https://www.jetbrains.com/lp/compose-multiplatform/). 3 + Android version uses CameraX and GoogleML Kit, while iOS version uses AVFoundation with VisionKit 4 + and CoreML. 3 5 We also support analysing pre-recorded video files. 4 - We now also support adding custom object detection models to the library, allowing you to detect custom objects in your camera feed or video files along side body poses. 6 + We now also support adding custom object detection models to the library, allowing you to detect 7 + custom objects in your camera feed or video files along side body poses. 5 8 6 9 ## Quick Start 7 10 8 11 Import the Compose library 9 12 10 13 ```kotlin 11 - implementation("com.performancecoachlab.posedetection:posedetection-compose:4.7.1") 14 + implementation("com.performancecoachlab.posedetection:posedetection-compose:4.7.2") 12 15 ``` 13 16 14 17 Add camera use to your android manifest ··· 30 33 ## Usage 31 34 32 35 Request camera permissions 36 + 33 37 ```kotlin 34 38 var permissionGranted by remember { mutableStateOf(false) } 35 39 PermissionProvider().apply { ··· 40 44 ``` 41 45 42 46 Create a Skeleton Repisitory 47 + 43 48 ```kotlin 44 49 val skeletonRepository = remember { SkeletonRepository() } 45 50 val customObjectRepository = remember { CustomObjectRespository() } 46 51 ``` 47 52 48 53 Initialise the camera feed 54 + 49 55 ```kotlin 50 56 if (permissionGranted) { 51 57 CameraView( ··· 58 64 ``` 59 65 60 66 Create a Pose to detect 67 + 61 68 ```kotlin 62 69 val upRightPose = Pose( 63 70 leftShoulder = Pose.PoseRange(0.0, 40.0), ··· 70 77 ``` 71 78 72 79 Listen for skeleton updates and detect specific poses 80 + 73 81 ```kotlin 74 82 val skeleton by skeletonRepository.skeletonFlow.collectAsState() 75 83 val poseDetected = skeleton?.let { ··· 79 87 80 88 Analyse pre recorded video files. 81 89 Initialise the video extraction for android with your application context. 90 + 82 91 ```kotlin 83 92 VideoExtractionContext.setUp(applicationContext) 84 93 ``` 94 + 85 95 extract frames from the video and request analysis 96 + 86 97 ```kotlin 87 98 rememberCoroutineScope().launch { 88 99 try { ··· 99 110 100 111 Add a custom object detection model 101 112 Initialse the custom models for ios and android respectively. 102 - For android you need to add a .tflite model file to your assets folder, then set androidModelPath to the name of the model file, including the .tflite extension. 103 - For iOS you need to add a .mlmodel model file to your Xcode project, then set iosModelPath to the name of the model file without the .mlmodel extension. 113 + For android you need to add a .tflite model file to your assets folder, then set androidModelPath to 114 + the name of the model file, including the .tflite extension. 115 + For iOS you need to add a .mlmodel model file to your Xcode project, then set iosModelPath to the 116 + name of the model file without the .mlmodel extension. 117 + 104 118 ```kotlin 105 119 val generalModel = ObjectModelProvider.get( 106 120 ModelPath( ··· 109 123 ) 110 124 ) 111 125 ``` 112 - Once this is done, you can use the model to detect objects in the camera feed or video frames. 113 126 127 + Once this is done, you can use the model to detect objects in the camera feed or video frames. 114 128 115 129 Check out the sample app for full example of how to use the library. 116 130

+1 -3

posedetection/build.gradle.kts

··· 1 - import com.android.build.api.dsl.AaptOptions 2 - import com.android.build.api.dsl.AndroidResources 3 1 import com.vanniktech.maven.publish.SonatypeHost 4 2 import org.jetbrains.compose.ExperimentalComposeLibrary 5 3 import org.jetbrains.kotlin.gradle.plugin.KotlinSourceSetTree 6 4 7 5 mavenPublishing { 8 6 publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL) 9 - coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "4.7.1") 7 + coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "4.7.2") 10 8 11 9 pom { 12 10 name.set("Pose Detection")

+60 -52

posedetection/src/androidMain/kotlin/com.performancecoachlab/posedetection/camera/CameraView.android.kt

··· 1 1 package com.performancecoachlab.posedetection.camera 2 2 3 3 import android.graphics.Bitmap 4 - import android.graphics.Matrix 4 + import android.hardware.camera2.CameraCharacteristics 5 5 import androidx.annotation.OptIn 6 + import androidx.camera.camera2.interop.Camera2CameraInfo 7 + import androidx.camera.camera2.interop.ExperimentalCamera2Interop 8 + import androidx.camera.core.CameraInfo 9 + import androidx.camera.core.CameraSelector 6 10 import androidx.camera.core.CameraSelector.DEFAULT_BACK_CAMERA 7 11 import androidx.camera.core.CameraSelector.DEFAULT_FRONT_CAMERA 8 - import androidx.camera.core.ExperimentalGetImage 12 + import androidx.camera.core.ImageAnalysis 13 + import androidx.camera.core.Preview 14 + import androidx.camera.lifecycle.ProcessCameraProvider 9 15 import androidx.camera.view.PreviewView 10 16 import androidx.compose.foundation.Canvas 11 - import androidx.compose.foundation.Image 12 17 import androidx.compose.foundation.layout.Box 13 18 import androidx.compose.foundation.layout.fillMaxSize 14 - import androidx.compose.foundation.layout.height 15 - import androidx.compose.foundation.layout.padding 16 - import androidx.compose.foundation.layout.width 17 - import androidx.compose.material3.Surface 18 19 import androidx.compose.runtime.Composable 19 20 import androidx.compose.runtime.LaunchedEffect 20 21 import androidx.compose.runtime.getValue 21 22 import androidx.compose.runtime.mutableStateOf 22 23 import androidx.compose.runtime.remember 24 + import androidx.compose.runtime.rememberCoroutineScope 23 25 import androidx.compose.runtime.setValue 24 26 import androidx.compose.ui.Modifier 25 27 import androidx.compose.ui.draw.clipToBounds ··· 31 33 import androidx.compose.ui.graphics.ImageBitmap 32 34 import androidx.compose.ui.graphics.asAndroidBitmap 33 35 import androidx.compose.ui.graphics.asImageBitmap 34 - import androidx.compose.ui.layout.ContentScale 36 + import androidx.compose.ui.graphics.drawscope.Stroke 35 37 import androidx.compose.ui.platform.LocalContext 36 - import androidx.compose.ui.unit.dp 38 + import androidx.compose.ui.text.rememberTextMeasurer 37 39 import androidx.compose.ui.viewinterop.AndroidView 38 40 import androidx.lifecycle.LifecycleOwner 39 41 import androidx.lifecycle.compose.LocalLifecycleOwner 40 - import com.google.android.gms.tasks.Tasks 41 - import com.google.mlkit.vision.common.InputImage 42 + import co.touchlab.kermit.Logger 42 43 import com.google.mlkit.vision.pose.Pose 43 44 import com.google.mlkit.vision.pose.PoseDetection 44 45 import com.google.mlkit.vision.pose.PoseLandmark 45 46 import com.google.mlkit.vision.pose.defaults.PoseDetectorOptions 46 - import com.performancecoachlab.posedetection.skeleton.Skeleton 47 - import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 48 - import java.util.concurrent.Executors 49 - import java.io.File 50 - import androidx.camera.lifecycle.ProcessCameraProvider 51 - import androidx.camera.core.Preview 52 - import androidx.camera.core.ImageAnalysis 53 - import androidx.camera.core.ImageProxy 54 - import androidx.compose.runtime.DisposableEffect 55 - import androidx.compose.runtime.rememberCoroutineScope 56 - import androidx.compose.ui.graphics.drawscope.Stroke 57 - import com.google.mlkit.vision.pose.PoseDetector 58 47 import com.performancecoachlab.posedetection.custom.CustomObjectRespository 59 48 import com.performancecoachlab.posedetection.custom.ObjectModel 60 49 import com.performancecoachlab.posedetection.recording.AnalysisObject 61 - import com.performancecoachlab.posedetection.recording.AnalysisResult 50 + import com.performancecoachlab.posedetection.skeleton.Skeleton 51 + import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 62 52 import kotlinx.coroutines.launch 63 - import org.tensorflow.lite.support.image.TensorImage 64 - import android.hardware.camera2.CameraCharacteristics 65 - import androidx.camera.camera2.interop.Camera2CameraInfo 66 - import androidx.camera.camera2.interop.ExperimentalCamera2Interop 67 - import androidx.camera.core.CameraInfo 68 - import androidx.camera.core.CameraSelector 69 - import androidx.compose.ui.text.rememberTextMeasurer 70 - import co.touchlab.kermit.Logger 53 + import java.io.File 54 + import java.util.concurrent.Executors 71 55 import java.util.concurrent.atomic.AtomicBoolean 72 56 import java.util.concurrent.atomic.AtomicLong 73 57 ··· 103 87 // Defensive: ensure we ONLY ever return a back-facing camera. 104 88 cameraInfos.filter { candidate -> 105 89 candidate == selectedInfo && 106 - Camera2CameraInfo.from(candidate) 107 - .getCameraCharacteristic(CameraCharacteristics.LENS_FACING) == CameraCharacteristics.LENS_FACING_BACK 90 + Camera2CameraInfo.from(candidate) 91 + .getCameraCharacteristic(CameraCharacteristics.LENS_FACING) == CameraCharacteristics.LENS_FACING_BACK 108 92 } 109 93 } 110 94 .build() ··· 179 163 180 164 // Restored helpers (were accidentally removed during refactor) 181 165 suspend fun startRecording(id: String, width: Int, height: Int) { 182 - val outputPath = File(context.cacheDir, "camera_recording_${id}_${System.currentTimeMillis()}.mp4").absolutePath 166 + val outputPath = File( 167 + context.cacheDir, 168 + "camera_recording_${id}_${System.currentTimeMillis()}.mp4" 169 + ).absolutePath 183 170 val slot = RecordingSlot( 184 171 builder = null, // Lazily created on first frame 185 172 firstTimestampMs = null, ··· 210 197 try { 211 198 // Lazily create builder on first frame to lock width/height 212 199 if (slot.builder == null) { 213 - slot.builder = com.performancecoachlab.posedetection.encoding.createVideoBuilder( 214 - outputPath = slot.outputPath, 215 - fps = 30, 216 - width = frame.width, 217 - height = frame.height 218 - ) 200 + slot.builder = 201 + com.performancecoachlab.posedetection.encoding.createVideoBuilder( 202 + outputPath = slot.outputPath, 203 + fps = 30, 204 + width = frame.width, 205 + height = frame.height 206 + ) 219 207 slot.firstTimestampMs = timestampMs 220 208 } 221 209 222 210 slot.builder?.let { builder -> 223 - val relativeTimestamp = timestampMs - (slot.firstTimestampMs ?: timestampMs) 211 + val relativeTimestamp = 212 + timestampMs - (slot.firstTimestampMs ?: timestampMs) 224 213 builder.addFrame(frame, relativeTimestamp) 225 214 } 226 215 } catch (e: Exception) { ··· 247 236 startRecording(id, size.width.toInt(), size.height.toInt()) 248 237 } else { 249 238 // If no frame yet, create empty slot that will be initialized on first frame 250 - val outputPath = File(context.cacheDir, "camera_recording_${id}_${System.currentTimeMillis()}.mp4").absolutePath 239 + val outputPath = File( 240 + context.cacheDir, 241 + "camera_recording_${id}_${System.currentTimeMillis()}.mp4" 242 + ).absolutePath 251 243 val slot = RecordingSlot( 252 244 builder = null, 253 245 firstTimestampMs = null, ··· 296 288 val area = focus 297 289 298 290 val shouldRunObject = currentDetectMode.doObject() && 299 - (now - lastObjectRunAtMs.get() >= objectIntervalMs) 291 + (now - lastObjectRunAtMs.get() >= objectIntervalMs) 300 292 val shouldRunPose = currentDetectMode.doPose() && 301 - (now - lastPoseRunAtMs.get() >= poseIntervalMs) 293 + (now - lastPoseRunAtMs.get() >= poseIntervalMs) 302 294 303 295 // If neither detector is scheduled to run, just close quickly and reuse last results. 304 296 if (!shouldRunObject && !shouldRunPose) { ··· 315 307 val objectClient = if (shouldRunObject) objectDetector?.getDetector() else null 316 308 val rotationDegrees = imageProxy.imageInfo.rotationDegrees 317 309 318 - imageProxy.process(objectClient, poseClient, now, area) { analysisResult, frameBitmap -> 310 + imageProxy.process( 311 + objectClient, 312 + poseClient, 313 + now, 314 + area 315 + ) { analysisResult, frameBitmap -> 319 316 try { 320 317 // Only update repositories/results for detectors that actually ran. 321 318 if (shouldRunObject) { ··· 441 438 color = d.colour, 442 439 topLeft = Offset(left, top), 443 440 size = Size(w, h), 444 - style = if (d.style is androidx.compose.ui.graphics.drawscope.Fill) androidx.compose.ui.graphics.drawscope.Fill else Stroke((d.style as? Stroke)?.width ?: 3f) 441 + style = if (d.style is androidx.compose.ui.graphics.drawscope.Fill) androidx.compose.ui.graphics.drawscope.Fill else Stroke( 442 + (d.style as? Stroke)?.width ?: 3f 443 + ) 445 444 ) 446 445 447 446 DrawableShape.OVAL -> drawOval( 448 447 color = d.colour, 449 448 topLeft = Offset(left, top), 450 449 size = Size(w, h), 451 - style = if (d.style is androidx.compose.ui.graphics.drawscope.Fill) androidx.compose.ui.graphics.drawscope.Fill else Stroke((d.style as? Stroke)?.width ?: 3f) 450 + style = if (d.style is androidx.compose.ui.graphics.drawscope.Fill) androidx.compose.ui.graphics.drawscope.Fill else Stroke( 451 + (d.style as? Stroke)?.width ?: 3f 452 + ) 452 453 ) 453 454 454 455 DrawableShape.LABEL -> { 455 456 drawLabelTextPlatform( 456 - drawableObject = d.copy(obj = d.obj.copy(boundingBox = Rect(offset = Offset(left, top), 457 - size = Size(w, h),))), 457 + drawableObject = d.copy( 458 + obj = d.obj.copy( 459 + boundingBox = Rect( 460 + offset = Offset(left, top), 461 + size = Size(w, h), 462 + ) 463 + ) 464 + ), 458 465 textMeasurer = textMeasurer 459 466 ) 460 467 } ··· 490 497 491 498 // ...existing code... 492 499 } 493 - 494 500 495 501 496 502 private fun PoseLandmark?.toSkeletonCoords(): Skeleton.SkeletonCoordinate? { ··· 535 541 private class FrameGate { 536 542 private val busy = AtomicBoolean(false) 537 543 fun tryEnter(): Boolean = busy.compareAndSet(false, true) 538 - fun exit() { busy.set(false) } 544 + fun exit() { 545 + busy.set(false) 546 + } 539 547 }

+114 -42

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt

··· 8 8 import androidx.camera.core.ExperimentalGetImage 9 9 import androidx.camera.core.ImageProxy 10 10 import androidx.compose.ui.geometry.Rect 11 + import androidx.core.graphics.createBitmap 12 + import co.touchlab.kermit.Logger 11 13 import com.google.android.gms.tasks.Tasks 12 14 import com.google.mlkit.vision.common.InputImage 15 + import com.google.mlkit.vision.pose.Pose 13 16 import com.google.mlkit.vision.pose.PoseDetector 17 + import com.google.mlkit.vision.pose.PoseLandmark 18 + import com.performancecoachlab.posedetection.custom.AndroidDetector 19 + import com.performancecoachlab.posedetection.custom.ModelInfo 14 20 import com.performancecoachlab.posedetection.recording.AnalysisObject 15 21 import com.performancecoachlab.posedetection.recording.AnalysisResult 16 22 import com.performancecoachlab.posedetection.recording.FrameSize ··· 21 27 import org.tensorflow.lite.support.image.ImageProcessor 22 28 import org.tensorflow.lite.support.image.TensorImage 23 29 import org.tensorflow.lite.support.tensorbuffer.TensorBuffer 30 + import java.nio.ByteBuffer 24 31 import kotlin.math.absoluteValue 25 - import co.touchlab.kermit.Logger 26 32 import kotlin.math.max 27 33 import kotlin.math.min 28 - import androidx.core.graphics.createBitmap 29 - import com.performancecoachlab.posedetection.custom.AndroidDetector 30 - import com.performancecoachlab.posedetection.custom.ModelInfo 31 - import java.nio.ByteBuffer 32 - import com.google.mlkit.vision.pose.Pose 33 - import com.google.mlkit.vision.pose.PoseLandmark 34 34 35 35 actual enum class PlatformType { 36 36 ANDROID, IOS; ··· 82 82 val bmp = cached 83 83 return if ( 84 84 bmp != null && 85 - !bmp.isRecycled && 86 - cachedW == width && 87 - cachedH == height && 88 - cachedConfig == config 85 + !bmp.isRecycled && 86 + cachedW == width && 87 + cachedH == height && 88 + cachedConfig == config 89 89 ) { 90 90 // Clear the bitmap; canvas draw will overwrite but masking may not cover entire area. 91 91 bmp.eraseColor(android.graphics.Color.TRANSPARENT) ··· 290 290 channels = dim2 291 291 isElementsFirst = true 292 292 } 293 + 293 294 dim1 == 6 -> { 294 295 channels = dim1 295 296 elements = dim2 296 297 isElementsFirst = false 297 298 } 298 - else -> return onComplete(AnalysisResult(skeleton = null, objects = emptyList()), bitmap) 299 + 300 + else -> return onComplete( 301 + AnalysisResult(skeleton = null, objects = emptyList()), 302 + bitmap 303 + ) 299 304 } 300 305 301 306 fun valueAt(elementIndex: Int, channelIndex: Int): Float { ··· 328 333 val label = objectDetector.modelInfo.label(cls) 329 334 330 335 AnalysisObject( 331 - boundingBox = Rect(left = leftPx, top = topPx, right = rightPx, bottom = bottomPx), 336 + boundingBox = Rect( 337 + left = leftPx, 338 + top = topPx, 339 + right = rightPx, 340 + bottom = bottomPx 341 + ), 332 342 trackingId = 0, 333 - labels = listOf(com.performancecoachlab.posedetection.recording.Label(label, cnf)), 334 - frameSize = FrameSize(width = width.absoluteValue, height = height.absoluteValue) 343 + labels = listOf( 344 + com.performancecoachlab.posedetection.recording.Label( 345 + label, 346 + cnf 347 + ) 348 + ), 349 + frameSize = FrameSize( 350 + width = width.absoluteValue, 351 + height = height.absoluteValue 352 + ) 335 353 ) 336 354 } else null 337 355 } ··· 349 367 scaleX = mlKitScaleX, 350 368 scaleY = mlKitScaleY, 351 369 ) 352 - }.onFailure { t -> Logger.e(t) { "MLKit poseDetector.process failed" } 370 + }.onFailure { t -> 371 + Logger.e(t) { "MLKit poseDetector.process failed" } 353 372 }.getOrNull() 354 373 } else null 355 374 ··· 362 381 ) 363 382 } 364 383 365 - private fun Rect?.toGraphicsRect(width: Int, height: Int):android.graphics.Rect { 384 + private fun Rect?.toGraphicsRect(width: Int, height: Int): android.graphics.Rect { 366 385 return this?.let { 367 - android.graphics.Rect((it.left*width).toInt(), 368 - (it.top*height).toInt(), 369 - (it.right*width).toInt(), 370 - (it.bottom*height).toInt() 386 + android.graphics.Rect( 387 + (it.left * width).toInt(), 388 + (it.top * height).toInt(), 389 + (it.right * width).toInt(), 390 + (it.bottom * height).toInt() 371 391 ) 372 - }?: android.graphics.Rect(0, 0, width, height) 392 + } ?: android.graphics.Rect(0, 0, width, height) 373 393 } 394 + 374 395 private val imageProcessor = ImageProcessor.Builder() 375 396 .add(NormalizeOp(0f, 255f)) 376 397 .add(CastOp(DataType.FLOAT32)) ··· 438 459 439 460 // Black out bottom area 440 461 if (focusRect.bottom < height) { 441 - canvas.drawRect(0f, focusRect.bottom.toFloat(), width.toFloat(), height.toFloat(), paint) 462 + canvas.drawRect( 463 + 0f, 464 + focusRect.bottom.toFloat(), 465 + width.toFloat(), 466 + height.toFloat(), 467 + paint 468 + ) 442 469 } 443 470 444 471 // Black out left area 445 472 if (focusRect.left > 0) { 446 - canvas.drawRect(0f, focusRect.top.toFloat(), focusRect.left.toFloat(), focusRect.bottom.toFloat(), paint) 473 + canvas.drawRect( 474 + 0f, 475 + focusRect.top.toFloat(), 476 + focusRect.left.toFloat(), 477 + focusRect.bottom.toFloat(), 478 + paint 479 + ) 447 480 } 448 481 449 482 // Black out right area 450 483 if (focusRect.right < width) { 451 - canvas.drawRect(focusRect.right.toFloat(), focusRect.top.toFloat(), width.toFloat(), focusRect.bottom.toFloat(), paint) 484 + canvas.drawRect( 485 + focusRect.right.toFloat(), 486 + focusRect.top.toFloat(), 487 + width.toFloat(), 488 + focusRect.bottom.toFloat(), 489 + paint 490 + ) 452 491 } 453 492 454 493 result ··· 521 560 val focusRect = transformedRect.toGraphicsRect(width, height) 522 561 523 562 if (focusRect.top > 0) canvas.drawRect(0f, 0f, width.toFloat(), focusRect.top.toFloat(), paint) 524 - if (focusRect.bottom < height) canvas.drawRect(0f, focusRect.bottom.toFloat(), width.toFloat(), height.toFloat(), paint) 525 - if (focusRect.left > 0) canvas.drawRect(0f, focusRect.top.toFloat(), focusRect.left.toFloat(), focusRect.bottom.toFloat(), paint) 526 - if (focusRect.right < width) canvas.drawRect(focusRect.right.toFloat(), focusRect.top.toFloat(), width.toFloat(), focusRect.bottom.toFloat(), paint) 563 + if (focusRect.bottom < height) canvas.drawRect( 564 + 0f, 565 + focusRect.bottom.toFloat(), 566 + width.toFloat(), 567 + height.toFloat(), 568 + paint 569 + ) 570 + if (focusRect.left > 0) canvas.drawRect( 571 + 0f, 572 + focusRect.top.toFloat(), 573 + focusRect.left.toFloat(), 574 + focusRect.bottom.toFloat(), 575 + paint 576 + ) 577 + if (focusRect.right < width) canvas.drawRect( 578 + focusRect.right.toFloat(), 579 + focusRect.top.toFloat(), 580 + width.toFloat(), 581 + focusRect.bottom.toFloat(), 582 + paint 583 + ) 527 584 } 528 585 529 586 // Keep applyFocusAreaMaskPooled for other call sites, but implement it via the in-place helper. ··· 548 605 val clsName: String 549 606 ) 550 607 551 - fun ModelInfo.label(cls: Int): String{ 608 + fun ModelInfo.label(cls: Int): String { 552 609 this.labels.let { labelsList -> 553 610 if (cls in labelsList.indices) { 554 611 return labelsList[cls] ··· 650 707 ) 651 708 } 652 709 653 - private fun PoseLandmark?.toSkeletonCoordsScaled(scaleX: Float, scaleY: Float): Skeleton.SkeletonCoordinate? { 710 + private fun PoseLandmark?.toSkeletonCoordsScaled( 711 + scaleX: Float, 712 + scaleY: Float 713 + ): Skeleton.SkeletonCoordinate? { 654 714 val pos = this?.position ?: return null 655 715 return Skeleton.SkeletonCoordinate( 656 716 x = pos.x * scaleX, ··· 668 728 ): Skeleton { 669 729 return Skeleton( 670 730 timestamp = timestamp, 671 - leftShoulder = pose?.getPoseLandmark(PoseLandmark.LEFT_SHOULDER)?.toSkeletonCoordsScaled(scaleX, scaleY), 672 - rightShoulder = pose?.getPoseLandmark(PoseLandmark.RIGHT_SHOULDER)?.toSkeletonCoordsScaled(scaleX, scaleY), 673 - leftElbow = pose?.getPoseLandmark(PoseLandmark.LEFT_ELBOW)?.toSkeletonCoordsScaled(scaleX, scaleY), 674 - rightElbow = pose?.getPoseLandmark(PoseLandmark.RIGHT_ELBOW)?.toSkeletonCoordsScaled(scaleX, scaleY), 675 - leftWrist = pose?.getPoseLandmark(PoseLandmark.LEFT_WRIST)?.toSkeletonCoordsScaled(scaleX, scaleY), 676 - rightWrist = pose?.getPoseLandmark(PoseLandmark.RIGHT_WRIST)?.toSkeletonCoordsScaled(scaleX, scaleY), 677 - leftHip = pose?.getPoseLandmark(PoseLandmark.LEFT_HIP)?.toSkeletonCoordsScaled(scaleX, scaleY), 678 - rightHip = pose?.getPoseLandmark(PoseLandmark.RIGHT_HIP)?.toSkeletonCoordsScaled(scaleX, scaleY), 679 - leftKnee = pose?.getPoseLandmark(PoseLandmark.LEFT_KNEE)?.toSkeletonCoordsScaled(scaleX, scaleY), 680 - rightKnee = pose?.getPoseLandmark(PoseLandmark.RIGHT_KNEE)?.toSkeletonCoordsScaled(scaleX, scaleY), 681 - leftAnkle = pose?.getPoseLandmark(PoseLandmark.LEFT_ANKLE)?.toSkeletonCoordsScaled(scaleX, scaleY), 682 - rightAnkle = pose?.getPoseLandmark(PoseLandmark.RIGHT_ANKLE)?.toSkeletonCoordsScaled(scaleX, scaleY), 731 + leftShoulder = pose?.getPoseLandmark(PoseLandmark.LEFT_SHOULDER) 732 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 733 + rightShoulder = pose?.getPoseLandmark(PoseLandmark.RIGHT_SHOULDER) 734 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 735 + leftElbow = pose?.getPoseLandmark(PoseLandmark.LEFT_ELBOW) 736 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 737 + rightElbow = pose?.getPoseLandmark(PoseLandmark.RIGHT_ELBOW) 738 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 739 + leftWrist = pose?.getPoseLandmark(PoseLandmark.LEFT_WRIST) 740 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 741 + rightWrist = pose?.getPoseLandmark(PoseLandmark.RIGHT_WRIST) 742 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 743 + leftHip = pose?.getPoseLandmark(PoseLandmark.LEFT_HIP) 744 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 745 + rightHip = pose?.getPoseLandmark(PoseLandmark.RIGHT_HIP) 746 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 747 + leftKnee = pose?.getPoseLandmark(PoseLandmark.LEFT_KNEE) 748 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 749 + rightKnee = pose?.getPoseLandmark(PoseLandmark.RIGHT_KNEE) 750 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 751 + leftAnkle = pose?.getPoseLandmark(PoseLandmark.LEFT_ANKLE) 752 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 753 + rightAnkle = pose?.getPoseLandmark(PoseLandmark.RIGHT_ANKLE) 754 + ?.toSkeletonCoordsScaled(scaleX, scaleY), 683 755 width = width.toFloat(), 684 756 height = height.toFloat(), 685 757 )

+15 -9

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/custom/CustomObjectModel.android.kt

··· 5 5 import co.touchlab.kermit.Logger 6 6 import org.json.JSONObject 7 7 import org.tensorflow.lite.Interpreter 8 - import org.tensorflow.lite.gpu.CompatibilityList 9 8 import org.tensorflow.lite.gpu.GpuDelegate 10 - import org.tensorflow.lite.gpu.GpuDelegateFactory 11 9 import org.tensorflow.lite.support.common.FileUtil 12 10 import org.tensorflow.lite.support.metadata.MetadataExtractor 13 11 import java.io.ByteArrayInputStream 14 12 import java.nio.MappedByteBuffer 15 13 import java.nio.charset.StandardCharsets 16 14 import java.util.zip.GZIPInputStream 17 - import java.util.zip.ZipInputStream 18 15 import java.util.zip.Inflater 19 16 import java.util.zip.InflaterInputStream 17 + import java.util.zip.ZipInputStream 20 18 21 19 @Composable 22 20 actual fun initialiseObjectModel(modelPath: ModelPath): ObjectModel { ··· 31 29 // Threads often don’t matter with GPU delegate; keep small to reduce contention. 32 30 setNumThreads(2) 33 31 } 34 - Logger.d{ "TFLite GPU delegate available" } 32 + Logger.d { "TFLite GPU delegate available" } 35 33 opts to delegate 36 34 }.onFailure { t -> 37 35 Logger.w(t) { "TFLite GPU delegate not available; falling back to CPU" } ··· 76 74 77 75 // Try every associated file. Pick the first that decodes to JSON with a `names` object. 78 76 for (name in files) { 79 - val rawBytes = runCatching { extractor.getAssociatedFile(name).readBytes() }.getOrNull() ?: continue 77 + val rawBytes = runCatching { extractor.getAssociatedFile(name).readBytes() }.getOrNull() 78 + ?: continue 80 79 81 80 val decoded = rawBytes.decodeUtf8PossiblyCompressed() 82 81 val trimmed = decoded.trimStart() ··· 101 100 take(minOf(size, n)).joinToString(" ") { b -> "%02x".format(b) } 102 101 103 102 // 0) Scan for embedded magic headers / common compressed stream signatures. 104 - val zipOffset = indexOfSubsequence(byteArrayOf('P'.code.toByte(), 'K'.code.toByte(), 0x03, 0x04)) 103 + val zipOffset = 104 + indexOfSubsequence(byteArrayOf('P'.code.toByte(), 'K'.code.toByte(), 0x03, 0x04)) 105 105 val gzipOffset = indexOfSubsequence(byteArrayOf(0x1F.toByte(), 0x8B.toByte())) 106 106 107 107 // Common zlib headers (CMF/FLG). Most common are 0x78 0x9C (default), 0x78 0xDA (best), 0x78 0x01 (no compression). ··· 218 218 // End of archive: two consecutive 512-byte blocks of zero 219 219 if (header.all { it == 0.toByte() }) break 220 220 221 - val name = header.copyOfRange(0, 100).toString(StandardCharsets.US_ASCII).trimEnd { it == '\u0000' } 222 - val sizeOctal = header.copyOfRange(124, 136).toString(StandardCharsets.US_ASCII).trim().trimEnd { it == '\u0000' } 221 + val name = header.copyOfRange(0, 100).toString(StandardCharsets.US_ASCII) 222 + .trimEnd { it == '\u0000' } 223 + val sizeOctal = header.copyOfRange(124, 136).toString(StandardCharsets.US_ASCII).trim() 224 + .trimEnd { it == '\u0000' } 223 225 val typeFlag = header[156] 224 226 225 227 val fileSize = sizeOctal.toLongOrNull(8) ?: 0L ··· 318 320 val labels: List<String>, 319 321 ) { 320 322 companion object { 321 - fun fromShapes(inputShape: IntArray, outputShape: IntArray, labels: List<String>): ModelInfo { 323 + fun fromShapes( 324 + inputShape: IntArray, 325 + outputShape: IntArray, 326 + labels: List<String> 327 + ): ModelInfo { 322 328 // Common TFLite image shapes: 323 329 // NHWC: [1, H, W, C] 324 330 // NCHW: [1, C, H, W] (less common on Android)

+46 -25

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/encoding/VideoBuilder.android.kt

··· 1 1 package com.performancecoachlab.posedetection.encoding 2 2 3 - import androidx.compose.ui.graphics.ImageBitmap 4 - import androidx.compose.ui.graphics.asAndroidBitmap 5 3 import android.graphics.Bitmap 6 4 import android.media.MediaCodec 7 5 import android.media.MediaCodecInfo 8 6 import android.media.MediaFormat 9 7 import android.media.MediaMuxer 10 - import com.performancecoachlab.posedetection.recording.InputFrame 11 - import kotlinx.coroutines.Dispatchers 12 - import kotlinx.coroutines.withContext 13 - import java.io.File 14 - import java.io.FileOutputStream 15 - import java.nio.ByteBuffer 16 - import kotlinx.coroutines.sync.Mutex 17 - import kotlinx.coroutines.sync.withLock 8 + import androidx.compose.ui.graphics.ImageBitmap 9 + import androidx.compose.ui.graphics.asAndroidBitmap 18 10 import kotlinx.coroutines.CoroutineScope 11 + import kotlinx.coroutines.Dispatchers 19 12 import kotlinx.coroutines.SupervisorJob 20 13 import kotlinx.coroutines.launch 14 + import kotlinx.coroutines.sync.Mutex 15 + import kotlinx.coroutines.sync.withLock 16 + import kotlinx.coroutines.withContext 21 17 import java.util.concurrent.ConcurrentLinkedQueue 22 18 23 19 actual fun createVideoBuilder( ··· 35 31 ) : VideoBuilder { 36 32 private var initialised = false 37 33 private val encoder = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_VIDEO_AVC) 38 - private val format = MediaFormat.createVideoFormat(MediaFormat.MIMETYPE_VIDEO_AVC, width, height).apply { 39 - setInteger(MediaFormat.KEY_BIT_RATE, width * height * 4) 40 - setInteger(MediaFormat.KEY_FRAME_RATE, fps) 41 - setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, 1) 42 - // Use a specific format instead of Flexible 43 - setInteger(MediaFormat.KEY_COLOR_FORMAT, MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420SemiPlanar) 44 - } 34 + private val format = 35 + MediaFormat.createVideoFormat(MediaFormat.MIMETYPE_VIDEO_AVC, width, height).apply { 36 + setInteger(MediaFormat.KEY_BIT_RATE, width * height * 4) 37 + setInteger(MediaFormat.KEY_FRAME_RATE, fps) 38 + setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, 1) 39 + // Use a specific format instead of Flexible 40 + setInteger( 41 + MediaFormat.KEY_COLOR_FORMAT, 42 + MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420SemiPlanar 43 + ) 44 + } 45 45 private lateinit var muxer: MediaMuxer 46 46 private var trackIndex = -1 47 47 private var muxerStarted = false ··· 56 56 private var isReleased = false 57 57 58 58 private var endOfStreamSignaled = false 59 - private var finalizeContinuation: (()->Unit)? = null 59 + private var finalizeContinuation: (() -> Unit)? = null 60 60 61 61 private var lastTimestampUs: Long = 0L 62 62 63 63 override suspend fun addFrame(frame: ImageBitmap, timestampms: Long) { 64 64 if (isReleased) return 65 - frameQueue.add(Pair(frame, timestampms*1000L)) 65 + frameQueue.add(Pair(frame, timestampms * 1000L)) 66 66 processQueue() 67 67 } 68 68 ··· 104 104 } 105 105 } 106 106 } 107 + 107 108 private fun convertBitmapToYuv420(bitmap: Bitmap): ByteArray { 108 109 val width = bitmap.width 109 110 val height = bitmap.height ··· 162 163 val avgB = sumB / count 163 164 164 165 // U and V conversion 165 - val u = (-0.169 * avgR - 0.331 * avgG + 0.5 * avgB + 128).toInt().coerceIn(0, 255) 166 - val v = (0.5 * avgR - 0.419 * avgG - 0.081 * avgB + 128).toInt().coerceIn(0, 255) 166 + val u = 167 + (-0.169 * avgR - 0.331 * avgG + 0.5 * avgB + 128).toInt().coerceIn(0, 255) 168 + val v = 169 + (0.5 * avgR - 0.419 * avgG - 0.081 * avgB + 128).toInt().coerceIn(0, 255) 167 170 168 171 // NV12 has interleaved U and V values 169 172 yuv[uvIndex++] = u.toByte() ··· 186 189 } 187 190 val bitmap = frame.asAndroidBitmap() 188 191 val yuvData = convertBitmapToYuv420(bitmap) 189 - val inputBufferIndex = try { encoder.dequeueInputBuffer(timeoutUs) } catch (e: IllegalStateException) { isReleased = true; frameQueue.clear(); return } 192 + val inputBufferIndex = try { 193 + encoder.dequeueInputBuffer(timeoutUs) 194 + } catch (e: IllegalStateException) { 195 + isReleased = true; frameQueue.clear(); return 196 + } 190 197 if (inputBufferIndex >= 0) { 191 - val inputBuffer = try { encoder.getInputBuffer(inputBufferIndex) } catch (e: IllegalStateException) { isReleased = true; frameQueue.clear(); return } 198 + val inputBuffer = try { 199 + encoder.getInputBuffer(inputBufferIndex) 200 + } catch (e: IllegalStateException) { 201 + isReleased = true; frameQueue.clear(); return 202 + } 192 203 inputBuffer?.clear() 193 204 if (inputBuffer != null) { 194 205 val bufferCapacity = inputBuffer.capacity() ··· 219 230 private fun drainEncoderOutput() { 220 231 var outputDone = false 221 232 while (!outputDone) { 222 - val outputBufferIndex = try { encoder.dequeueOutputBuffer(bufferInfo, timeoutUs) } catch (e: IllegalStateException) { isReleased = true; frameQueue.clear(); return } 233 + val outputBufferIndex = try { 234 + encoder.dequeueOutputBuffer(bufferInfo, timeoutUs) 235 + } catch (e: IllegalStateException) { 236 + isReleased = true; frameQueue.clear(); return 237 + } 223 238 when { 224 239 outputBufferIndex == MediaCodec.INFO_TRY_AGAIN_LATER -> { 225 240 outputDone = true 226 241 } 242 + 227 243 outputBufferIndex == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> { 228 244 // Set up muxer when format is ready 229 245 try { ··· 234 250 isReleased = true; frameQueue.clear(); return 235 251 } 236 252 } 253 + 237 254 outputBufferIndex >= 0 -> { 238 - val outputBuffer = try { encoder.getOutputBuffer(outputBufferIndex) } catch (e: IllegalStateException) { isReleased = true; frameQueue.clear(); return } 255 + val outputBuffer = try { 256 + encoder.getOutputBuffer(outputBufferIndex) 257 + } catch (e: IllegalStateException) { 258 + isReleased = true; frameQueue.clear(); return 259 + } 239 260 if (bufferInfo.flags and MediaCodec.BUFFER_FLAG_CODEC_CONFIG != 0) { 240 261 // Config info, not actual frame data 241 262 bufferInfo.size = 0

+12 -7

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.android.kt

··· 6 6 import androidx.compose.ui.geometry.Rect 7 7 import androidx.compose.ui.graphics.ImageBitmap 8 8 import androidx.compose.ui.graphics.asImageBitmap 9 + import androidx.core.graphics.createBitmap 10 + import co.touchlab.kermit.Logger 9 11 import com.google.mlkit.vision.common.InputImage 10 12 import com.google.mlkit.vision.pose.PoseDetection 11 13 import com.google.mlkit.vision.pose.defaults.PoseDetectorOptions 12 14 import com.performancecoachlab.posedetection.camera.applyFocusAreaMaskPooled 13 15 import com.performancecoachlab.posedetection.camera.drawAnalysisResults 14 16 import com.performancecoachlab.posedetection.camera.drawSkeleton 17 + import com.performancecoachlab.posedetection.camera.process 15 18 import com.performancecoachlab.posedetection.custom.ObjectModel 16 19 import com.performancecoachlab.posedetection.skeleton.Skeleton 20 + import kotlinx.coroutines.Dispatchers 21 + import kotlinx.coroutines.launch 17 22 import kotlinx.coroutines.suspendCancellableCoroutine 18 23 import org.tensorflow.lite.DataType 19 24 import org.tensorflow.lite.support.common.ops.CastOp ··· 22 27 import org.tensorflow.lite.support.image.TensorImage 23 28 import kotlin.coroutines.resume 24 29 import kotlin.math.max 25 - import androidx.core.graphics.createBitmap 26 - import co.touchlab.kermit.Logger 27 - import com.performancecoachlab.posedetection.camera.process 28 - import kotlinx.coroutines.Dispatchers 29 - import kotlinx.coroutines.launch 30 30 31 31 actual class InputFrame(val bitmap: Bitmap, actual val timestamp: Long) { 32 32 actual fun toImageBitmap(): ImageBitmap { ··· 88 88 } 89 89 } 90 90 } 91 + 91 92 private fun toPoseBitmap(src: Bitmap): Bitmap { 92 - val argb8888 = if (src.config == Bitmap.Config.ARGB_8888) src else src.copy(Bitmap.Config.ARGB_8888, false) 93 + val argb8888 = if (src.config == Bitmap.Config.ARGB_8888) src else src.copy( 94 + Bitmap.Config.ARGB_8888, 95 + false 96 + ) 93 97 94 98 val minSide = 480 95 99 val w = argb8888.width ··· 130 134 android.graphics.Rect(0, 0, w, h), 131 135 android.graphics.Paint(android.graphics.Paint.FILTER_BITMAP_FLAG) 132 136 ) 133 - TensorImage(DataType.FLOAT32).also { ti -> ti.load(dst) }.let(imageProcessor::process) 137 + TensorImage(DataType.FLOAT32).also { ti -> ti.load(dst) } 138 + .let(imageProcessor::process) 134 139 } 135 140 } 136 141 continuation.context.let { ctx ->

+3 -3

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.android.kt

··· 5 5 import android.media.MediaExtractor 6 6 import android.media.MediaFormat 7 7 import android.media.MediaMetadataRetriever 8 + import androidx.core.net.toUri 9 + import co.touchlab.kermit.Logger 8 10 import kotlinx.coroutines.Dispatchers 9 11 import kotlinx.coroutines.withContext 10 - import androidx.core.net.toUri 11 - import co.touchlab.kermit.Logger 12 12 13 13 actual suspend fun extractFrame( 14 14 videoPath: String, frameTimestamp: Long ··· 19 19 // Attempt to set the data source and handle any exceptions gracefully 20 20 val uri = videoPath.toUri() 21 21 22 - retriever.setDataSource(VideoExtractionContext.get(),uri) 22 + retriever.setDataSource(VideoExtractionContext.get(), uri) 23 23 try { 24 24 val bitmap = retriever.getFrameAtTime( 25 25 frameTimestamp * 1000L, // microseconds

+7 -3

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.kt

··· 13 13 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 14 14 15 15 enum class DetectMode { NONE, POSE, OBJECT, BOTH } 16 + 16 17 fun DetectMode.doPose(): Boolean { 17 - return this == DetectMode.POSE || this == DetectMode.BOTH 18 + return this == DetectMode.POSE || this == DetectMode.BOTH 18 19 } 20 + 19 21 fun DetectMode.doObject(): Boolean { 20 - return this == DetectMode.OBJECT || this == DetectMode.BOTH 22 + return this == DetectMode.OBJECT || this == DetectMode.BOTH 21 23 } 22 24 23 25 enum class PreviewFillMode { 24 26 /** Show the full camera frame inside the view (letterbox/pillarbox as needed). */ 25 27 FIT, 28 + 26 29 /** Fill the view and crop excess (center-crop). */ 27 30 FILL 28 31 } ··· 56 59 ) 57 60 58 61 enum class DrawableShape { 59 - OVAL,RECTANGLE,LABEL 62 + OVAL, RECTANGLE, LABEL 60 63 } 61 64 62 65 data class CameraViewData( ··· 80 83 override fun requestData(onResult: (CameraViewData) -> Unit) { 81 84 dataProvider?.let { onResult(it()) } 82 85 } 86 + 83 87 override fun setRequestDataProvider(provider: (() -> CameraViewData)?) { 84 88 dataProvider = provider 85 89 }

-1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.kt

··· 17 17 import androidx.compose.ui.text.TextMeasurer 18 18 import androidx.compose.ui.unit.Density 19 19 import androidx.compose.ui.unit.LayoutDirection 20 - import co.touchlab.kermit.Logger 21 20 import com.performancecoachlab.posedetection.recording.AnalysisResult 22 21 import com.performancecoachlab.posedetection.skeleton.Skeleton 23 22

+2 -1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/custom/CustomObjectModel.kt

··· 29 29 internal expect fun platformRememberObjectModel(modelPath: ModelPath): ObjectModel 30 30 31 31 object ObjectModelProvider { 32 - @Volatile private var cached: ObjectModel? = null 32 + @Volatile 33 + private var cached: ObjectModel? = null 33 34 34 35 // Kotlin/Native requires a SynchronizedObject as the lock 35 36 @OptIn(InternalCoroutinesApi::class)

-1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/encoding/VideoBuilder.kt

··· 1 1 package com.performancecoachlab.posedetection.encoding 2 2 3 3 import androidx.compose.ui.graphics.ImageBitmap 4 - import com.performancecoachlab.posedetection.recording.InputFrame 5 4 6 5 /** 7 6 * Builder for creating videos from a sequence of ImageBitmap frames

+2 -1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.kt

··· 27 27 28 28 data class FrameSize( 29 29 val width: Int, 30 - val height: Int) 30 + val height: Int 31 + ) 31 32 32 33 data class Label( 33 34 val text: String,

+198 -97

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraEngine.kt

··· 14 14 import com.performancecoachlab.posedetection.custom.CustomObjectRespository 15 15 import com.performancecoachlab.posedetection.custom.ObjectModel 16 16 import com.performancecoachlab.posedetection.recording.AnalysisObject 17 - import com.performancecoachlab.posedetection.recording.FrameSize 18 17 import com.performancecoachlab.posedetection.skeleton.Skeleton 19 18 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 20 19 import kotlinx.cinterop.CValue ··· 36 35 import platform.AVFoundation.AVCaptureDevicePositionBack 37 36 import platform.AVFoundation.AVCaptureDevicePositionFront 38 37 import platform.AVFoundation.AVCaptureDevicePositionUnspecified 39 - import platform.AVFoundation.AVCaptureDeviceType 40 38 import platform.AVFoundation.AVCaptureDeviceTypeBuiltInUltraWideCamera 41 39 import platform.AVFoundation.AVCaptureDeviceTypeBuiltInWideAngleCamera 42 40 import platform.AVFoundation.AVCaptureFileOutput ··· 53 51 import platform.AVFoundation.AVCaptureVideoOrientationPortrait 54 52 import platform.AVFoundation.AVCaptureVideoOrientationPortraitUpsideDown 55 53 import platform.AVFoundation.AVCaptureVideoPreviewLayer 54 + import platform.AVFoundation.AVLayerVideoGravityResizeAspect 56 55 import platform.AVFoundation.AVLayerVideoGravityResizeAspectFill 57 - import platform.AVFoundation.AVLayerVideoGravityResizeAspect 58 56 import platform.AVFoundation.AVMediaTypeVideo 59 57 import platform.AVFoundation.AVVideoCodecJPEG 60 58 import platform.AVFoundation.AVVideoCodecKey ··· 82 80 import platform.Foundation.NSError 83 81 import platform.Foundation.NSURL 84 82 import platform.Foundation.stringByAppendingPathComponent 83 + import platform.UIKit.UIApplication 85 84 import platform.UIKit.UIDevice 86 85 import platform.UIKit.UIDeviceOrientation 87 86 import platform.UIKit.UIImage 87 + import platform.UIKit.UIInterfaceOrientationLandscapeLeft 88 + import platform.UIKit.UIInterfaceOrientationLandscapeRight 89 + import platform.UIKit.UIInterfaceOrientationPortrait 90 + import platform.UIKit.UIInterfaceOrientationPortraitUpsideDown 88 91 import platform.UIKit.UIView 89 92 import platform.UIKit.UIViewController 90 - import platform.darwin.DISPATCH_QUEUE_PRIORITY_HIGH 93 + import platform.UIKit.UIViewControllerTransitionCoordinatorProtocol 91 94 import platform.darwin.NSObject 92 95 import platform.darwin.dispatch_async 93 - import platform.darwin.dispatch_get_global_queue 94 96 import platform.darwin.dispatch_get_main_queue 95 97 import platform.darwin.dispatch_queue_create 96 98 import platform.darwin.dispatch_sync 97 - import platform.darwin.dispatch_time 98 99 import platform.posix.memcpy 99 100 import kotlin.math.abs 100 101 import kotlin.native.runtime.NativeRuntimeApi 101 - import platform.UIKit.UIApplication 102 - import platform.UIKit.UIInterfaceOrientationLandscapeLeft 103 - import platform.UIKit.UIInterfaceOrientationLandscapeRight 104 - import platform.UIKit.UIInterfaceOrientationPortrait 105 - import platform.UIKit.UIInterfaceOrientationPortraitUpsideDown 106 - import platform.UIKit.UIViewControllerTransitionCoordinatorProtocol 107 - import kotlin.math.absoluteValue 108 102 109 103 fun interfaceOrientationToVideoOrientation(): AVCaptureVideoOrientation { 110 104 val orientation = UIApplication.sharedApplication.keyWindow?.windowScene?.interfaceOrientation ··· 151 145 withTransitionCoordinator: UIViewControllerTransitionCoordinatorProtocol 152 146 ) { 153 147 super.viewWillTransitionToSize(size, withTransitionCoordinator) 154 - cameraController.cameraPreviewLayer?.connection?.videoOrientation = interfaceOrientationToVideoOrientation() 148 + val orientation = interfaceOrientationToVideoOrientation() 149 + cameraController.cameraPreviewLayer?.connection?.videoOrientation = orientation 150 + // Also update the video-data output connection (used for Vision/CoreML) 151 + cameraController.updateVideoOutputOrientation(orientation) 155 152 } 156 153 157 154 fun getCameraPreviewLayer() = cameraController.cameraPreviewLayer ··· 203 200 super.viewDidLayoutSubviews() 204 201 cameraController.cameraPreviewLayer?.setFrame(view.bounds) 205 202 cameraController.cameraPreviewLayer?.contentsGravity = AVLayerVideoGravityResizeAspectFill 206 - } 207 - 208 - fun normaliseImageRotation(image: NSData): ImageBitmap { 209 - val uiImage = UIImage(image).toImageBitmap() 210 - return when (UIDevice.currentDevice.orientation) { 211 - UIDeviceOrientation.UIDeviceOrientationPortrait -> uiImage.rotateLeft() 212 - UIDeviceOrientation.UIDeviceOrientationPortraitUpsideDown -> uiImage.rotateRight() 213 - UIDeviceOrientation.UIDeviceOrientationLandscapeLeft -> if (cameraController.isUsingFrontCamera) uiImage.rotate180() else uiImage 214 - else -> if (cameraController.isUsingFrontCamera) uiImage else uiImage.rotate180() 215 - } 216 203 } 217 204 218 205 fun setDetectMode(detectMode: DetectMode) { ··· 331 318 var textMeasurer: androidx.compose.ui.text.TextMeasurer? = null 332 319 333 320 // Serial queue to serialize session configuration and start/stop calls 334 - private val sessionQueue = dispatch_queue_create("com.performancecoachlab.captureSessionQueue", null) 321 + private val sessionQueue = 322 + dispatch_queue_create("com.performancecoachlab.captureSessionQueue", null) 335 323 336 324 // Prevent overlapping switch operations (which can deadlock/jank the UI). 337 325 private var isSwitchingCamera: Boolean = false 338 326 339 327 // Reuse a single queue for frame processing work. 340 - private val frameProcessingQueue = dispatch_queue_create("com.performancecoachlab.frameProcessing", null) 328 + private val frameProcessingQueue = 329 + dispatch_queue_create("com.performancecoachlab.frameProcessing", null) 341 330 342 331 // iOS 14+ introduces a new way to access the ultra-wide camera, which we can use conditionally. 343 332 private var backUltraWideCamera: AVCaptureDevice? = null ··· 521 510 } 522 511 523 512 // Keep existing backCamera/frontCamera fields used elsewhere. 524 - backCamera = if (useUltraWideBack) backUltraWideCamera ?: backWideCamera else backWideCamera ?: backUltraWideCamera 513 + backCamera = if (useUltraWideBack) backUltraWideCamera ?: backWideCamera else backWideCamera 514 + ?: backUltraWideCamera 525 515 526 516 currentCamera = if (isUsingFrontCamera) { 527 517 frontCamera ?: backCamera ?: return false ··· 530 520 } 531 521 532 522 try { 533 - val input = AVCaptureDeviceInput.deviceInputWithDevice(currentCamera!!, null) ?: return false 523 + val input = 524 + AVCaptureDeviceInput.deviceInputWithDevice(currentCamera!!, null) ?: return false 534 525 535 526 if (captureSession?.canAddInput(input) == true) { 536 527 captureSession?.addInput(input) ··· 588 579 589 580 view.layer.addSublayer(newPreviewLayer) 590 581 cameraPreviewLayer = newPreviewLayer 582 + 583 + cameraPreviewLayer?.connection?.let { connection -> 584 + connection.videoOrientation = interfaceOrientationToVideoOrientation() 585 + // Ensure video-data output connection matches the same orientation/mirroring 586 + updateVideoOutputOrientation(interfaceOrientationToVideoOrientation()) 587 + } 591 588 } 592 589 } 593 590 ··· 630 627 } 631 628 632 629 // Recompute which back camera we want. 633 - backCamera = if (useUltraWideBack) backUltraWideCamera ?: backWideCamera else backWideCamera ?: backUltraWideCamera 630 + backCamera = 631 + if (useUltraWideBack) backUltraWideCamera ?: backWideCamera else backWideCamera 632 + ?: backUltraWideCamera 634 633 635 634 currentCamera = if (isUsingFrontCamera) { 636 635 frontCamera ?: backCamera ··· 660 659 connection.setVideoMirrored(isUsingFrontCamera) 661 660 } 662 661 } 662 + updateVideoOutputOrientation(interfaceOrientationToVideoOrientation()) 663 + 664 + cameraPreviewLayer?.apply { 665 + // Use a tiny fade-in to avoid a harsh blink. 666 + opacity = 0f 667 + hidden = false 668 + val anim = 669 + platform.QuartzCore.CABasicAnimation.animationWithKeyPath("opacity") 670 + anim.fromValue = 0.0 671 + anim.toValue = 1.0 672 + anim.duration = 0.12 673 + addAnimation(anim, forKey = "fadeIn") 674 + opacity = 1f 675 + } 663 676 } 664 677 } catch (e: CameraException) { 665 678 onError?.invoke(e) ··· 684 697 // Use a tiny fade-in to avoid a harsh blink. 685 698 opacity = 0f 686 699 hidden = false 687 - val anim = platform.QuartzCore.CABasicAnimation.animationWithKeyPath("opacity") 700 + val anim = 701 + platform.QuartzCore.CABasicAnimation.animationWithKeyPath("opacity") 688 702 anim.fromValue = 0.0 689 703 anim.toValue = 1.0 690 704 anim.duration = 0.12 ··· 735 749 mapSkeletonToPreview( 736 750 skeleton = it, 737 751 previewLayer = preview, 738 - width = 480f, 739 - height = 360f 752 + width = it.width, 753 + height = it.height 740 754 ) 741 755 } 742 756 ··· 744 758 skeletonRepository?.updateSkeleton(it) 745 759 } 746 760 747 - val previewObjects = detectedObjects.map { 748 - it.copy( 761 + val previewObjects = detectedObjects.map { obj -> 762 + obj.copy( 749 763 boundingBox = mapBoxToPreview( 750 - it.boundingBox, 764 + obj.boundingBox, 751 765 preview, 752 - width = 480f, 753 - height = 360f 766 + width = obj.frameSize.width.toFloat(), 767 + height = obj.frameSize.height.toFloat() 754 768 ), 755 - frameSize = it.frameSize.let { 756 - mapBoxToPreview( 757 - Rect(Offset.Zero, Size(480f, 360f)), 758 - preview, 759 - width = 480f, 760 - height = 360f 761 - ).let { r -> 762 - FrameSize(r.width.toInt().absoluteValue, r.height.toInt().absoluteValue) 763 - } 764 - } 769 + // Keep the original analysis frame size; don't try to derive it in preview space. 770 + frameSize = obj.frameSize 765 771 ) 766 772 } 767 773 ··· 807 813 kotlin.native.runtime.GC.collect() 808 814 } 809 815 816 + /** 817 + * Keeps the AVCaptureVideoDataOutput connection (used for Vision/CoreML) aligned with UI orientation. 818 + * This must be updated separately from the preview layer connection. 819 + */ 820 + @OptIn(ExperimentalForeignApi::class) 821 + fun updateVideoOutputOrientation(orientation: AVCaptureVideoOrientation) { 822 + val output = videoOutput ?: return 823 + val connection = 824 + output.connectionWithMediaType(AVMediaTypeVideo) as? AVCaptureConnection ?: return 825 + if (connection.isVideoOrientationSupported()) { 826 + connection.videoOrientation = orientation 827 + } 828 + if (connection.isVideoMirroringSupported()) { 829 + connection.automaticallyAdjustsVideoMirroring = false 830 + connection.videoMirrored = isUsingFrontCamera 831 + } 832 + } 833 + 810 834 fun ImageBitmap.copy(): ImageBitmap { 811 835 val original = this 812 836 val copied = ImageBitmap(width, height, config, hasAlpha, colorSpace) ··· 835 859 } 836 860 } 837 861 862 + // Replace mapBoxToPreview implementation to map oriented analysis pixels -> raw buffer normalized -> preview space. 863 + @OptIn(ExperimentalForeignApi::class) 864 + private fun orientedNormalizedToCaptureDeviceNormalized( 865 + uTopLeft: Double, 866 + vTopLeft: Double, 867 + previewLayer: AVCaptureVideoPreviewLayer, 868 + ): Pair<Double, Double> { 869 + // Clamp first to avoid ever returning out-of-range points. 870 + val u = uTopLeft.coerceIn(0.0, 1.0) 871 + val v = vTopLeft.coerceIn(0.0, 1.0) 872 + 873 + val orientation = 874 + previewLayer.connection?.videoOrientation ?: AVCaptureVideoOrientationLandscapeRight 875 + val mirrored = previewLayer.connection?.videoMirrored ?: false 876 + 877 + // Convert from oriented (top-left origin) to capture-device normalized expected by pointForCaptureDevicePointOfInterest. 878 + // Empirically, in portrait the old mapping appears 90° clockwise, so we apply a 90° counter-clockwise fix: 879 + // (u,v) -> (x=v, y=1-u) 880 + var (x, y) = when (orientation) { 881 + AVCaptureVideoOrientationPortrait -> Pair(v, 1.0 - u) 882 + AVCaptureVideoOrientationPortraitUpsideDown -> Pair(1.0 - v, u) 883 + AVCaptureVideoOrientationLandscapeRight -> Pair(u, v) 884 + AVCaptureVideoOrientationLandscapeLeft -> Pair(1.0 - u, 1.0 - v) 885 + else -> Pair(u, v) 886 + } 887 + 888 + if (mirrored) { 889 + x = 1.0 - x 890 + } 891 + 892 + return Pair(x.coerceIn(0.0, 1.0), y.coerceIn(0.0, 1.0)) 893 + } 894 + 838 895 @OptIn(ExperimentalForeignApi::class) 839 896 fun mapBoxToPreview( 840 897 box: Rect, ··· 843 900 height: Float, 844 901 ): Rect { 845 902 fun mapPoint(point: Offset): Offset { 846 - val normalizedPoint = 847 - CGPointMake(point.x.toDouble() / width, point.y.toDouble() / height) 903 + val u = (point.x.toDouble() / width.toDouble()) 904 + val v = (point.y.toDouble() / height.toDouble()) 905 + val (cx, cy) = orientedNormalizedToCaptureDeviceNormalized(u, v, previewLayer) 906 + 907 + val normalizedPoint = CGPointMake(cx, cy) 848 908 val screenPoint = previewLayer.pointForCaptureDevicePointOfInterest(normalizedPoint) 849 909 return Offset( 850 910 screenPoint.useContents { x.toFloat() }, 851 - screenPoint.useContents { y.toFloat() }) 911 + screenPoint.useContents { y.toFloat() } 912 + ) 852 913 } 853 914 854 - val topLeft = mapPoint(box.topLeft) 855 - val bottomRight = mapPoint(box.bottomRight) 856 - return Rect(topLeft = topLeft, bottomRight = bottomRight).normalize() 915 + // Map all 4 corners to ensure correct rect under rotations. 916 + val p1 = mapPoint(Offset(box.left, box.top)) 917 + val p2 = mapPoint(Offset(box.right, box.top)) 918 + val p3 = mapPoint(Offset(box.right, box.bottom)) 919 + val p4 = mapPoint(Offset(box.left, box.bottom)) 920 + 921 + val left = minOf(p1.x, p2.x, p3.x, p4.x) 922 + val top = minOf(p1.y, p2.y, p3.y, p4.y) 923 + val right = maxOf(p1.x, p2.x, p3.x, p4.x) 924 + val bottom = maxOf(p1.y, p2.y, p3.y, p4.y) 925 + 926 + return Rect(left = left, top = top, right = right, bottom = bottom).normalize() 857 927 } 858 928 929 + // Re-add the FileOutput delegate callback (was accidentally removed during edits) 859 930 override fun captureOutput( 860 931 output: AVCaptureFileOutput, 861 932 didFinishRecordingToOutputFileAtURL: NSURL, ··· 864 935 ) { 865 936 onVideoSaved?.invoke(didFinishRecordingToOutputFileAtURL.path ?: "") 866 937 } 867 - 868 938 } 869 - 870 - @OptIn(ExperimentalForeignApi::class) 871 - fun mapSkeletonToPreview( 872 - skeleton: Skeleton, previewLayer: AVCaptureVideoPreviewLayer, width: Float, height: Float 873 - ): Skeleton { 874 - fun mapPoint(point: Skeleton.SkeletonCoordinate?): Skeleton.SkeletonCoordinate? { 875 - if (point == null) return null 876 - 877 - // Normalize the point 878 - val normalizedPoint = CGPointMake(point.x.toDouble() / width, point.y.toDouble() / height) 879 - val screenPoint = previewLayer.pointForCaptureDevicePointOfInterest(normalizedPoint) 880 - return Skeleton.SkeletonCoordinate( 881 - screenPoint.useContents { x.toFloat() }, 882 - screenPoint.useContents { y.toFloat() }) 883 - } 884 - 885 - val minbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(0.0, 0.0)) 886 - .useContents { Pair(x.toFloat(), y.toFloat()) } 887 - val maxbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(1.0, 1.0)) 888 - .useContents { Pair(x.toFloat(), y.toFloat()) } 889 - val bounds = Pair( 890 - abs(maxbounds.first - minbounds.first), abs(maxbounds.second - minbounds.second) 891 - ) 892 - 893 - return Skeleton( 894 - timestamp = skeleton.timestamp, 895 - leftShoulder = mapPoint(skeleton.leftShoulder), 896 - rightShoulder = mapPoint(skeleton.rightShoulder), 897 - leftElbow = mapPoint(skeleton.leftElbow), 898 - rightElbow = mapPoint(skeleton.rightElbow), 899 - leftWrist = mapPoint(skeleton.leftWrist), 900 - rightWrist = mapPoint(skeleton.rightWrist), 901 - leftHip = mapPoint(skeleton.leftHip), 902 - rightHip = mapPoint(skeleton.rightHip), 903 - leftKnee = mapPoint(skeleton.leftKnee), 904 - rightKnee = mapPoint(skeleton.rightKnee), 905 - leftAnkle = mapPoint(skeleton.leftAnkle), 906 - rightAnkle = mapPoint(skeleton.rightAnkle), 907 - width = bounds.first, 908 - height = bounds.second, 909 - ) 910 - } 911 - 912 939 913 940 @OptIn(ExperimentalForeignApi::class) 914 941 internal fun UIImage.toSkiaImage(): Image? { ··· 1049 1076 val cgImage = CGBitmapContextCreateImage(context) 1050 1077 return cgImage?.let { UIImage.imageWithCGImage(it) } 1051 1078 } 1079 + 1080 + @OptIn(ExperimentalForeignApi::class) 1081 + fun mapSkeletonToPreview( 1082 + skeleton: Skeleton, 1083 + previewLayer: AVCaptureVideoPreviewLayer, 1084 + width: Float, 1085 + height: Float 1086 + ): Skeleton { 1087 + fun orientedNormalizedToCaptureDeviceNormalized( 1088 + uTopLeft: Double, 1089 + vTopLeft: Double 1090 + ): Pair<Double, Double> { 1091 + val u = uTopLeft.coerceIn(0.0, 1.0) 1092 + val v = vTopLeft.coerceIn(0.0, 1.0) 1093 + 1094 + val orientation = 1095 + previewLayer.connection?.videoOrientation ?: AVCaptureVideoOrientationLandscapeRight 1096 + val mirrored = previewLayer.connection?.videoMirrored ?: false 1097 + 1098 + var (x, y) = when (orientation) { 1099 + AVCaptureVideoOrientationPortrait -> Pair(v, 1.0 - u) 1100 + AVCaptureVideoOrientationPortraitUpsideDown -> Pair(1.0 - v, u) 1101 + AVCaptureVideoOrientationLandscapeRight -> Pair(u, v) 1102 + AVCaptureVideoOrientationLandscapeLeft -> Pair(1.0 - u, 1.0 - v) 1103 + else -> Pair(u, v) 1104 + } 1105 + 1106 + if (mirrored) x = 1.0 - x 1107 + return Pair(x.coerceIn(0.0, 1.0), y.coerceIn(0.0, 1.0)) 1108 + } 1109 + 1110 + fun mapPoint(point: Skeleton.SkeletonCoordinate?): Skeleton.SkeletonCoordinate? { 1111 + if (point == null) return null 1112 + 1113 + val u = (point.x.toDouble() / width.toDouble()) 1114 + val v = (point.y.toDouble() / height.toDouble()) 1115 + val (cx, cy) = orientedNormalizedToCaptureDeviceNormalized(u, v) 1116 + 1117 + val normalizedPoint = CGPointMake(cx, cy) 1118 + val screenPoint = previewLayer.pointForCaptureDevicePointOfInterest(normalizedPoint) 1119 + 1120 + return Skeleton.SkeletonCoordinate( 1121 + screenPoint.useContents { x.toFloat() }, 1122 + screenPoint.useContents { y.toFloat() } 1123 + ) 1124 + } 1125 + 1126 + val minbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(0.0, 0.0)) 1127 + .useContents { Pair(x.toFloat(), y.toFloat()) } 1128 + val maxbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(1.0, 1.0)) 1129 + .useContents { Pair(x.toFloat(), y.toFloat()) } 1130 + val bounds = Pair( 1131 + abs(maxbounds.first - minbounds.first), 1132 + abs(maxbounds.second - minbounds.second) 1133 + ) 1134 + 1135 + return Skeleton( 1136 + timestamp = skeleton.timestamp, 1137 + leftShoulder = mapPoint(skeleton.leftShoulder), 1138 + rightShoulder = mapPoint(skeleton.rightShoulder), 1139 + leftElbow = mapPoint(skeleton.leftElbow), 1140 + rightElbow = mapPoint(skeleton.rightElbow), 1141 + leftWrist = mapPoint(skeleton.leftWrist), 1142 + rightWrist = mapPoint(skeleton.rightWrist), 1143 + leftHip = mapPoint(skeleton.leftHip), 1144 + rightHip = mapPoint(skeleton.rightHip), 1145 + leftKnee = mapPoint(skeleton.leftKnee), 1146 + rightKnee = mapPoint(skeleton.rightKnee), 1147 + leftAnkle = mapPoint(skeleton.leftAnkle), 1148 + rightAnkle = mapPoint(skeleton.rightAnkle), 1149 + width = bounds.first, 1150 + height = bounds.second, 1151 + ) 1152 + }

+9 -9

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.ios.kt

··· 26 26 @Composable 27 27 actual fun CameraView( 28 28 skeletonRepository: SkeletonRepository, 29 - customObjectRepository:CustomObjectRespository, 29 + customObjectRepository: CustomObjectRespository, 30 30 detectMode: DetectMode, 31 31 drawSkeleton: Boolean, 32 32 objectModel: ObjectModel?, ··· 40 40 controller: CameraViewController?, 41 41 onRecordToggled: (Boolean) -> Unit, 42 42 onVideoSaved: (String, String) -> Unit, 43 - ) { 43 + ) { 44 44 val cameraEngine = remember { mutableStateOf<CameraEngine?>(null) } 45 45 val frameListener = remember { FrameRepository() } 46 46 val frameBitmap by frameListener.frameFlow.collectAsState() ··· 57 57 cameraEngine.value?.setUseUltraWideSafe(useUltraWide) 58 58 } 59 59 60 - val recordingDone = {path: String -> 60 + val recordingDone = { path: String -> 61 61 val id = idMap.entries.firstOrNull { it.value == path }?.key 62 62 if (id != null) { 63 63 onVideoSaved(id, path) ··· 78 78 setTextMeasurer(textMeasurer) 79 79 80 80 if (recordingId != null) { 81 - (if(lastRecordingState) splitRecording() 82 - else startRecording())?.also{ 81 + (if (lastRecordingState) splitRecording() 82 + else startRecording())?.also { 83 83 idMap = idMap + (recordingId to it) 84 84 lastRecordingState = true 85 85 } 86 86 } else { 87 - if(lastRecordingState){ 87 + if (lastRecordingState) { 88 88 stopRecording() 89 89 } 90 90 lastRecordingState = false 91 91 } 92 92 } 93 93 } 94 - LaunchedEffect(focusArea){ 94 + LaunchedEffect(focusArea) { 95 95 cameraEngine.value?.setFocusArea(focusArea) 96 96 } 97 97 LaunchedEffect(objectModel) { 98 98 cameraEngine.value?.setObjectModel(objectModel) 99 99 } 100 - LaunchedEffect(Unit){ 100 + LaunchedEffect(Unit) { 101 101 delay(1000L) 102 102 cameraEngine.value?.setObjectModel(objectModel) 103 103 cameraEngine.value?.setFocusArea(focusArea) ··· 117 117 engine.setPreviewFillMode(previewFillMode) 118 118 }, 119 119 ) 120 - if (drawSkeleton || drawObjects!= null) { 120 + if (drawSkeleton || drawObjects != null) { 121 121 // IMPORTANT: Do not force GC from composition; it can cause visible flicker/jitter. 122 122 // (GC.collect() is still performed in the capture pipeline.) 123 123 frameBitmap?.also {

+4 -4

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/Conversions.kt

··· 74 74 ) 75 75 ) 76 76 return videoImage 77 - }catch (e: Exception) { 77 + } catch (e: Exception) { 78 78 //println("Error: Failed to create CGImage from CVImageBufferRef: ${e.message}") 79 79 return null 80 80 } ··· 98 98 ) 99 99 ) 100 100 return videoImage 101 - }catch (e: Exception) { 101 + } catch (e: Exception) { 102 102 //println("Error: Failed to create CGImage from CVImageBufferRef: ${e.message}") 103 103 return null 104 104 } ··· 130 130 131 131 // Call a function to process the UIImage 132 132 return image.toImageBitmap() 133 - }catch (e: Exception) { 133 + } catch (e: Exception) { 134 134 //println("Error: Failed to create CGImage from CVImageBufferRef: ${e.message}") 135 135 return null 136 136 137 - }finally { 137 + } finally { 138 138 // Release the CGImage 139 139 CGImageRelease(videoImage) 140 140 }

+246 -117

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/FrameProcessor.kt

··· 68 68 import platform.Vision.VNRecognizedPoint 69 69 import platform.Vision.VNRequest 70 70 import kotlin.math.abs 71 + import kotlin.math.absoluteValue 71 72 import kotlin.math.max 72 73 import kotlin.math.min 73 74 74 - private const val VN_IMAGE_OPTION_CG_IMAGE_PROPERTY_ORIENTATION = "VNImageOptionCGImagePropertyOrientation" 75 + private const val VN_IMAGE_OPTION_CG_IMAGE_PROPERTY_ORIENTATION = 76 + "VNImageOptionCGImagePropertyOrientation" 77 + 78 + // Vision returns points/rects in normalized image coordinates with origin at bottom-left. 79 + // We convert everything into *oriented pixel coordinates* with origin at top-left. 80 + private data class OrientedFrameSize(val width: Float, val height: Float) 81 + 82 + private fun orientedFrameSize( 83 + rawWidth: ULong, 84 + rawHeight: ULong, 85 + exifOrientation: Int 86 + ): OrientedFrameSize { 87 + val w = rawWidth.toFloat() 88 + val h = rawHeight.toFloat() 89 + // Left/Right orientations swap dimensions. 90 + return when (exifOrientation.toUInt()) { 91 + kCGImagePropertyOrientationLeft, 92 + kCGImagePropertyOrientationLeftMirrored, 93 + kCGImagePropertyOrientationRight, 94 + kCGImagePropertyOrientationRightMirrored -> OrientedFrameSize(width = h, height = w) 95 + 96 + else -> OrientedFrameSize(width = w, height = h) 97 + } 98 + } 99 + 100 + @OptIn(ExperimentalForeignApi::class) 101 + private fun CValue<CGPoint>.toOrientedPixelPoint( 102 + rawWidth: ULong, 103 + rawHeight: ULong, 104 + exifOrientation: Int 105 + ): Skeleton.SkeletonCoordinate { 106 + val oriented = orientedFrameSize(rawWidth, rawHeight, exifOrientation) 107 + // Convert from Vision normalized (origin bottom-left) to oriented pixel (origin top-left). 108 + // This assumes the EXIF orientation is already applied by VNImageRequestHandler. 109 + val xNorm: Float 110 + val yNorm: Float 111 + this.useContents { 112 + xNorm = x.toFloat() 113 + yNorm = y.toFloat() 114 + } 115 + 116 + val xPx = (xNorm * oriented.width).coerceIn(0f, oriented.width) 117 + val yPx = ((1f - yNorm) * oriented.height).coerceIn(0f, oriented.height) 118 + return Skeleton.SkeletonCoordinate(xPx, yPx) 119 + } 120 + 121 + @OptIn(ExperimentalForeignApi::class) 122 + private fun CValue<CGRect>.toOrientedPixelRect( 123 + rawWidth: ULong, 124 + rawHeight: ULong, 125 + exifOrientation: Int 126 + ): Rect { 127 + val oriented = orientedFrameSize(rawWidth, rawHeight, exifOrientation) 128 + // Vision normalized bounding boxes use origin bottom-left. 129 + return this.useContents { 130 + val left = (origin.x.toFloat() * oriented.width) 131 + val bottom = (origin.y.toFloat() * oriented.height) 132 + val right = ((origin.x + size.width).toFloat() * oriented.width) 133 + val top = ((origin.y + size.height).toFloat() * oriented.height) 134 + 135 + // Convert bottom-left origin to top-left origin. 136 + val topPx = oriented.height - top 137 + val bottomPx = oriented.height - bottom 138 + 139 + Rect( 140 + left = min(left, right), 141 + top = min(topPx, bottomPx), 142 + right = max(left, right), 143 + bottom = max(topPx, bottomPx) 144 + ).normalize() 145 + } 146 + } 75 147 76 148 fun bodyPoseHandler(request: VNRequest): List<MutableMap<VNHumanBodyPoseObservationJointName, VNRecognizedPoint>>? { 77 149 try { ··· 184 256 this.detectMode = detectMode 185 257 } 186 258 187 - fun setObjectModel(objectModel: ObjectModel?){ 259 + fun setObjectModel(objectModel: ObjectModel?) { 188 260 modelObj = objectModel?.getModel() 189 261 setUpRecognition() 190 262 } ··· 257 329 val right = (origin.x + size.width) * w 258 330 val bottom = (1.0 - (origin.y + size.height)) * h 259 331 Rect( 260 - left = min(left,right).toFloat(), 261 - top = min(top,bottom).toFloat(), 262 - right = max(left,right).toFloat(), 263 - bottom = max(top,bottom).toFloat() 332 + left = min(left, right).toFloat(), 333 + top = min(top, bottom).toFloat(), 334 + right = max(left, right).toFloat(), 335 + bottom = max(top, bottom).toFloat() 264 336 ) 265 337 } 266 338 val labels = observation.labels.mapNotNull { ··· 284 356 onObjectsProcessed(analysisObjects) 285 357 }.apply { 286 358 // Make preprocessing consistent across orientations. 287 - imageCropAndScaleOption = platform.Vision.VNImageCropAndScaleOptionCenterCrop 359 + imageCropAndScaleOption = 360 + platform.Vision.VNImageCropAndScaleOptionCenterCrop 288 361 } 289 362 } 290 363 // For CGImage path we don't have an AVCaptureConnection; assume the CGImage ··· 298 371 onSkeletonProcessed(null) 299 372 } else { 300 373 request?.also { vnRequest -> 301 - val recognizedPoints = bodyPoseHandler(vnRequest)?.firstOrNull{ 374 + val recognizedPoints = bodyPoseHandler(vnRequest)?.firstOrNull { 302 375 isInFocusArea(it, focusArea, width, height) 303 376 } 304 377 regionOfInterest = ··· 372 445 focusArea: Rect?, 373 446 width: ULong, 374 447 height: ULong, 375 - mapPoint: (Skeleton.SkeletonCoordinate) -> Skeleton.SkeletonCoordinate = {it} 448 + mapPoint: (Skeleton.SkeletonCoordinate) -> Skeleton.SkeletonCoordinate = { it } 376 449 ): Boolean { 377 450 if (focusArea == null || recognizedPoints.isNullOrEmpty()) return true 378 451 val focusRect = Rect( ··· 418 491 return 419 492 } 420 493 val retainedBuffer = CFRetain(buffer) 421 - val width = CVPixelBufferGetWidth(buffer).toULong() 422 - val height = CVPixelBufferGetHeight(buffer).toULong() 494 + val rawWidth = CVPixelBufferGetWidth(buffer).toULong() 495 + val rawHeight = CVPixelBufferGetHeight(buffer).toULong() 496 + val exifOrientation = captureConnection.visionExifOrientation() 497 + val orientedSize = orientedFrameSize(rawWidth, rawHeight, exifOrientation) 498 + 423 499 memScoped { 424 500 val errorPtr = alloc<ObjCObjectVar<NSError?>>() 425 501 try { 426 502 val requestForObjects = if (detectMode.doObject()) { 427 - modelObj?.let { 428 - VNCoreMLRequest(it) { request, error -> 503 + modelObj?.let { model -> 504 + VNCoreMLRequest(model) { request, error -> 429 505 if (error != null) { 430 506 onObjectsProcessed(emptyList()) 431 507 return@VNCoreMLRequest ··· 433 509 val results = request?.results as? List<*> ?: emptyList<Any>() 434 510 val recognized = 435 511 results.filterIsInstance<VNRecognizedObjectObservation>() 512 + 436 513 val analysisObjects = recognized.map { observation -> 437 - val confidence = observation.confidence 438 - val boundingBox = observation.boundingBox.useContents { 439 - val w = width.toFloat() 440 - val h = height.toFloat() 441 - val left = origin.x * w 442 - val top = (1.0 - origin.y) * h 443 - val right = (origin.x + size.width) * w 444 - val bottom = (1.0 - (origin.y + size.height)) * h 445 - Rect( 446 - left = min(left,right).toFloat(), 447 - top = min(top,bottom).toFloat(), 448 - right = max(left,right).toFloat(), 449 - bottom = max(top,bottom).toFloat() 450 - ) 451 - } 514 + val boundingBox = observation.boundingBox.toOrientedPixelRect( 515 + rawWidth = rawWidth, 516 + rawHeight = rawHeight, 517 + exifOrientation = exifOrientation 518 + ) 519 + 452 520 val labels = observation.labels.mapNotNull { 453 521 (it as VNClassificationObservation).let { ca -> 454 522 if (ca.confidence > 0.0) Label( ··· 457 525 ) else null 458 526 } 459 527 } 528 + 460 529 AnalysisObject( 461 530 trackingId = stableTrackingId(observation), 462 531 labels = labels, 463 - boundingBox = boundingBox.normalize(), 532 + boundingBox = boundingBox, 464 533 frameSize = FrameSize( 465 - width = abs(width.toInt()), 466 - height = abs(height.toInt()) 534 + width = orientedSize.width.toInt().absoluteValue, 535 + height = orientedSize.height.toInt().absoluteValue 467 536 ) 468 537 ) 469 538 } 539 + 470 540 onObjectsProcessed(analysisObjects) 471 541 }.apply { 472 - imageCropAndScaleOption = platform.Vision.VNImageCropAndScaleOptionCenterCrop 542 + imageCropAndScaleOption = 543 + platform.Vision.VNImageCropAndScaleOptionCenterCrop 473 544 } 474 545 } 475 546 } else null 476 547 477 548 val options: Map<Any?, Any?> = mapOf( 478 - VN_IMAGE_OPTION_CG_IMAGE_PROPERTY_ORIENTATION to captureConnection.visionExifOrientation() 549 + VN_IMAGE_OPTION_CG_IMAGE_PROPERTY_ORIENTATION to exifOrientation 479 550 ) 480 551 481 552 val requestForSkeleton = if (detectMode.doPose()) { ··· 484 555 onSkeletonProcessed(null) 485 556 } else { 486 557 request?.also { vnRequest -> 487 - val recognizedPoints = bodyPoseHandler(vnRequest)?.firstOrNull { pointsMap -> 488 - Skeleton( 489 - timestamp = timestamp, 490 - leftShoulder = pointsMap[VNHumanBodyPoseObservationJointNameLeftShoulder]?.location?.toSkeletonPoint( 491 - width, 492 - height 493 - ), 494 - rightShoulder = pointsMap[VNHumanBodyPoseObservationJointNameRightShoulder]?.location?.toSkeletonPoint( 495 - width, 496 - height 497 - ), 498 - leftElbow = pointsMap[VNHumanBodyPoseObservationJointNameLeftElbow]?.location?.toSkeletonPoint( 499 - width, 500 - height 501 - ), 502 - rightElbow = pointsMap[VNHumanBodyPoseObservationJointNameRightElbow]?.location?.toSkeletonPoint( 503 - width, 504 - height 505 - ), 506 - leftWrist = pointsMap[VNHumanBodyPoseObservationJointNameLeftWrist]?.location?.toSkeletonPoint( 507 - width, 508 - height 509 - ), 510 - rightWrist = pointsMap[VNHumanBodyPoseObservationJointNameRightWrist]?.location?.toSkeletonPoint( 511 - width, 512 - height 513 - ), 514 - leftHip = pointsMap[VNHumanBodyPoseObservationJointNameLeftHip]?.location?.toSkeletonPoint( 515 - width, 516 - height 517 - ), 518 - rightHip = pointsMap[VNHumanBodyPoseObservationJointNameRightHip]?.location?.toSkeletonPoint( 519 - width, 520 - height 521 - ), 522 - leftKnee = pointsMap[VNHumanBodyPoseObservationJointNameLeftKnee]?.location?.toSkeletonPoint( 523 - width, 524 - height 525 - ), 526 - rightKnee = pointsMap[VNHumanBodyPoseObservationJointNameRightKnee]?.location?.toSkeletonPoint( 527 - width, 528 - height 529 - ), 530 - leftAnkle = pointsMap[VNHumanBodyPoseObservationJointNameLeftAnkle]?.location?.toSkeletonPoint( 531 - width, 532 - height 533 - ), 534 - rightAnkle = pointsMap[VNHumanBodyPoseObservationJointNameRightAnkle]?.location?.toSkeletonPoint( 535 - width, 536 - height 537 - ), 538 - height = height.toFloat(), 539 - width = width.toFloat() 540 - ).let { skel -> 541 - preview?.let { layer -> 542 - mapSkeletonToPreview( 543 - skeleton = skel, 544 - previewLayer = layer, 545 - width = 480f, 546 - height = 360f 547 - ) 548 - } 549 - }?.isInFocusArea(focusArea) ?: false 550 - } 558 + val recognizedPoints = 559 + bodyPoseHandler(vnRequest)?.firstOrNull { pointsMap -> 560 + // Build skeleton in oriented pixel space and check focus area there. 561 + val candidate = Skeleton( 562 + timestamp = timestamp, 563 + leftShoulder = pointsMap[VNHumanBodyPoseObservationJointNameLeftShoulder]?.location?.toOrientedPixelPoint( 564 + rawWidth, 565 + rawHeight, 566 + exifOrientation 567 + ), 568 + rightShoulder = pointsMap[VNHumanBodyPoseObservationJointNameRightShoulder]?.location?.toOrientedPixelPoint( 569 + rawWidth, 570 + rawHeight, 571 + exifOrientation 572 + ), 573 + leftElbow = pointsMap[VNHumanBodyPoseObservationJointNameLeftElbow]?.location?.toOrientedPixelPoint( 574 + rawWidth, 575 + rawHeight, 576 + exifOrientation 577 + ), 578 + rightElbow = pointsMap[VNHumanBodyPoseObservationJointNameRightElbow]?.location?.toOrientedPixelPoint( 579 + rawWidth, 580 + rawHeight, 581 + exifOrientation 582 + ), 583 + leftWrist = pointsMap[VNHumanBodyPoseObservationJointNameLeftWrist]?.location?.toOrientedPixelPoint( 584 + rawWidth, 585 + rawHeight, 586 + exifOrientation 587 + ), 588 + rightWrist = pointsMap[VNHumanBodyPoseObservationJointNameRightWrist]?.location?.toOrientedPixelPoint( 589 + rawWidth, 590 + rawHeight, 591 + exifOrientation 592 + ), 593 + leftHip = pointsMap[VNHumanBodyPoseObservationJointNameLeftHip]?.location?.toOrientedPixelPoint( 594 + rawWidth, 595 + rawHeight, 596 + exifOrientation 597 + ), 598 + rightHip = pointsMap[VNHumanBodyPoseObservationJointNameRightHip]?.location?.toOrientedPixelPoint( 599 + rawWidth, 600 + rawHeight, 601 + exifOrientation 602 + ), 603 + leftKnee = pointsMap[VNHumanBodyPoseObservationJointNameLeftKnee]?.location?.toOrientedPixelPoint( 604 + rawWidth, 605 + rawHeight, 606 + exifOrientation 607 + ), 608 + rightKnee = pointsMap[VNHumanBodyPoseObservationJointNameRightKnee]?.location?.toOrientedPixelPoint( 609 + rawWidth, 610 + rawHeight, 611 + exifOrientation 612 + ), 613 + leftAnkle = pointsMap[VNHumanBodyPoseObservationJointNameLeftAnkle]?.location?.toOrientedPixelPoint( 614 + rawWidth, 615 + rawHeight, 616 + exifOrientation 617 + ), 618 + rightAnkle = pointsMap[VNHumanBodyPoseObservationJointNameRightAnkle]?.location?.toOrientedPixelPoint( 619 + rawWidth, 620 + rawHeight, 621 + exifOrientation 622 + ), 623 + height = orientedSize.height, 624 + width = orientedSize.width 625 + ) 626 + 627 + candidate.isInFocusArea(focusArea) 628 + } 551 629 552 630 regionOfInterest = calculateRegionOfInterest(recognizedPoints) 553 631 ··· 555 633 timestamp = timestamp, 556 634 leftShoulder = recognizedPoints?.get( 557 635 VNHumanBodyPoseObservationJointNameLeftShoulder 558 - )?.location?.toSkeletonPoint(width, height), 636 + )?.location?.toOrientedPixelPoint( 637 + rawWidth, 638 + rawHeight, 639 + exifOrientation 640 + ), 559 641 rightShoulder = recognizedPoints?.get( 560 642 VNHumanBodyPoseObservationJointNameRightShoulder 561 - )?.location?.toSkeletonPoint(width, height), 643 + )?.location?.toOrientedPixelPoint( 644 + rawWidth, 645 + rawHeight, 646 + exifOrientation 647 + ), 562 648 leftElbow = recognizedPoints?.get( 563 649 VNHumanBodyPoseObservationJointNameLeftElbow 564 - )?.location?.toSkeletonPoint(width, height), 650 + )?.location?.toOrientedPixelPoint( 651 + rawWidth, 652 + rawHeight, 653 + exifOrientation 654 + ), 565 655 rightElbow = recognizedPoints?.get( 566 656 VNHumanBodyPoseObservationJointNameRightElbow 567 - )?.location?.toSkeletonPoint(width, height), 657 + )?.location?.toOrientedPixelPoint( 658 + rawWidth, 659 + rawHeight, 660 + exifOrientation 661 + ), 568 662 leftWrist = recognizedPoints?.get( 569 663 VNHumanBodyPoseObservationJointNameLeftWrist 570 - )?.location?.toSkeletonPoint(width, height), 664 + )?.location?.toOrientedPixelPoint( 665 + rawWidth, 666 + rawHeight, 667 + exifOrientation 668 + ), 571 669 rightWrist = recognizedPoints?.get( 572 670 VNHumanBodyPoseObservationJointNameRightWrist 573 - )?.location?.toSkeletonPoint(width, height), 671 + )?.location?.toOrientedPixelPoint( 672 + rawWidth, 673 + rawHeight, 674 + exifOrientation 675 + ), 574 676 leftHip = recognizedPoints?.get( 575 677 VNHumanBodyPoseObservationJointNameLeftHip 576 - )?.location?.toSkeletonPoint(width, height), 678 + )?.location?.toOrientedPixelPoint( 679 + rawWidth, 680 + rawHeight, 681 + exifOrientation 682 + ), 577 683 rightHip = recognizedPoints?.get( 578 684 VNHumanBodyPoseObservationJointNameRightHip 579 - )?.location?.toSkeletonPoint(width, height), 685 + )?.location?.toOrientedPixelPoint( 686 + rawWidth, 687 + rawHeight, 688 + exifOrientation 689 + ), 580 690 leftKnee = recognizedPoints?.get( 581 691 VNHumanBodyPoseObservationJointNameLeftKnee 582 - )?.location?.toSkeletonPoint(width, height), 692 + )?.location?.toOrientedPixelPoint( 693 + rawWidth, 694 + rawHeight, 695 + exifOrientation 696 + ), 583 697 rightKnee = recognizedPoints?.get( 584 698 VNHumanBodyPoseObservationJointNameRightKnee 585 - )?.location?.toSkeletonPoint(width, height), 699 + )?.location?.toOrientedPixelPoint( 700 + rawWidth, 701 + rawHeight, 702 + exifOrientation 703 + ), 586 704 leftAnkle = recognizedPoints?.get( 587 705 VNHumanBodyPoseObservationJointNameLeftAnkle 588 - )?.location?.toSkeletonPoint(width, height), 706 + )?.location?.toOrientedPixelPoint( 707 + rawWidth, 708 + rawHeight, 709 + exifOrientation 710 + ), 589 711 rightAnkle = recognizedPoints?.get( 590 712 VNHumanBodyPoseObservationJointNameRightAnkle 591 - )?.location?.toSkeletonPoint(width, height), 592 - height = height.toFloat(), 593 - width = width.toFloat() 713 + )?.location?.toOrientedPixelPoint( 714 + rawWidth, 715 + rawHeight, 716 + exifOrientation 717 + ), 718 + height = orientedSize.height, 719 + width = orientedSize.width 594 720 ) 595 721 596 722 onSkeletonProcessed(skelBuffer.smooth(updatedSkeleton)) 597 723 } 598 724 } 599 725 } 600 - }else null 726 + } else null 727 + 601 728 requestForSkeleton?.regionOfInterest = regionOfInterest 602 729 val handler = VNImageRequestHandler(buffer, options) 603 730 handler.performRequests( 604 - listOfNotNull(requestForObjects, requestForSkeleton), errorPtr.ptr 731 + listOfNotNull(requestForObjects, requestForSkeleton), 732 + errorPtr.ptr 605 733 ) 734 + 606 735 CFRelease(retainedBuffer) 736 + 607 737 if (errorPtr.value != null) { 608 - //println("Error performing object detection request: ${errorPtr.value}") 609 738 onObjectsProcessed(emptyList()) 610 739 onSkeletonProcessed(null) 611 740 } 612 - } catch (e: Throwable) { 613 - //println("Unable to perform the object detection request: ${e.message}") 741 + } catch (_: Throwable) { 614 742 onObjectsProcessed(emptyList()) 615 743 onSkeletonProcessed(null) 616 744 } ··· 680 808 } 681 809 } 682 810 683 - private fun stableTrackingId(observation: VNRecognizedObjectObservation): Int = observation.hashCode() 811 + private fun stableTrackingId(observation: VNRecognizedObjectObservation): Int = 812 + observation.hashCode() 684 813 685 814 data class DetectedObject @OptIn(ExperimentalForeignApi::class) constructor( 686 815 val id: String = NSUUID().UUIDString(),

+1

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/FrameRepository.kt

··· 8 8 class FrameRepository { 9 9 @OptIn(ExperimentalForeignApi::class) 10 10 private val _frameFlow = MutableStateFlow<ImageBitmap?>(null) 11 + 11 12 @OptIn(ExperimentalForeignApi::class) 12 13 val frameFlow: StateFlow<ImageBitmap?> get() = _frameFlow 13 14

+2 -4

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/LabelTextIos.kt

··· 1 1 package com.performancecoachlab.posedetection.camera 2 2 3 3 import androidx.compose.ui.geometry.Offset 4 - import androidx.compose.ui.geometry.Size 5 - import androidx.compose.ui.graphics.Color 6 4 import androidx.compose.ui.graphics.drawscope.DrawScope 7 - import androidx.compose.ui.graphics.drawscope.clipRect 8 5 import androidx.compose.ui.graphics.drawscope.withTransform 9 6 import androidx.compose.ui.text.TextLayoutResult 10 7 import androidx.compose.ui.text.TextMeasurer ··· 15 12 drawableObject: DrawableObject, 16 13 textMeasurer: TextMeasurer? 17 14 ) { 18 - val label = drawableObject.obj.labels.firstOrNull()?.text.let { if (it.isNullOrBlank()) "Object" else it } 15 + val label = 16 + drawableObject.obj.labels.firstOrNull()?.text.let { if (it.isNullOrBlank()) "Object" else it } 19 17 if (label.isEmpty() || textMeasurer == null) return 20 18 21 19 val box = drawableObject.obj.boundingBox

+8 -7

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/custom/CustomObjectModel.ios.kt

··· 1 1 package com.performancecoachlab.posedetection.custom 2 2 3 3 import androidx.compose.runtime.Composable 4 - import androidx.compose.runtime.remember 5 4 import kotlinx.cinterop.ExperimentalForeignApi 6 5 import platform.CoreML.MLModel 7 6 import platform.Foundation.NSBundle ··· 16 15 17 16 @OptIn(ExperimentalForeignApi::class) 18 17 fun createObjectDetector(model: String?): VNCoreMLModel? { 19 - if(model == null) { 18 + if (model == null) { 20 19 return null 21 20 } 22 21 //println("Model input: $model") ··· 24 23 //println("Model path: $path") 25 24 val url = path?.let { NSURL.fileURLWithPath(it) } 26 25 //println("Model URL: $url") 27 - val modelCont = url?.let {MLModel.modelWithContentsOfURL(it, null)} 26 + val modelCont = url?.let { MLModel.modelWithContentsOfURL(it, null) } 28 27 //println("Model content: $modelCont") 29 - val modelObj = modelCont?.let {VNCoreMLModel.modelForMLModel(it,null)} 28 + val modelObj = modelCont?.let { VNCoreMLModel.modelForMLModel(it, null) } 30 29 //println("Model: $modelObj") 31 30 return modelObj 32 31 } 33 32 34 - actual class ObjectModel{ 33 + actual class ObjectModel { 35 34 private var model: VNCoreMLModel? = null 36 - constructor(model: VNCoreMLModel?){ 35 + 36 + constructor(model: VNCoreMLModel?) { 37 37 this.model = model 38 38 } 39 + 39 40 fun getModel(): VNCoreMLModel? { 40 41 return model 41 42 } ··· 44 45 @Composable 45 46 internal actual fun platformRememberObjectModel(modelPath: ModelPath): ObjectModel { 46 47 // iOS: create once for this composition; avoids global caching issues 47 - return initialiseObjectModel(modelPath) 48 + return initialiseObjectModel(modelPath) 48 49 }

+17 -23

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/encoding/VideoBuilder.ios.kt

··· 3 3 import androidx.compose.ui.graphics.ImageBitmap 4 4 import androidx.compose.ui.graphics.PixelMap 5 5 import androidx.compose.ui.graphics.toPixelMap 6 - import com.performancecoachlab.posedetection.recording.InputFrame 7 6 import kotlinx.cinterop.ByteVar 8 - import kotlinx.cinterop.CValue 9 7 import kotlinx.cinterop.ExperimentalForeignApi 10 8 import kotlinx.cinterop.addressOf 11 9 import kotlinx.cinterop.alloc ··· 14 12 import kotlinx.cinterop.plus 15 13 import kotlinx.cinterop.ptr 16 14 import kotlinx.cinterop.reinterpret 17 - import kotlinx.cinterop.useContents 18 15 import kotlinx.cinterop.usePinned 19 16 import kotlinx.cinterop.value 20 17 import kotlinx.coroutines.Dispatchers ··· 38 35 import platform.CoreGraphics.CGColorSpaceCreateDeviceRGB 39 36 import platform.CoreGraphics.CGColorSpaceRelease 40 37 import platform.CoreGraphics.CGContextClearRect 41 - import platform.CoreGraphics.CGContextDrawImage 42 38 import platform.CoreGraphics.CGContextRelease 43 - import platform.CoreGraphics.CGContextScaleCTM 44 - import platform.CoreGraphics.CGContextTranslateCTM 45 39 import platform.CoreGraphics.CGImageAlphaInfo 46 - import platform.CoreGraphics.CGImageRef 47 40 import platform.CoreGraphics.CGRectMake 48 - import platform.CoreGraphics.CGSize 49 - import platform.CoreGraphics.CGSizeMake 50 - import platform.CoreGraphics.kCGBitmapByteOrder32Little 51 - import platform.CoreMedia.CMTimeAdd 52 41 import platform.CoreMedia.CMTimeMake 53 42 import platform.CoreVideo.CVPixelBufferGetBaseAddress 54 43 import platform.CoreVideo.CVPixelBufferGetBytesPerRow 55 44 import platform.CoreVideo.CVPixelBufferLockBaseAddress 56 45 import platform.CoreVideo.CVPixelBufferPoolCreatePixelBuffer 57 - import platform.CoreVideo.CVPixelBufferPoolRef 58 - import platform.CoreVideo.CVPixelBufferRef 59 46 import platform.CoreVideo.CVPixelBufferRefVar 60 47 import platform.CoreVideo.CVPixelBufferRelease 61 48 import platform.CoreVideo.CVPixelBufferUnlockBaseAddress ··· 67 54 import platform.CoreVideo.kCVPixelFormatType_32BGRA 68 55 import platform.CoreVideo.kCVReturnSuccess 69 56 import platform.Foundation.CFBridgingRelease 70 - import platform.Foundation.NSDocumentDirectory 71 57 import platform.Foundation.NSFileManager 72 58 import platform.Foundation.NSURL 73 - import platform.Foundation.NSUserDomainMask 74 - import platform.darwin.dispatch_queue_create 75 - import kotlin.native.runtime.GC 76 59 import kotlin.native.runtime.NativeRuntimeApi 77 60 78 61 private class IOSVideoBuilder( ··· 131 114 started = true 132 115 frameCount = 0L 133 116 } 117 + 134 118 @OptIn(ExperimentalForeignApi::class, NativeRuntimeApi::class) 135 119 override suspend fun addFrame(frame: ImageBitmap, timestampms: Long) { 136 120 if (!started) initWriter() ··· 138 122 if (frame.width == 0 || frame.height == 0) return 139 123 val videoWriterInput = input!! 140 124 val pixelBufferAdaptor = adaptor!! 141 - val pool = pixelBufferAdaptor.pixelBufferPool ?: throw IllegalStateException("Pixel buffer pool is null") 125 + val pool = pixelBufferAdaptor.pixelBufferPool 126 + ?: throw IllegalStateException("Pixel buffer pool is null") 142 127 val presentationTime = CMTimeMake(timestampms, 1_000) 143 128 memScoped { 144 129 val pixelBufferPtr = alloc<CVPixelBufferRefVar>() 145 - val status = CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool, pixelBufferPtr.ptr) 130 + val status = 131 + CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool, pixelBufferPtr.ptr) 146 132 if (status == kCVReturnSuccess) { 147 133 val pixelBuffer = pixelBufferPtr.value 148 134 CVPixelBufferLockBaseAddress(pixelBuffer, 0u) ··· 160 146 bitmapInfo 161 147 ) 162 148 if (context != null) { 163 - CGContextClearRect(context, CGRectMake(0.0, 0.0, width.toDouble(), height.toDouble())) 149 + CGContextClearRect( 150 + context, 151 + CGRectMake(0.0, 0.0, width.toDouble(), height.toDouble()) 152 + ) 164 153 val pixelMap = frame.toPixelMap() 165 154 val buffer = ensureFourChannelBuffer(pixelMap) 166 155 val contextData = baseAddress?.reinterpret<ByteVar>() ··· 185 174 CGColorSpaceRelease(colorSpace) 186 175 CVPixelBufferUnlockBaseAddress(pixelBuffer, 0u) 187 176 // Wait for input to be ready 188 - while (!videoWriterInput.readyForMoreMediaData) {} 189 - pixelBufferAdaptor.appendPixelBuffer(pixelBuffer, withPresentationTime = presentationTime) 177 + while (!videoWriterInput.readyForMoreMediaData) { 178 + } 179 + pixelBufferAdaptor.appendPixelBuffer( 180 + pixelBuffer, 181 + withPresentationTime = presentationTime 182 + ) 190 183 CVPixelBufferRelease(pixelBuffer) 191 184 } else { 192 185 //println("Failed to allocate pixel buffer: $status") ··· 203 196 //println("[VideoBuilder] Finished writing video!") 204 197 } 205 198 // Wait for writing to finish 206 - while (writer?.status == AVAssetWriterStatusWriting) {} 199 + while (writer?.status == AVAssetWriterStatusWriting) { 200 + } 207 201 cleanup() 208 202 } 209 203 outputPath

+2 -8

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.ios.kt

··· 15 15 import platform.CoreGraphics.CGImageGetWidth 16 16 import platform.CoreGraphics.CGImageRef 17 17 import platform.CoreGraphics.CGRectMake 18 - import platform.CoreML.MLModel 19 - import platform.Foundation.NSBundle 20 - import platform.Foundation.NSURL 21 18 import platform.UIKit.UIImage 22 - import platform.Vision.VNCoreMLModel 23 19 import kotlin.coroutines.resume 24 20 import kotlin.native.runtime.NativeRuntimeApi 25 - import kotlin.text.toDouble 26 - import kotlin.text.toFloat 27 21 28 22 actual class InputFrame @OptIn(ExperimentalForeignApi::class) constructor( 29 23 val cgImage: CGImageRef, actual val timestamp: Long ··· 60 54 61 55 val cgRect = CGRectMake(cropX, cropY, cropW, cropH) 62 56 kotlin.native.runtime.GC.collect() 63 - return InputFrame(CGImageCreateWithImageInRect(cgImage, cgRect)?:cgImage, timestamp) 57 + return InputFrame(CGImageCreateWithImageInRect(cgImage, cgRect) ?: cgImage, timestamp) 64 58 } 65 59 } 66 60 ··· 69 63 private val frameProcessor = FrameProcessor(modelObj) 70 64 71 65 @OptIn(ExperimentalForeignApi::class) 72 - actual suspend fun analyseFrame(inputFrame: InputFrame,focusArea: Rect?): AnalysisResult { 66 + actual suspend fun analyseFrame(inputFrame: InputFrame, focusArea: Rect?): AnalysisResult { 73 67 frameProcessor.setFocusArea(focusArea) 74 68 val img = inputFrame.cgImage 75 69 return suspendCancellableCoroutine { continuation ->

+6 -6

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.ios.kt

··· 1 1 package com.performancecoachlab.posedetection.recording 2 2 3 + import co.touchlab.kermit.Logger 3 4 import kotlinx.cinterop.BetaInteropApi 4 5 import kotlinx.cinterop.ExperimentalForeignApi 5 6 import kotlinx.cinterop.ObjCObjectVar ··· 19 20 import platform.AVFoundation.CMTimeValue 20 21 import platform.AVFoundation.tracksWithMediaType 21 22 import platform.AVFoundation.valueWithCMTime 23 + import platform.CoreFoundation.CFRelease 24 + import platform.CoreMedia.CMSampleBufferGetPresentationTimeStamp 25 + import platform.CoreMedia.CMTimeGetSeconds 22 26 import platform.CoreMedia.CMTimeMake 23 27 import platform.CoreMedia.CMTimeMakeWithSeconds 24 - import platform.CoreMedia.CMTimeGetSeconds 25 - import platform.CoreMedia.CMSampleBufferGetPresentationTimeStamp 26 - import platform.CoreFoundation.CFRelease 27 28 import platform.Foundation.NSError 29 + import platform.Foundation.NSFileManager 28 30 import platform.Foundation.NSURL 29 31 import platform.Foundation.NSValue 30 - import platform.Foundation.NSFileManager 31 - import co.touchlab.kermit.Logger 32 32 33 33 @OptIn(ExperimentalForeignApi::class) 34 34 actual suspend fun extractFrame( ··· 41 41 generator.appliesPreferredTrackTransform = true 42 42 generator.requestedTimeToleranceBefore = CMTimeMake(0, 600) 43 43 generator.requestedTimeToleranceAfter = CMTimeMake(0, 600) 44 - val t = CMTimeMakeWithSeconds(frameTimestamp.toDouble()/1000.0, preferredTimescale = 600) 44 + val t = CMTimeMakeWithSeconds(frameTimestamp.toDouble() / 1000.0, preferredTimescale = 600) 45 45 val time = NSValue.valueWithCMTime(t) 46 46 try { 47 47 val cgImage =

+12 -4

sample/composeApp/src/androidMain/res/xml/file_paths.xml

··· 1 1 <?xml version="1.0" encoding="utf-8"?> 2 2 <paths> 3 - <files-path name="files" path="/" /> 4 - <cache-path name="cache" path="/" /> 5 - <external-files-path name="external_files" path="/" /> 6 - <external-cache-path name="external_cache" path="/" /> 3 + <files-path 4 + name="files" 5 + path="/" /> 6 + <cache-path 7 + name="cache" 8 + path="/" /> 9 + <external-files-path 10 + name="external_files" 11 + path="/" /> 12 + <external-cache-path 13 + name="external_cache" 14 + path="/" /> 7 15 </paths>

+64 -28

sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt

··· 60 60 import com.performancecoachlab.posedetection.camera.PreviewFillMode 61 61 import com.performancecoachlab.posedetection.custom.CustomObjectRespository 62 62 import com.performancecoachlab.posedetection.custom.ModelPath 63 - import com.performancecoachlab.posedetection.custom.ObjectModelProvider 64 - import com.performancecoachlab.posedetection.custom.initialiseObjectModel 65 63 import com.performancecoachlab.posedetection.custom.rememberObjectModel 66 64 import com.performancecoachlab.posedetection.encoding.VideoBuilder 67 65 import com.performancecoachlab.posedetection.encoding.createVideoBuilder ··· 340 338 } 341 339 342 340 if (permissionGranted) { 343 - DetectOrientation{ orientation -> 341 + DetectOrientation { orientation -> 344 342 Box(modifier = Modifier.fillMaxSize()) { 345 343 Column(modifier = Modifier.fillMaxSize()) { 346 344 androidx.compose.animation.AnimatedVisibility(path.isNotBlank()) { ··· 369 367 ) 370 368 ) 371 369 } 372 - key(orientation){ 370 + key(orientation) { 373 371 CameraView( 374 372 skeletonRepository = skeletonRepository, 375 373 customObjectRepository = customObjectRespository, ··· 394 392 }, 395 393 objectModel = generalModel, 396 394 modifier = Modifier.weight(1f), 397 - focusArea = Rect(0f,0f,1f,1f), 395 + focusArea = Rect(0f, 0f, 1f, 1f), 398 396 frontCamera = frontCamera, 399 397 useUltraWide = ultrawide, 400 398 previewFillMode = previewFillMode, ··· 421 419 // Zoom toggle 422 420 DropdownMenuItem( 423 421 text = { 424 - Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 425 - Text(text = if(ultrawide) "0.5x zoom" else "1.0x zoom") 422 + Row( 423 + horizontalArrangement = Arrangement.SpaceBetween, 424 + verticalAlignment = Alignment.CenterVertically, 425 + modifier = Modifier.fillMaxWidth() 426 + ) { 427 + Text(text = if (ultrawide) "0.5x zoom" else "1.0x zoom") 426 428 Spacer(Modifier.width(12.dp)) 427 429 Switch(checked = ultrawide, onCheckedChange = null) 428 430 } ··· 433 435 // Preview fill/crop toggle 434 436 DropdownMenuItem( 435 437 text = { 436 - Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 437 - Text(text = if(previewFillMode==PreviewFillMode.FIT)"Fill Preview" else "Fit Preview") 438 + Row( 439 + horizontalArrangement = Arrangement.SpaceBetween, 440 + verticalAlignment = Alignment.CenterVertically, 441 + modifier = Modifier.fillMaxWidth() 442 + ) { 443 + Text(text = if (previewFillMode == PreviewFillMode.FIT) "Fill Preview" else "Fit Preview") 438 444 Spacer(Modifier.width(12.dp)) 439 - Switch(checked = previewFillMode==PreviewFillMode.FIT, onCheckedChange = null) 445 + Switch( 446 + checked = previewFillMode == PreviewFillMode.FIT, 447 + onCheckedChange = null 448 + ) 440 449 } 441 450 }, 442 451 onClick = { 443 - previewFillMode = if(previewFillMode==PreviewFillMode.FIT) PreviewFillMode.FILL else PreviewFillMode.FIT 452 + previewFillMode = 453 + if (previewFillMode == PreviewFillMode.FIT) PreviewFillMode.FILL else PreviewFillMode.FIT 444 454 } 445 455 ) 446 456 ··· 448 458 449 459 DropdownMenuItem(text = { 450 460 Column { 451 - Text("Detection Mode",modifier = Modifier.fillMaxWidth(), textAlign = TextAlign.Center) 452 - Row(horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 461 + Text( 462 + "Detection Mode", 463 + modifier = Modifier.fillMaxWidth(), 464 + textAlign = TextAlign.Center 465 + ) 466 + Row( 467 + horizontalArrangement = Arrangement.SpaceBetween, 468 + verticalAlignment = Alignment.CenterVertically, 469 + modifier = Modifier.fillMaxWidth() 470 + ) { 453 471 Text(text = "Pose") 454 472 Spacer(Modifier.width(12.dp)) 455 473 RadioButton( ··· 458 476 detectMode = DetectMode.POSE 459 477 }) 460 478 } 461 - Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 479 + Row( 480 + horizontalArrangement = Arrangement.SpaceBetween, 481 + verticalAlignment = Alignment.CenterVertically, 482 + modifier = Modifier.fillMaxWidth() 483 + ) { 462 484 Text(text = "Objects") 463 485 Spacer(Modifier.width(12.dp)) 464 - RadioButton(selected = detectMode == DetectMode.OBJECT, onClick = { 465 - detectMode = DetectMode.OBJECT 466 - }) 486 + RadioButton( 487 + selected = detectMode == DetectMode.OBJECT, 488 + onClick = { 489 + detectMode = DetectMode.OBJECT 490 + }) 467 491 } 468 - Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 492 + Row( 493 + horizontalArrangement = Arrangement.SpaceBetween, 494 + verticalAlignment = Alignment.CenterVertically, 495 + modifier = Modifier.fillMaxWidth() 496 + ) { 469 497 Text(text = "Both") 470 498 Spacer(Modifier.width(12.dp)) 471 - RadioButton(selected = detectMode == DetectMode.BOTH, onClick = { 472 - detectMode = DetectMode.BOTH 473 - }) 499 + RadioButton( 500 + selected = detectMode == DetectMode.BOTH, 501 + onClick = { 502 + detectMode = DetectMode.BOTH 503 + }) 474 504 } 475 - Row(horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 505 + Row( 506 + horizontalArrangement = Arrangement.SpaceBetween, 507 + verticalAlignment = Alignment.CenterVertically, 508 + modifier = Modifier.fillMaxWidth() 509 + ) { 476 510 Text(text = "None") 477 511 Spacer(Modifier.width(12.dp)) 478 - RadioButton(selected = detectMode == DetectMode.NONE, onClick = { 479 - detectMode = DetectMode.NONE 480 - }) 512 + RadioButton( 513 + selected = detectMode == DetectMode.NONE, 514 + onClick = { 515 + detectMode = DetectMode.NONE 516 + }) 481 517 } 482 518 } 483 519 }, onClick = {}) ··· 486 522 487 523 // Start/End recording button 488 524 DropdownMenuItem( 489 - text = { Text(if (recordingId!=null) "End recording" else "Start recording") }, 525 + text = { Text(if (recordingId != null) "End recording" else "Start recording") }, 490 526 onClick = { 491 - if(recordingId == null){ 527 + if (recordingId == null) { 492 528 recordingId = "${Clock.System.now().epochSeconds}" 493 - }else{ 529 + } else { 494 530 recordingId = null 495 531 } 496 532 // Keep menu open or close? Close feels better.

+10 -10

settings.gradle.kts

··· 3 3 pluginManagement { 4 4 repositories { 5 5 google { 6 - content { 7 - includeGroupByRegex("com\\.android.*") 8 - includeGroupByRegex("com\\.google.*") 9 - includeGroupByRegex("androidx.*") 10 - includeGroupByRegex("android.*") 6 + content { 7 + includeGroupByRegex("com\\.android.*") 8 + includeGroupByRegex("com\\.google.*") 9 + includeGroupByRegex("androidx.*") 10 + includeGroupByRegex("android.*") 11 11 } 12 12 } 13 13 gradlePluginPortal() ··· 21 21 dependencyResolutionManagement { 22 22 repositories { 23 23 google { 24 - content { 25 - includeGroupByRegex("com\\.android.*") 26 - includeGroupByRegex("com\\.google.*") 27 - includeGroupByRegex("androidx.*") 28 - includeGroupByRegex("android.*") 24 + content { 25 + includeGroupByRegex("com\\.android.*") 26 + includeGroupByRegex("com\\.google.*") 27 + includeGroupByRegex("androidx.*") 28 + includeGroupByRegex("android.*") 29 29 } 30 30 } 31 31 mavenCentral()

Configure Feed

Configure Feed