feat: allow specifying a focus area for pose detection, ignoring skeletons outside that region · nateholland.bsky.social/PoseDetection@1c61763

nateholland.bsky.social / PoseDetection

Fork 0

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

This repository has no description

Fork 0

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

feat: allow specifying a focus area for pose detection, ignoring skeletons outside that region

author

nathan holland date 11 months ago (Jul 27, 2025, 9:56 PM +0100) commit 1c61763f 1c61763f5f29f55ecf22d5612bdcaea10283abc5 parent 82dc6f23 82dc6f2320f2234b850bf268cad7cca01d7d3d4f

+377 -117

11 changed files

Expand all Collapse all

posedetection

src

androidMain

kotlin

com

performancecoachlab

posedetection

camera

Utils.android.kt

recording

InputFrame.android.kt

com.performancecoachlab

posedetection

camera

CameraView.android.kt

commonMain

kotlin

com

performancecoachlab

posedetection

camera

CameraView.kt

recording

InputFrame.kt

iosMain

kotlin

com

performancecoachlab

posedetection

camera

CameraEngine.kt

CameraPreview.kt

CameraView.ios.kt

FrameProcessor.kt

recording

InputFrame.ios.kt

sample

composeApp

src

commonMain

kotlin

com

nate

posedetection

App.kt

+10 -2

posedetection/src/androidMain/kotlin/com.performancecoachlab/posedetection/camera/CameraView.android.kt

Reviewed

··· 65 65 modifier: Modifier, 66 66 frontCamera: Boolean, 67 67 isRecording: Boolean, 68 68 + focusArea: Rect?, 68 69 onRecordToggled: (Boolean) -> Unit, 69 70 onVideoSaved: (String) -> Unit, 70 71 ) { ··· 79 80 val objDetector = CustomObjectDetectorModels.getInstance().model 80 81 val scope = rememberCoroutineScope() 81 82 var firstFrameTimestamp: Long? = null 83 83 + var focus by remember { mutableStateOf<Rect?>(focusArea) } 84 84 + 85 85 + // Update focus when focusArea changes 86 86 + LaunchedEffect(focusArea) { 87 87 + focus = focusArea 88 88 + } 82 89 83 90 // Video recording state 84 91 var videoBuilder by remember { mutableStateOf<com.performancecoachlab.posedetection.encoding.VideoBuilder?>(null) } ··· 136 143 val imageAnalysis = ImageAnalysis.Builder().build().also { analysis -> 137 144 analysis.setAnalyzer(executor) { imageProxy -> 138 145 val timestamp = System.currentTimeMillis() 146 146 + val area = focus 139 147 imageProxy.process( 140 140 - objDetector?.getDetector(), poseDetector, timestamp 148 148 + objDetector?.getDetector(), poseDetector, timestamp, area 141 149 ){ 142 150 customObjectRepository.updateCustomObject(it.objects) 143 151 it.skeleton?.let { skel -> ··· 188 196 modifier = Modifier 189 197 .fillMaxSize() 190 198 .scale(if (frontCamera) -1f else 1f, 1f), 191 191 - contentScale = ContentScale.Crop 199 199 + contentScale = ContentScale.Fit//.Crop 192 200 ) 193 201 } 194 202 }

+103 -9

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt

Reviewed

··· 19 19 actual companion object { 20 20 actual fun getCurrentPlatform(): PlatformType = ANDROID 21 21 } 22 22 - 23 22 } 24 24 - 25 23 26 24 @OptIn(ExperimentalGetImage::class) 27 25 fun ImageProxy.process( 28 26 objectDetector: org.tensorflow.lite.task.vision.detector.ObjectDetector?, 29 27 poseDetector: PoseDetector, 30 28 timestamp: Long, 29 29 + focusArea: Rect?, 31 30 onComplete: (AnalysisResult) -> Unit 32 31 ) { 33 33 - val tensorImage = TensorImage.fromBitmap(toBitmap()) 34 34 - val mlKitImage = image?.let { 35 35 - InputImage.fromMediaImage( 36 36 - it, imageInfo.rotationDegrees 37 37 - ) 38 38 - } 32 32 + val bitmap = toBitmap() 33 33 + val tensorImage = TensorImage.fromBitmap(bitmap) 34 34 + val mlKitImage = InputImage.fromBitmap(bitmap.applyFocusAreaMask(focusArea,imageInfo.rotationDegrees 35 35 + ), imageInfo.rotationDegrees) 39 36 process( 40 37 tensorImage = tensorImage, 41 38 mlKitImage = mlKitImage, ··· 48 45 ) 49 46 } 50 47 48 48 + private fun Rect?.toGraphicsRect(width: Int, height: Int):android.graphics.Rect { 49 49 + return this?.let { 50 50 + android.graphics.Rect((it.left*width).toInt(), 51 51 + (it.top*height).toInt(), 52 52 + (it.right*width).toInt(), 53 53 + (it.bottom*height).toInt() 54 54 + ) 55 55 + }?: android.graphics.Rect(0, 0, width, height) 56 56 + } 57 57 + 51 58 fun Bitmap.process( 52 59 objectDetector: org.tensorflow.lite.task.vision.detector.ObjectDetector?, 53 60 poseDetector: PoseDetector, 54 61 timestamp: Long, 62 62 + focusArea: Rect?, 55 63 onComplete: (AnalysisResult) -> Unit 56 64 ) { 57 65 val tensorImage = TensorImage.fromBitmap(this) 58 58 - val mlKitImage = InputImage.fromBitmap(this, 0) 66 66 + val mlKitImage = InputImage.fromBitmap(this.applyFocusAreaMask(focusArea), 0) 59 67 process( 60 68 tensorImage = tensorImage, 61 69 mlKitImage = mlKitImage, ··· 66 74 height = height, 67 75 onComplete = onComplete 68 76 ) 77 77 + } 78 78 + 79 79 + /** 80 80 + * Crops the bitmap to the specified focus area rectangle 81 81 + * @param focusArea The rectangle area to crop to (in normalized coordinates 0.0-1.0) 82 82 + * @return A new bitmap cropped to the focus area, or the original bitmap if focusArea is null 83 83 + */ 84 84 + fun Bitmap.cropToFocusArea(focusArea: Rect?): Bitmap { 85 85 + return focusArea?.let { rect -> 86 86 + val left = (rect.left * width.toFloat()).toInt().coerceIn(0, width) 87 87 + val top = (rect.top * height.toFloat()).toInt().coerceIn(0, height) 88 88 + val right = (rect.right * width.toFloat()).toInt().coerceIn(left, width) 89 89 + val bottom = (rect.bottom * height.toFloat()).toInt().coerceIn(top, height) 90 90 + 91 91 + val cropWidth = right - left 92 92 + val cropHeight = bottom - top 93 93 + 94 94 + if (cropWidth > 0 && cropHeight > 0) { 95 95 + Bitmap.createBitmap(this, left, top, cropWidth, cropHeight) 96 96 + } else { 97 97 + this 98 98 + } 99 99 + } ?: this 100 100 + } 101 101 + 102 102 + /** 103 103 + * Creates a copy of the bitmap with everything outside the focus area blacked out 104 104 + * @param focusArea The rectangle area to keep visible (in normalized coordinates 0.0-1.0) 105 105 + * @param angle The rotation angle in degrees (must be a multiple of 90) to apply to the focus area rectangle 106 106 + * @return A new bitmap with areas outside the focus area blacked out, or the original bitmap if focusArea is null 107 107 + */ 108 108 + fun Bitmap.applyFocusAreaMask(focusArea: Rect?, angle: Int = 0): Bitmap { 109 109 + return focusArea?.let { rect -> 110 110 + val result = this.copy(this.config ?: Bitmap.Config.ARGB_8888, true) 111 111 + val canvas = android.graphics.Canvas(result) 112 112 + val paint = android.graphics.Paint().apply { 113 113 + color = android.graphics.Color.BLACK 114 114 + } 115 115 + 116 116 + // Transform the rectangle coordinates based on the angle 117 117 + val transformedRect = when (angle % 360) { 118 118 + 90 -> Rect( 119 119 + left = rect.top, 120 120 + top = 1f - rect.right, 121 121 + right = rect.bottom, 122 122 + bottom = 1f - rect.left 123 123 + ) 124 124 + 180 -> Rect( 125 125 + left = 1f - rect.right, 126 126 + top = 1f - rect.bottom, 127 127 + right = 1f - rect.left, 128 128 + bottom = 1f - rect.top 129 129 + ) 130 130 + 270 -> Rect( 131 131 + left = 1f - rect.bottom, 132 132 + top = rect.left, 133 133 + right = 1f - rect.top, 134 134 + bottom = rect.right 135 135 + ) 136 136 + else -> rect // 0 degrees or any other angle 137 137 + } 138 138 + 139 139 + val focusRect = transformedRect.toGraphicsRect(width, height) 140 140 + 141 141 + // Black out top area 142 142 + if (focusRect.top > 0) { 143 143 + canvas.drawRect(0f, 0f, width.toFloat(), focusRect.top.toFloat(), paint) 144 144 + } 145 145 + 146 146 + // Black out bottom area 147 147 + if (focusRect.bottom < height) { 148 148 + canvas.drawRect(0f, focusRect.bottom.toFloat(), width.toFloat(),height.toFloat(), paint) 149 149 + } 150 150 + 151 151 + // Black out left area 152 152 + if (focusRect.left > 0) { 153 153 + canvas.drawRect(0f, focusRect.top.toFloat(), focusRect.left.toFloat(), focusRect.bottom.toFloat(), paint) 154 154 + } 155 155 + 156 156 + // Black out right area 157 157 + if (focusRect.right < width) { 158 158 + canvas.drawRect(focusRect.right.toFloat(), focusRect.top.toFloat(), width.toFloat(), focusRect.bottom.toFloat(), paint) 159 159 + } 160 160 + 161 161 + result 162 162 + } ?: this 69 163 } 70 164 71 165 private fun process(

+3 -2

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.android.kt

Reviewed

··· 1 1 package com.performancecoachlab.posedetection.recording 2 2 3 3 import android.graphics.Bitmap 4 4 + import androidx.compose.ui.geometry.Rect 4 5 import androidx.compose.ui.graphics.ImageBitmap 5 6 import androidx.compose.ui.graphics.asImageBitmap 6 7 import com.google.mlkit.vision.pose.PoseDetection ··· 32 33 PoseDetectorOptions.Builder().setDetectorMode(PoseDetectorOptions.STREAM_MODE).build() 33 34 private val poseDetector = PoseDetection.getClient(options) 34 35 private val objDetector = CustomObjectDetectorModels.getInstance().model?.getDetector() 35 35 - actual suspend fun analyseFrame(inputFrame: InputFrame): AnalysisResult = 36 36 + actual suspend fun analyseFrame(inputFrame: InputFrame, focusArea: Rect?): AnalysisResult = 36 37 suspendCancellableCoroutine { continuation -> 37 38 inputFrame.bitmap.process( 38 38 - objDetector, poseDetector, inputFrame.timestamp 39 39 + objDetector, poseDetector, inputFrame.timestamp,focusArea 39 40 ) { result -> 40 41 continuation.resume(result) 41 42 }

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.kt

Reviewed

··· 3 3 import androidx.compose.foundation.layout.fillMaxSize 4 4 import androidx.compose.runtime.Composable 5 5 import androidx.compose.ui.Modifier 6 6 + import androidx.compose.ui.geometry.Rect 6 7 import com.performancecoachlab.posedetection.custom.CustomObjectRespository 7 8 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 8 9 ··· 15 16 modifier: Modifier = Modifier.fillMaxSize(), 16 17 frontCamera: Boolean = true, 17 18 isRecording: Boolean = false, 19 19 + focusArea: Rect? = null, 18 20 onRecordToggled: (Boolean) -> Unit = {}, 19 21 onVideoSaved: (String) -> Unit = {}, 20 22 )

+1 -1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.kt

Reviewed

··· 13 13 } 14 14 15 15 expect class FrameAnalyser() { 16 16 - suspend fun analyseFrame(inputFrame: InputFrame): AnalysisResult 16 16 + suspend fun analyseFrame(inputFrame: InputFrame, focusArea: Rect? = null): AnalysisResult 17 17 } 18 18 19 19 data class AnalysisObject(

+67 -35

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraEngine.kt

Reviewed

··· 86 86 import platform.darwin.dispatch_get_global_queue 87 87 import platform.darwin.dispatch_get_main_queue 88 88 import platform.darwin.dispatch_queue_create 89 89 + import platform.posix.abs 89 90 import platform.posix.memcpy 91 91 + import kotlin.math.abs 90 92 import kotlin.native.runtime.NativeRuntimeApi 91 93 92 94 class CameraEngine : UIViewController(null, null) { ··· 221 223 cameraController.drawSkeleton = drawSkeleton 222 224 cameraController.drawObjects = drawObjects 223 225 } 226 226 + 227 227 + fun setFocusArea(focusArea: Rect?) { 228 228 + cameraController.setFocusArea(focusArea) 229 229 + } 224 230 } 225 231 226 232 class CameraController : NSObject(), AVCaptureVideoDataOutputSampleBufferDelegateProtocol, ··· 283 289 return platform.Foundation.NSURL.fileURLWithPath(filePath) 284 290 } 285 291 292 292 + fun setFocusArea(focusArea: Rect?) { 293 293 + frameProcessor.setFocusArea(focusArea) 294 294 + } 295 295 + 286 296 fun setupSession() { 287 297 try { 288 298 captureSession = AVCaptureSession() ··· 471 481 frameProcessor.analyseBufferForAll( 472 482 CMSampleBufferGetImageBuffer(didOutputSampleBuffer), 473 483 timestamp, 484 484 + mapPoint = { point: Skeleton.SkeletonCoordinate -> 485 485 + val normalizedPoint = CGPointMake( 486 486 + point.x.toDouble() / 480f, point.y.toDouble() / 360f 487 487 + ) 488 488 + cameraPreviewLayer?.let { preview -> 489 489 + val screenPoint = 490 490 + preview.pointForCaptureDevicePointOfInterest( 491 491 + normalizedPoint 492 492 + ) 493 493 + Skeleton.SkeletonCoordinate( 494 494 + screenPoint.useContents { x.toFloat() }, 495 495 + screenPoint.useContents { y.toFloat() }) 496 496 + }?:point 497 497 + }, 498 498 + preview = cameraPreviewLayer, 474 499 onSkeletonProcessed = { skeleton -> 475 500 skeleton?.also { 476 501 skeletonRepository?.updateSkeleton( ··· 578 603 return Rect(topLeft = topLeft, bottomRight = bottomRight) 579 604 } 580 605 581 581 - @OptIn(ExperimentalForeignApi::class) 582 582 - fun mapSkeletonToPreview( 583 583 - skeleton: Skeleton, previewLayer: AVCaptureVideoPreviewLayer, width: Float, height: Float 584 584 - ): Skeleton { 585 585 - fun mapPoint(point: Skeleton.SkeletonCoordinate?): Skeleton.SkeletonCoordinate? { 586 586 - if (point == null) return null 587 587 - 588 588 - // Normalize the point 589 589 - val normalizedPoint = 590 590 - CGPointMake(point.x.toDouble() / width, point.y.toDouble() / height) 591 591 - val screenPoint = previewLayer.pointForCaptureDevicePointOfInterest(normalizedPoint) 592 592 - return Skeleton.SkeletonCoordinate( 593 593 - screenPoint.useContents { x.toFloat() }, 594 594 - screenPoint.useContents { y.toFloat() }) 595 595 - } 596 596 - 597 597 - return Skeleton( 598 598 - timestamp = skeleton.timestamp, 599 599 - leftShoulder = mapPoint(skeleton.leftShoulder), 600 600 - rightShoulder = mapPoint(skeleton.rightShoulder), 601 601 - leftElbow = mapPoint(skeleton.leftElbow), 602 602 - rightElbow = mapPoint(skeleton.rightElbow), 603 603 - leftWrist = mapPoint(skeleton.leftWrist), 604 604 - rightWrist = mapPoint(skeleton.rightWrist), 605 605 - leftHip = mapPoint(skeleton.leftHip), 606 606 - rightHip = mapPoint(skeleton.rightHip), 607 607 - leftKnee = mapPoint(skeleton.leftKnee), 608 608 - rightKnee = mapPoint(skeleton.rightKnee), 609 609 - leftAnkle = mapPoint(skeleton.leftAnkle), 610 610 - rightAnkle = mapPoint(skeleton.rightAnkle), 611 611 - width = width, 612 612 - height = height, 613 613 - ) 614 614 - } 615 615 - 616 606 override fun captureOutput( 617 607 output: AVCaptureFileOutput, 618 608 didFinishRecordingToOutputFileAtURL: NSURL, ··· 622 612 onVideoSaved?.invoke(didFinishRecordingToOutputFileAtURL.path ?: "") 623 613 } 624 614 615 615 + } 616 616 + 617 617 + @OptIn(ExperimentalForeignApi::class) 618 618 + fun mapSkeletonToPreview( 619 619 + skeleton: Skeleton, previewLayer: AVCaptureVideoPreviewLayer, width: Float, height: Float 620 620 + ): Skeleton { 621 621 + fun mapPoint(point: Skeleton.SkeletonCoordinate?): Skeleton.SkeletonCoordinate? { 622 622 + if (point == null) return null 623 623 + 624 624 + // Normalize the point 625 625 + val normalizedPoint = 626 626 + CGPointMake(point.x.toDouble() / width, point.y.toDouble() / height) 627 627 + val screenPoint = previewLayer.pointForCaptureDevicePointOfInterest(normalizedPoint) 628 628 + return Skeleton.SkeletonCoordinate( 629 629 + screenPoint.useContents { x.toFloat() }, 630 630 + screenPoint.useContents { y.toFloat() }) 631 631 + } 632 632 + 633 633 + val minbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(0.0,0.0)).useContents { Pair(x.toFloat(), y.toFloat()) } 634 634 + val maxbounds = previewLayer.pointForCaptureDevicePointOfInterest(CGPointMake(1.0,1.0)).useContents { Pair(x.toFloat(), y.toFloat()) } 635 635 + val bounds = Pair( 636 636 + abs( maxbounds.first - minbounds.first), 637 637 + abs(maxbounds.second - minbounds.second) 638 638 + ) 639 639 + 640 640 + return Skeleton( 641 641 + timestamp = skeleton.timestamp, 642 642 + leftShoulder = mapPoint(skeleton.leftShoulder), 643 643 + rightShoulder = mapPoint(skeleton.rightShoulder), 644 644 + leftElbow = mapPoint(skeleton.leftElbow), 645 645 + rightElbow = mapPoint(skeleton.rightElbow), 646 646 + leftWrist = mapPoint(skeleton.leftWrist), 647 647 + rightWrist = mapPoint(skeleton.rightWrist), 648 648 + leftHip = mapPoint(skeleton.leftHip), 649 649 + rightHip = mapPoint(skeleton.rightHip), 650 650 + leftKnee = mapPoint(skeleton.leftKnee), 651 651 + rightKnee = mapPoint(skeleton.rightKnee), 652 652 + leftAnkle = mapPoint(skeleton.leftAnkle), 653 653 + rightAnkle = mapPoint(skeleton.rightAnkle), 654 654 + width = bounds.first, 655 655 + height = bounds.second, 656 656 + ) 625 657 } 626 658 627 659

-1

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraPreview.kt

Reviewed

··· 1 1 package com.performancecoachlab.posedetection.camera 2 2 3 3 - 4 3 import androidx.compose.runtime.Composable 5 4 import androidx.compose.runtime.DisposableEffect 6 5 import androidx.compose.runtime.LaunchedEffect

+6 -1

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.ios.kt

Reviewed

··· 11 11 import androidx.compose.runtime.remember 12 12 import androidx.compose.runtime.setValue 13 13 import androidx.compose.ui.Modifier 14 14 + import androidx.compose.ui.geometry.Rect 14 15 import androidx.compose.ui.layout.ContentScale 15 16 import com.performancecoachlab.posedetection.custom.CustomObjectRespository 16 17 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository ··· 28 29 modifier: Modifier, 29 30 frontCamera: Boolean, 30 31 isRecording: Boolean, 32 32 + focusArea: Rect?, 31 33 onRecordToggled: (Boolean) -> Unit, 32 34 onVideoSaved: (String) -> Unit, 33 35 ) { ··· 50 52 lastRecordingState = isRecording 51 53 } 52 54 } 55 55 + LaunchedEffect(focusArea){ 56 56 + cameraEngine.value?.setFocusArea(focusArea) 57 57 + } 53 58 Box(modifier = Modifier.fillMaxSize()) { 54 59 CameraPreview( 55 60 modifier = Modifier.fillMaxSize(), onCameraControllerReady = { engine -> 56 61 cameraEngine.value = engine.also { if (!frontCamera) it.toggleCameraLens() } 57 57 - }) 62 62 + },) 58 63 if (drawSkeleton || drawObjects) { 59 64 kotlin.native.runtime.GC.collect() 60 65 frameBitmap?.also {

+134 -16

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/FrameProcessor.kt

Reviewed

··· 1 1 package com.performancecoachlab.posedetection.camera 2 2 3 3 + import androidx.compose.ui.geometry.Offset 3 4 import androidx.compose.ui.geometry.Rect 4 5 import com.performancecoachlab.posedetection.recording.AnalysisObject 5 6 import com.performancecoachlab.posedetection.recording.Label ··· 15 16 import kotlinx.cinterop.useContents 16 17 import kotlinx.cinterop.value 17 18 import platform.AVFoundation.AVCaptureVideoOrientationLandscapeRight 19 19 + import platform.AVFoundation.AVCaptureVideoPreviewLayer 18 20 import platform.CoreGraphics.CGImageGetHeight 19 21 import platform.CoreGraphics.CGImageGetWidth 20 22 import platform.CoreGraphics.CGImageRef ··· 22 24 import platform.CoreGraphics.CGRect 23 25 import platform.CoreGraphics.CGRectMake 24 26 import platform.CoreVideo.CVImageBufferRef 25 25 - import platform.CoreVideo.CVPixelBufferRef 26 27 import platform.Foundation.NSError 27 28 import platform.Foundation.NSUUID 28 29 import platform.Vision.VNClassificationObservation ··· 51 52 import kotlin.experimental.ExperimentalNativeApi 52 53 import kotlin.native.identityHashCode 53 54 54 54 - fun bodyPoseHandler(request: VNRequest): MutableMap<VNHumanBodyPoseObservationJointName, VNRecognizedPoint>? { 55 55 + fun bodyPoseHandler(request: VNRequest): List<MutableMap<VNHumanBodyPoseObservationJointName, VNRecognizedPoint>>? { 55 56 try { 56 57 val observations = request.results as List<VNHumanBodyPoseObservation> 57 58 // Process each observation to find the recognized body pose points. 58 58 - return observations.lastOrNull()?.let { processObservation(it) } 59 59 + return observations.map { processObservation(it) } 59 60 } catch (e: Exception) { 60 61 println("Error processing observations: ${e.message}") 61 62 return null ··· 68 69 observation.availableJointNames.forEach { 69 70 observation.recognizedPointForJointName(it as VNHumanBodyPoseObservationJointName, null) 70 71 ?.also { point -> 71 71 - if (point.confidence > 0f) { 72 72 + if (point.confidence > 0.2f) { 72 73 points[it] = point 73 74 } 74 75 } ··· 131 132 private var regionOfInterest = CGRectMake(0.0, 0.0, 1.0, 1.0) 132 133 private var requests = mutableListOf<VNRequest>() 133 134 val objectRecognition = setUpRecognition() 135 135 + private var focusArea: Rect? = null 136 136 + 137 137 + fun setFocusArea(focusArea: Rect?) { 138 138 + this.focusArea = focusArea 139 139 + } 134 140 135 141 private fun setUpRecognition() { 136 142 if (modelObj == null) { ··· 165 171 timestamp: Long, 166 172 onObjectsProcessed: (List<AnalysisObject>) -> Unit, 167 173 onSkeletonProcessed: (Skeleton?) -> Unit 168 168 - ){ 174 174 + ) { 169 175 autoreleasepool { 170 176 if (cgImage == null) { 171 177 onObjectsProcessed(emptyList()) ··· 189 195 return@VNCoreMLRequest 190 196 } 191 197 val results = request?.results as? List<*> ?: emptyList<Any>() 192 192 - val recognized = results.filterIsInstance<VNRecognizedObjectObservation>() 198 198 + val recognized = 199 199 + results.filterIsInstance<VNRecognizedObjectObservation>() 193 200 val analysisObjects = recognized.map { observation -> 194 201 val confidence = observation.confidence 195 202 val boundingBox = observation.boundingBox.useContents { ··· 208 215 } 209 216 val labels = observation.labels.mapNotNull { 210 217 (it as VNClassificationObservation).let { ca -> 211 211 - if (ca.confidence > 0.0) Label(ca.identifier,ca.confidence) else null 218 218 + if (ca.confidence > 0.0) Label( 219 219 + ca.identifier, 220 220 + ca.confidence 221 221 + ) else null 212 222 } 213 223 } 214 224 AnalysisObject( ··· 228 238 onSkeletonProcessed(null) 229 239 } else { 230 240 request?.also { vnRequest -> 231 231 - val recognizedPoints = bodyPoseHandler(vnRequest) 232 232 - regionOfInterest = calculateRegionOfInterest(recognizedPoints) 241 241 + val recognizedPoints = bodyPoseHandler(vnRequest)?.firstOrNull{ 242 242 + isInFocusArea(it, focusArea, width, height) 243 243 + } 244 244 + regionOfInterest = 245 245 + calculateRegionOfInterest(recognizedPoints) 233 246 val updatedSkeleton = Skeleton( 234 247 timestamp = timestamp, 235 248 leftShoulder = recognizedPoints?.get( ··· 294 307 } 295 308 } 296 309 310 310 + fun isInFocusArea( 311 311 + recognizedPoints: MutableMap<VNHumanBodyPoseObservationJointName, VNRecognizedPoint>?, 312 312 + focusArea: Rect?, 313 313 + width: ULong, 314 314 + height: ULong, 315 315 + mapPoint: (Skeleton.SkeletonCoordinate) -> Skeleton.SkeletonCoordinate = {it} 316 316 + ): Boolean { 317 317 + if (focusArea == null || recognizedPoints.isNullOrEmpty()) return true 318 318 + val focusRect = Rect( 319 319 + left = focusArea.left * width.toFloat(), 320 320 + top = focusArea.top * height.toFloat(), 321 321 + right = focusArea.right * width.toFloat(), 322 322 + bottom = focusArea.bottom * height.toFloat() 323 323 + ) 324 324 + return recognizedPoints.values.all { point -> 325 325 + val pointInFocus = mapPoint(point.location.toSkeletonPoint(width, height)) 326 326 + focusRect.contains( 327 327 + Offset(pointInFocus.x, pointInFocus.y) 328 328 + ) 329 329 + } 330 330 + } 331 331 + 332 332 + fun Skeleton.isInFocusArea(focusArea: Rect?): Boolean { 333 333 + if (focusArea == null || joints().isEmpty()) return true 334 334 + val focusRect = Rect( 335 335 + left = focusArea.left * width, 336 336 + top = focusArea.top * height, 337 337 + right = focusArea.right * width, 338 338 + bottom = focusArea.bottom * height 339 339 + ) 340 340 + return joints().all { point -> 341 341 + focusRect.contains(Offset(point.x, point.y)) 342 342 + } 343 343 + } 344 344 + 297 345 @OptIn(BetaInteropApi::class, ExperimentalNativeApi::class) 298 346 fun analyseBufferForAll( 299 347 buffer: CVImageBufferRef?, 300 348 timestamp: Long, 349 349 + mapPoint: (skeleton: Skeleton.SkeletonCoordinate) -> Skeleton.SkeletonCoordinate, 350 350 + preview: AVCaptureVideoPreviewLayer?, 301 351 onObjectsProcessed: (List<AnalysisObject>) -> Unit, 302 352 onSkeletonProcessed: (Skeleton?) -> Unit 303 353 ) { ··· 319 369 return@VNCoreMLRequest 320 370 } 321 371 val results = request?.results as? List<*> ?: emptyList<Any>() 322 322 - val recognized = results.filterIsInstance<VNRecognizedObjectObservation>() 372 372 + val recognized = 373 373 + results.filterIsInstance<VNRecognizedObjectObservation>() 323 374 val analysisObjects = recognized.map { observation -> 324 375 val confidence = observation.confidence 325 376 val boundingBox = observation.boundingBox.useContents { ··· 338 389 } 339 390 val labels = observation.labels.mapNotNull { 340 391 (it as VNClassificationObservation).let { ca -> 341 341 - if (ca.confidence > 0.0) Label(ca.identifier,ca.confidence) else null 392 392 + if (ca.confidence > 0.0) Label( 393 393 + ca.identifier, 394 394 + ca.confidence 395 395 + ) else null 342 396 } 343 397 } 344 398 AnalysisObject( ··· 359 413 onSkeletonProcessed(null) 360 414 } else { 361 415 request?.also { vnRequest -> 362 362 - val recognizedPoints = bodyPoseHandler(vnRequest) 363 363 - regionOfInterest = calculateRegionOfInterest(recognizedPoints) 416 416 + val recognizedPoints = bodyPoseHandler(vnRequest)?.firstOrNull{ 417 417 + Skeleton( 418 418 + timestamp = timestamp, 419 419 + leftShoulder = it[VNHumanBodyPoseObservationJointNameLeftShoulder]?.location?.toSkeletonPoint(width, height), 420 420 + rightShoulder = it[VNHumanBodyPoseObservationJointNameRightShoulder]?.location?.toSkeletonPoint(width, height), 421 421 + leftElbow = it[VNHumanBodyPoseObservationJointNameLeftElbow]?.location?.toSkeletonPoint(width, height), 422 422 + rightElbow = it[VNHumanBodyPoseObservationJointNameRightElbow]?.location?.toSkeletonPoint(width, height), 423 423 + leftWrist = it[VNHumanBodyPoseObservationJointNameLeftWrist]?.location?.toSkeletonPoint(width, height), 424 424 + rightWrist = it[VNHumanBodyPoseObservationJointNameRightWrist]?.location?.toSkeletonPoint(width, height), 425 425 + leftHip = it[VNHumanBodyPoseObservationJointNameLeftHip]?.location?.toSkeletonPoint(width, height), 426 426 + rightHip = it[VNHumanBodyPoseObservationJointNameRightHip]?.location?.toSkeletonPoint(width, height), 427 427 + leftKnee = it[VNHumanBodyPoseObservationJointNameLeftKnee]?.location?.toSkeletonPoint(width, height), 428 428 + rightKnee = it[VNHumanBodyPoseObservationJointNameRightKnee]?.location?.toSkeletonPoint(width, height), 429 429 + leftAnkle = it[VNHumanBodyPoseObservationJointNameLeftAnkle]?.location?.toSkeletonPoint(width, height), 430 430 + rightAnkle = it[VNHumanBodyPoseObservationJointNameRightAnkle]?.location?.toSkeletonPoint(width, height), 431 431 + height = height.toFloat(), 432 432 + width = width.toFloat() 433 433 + ).let { skel-> 434 434 + preview?.let{ layer -> 435 435 + mapSkeletonToPreview( 436 436 + skeleton = skel, 437 437 + previewLayer = layer, 438 438 + width = 480f, 439 439 + height = 360f 440 440 + ) 441 441 + } 442 442 + }?.isInFocusArea(focusArea)?:false 443 443 + } 444 444 + /*firstOrNull{ 445 445 + isInFocusArea(it,focusArea,width, height, mapPoint) 446 446 + }*/ 447 447 + regionOfInterest = 448 448 + calculateRegionOfInterest(recognizedPoints) 364 449 val updatedSkeleton = Skeleton( 365 450 timestamp = timestamp, 366 451 leftShoulder = recognizedPoints?.get( ··· 432 517 ): CValue<CGRect> { 433 518 val margin = 0.0 434 519 if (recognizedPoints.isNullOrEmpty()) { 435 435 - return CGRectMake(0.0, 0.0, 1.0, 1.0) // Return full image area if no points are recognized 520 520 + return CGRectMake(0.0, 0.0, 1.0, 1.0) 436 521 } 437 522 recognizedPoints.values.let { allPoints -> 438 438 - val maxX = allPoints.minOf { it.x } 523 523 + val maxX = allPoints.maxOf { it.x } 439 524 val minX = allPoints.minOf { it.x } 440 440 - val maxY = allPoints.minOf { it.y } 525 525 + val maxY = allPoints.maxOf { it.y } 441 526 val minY = allPoints.minOf { it.y } 442 527 443 528 val x = (minX - margin).coerceIn(0.0, 1.0) ··· 452 537 height = height, 453 538 ) 454 539 } 540 540 + } 455 541 542 542 + @OptIn(ExperimentalForeignApi::class) 543 543 + private fun Rect?.toCGRect(): CValue<CGRect> { 544 544 + return if (this == null) { 545 545 + CGRectMake(0.0, 0.0, 1.0, 1.0) 546 546 + } else { 547 547 + val uiX = left 548 548 + val uiY = top 549 549 + val uiWidth = width 550 550 + val uiHeight = height 551 551 + 552 552 + // Transform to camera coordinates (90° clockwise rotation) 553 553 + val cameraX = (1.0 - uiY - uiHeight) 554 554 + val cameraY = uiX 555 555 + val cameraWidth = uiHeight 556 556 + val cameraHeight = uiWidth 557 557 + 558 558 + val xHun = (cameraX * 1000f).toInt().coerceIn(0, 1000) 559 559 + val yHun = (cameraY * 1000f).toInt().coerceIn(0, 1000) 560 560 + val widthHun = (cameraWidth * 1000f).toInt().coerceIn(1, 999 - xHun) 561 561 + val heightHun = (cameraHeight * 1000f).toInt().coerceIn(1, 999 - yHun) 562 562 + val x = xHun / 1000.0 563 563 + val y = yHun / 1000.0 564 564 + val width = widthHun / 1000.0 565 565 + val height = heightHun / 1000.0 566 566 + CGRectMake( 567 567 + x = x, 568 568 + y = y, 569 569 + width = width, 570 570 + height = height, 571 571 + ) 572 572 + } 456 573 } 457 574 458 575 data class DetectedObject @OptIn(ExperimentalForeignApi::class) constructor( ··· 461 578 val confidence: Float, 462 579 val boundingBox: CValue<CGRect> 463 580 ) 581 581 +

+3 -2

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.ios.kt

Reviewed

··· 1 1 package com.performancecoachlab.posedetection.recording 2 2 3 3 + import androidx.compose.ui.geometry.Rect 3 4 import androidx.compose.ui.graphics.ImageBitmap 4 5 import com.performancecoachlab.posedetection.camera.FrameProcessor 5 6 import com.performancecoachlab.posedetection.camera.drawAnalysisResults ··· 39 40 private val frameProcessor = FrameProcessor(modelObj) 40 41 41 42 @OptIn(ExperimentalForeignApi::class) 42 42 - actual suspend fun analyseFrame(inputFrame: InputFrame): AnalysisResult { 43 43 + actual suspend fun analyseFrame(inputFrame: InputFrame,focusArea: Rect?): AnalysisResult { 44 44 + frameProcessor.setFocusArea(focusArea) 43 45 val img = inputFrame.cgImage 44 46 return suspendCancellableCoroutine { continuation -> 45 47 var poseResult: Skeleton? = null 46 48 var objectResults: List<AnalysisObject> = emptyList() 47 47 - 48 49 frameProcessor.analyseFrameForAll(img, inputFrame.timestamp, onSkeletonProcessed = { 49 50 poseResult = it 50 51 }, onObjectsProcessed = {

+48 -48

sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt

Reviewed

··· 6 6 import androidx.compose.foundation.layout.Box 7 7 import androidx.compose.foundation.layout.Column 8 8 import androidx.compose.foundation.layout.fillMaxSize 9 9 + import androidx.compose.foundation.layout.imePadding 9 10 import androidx.compose.foundation.layout.padding 10 11 import androidx.compose.foundation.layout.requiredHeight 11 12 import androidx.compose.foundation.layout.requiredWidth 13 13 + import androidx.compose.foundation.layout.safeDrawing 12 14 import androidx.compose.material3.Button 13 15 import androidx.compose.material3.CircularProgressIndicator 14 16 import androidx.compose.material3.Tab ··· 25 27 import androidx.compose.runtime.setValue 26 28 import androidx.compose.ui.Alignment 27 29 import androidx.compose.ui.Modifier 30 30 + import androidx.compose.ui.geometry.Rect 28 31 import androidx.compose.ui.graphics.ImageBitmap 29 32 import androidx.compose.ui.layout.ContentScale 30 33 import androidx.compose.ui.unit.dp ··· 59 62 import io.github.vinceglb.filekit.filesDir 60 63 import io.github.vinceglb.filekit.path 61 64 import kotlinx.coroutines.Job 65 65 + import kotlinx.coroutines.delay 62 66 import kotlinx.coroutines.launch 63 67 import kotlin.math.roundToLong 64 68 65 69 @Composable 66 70 internal fun App() = AppTheme { 67 67 - var selectedTabIndex by remember { mutableStateOf(1) } 71 71 + var selectedTabIndex by remember { mutableStateOf(0) } 68 72 val tabs = listOf("Camera Feed", "Recorded Video") 69 69 - 70 73 CustomObjectDetectorModels.init( 71 74 ModelPath("lite-model_efficientdet_lite2_detection_metadata_1.tflite", "YOLOv3FP16") 72 75 ) 73 73 - 74 74 - 75 76 Column { 76 77 TabRow(selectedTabIndex = selectedTabIndex) { 77 78 tabs.forEachIndexed { index, title -> ··· 284 285 var permissionGranted by remember { mutableStateOf(false) } 285 286 var isRecording by remember { mutableStateOf(false) } 286 287 var path by remember { mutableStateOf("") } 287 287 - var allDetected = remember { mutableListOf<String>() } 288 288 PermissionProvider().apply { 289 289 if (!hasCameraPermission()) RequestCameraPermission(onGranted = { 290 290 permissionGranted = true 291 291 }, onDenied = { permissionGranted = false }) else permissionGranted = true 292 292 } 293 293 if (permissionGranted) { 294 294 - Column(modifier = Modifier.fillMaxSize()) { 295 295 - Button( 296 296 - onClick = { isRecording = !isRecording }, 297 297 - modifier = Modifier.padding(16.dp) 298 298 - ) { 299 299 - Text(if (isRecording) "Stop Recording" else "Start Recording") 300 300 - } 301 301 - androidx.compose.animation.AnimatedVisibility(path.isNotBlank()) { 302 302 - val playerHost = remember(path) { 303 303 - MediaPlayerHost( 304 304 - mediaUrl = path, 305 305 - isLooping = true, 306 306 - isPaused = false, 307 307 - isMuted = false, 308 308 - initialVideoFitMode = ScreenResize.FIT, 294 294 + Box(modifier = Modifier.fillMaxSize()) { 295 295 + Column (modifier = Modifier.fillMaxSize()){ 296 296 + androidx.compose.animation.AnimatedVisibility(path.isNotBlank()) { 297 297 + val playerHost = remember(path) { 298 298 + MediaPlayerHost( 299 299 + mediaUrl = path, 300 300 + isLooping = true, 301 301 + isPaused = false, 302 302 + isMuted = false, 303 303 + initialVideoFitMode = ScreenResize.FIT, 304 304 + ) 305 305 + } 306 306 + VideoPlayerComposable( 307 307 + modifier = Modifier.weight(1f), 308 308 + playerHost = playerHost, 309 309 + playerConfig = VideoPlayerConfig( 310 310 + isSeekBarVisible = true, 311 311 + isDurationVisible = true, 312 312 + isFastForwardBackwardEnabled = true, 313 313 + isMuteControlEnabled = true, 314 314 + isSpeedControlEnabled = true, 315 315 + isScreenLockEnabled = false, 316 316 + isScreenResizeEnabled = true, 317 317 + isFullScreenEnabled = true, 318 318 + isPauseResumeEnabled = true, 319 319 + ) 309 320 ) 310 321 } 311 311 - VideoPlayerComposable( 322 322 + CameraView( 323 323 + skeletonRepository = skeletonRepository, 324 324 + customObjectRepository = customObjectRespository, 325 325 + drawSkeleton = true, 326 326 + drawObjects = true, 312 327 modifier = Modifier.weight(1f), 313 313 - playerHost = playerHost, 314 314 - playerConfig = VideoPlayerConfig( 315 315 - isSeekBarVisible = true, 316 316 - isDurationVisible = true, 317 317 - isFastForwardBackwardEnabled = true, 318 318 - isMuteControlEnabled = true, 319 319 - isSpeedControlEnabled = true, 320 320 - isScreenLockEnabled = false, 321 321 - isScreenResizeEnabled = true, 322 322 - isFullScreenEnabled = true, 323 323 - isPauseResumeEnabled = true, 324 324 - ) 328 328 + frontCamera = true, 329 329 + isRecording = isRecording, 330 330 + onRecordToggled = { isRecording = it }, 331 331 + onVideoSaved = { path = it }, 325 332 ) 326 333 } 327 327 - CameraView( 328 328 - skeletonRepository = skeletonRepository, 329 329 - customObjectRepository = customObjectRespository, 330 330 - drawSkeleton = false, 331 331 - drawObjects = true, 332 332 - modifier = Modifier.weight(1f), 333 333 - frontCamera = false, 334 334 - isRecording = isRecording, 335 335 - onRecordToggled = { isRecording = it }, 336 336 - onVideoSaved = { path = it }, 337 337 - ) 334 334 + Button( 335 335 + onClick = { 336 336 + isRecording = !isRecording 337 337 + }, 338 338 + modifier = Modifier.imePadding().padding(16.dp).align(Alignment.TopStart) 339 339 + ) { 340 340 + Text(if (isRecording) "Stop Recording" else "Start Recording") 341 341 + } 338 342 } 339 343 } else Text("Camera permission not granted") 340 344 val upRightPose = Pose( ··· 363 367 it.forEach { obj -> 364 368 val l = "${obj.labels.maxByOrNull { it.confidence }?.text}" 365 369 println("Detected Objects: ${obj.labels}") 366 366 - if(!allDetected.contains(l)){ 367 367 - allDetected.add(l) 368 368 - println("Detected Objects: $allDetected") 369 369 - } 370 370 } 371 371 } 372 372 }