feat: PoseFocusMode.CROP_FOLLOW — dynamic follow-the-player crop · nateholland.bsky.social/PoseDetection@4a9b017

+127 -10

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt

··· 248 248 // 2) Pose input: build MLKit image (optionally masked) and DOWNscale it. 249 249 // Rotation is now 0 because analysisBitmap is upright. 250 250 val mlKitPoseInput: MlKitPoseInput? = poseDetector?.let { 251 - // In CROP mode we feed MLKit only the focus-area crop, which is already 252 - // smaller than the full frame — so raise the downscale target to give 253 - // the model more effective pixels on the person without ballooning work. 254 - val downscale = if (focusArea != null && poseFocusMode == PoseFocusMode.CROP) 384 else 256 251 + // In CROP / CROP_FOLLOW we feed MLKit only the focus-area crop, which 252 + // is already smaller than the full frame — so raise the downscale 253 + // target to land more pixels on the person without ballooning work. 254 + val isCroppingMode = poseFocusMode == PoseFocusMode.CROP || 255 + poseFocusMode == PoseFocusMode.CROP_FOLLOW 256 + val downscale = if (focusArea != null && isCroppingMode) 384 else 256 255 257 buildMlKitPoseInput( 256 258 analysisBitmap = analysisBitmap, 257 259 focusArea = focusArea, ··· 285 287 width = analysisBitmap.width, 286 288 height = analysisBitmap.height, 287 289 bitmap = analysisBitmap, 288 - onComplete = onComplete, 290 + onComplete = { result, bmp -> 291 + // Update the follow-crop state so the next frame can tighten around 292 + // the current skeleton. No-op in MASK/CROP (nothing reads it). 293 + if (poseFocusMode == PoseFocusMode.CROP_FOLLOW && focusArea != null) { 294 + FollowCropState.updateFromSkeleton( 295 + skeleton = result.skeleton, 296 + analysisW = analysisBitmap.width, 297 + analysisH = analysisBitmap.height, 298 + clampTo = focusArea, 299 + nowMs = System.currentTimeMillis(), 300 + ) 301 + } 302 + onComplete(result, bmp) 303 + }, 289 304 mlKitScaleX = mlKitPoseInput?.scaleX ?: 1f, 290 305 mlKitScaleY = mlKitPoseInput?.scaleY ?: 1f, 291 306 mlKitOffsetX = mlKitPoseInput?.offsetX ?: 0f, ··· 295 310 296 311 // Shared executor for running pose detection in parallel with object detection. 297 312 private val poseExecutor = Executors.newSingleThreadExecutor() 313 + 314 + // Dynamic follow-the-player crop: once a confident full skeleton is detected, 315 + // remember its bbox (padded) so the next frame crops tightly around it. Reset 316 + // when the skeleton is lost, comes back partial for MISS_TOLERANCE frames in a 317 + // row (implying it moved out of the tight crop), or goes stale past TIMEOUT_MS. 318 + private object FollowCropState { 319 + private const val TIMEOUT_MS = 500L 320 + // Pad generously — basketball shots have fast lateral hand/foot motion, 321 + // and a clipped joint drops the whole frame via the conf filter. 322 + private const val PAD_FRACTION = 0.5f 323 + // Floor on normalized crop size (either dim) so we never hand MLKit an 324 + // extremely narrow/tall image. Tall-narrow aspect hurts recall. 325 + private const val MIN_NORMALIZED_SIDE = 0.25f 326 + // Hysteresis: a single bad frame doesn't bounce us back to the wide crop. 327 + private const val MISS_TOLERANCE = 2 328 + private var tightRect: Rect? = null 329 + private var lastUpdatedMs: Long = 0L 330 + private var consecutiveMisses: Int = 0 331 + 332 + @Synchronized 333 + fun current(nowMs: Long): Rect? { 334 + if (nowMs - lastUpdatedMs > TIMEOUT_MS) { 335 + tightRect = null 336 + consecutiveMisses = 0 337 + } 338 + return tightRect 339 + } 340 + 341 + @Synchronized 342 + fun reset() { 343 + tightRect = null 344 + consecutiveMisses = 0 345 + } 346 + 347 + /** Called after each pose inference. Skeleton coords are in analysis-bitmap 348 + * pixel space; clampTo is the static user-configured focus area in 0..1. */ 349 + @Synchronized 350 + fun updateFromSkeleton( 351 + skeleton: Skeleton?, 352 + analysisW: Int, 353 + analysisH: Int, 354 + clampTo: Rect, 355 + nowMs: Long, 356 + ) { 357 + val joints = skeleton?.joints().orEmpty() 358 + if (joints.size < 12) { 359 + consecutiveMisses += 1 360 + if (consecutiveMisses >= MISS_TOLERANCE) { 361 + tightRect = null 362 + consecutiveMisses = 0 363 + } 364 + return 365 + } 366 + consecutiveMisses = 0 367 + val w = analysisW.toFloat(); val h = analysisH.toFloat() 368 + if (w <= 0f || h <= 0f) { tightRect = null; return } 369 + val minX = joints.minOf { it.x } 370 + val minY = joints.minOf { it.y } 371 + val maxX = joints.maxOf { it.x } 372 + val maxY = joints.maxOf { it.y } 373 + val padW = (maxX - minX) * PAD_FRACTION 374 + val padH = (maxY - minY) * PAD_FRACTION 375 + var l = (minX - padW) / w 376 + var t = (minY - padH) / h 377 + var r = (maxX + padW) / w 378 + var b = (maxY + padH) / h 379 + // Enforce a minimum normalized side length by expanding around the 380 + // centroid when the padded rect is narrower/shorter than the floor. 381 + val cx = (l + r) / 2f 382 + val cy = (t + b) / 2f 383 + if (r - l < MIN_NORMALIZED_SIDE) { 384 + l = cx - MIN_NORMALIZED_SIDE / 2f 385 + r = cx + MIN_NORMALIZED_SIDE / 2f 386 + } 387 + if (b - t < MIN_NORMALIZED_SIDE) { 388 + t = cy - MIN_NORMALIZED_SIDE / 2f 389 + b = cy + MIN_NORMALIZED_SIDE / 2f 390 + } 391 + l = l.coerceIn(clampTo.left, clampTo.right) 392 + t = t.coerceIn(clampTo.top, clampTo.bottom) 393 + r = r.coerceIn(clampTo.left, clampTo.right) 394 + b = b.coerceIn(clampTo.top, clampTo.bottom) 395 + if (r - l > 0.01f && b - t > 0.01f) { 396 + tightRect = Rect(l, t, r, b) 397 + lastUpdatedMs = nowMs 398 + } else { 399 + tightRect = null 400 + } 401 + } 402 + } 298 403 299 404 // Drop landmarks whose MLKit inFrameLikelihood is below this threshold — keeps 300 405 // obvious phantoms (occluded joints guessed with low conf) out of downstream. ··· 769 874 // 770 875 // Crop-mode also records offsetX/offsetY in full-bitmap pixel coords so 771 876 // landmarks (which come back in crop-local coords) can be remapped. 772 - val useCrop = focusArea != null && poseFocusMode == PoseFocusMode.CROP 877 + // 878 + // CROP_FOLLOW: if we have a recent tight bbox from the last skeleton, 879 + // use it as the effective crop. Otherwise fall back to the static focus 880 + // area so a new detection can re-acquire. 881 + val effectiveFocus: Rect? = when { 882 + focusArea == null -> null 883 + poseFocusMode == PoseFocusMode.CROP_FOLLOW -> 884 + FollowCropState.current(System.currentTimeMillis()) ?: focusArea 885 + else -> focusArea 886 + } 887 + val useCrop = effectiveFocus != null && 888 + (poseFocusMode == PoseFocusMode.CROP || poseFocusMode == PoseFocusMode.CROP_FOLLOW) 773 889 774 890 val offsetX: Float 775 891 val offsetY: Float ··· 782 898 useCrop -> { 783 899 val w = analysisBitmap.width 784 900 val h = analysisBitmap.height 785 - val leftPx = (focusArea.left * w).toInt().coerceIn(0, w) 786 - val topPx = (focusArea.top * h).toInt().coerceIn(0, h) 787 - val rightPx = (focusArea.right * w).toInt().coerceIn(leftPx, w) 788 - val bottomPx = (focusArea.bottom * h).toInt().coerceIn(topPx, h) 901 + val rect = effectiveFocus!! 902 + val leftPx = (rect.left * w).toInt().coerceIn(0, w) 903 + val topPx = (rect.top * h).toInt().coerceIn(0, h) 904 + val rightPx = (rect.right * w).toInt().coerceIn(leftPx, w) 905 + val bottomPx = (rect.bottom * h).toInt().coerceIn(topPx, h) 789 906 val cropW = rightPx - leftPx 790 907 val cropH = bottomPx - topPx 791 908 if (cropW <= 0 || cropH <= 0) {

+7 -1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.kt

··· 39 39 * CROP — crop the frame to the focus area, then downscale only the crop. Gives 40 40 * the pose model higher effective resolution of the focus region at the same 41 41 * downscaled side length. Landmarks are remapped back to full-frame coords. 42 + * 43 + * CROP_FOLLOW — starts in the static focus area; once a confident full 44 + * skeleton lands, tightens the next crop to the skeleton's bbox (with a small 45 + * buffer) so the model sees the player at even higher effective resolution. 46 + * Reverts to the static focus area whenever the skeleton is lost, clipped, or 47 + * stale — so a jogger walking out of frame doesn't strand the tracker. 42 48 */ 43 - enum class PoseFocusMode { MASK, CROP } 49 + enum class PoseFocusMode { MASK, CROP, CROP_FOLLOW } 44 50 45 51 @Composable 46 52 expect fun CameraView(

+14 -9

sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt

··· 654 654 } 655 655 ) 656 656 657 - // Pose focus mode toggle (mask vs crop) — only affects 658 - // the pose input; object detection always sees the full frame. 657 + // Pose focus mode cycle — only affects the pose input; 658 + // object detection always sees the full frame. Tap 659 + // advances Mask → Crop → Crop+follow → Mask. 659 660 DropdownMenuItem( 660 661 text = { 661 662 Row( ··· 663 664 verticalAlignment = Alignment.CenterVertically, 664 665 modifier = Modifier.fillMaxWidth() 665 666 ) { 666 - Text(text = if (poseFocusMode == PoseFocusMode.CROP) "Pose: Crop focus" else "Pose: Mask focus") 667 - Spacer(Modifier.width(12.dp)) 668 - Switch( 669 - checked = poseFocusMode == PoseFocusMode.CROP, 670 - onCheckedChange = null 667 + Text( 668 + text = when (poseFocusMode) { 669 + PoseFocusMode.MASK -> "Pose: Mask focus" 670 + PoseFocusMode.CROP -> "Pose: Crop focus" 671 + PoseFocusMode.CROP_FOLLOW -> "Pose: Crop+follow" 672 + } 671 673 ) 672 674 } 673 675 }, 674 676 onClick = { 675 - poseFocusMode = 676 - if (poseFocusMode == PoseFocusMode.MASK) PoseFocusMode.CROP else PoseFocusMode.MASK 677 + poseFocusMode = when (poseFocusMode) { 678 + PoseFocusMode.MASK -> PoseFocusMode.CROP 679 + PoseFocusMode.CROP -> PoseFocusMode.CROP_FOLLOW 680 + PoseFocusMode.CROP_FOLLOW -> PoseFocusMode.MASK 681 + } 677 682 } 678 683 ) 679 684

Configure Feed

Configure Feed