feat: sample app change detect type · nateholland.bsky.social/PoseDetection@1add031

+181 -37

2 changed files

Expand all

posedetection

src

androidMain

kotlin

com

performancecoachlab

posedetection

camera

Utils.android.kt

sample

composeApp

src

commonMain

kotlin

com

nate

posedetection

App.kt

+137 -29

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt

··· 29 29 import com.performancecoachlab.posedetection.custom.AndroidDetector 30 30 import com.performancecoachlab.posedetection.custom.ModelInfo 31 31 import java.nio.ByteBuffer 32 + import com.google.mlkit.vision.pose.Pose 33 + import com.google.mlkit.vision.pose.PoseLandmark 32 34 33 35 actual enum class PlatformType { 34 36 ANDROID, IOS; ··· 212 214 // Rotate into a pooled bitmap if needed. 213 215 val analysisBitmap: Bitmap = baseBitmap.rotateIntoPooled(rotationDegrees) 214 216 215 - // 2) MLKit image must match analysisBitmap coordinate space. Rotation is now 0. 216 - // IMPORTANT: focusArea is defined in normalized coordinates of the *displayed* (upright) frame, 217 - // so apply it with angle=0 in this already-rotated bitmap coordinate space. 218 - val mlKitImage: InputImage? = poseDetector?.let { 219 - if (focusArea == null) { 220 - InputImage.fromBitmap(analysisBitmap, 0) 221 - } else { 222 - // Create the masked bitmap into a dedicated pool so we never overwrite analysisBitmap 223 - val out = PoseMaskBitmapPool.obtain( 224 - analysisBitmap.width, 225 - analysisBitmap.height, 226 - analysisBitmap.config ?: Bitmap.Config.ARGB_8888 227 - ) 228 - val canvas = Canvas(out) 229 - canvas.drawBitmap(analysisBitmap, 0f, 0f, null) 230 - 231 - // Apply mask in upright coordinates. 232 - out.applyFocusAreaMaskInPlace( 233 - focusArea = focusArea, 234 - angle = 0, 235 - ) 236 - 237 - InputImage.fromBitmap(out, 0) 238 - } 217 + // 2) Pose input: build MLKit image (optionally masked) and DOWNscale it. 218 + // Rotation is now 0 because analysisBitmap is upright. 219 + val mlKitPoseInput: MlKitPoseInput? = poseDetector?.let { 220 + buildMlKitPoseInput( 221 + analysisBitmap = analysisBitmap, 222 + focusArea = focusArea, 223 + downscaleMaxSidePx = 256 224 + ) 239 225 } 240 226 241 227 // 3) Tensor input: resize into pooled bitmap (avoid allocating each frame). ··· 256 242 257 243 process( 258 244 tensorImage = processedTensorImage, 259 - mlKitImage = mlKitImage, 245 + mlKitImage = mlKitPoseInput?.image, 260 246 objectDetector = objectDetector, 261 247 poseDetector = poseDetector, 262 248 timestamp = timestamp, 263 249 width = analysisBitmap.width, 264 250 height = analysisBitmap.height, 265 251 bitmap = analysisBitmap, 266 - onComplete = onComplete 252 + onComplete = onComplete, 253 + mlKitScaleX = mlKitPoseInput?.scaleX ?: 1f, 254 + mlKitScaleY = mlKitPoseInput?.scaleY ?: 1f, 267 255 ) 268 256 } 269 257 ··· 277 265 width: Int, 278 266 height: Int, 279 267 bitmap: Bitmap, 280 - onComplete: (AnalysisResult, Bitmap) -> Unit 268 + onComplete: (AnalysisResult, Bitmap) -> Unit, 269 + mlKitScaleX: Float = 1f, 270 + mlKitScaleY: Float = 1f, 281 271 ) { 282 272 val objectsDetected = if (objectDetector != null && tensorImage != null) { 283 273 val outputShape = objectDetector.modelInfo.outputShape ··· 351 341 val skeleton: Skeleton? = if (poseDetector != null && mlKitImage != null) { 352 342 runCatching { 353 343 val pose = Tasks.await(poseDetector.process(mlKitImage)) 354 - val landmarks = pose.allPoseLandmarks.size 355 - skeleton(pose, timestamp, width, height) 344 + skeletonFromPoseScaled( 345 + pose = pose, 346 + timestamp = timestamp, 347 + width = width, 348 + height = height, 349 + scaleX = mlKitScaleX, 350 + scaleY = mlKitScaleY, 351 + ) 356 352 }.onFailure { t -> Logger.e(t) { "MLKit poseDetector.process failed" } 357 353 }.getOrNull() 358 354 } else null ··· 576 572 val bottomN = max(t, b) 577 573 return Rect(left = leftN, top = topN, right = rightN, bottom = bottomN) 578 574 } 575 + 576 + /** Pool for the (smaller) MLKit pose input bitmap. */ 577 + private object PoseInputBitmapPool { 578 + private var cached: Bitmap? = null 579 + private var cachedW: Int = 0 580 + private var cachedH: Int = 0 581 + 582 + fun obtain(width: Int, height: Int): Bitmap { 583 + val bmp = cached 584 + return if (bmp != null && !bmp.isRecycled && cachedW == width && cachedH == height) { 585 + bmp.eraseColor(android.graphics.Color.TRANSPARENT) 586 + bmp 587 + } else { 588 + createBitmap(width, height, Bitmap.Config.ARGB_8888).also { 589 + cached = it 590 + cachedW = width 591 + cachedH = height 592 + } 593 + } 594 + } 595 + } 596 + 597 + private data class MlKitPoseInput( 598 + val image: InputImage, 599 + val scaleX: Float, 600 + val scaleY: Float, 601 + ) 602 + 603 + private fun buildMlKitPoseInput( 604 + analysisBitmap: Bitmap, 605 + focusArea: Rect?, 606 + downscaleMaxSidePx: Int = 360, 607 + ): MlKitPoseInput { 608 + // 1) Apply optional focus mask in full-resolution upright coordinates. 609 + val poseBitmapFull = if (focusArea == null) { 610 + analysisBitmap 611 + } else { 612 + val out = PoseMaskBitmapPool.obtain( 613 + analysisBitmap.width, 614 + analysisBitmap.height, 615 + analysisBitmap.config ?: Bitmap.Config.ARGB_8888 616 + ) 617 + Canvas(out).drawBitmap(analysisBitmap, 0f, 0f, null) 618 + out.applyFocusAreaMaskInPlace(focusArea = focusArea, angle = 0) 619 + out 620 + } 621 + 622 + // 2) Downscale for MLKit (pose is generally robust at lower res). 623 + val srcW = poseBitmapFull.width 624 + val srcH = poseBitmapFull.height 625 + val srcMax = max(srcW, srcH) 626 + 627 + if (downscaleMaxSidePx <= 0 || srcMax <= downscaleMaxSidePx) { 628 + return MlKitPoseInput( 629 + image = InputImage.fromBitmap(poseBitmapFull, 0), 630 + scaleX = 1f, 631 + scaleY = 1f, 632 + ) 633 + } 634 + 635 + val scale = downscaleMaxSidePx.toFloat() / srcMax.toFloat() 636 + val dstW = max(1, (srcW * scale).toInt()) 637 + val dstH = max(1, (srcH * scale).toInt()) 638 + 639 + val downscaled = PoseInputBitmapPool.obtain(dstW, dstH) 640 + resizeInto(poseBitmapFull, downscaled) 641 + 642 + // If we scaled down, x/y returned by MLKit are in downscaled pixel coords. 643 + val scaleBackX = srcW.toFloat() / dstW.toFloat() 644 + val scaleBackY = srcH.toFloat() / dstH.toFloat() 645 + 646 + return MlKitPoseInput( 647 + image = InputImage.fromBitmap(downscaled, 0), 648 + scaleX = scaleBackX, 649 + scaleY = scaleBackY, 650 + ) 651 + } 652 + 653 + private fun PoseLandmark?.toSkeletonCoordsScaled(scaleX: Float, scaleY: Float): Skeleton.SkeletonCoordinate? { 654 + val pos = this?.position ?: return null 655 + return Skeleton.SkeletonCoordinate( 656 + x = pos.x * scaleX, 657 + y = pos.y * scaleY, 658 + ) 659 + } 660 + 661 + private fun skeletonFromPoseScaled( 662 + pose: Pose?, 663 + timestamp: Long, 664 + width: Int, 665 + height: Int, 666 + scaleX: Float, 667 + scaleY: Float, 668 + ): Skeleton { 669 + return Skeleton( 670 + timestamp = timestamp, 671 + leftShoulder = pose?.getPoseLandmark(PoseLandmark.LEFT_SHOULDER)?.toSkeletonCoordsScaled(scaleX, scaleY), 672 + rightShoulder = pose?.getPoseLandmark(PoseLandmark.RIGHT_SHOULDER)?.toSkeletonCoordsScaled(scaleX, scaleY), 673 + leftElbow = pose?.getPoseLandmark(PoseLandmark.LEFT_ELBOW)?.toSkeletonCoordsScaled(scaleX, scaleY), 674 + rightElbow = pose?.getPoseLandmark(PoseLandmark.RIGHT_ELBOW)?.toSkeletonCoordsScaled(scaleX, scaleY), 675 + leftWrist = pose?.getPoseLandmark(PoseLandmark.LEFT_WRIST)?.toSkeletonCoordsScaled(scaleX, scaleY), 676 + rightWrist = pose?.getPoseLandmark(PoseLandmark.RIGHT_WRIST)?.toSkeletonCoordsScaled(scaleX, scaleY), 677 + leftHip = pose?.getPoseLandmark(PoseLandmark.LEFT_HIP)?.toSkeletonCoordsScaled(scaleX, scaleY), 678 + rightHip = pose?.getPoseLandmark(PoseLandmark.RIGHT_HIP)?.toSkeletonCoordsScaled(scaleX, scaleY), 679 + leftKnee = pose?.getPoseLandmark(PoseLandmark.LEFT_KNEE)?.toSkeletonCoordsScaled(scaleX, scaleY), 680 + rightKnee = pose?.getPoseLandmark(PoseLandmark.RIGHT_KNEE)?.toSkeletonCoordsScaled(scaleX, scaleY), 681 + leftAnkle = pose?.getPoseLandmark(PoseLandmark.LEFT_ANKLE)?.toSkeletonCoordsScaled(scaleX, scaleY), 682 + rightAnkle = pose?.getPoseLandmark(PoseLandmark.RIGHT_ANKLE)?.toSkeletonCoordsScaled(scaleX, scaleY), 683 + width = width.toFloat(), 684 + height = height.toFloat(), 685 + ) 686 + }

+44 -8

sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt

··· 29 29 import androidx.compose.material3.IconButton 30 30 import androidx.compose.material3.Switch 31 31 import androidx.compose.material3.HorizontalDivider 32 + import androidx.compose.material3.RadioButton 32 33 import androidx.compose.runtime.Composable 33 34 import androidx.compose.runtime.DisposableEffect 34 35 import androidx.compose.runtime.LaunchedEffect ··· 47 48 import androidx.compose.ui.graphics.drawscope.Stroke 48 49 import androidx.compose.ui.layout.ContentScale 49 50 import androidx.compose.ui.text.font.FontWeight 51 + import androidx.compose.ui.text.style.TextAlign 50 52 import androidx.compose.ui.unit.dp 51 53 import androidx.compose.ui.unit.sp 52 54 import chaintech.videoplayer.host.MediaPlayerHost ··· 319 321 } 320 322 } 321 323 322 - private enum class ZoomChoice(val label: String) { 323 - ZOOM_1X("1.0x"), 324 - ZOOM_0_5X("0.5x"), 325 - } 326 - 327 324 @OptIn(ExperimentalTime::class) 328 325 @Composable 329 326 fun CameraSample() { ··· 341 338 var ultrawide by remember { mutableStateOf(false) } 342 339 var previewFillMode by remember { mutableStateOf(PreviewFillMode.FIT) } 343 340 var menuExpanded by remember { mutableStateOf(false) } 341 + var detectMode by remember { mutableStateOf(DetectMode.BOTH) } 344 342 345 343 val controller = remember { CameraViewControllerImpl() } 346 344 PermissionProvider().apply { ··· 383 381 CameraView( 384 382 skeletonRepository = skeletonRepository, 385 383 customObjectRepository = customObjectRespository, 386 - detectMode = DetectMode.BOTH, 384 + detectMode = detectMode, 387 385 drawSkeleton = true, 388 386 drawObjects = { obj -> 389 387 obj.flatMap { ··· 431 429 // Zoom toggle 432 430 DropdownMenuItem( 433 431 text = { 434 - Row { 432 + Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 435 433 Text(text = if(ultrawide) "0.5x zoom" else "1.0x zoom") 436 434 Spacer(Modifier.width(12.dp)) 437 435 Switch(checked = ultrawide, onCheckedChange = null) ··· 443 441 // Preview fill/crop toggle 444 442 DropdownMenuItem( 445 443 text = { 446 - Row { 444 + Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 447 445 Text(text = if(previewFillMode==PreviewFillMode.FIT)"Fill Preview" else "Fit Preview") 448 446 Spacer(Modifier.width(12.dp)) 449 447 Switch(checked = previewFillMode==PreviewFillMode.FIT, onCheckedChange = null) ··· 453 451 previewFillMode = if(previewFillMode==PreviewFillMode.FIT) PreviewFillMode.FILL else PreviewFillMode.FIT 454 452 } 455 453 ) 454 + 455 + HorizontalDivider() 456 + 457 + DropdownMenuItem(text = { 458 + Column { 459 + Text("Detection Mode",modifier = Modifier.fillMaxWidth(), textAlign = TextAlign.Center) 460 + Row(horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 461 + Text(text = "Pose") 462 + Spacer(Modifier.width(12.dp)) 463 + RadioButton( 464 + selected = detectMode == DetectMode.POSE, 465 + onClick = { 466 + detectMode = DetectMode.POSE 467 + }) 468 + } 469 + Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 470 + Text(text = "Objects") 471 + Spacer(Modifier.width(12.dp)) 472 + RadioButton(selected = detectMode == DetectMode.OBJECT, onClick = { 473 + detectMode = DetectMode.OBJECT 474 + }) 475 + } 476 + Row (horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 477 + Text(text = "Both") 478 + Spacer(Modifier.width(12.dp)) 479 + RadioButton(selected = detectMode == DetectMode.BOTH, onClick = { 480 + detectMode = DetectMode.BOTH 481 + }) 482 + } 483 + Row(horizontalArrangement = Arrangement.SpaceBetween, verticalAlignment = Alignment.CenterVertically, modifier = Modifier.fillMaxWidth()) { 484 + Text(text = "None") 485 + Spacer(Modifier.width(12.dp)) 486 + RadioButton(selected = detectMode == DetectMode.NONE, onClick = { 487 + detectMode = DetectMode.NONE 488 + }) 489 + } 490 + } 491 + }, onClick = {}) 456 492 457 493 HorizontalDivider() 458 494

Configure Feed

Configure Feed