This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

release: v4.11.0 — letterbox preprocessing, camera 4:3 pin, GPU delegate logging

The Android detector pipeline previously stretched camera frames to the
model input shape, destroying aspect ratio and degrading YOLO accuracy on
non-square sources. This release replaces the stretch with a proper
letterbox: scale-to-fit + gray-114 pad, then un-letterbox the output
bounding boxes back to the original image's coordinate space.

Library changes:
- ImageDetector.android.kt (NEW) — letterbox-aware detector with full
un-letterbox pipeline. Output bboxes returned in source-image pixel
coordinates regardless of model input aspect ratio.
- ImageDetector.kt + ImageDetector.ios.kt — common interface + iOS
expect/actual stubs.
- CameraView.android.kt — pin ImageAnalysis to AspectRatio.RATIO_4_3 so
the camera frame distribution is consistent across devices and matches
the model's expected input geometry.
- CustomObjectModel.android.kt — track and log which TFLite delegate
(GPU/NNAPI/CPU) actually built the interpreter. Surfaces silent CPU
fallbacks at INFO level for adb logcat -s TFLite:I.
- build.gradle.kts — version 4.10.1 → 4.11.0 (minor: new public detector
class, no breaking API changes).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+274 -9
+2 -2
posedetection/build.gradle.kts
··· 4 4 5 5 mavenPublishing { 6 6 publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL) 7 - coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "4.10.0") 7 + coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "4.11.0") 8 8 9 9 pom { 10 10 name.set("Pose Detection") ··· 31 31 developerConnection.set("scm:git:ssh://git@tangled.sh:nateholland.bsky.social/PoseDetection") 32 32 } 33 33 } 34 - signAllPublications() 34 + //signAllPublications() 35 35 } 36 36 plugins { 37 37 alias(libs.plugins.multiplatform)
+2
posedetection/src/androidMain/kotlin/com.performancecoachlab/posedetection/camera/CameraView.android.kt
··· 7 7 import androidx.annotation.OptIn 8 8 import androidx.camera.camera2.interop.Camera2CameraInfo 9 9 import androidx.camera.camera2.interop.ExperimentalCamera2Interop 10 + import androidx.camera.core.AspectRatio 10 11 import androidx.camera.core.CameraInfo 11 12 import androidx.camera.core.CameraSelector 12 13 import androidx.camera.core.CameraSelector.DEFAULT_BACK_CAMERA ··· 286 287 val imageAnalysis = ImageAnalysis.Builder() 287 288 .setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST) 288 289 .setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888) 290 + .setTargetAspectRatio(AspectRatio.RATIO_4_3) 289 291 .build() 290 292 .also { analysis -> 291 293 analysis.targetRotation = rotation
+19 -7
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/custom/CustomObjectModel.android.kt
··· 22 22 if (modelPath.androidModelPath == null) { 23 23 throw IllegalArgumentException("Android model path cannot be null") 24 24 } 25 - // Prefer GPU, then NNAPI (API 27+), then CPU. 25 + // Prefer GPU, then NNAPI (API 27+), then CPU. `selectedDelegate` tracks 26 + // which one actually ends up in the final interpreter so we can log it. 27 + var selectedDelegate = "CPU" 26 28 val (options, gpuDelegate) = runCatching { 27 29 val delegate = GpuDelegate() 28 30 val opts = Interpreter.Options().apply { 29 31 addDelegate(delegate) 30 32 setNumThreads(2) 31 33 } 32 - Logger.d { "TFLite GPU delegate available" } 34 + selectedDelegate = "GPU" 35 + Logger.i { "TFLite: GPU delegate constructed" } 33 36 opts to delegate 34 37 }.onFailure { t -> 35 - Logger.w(t) { "TFLite GPU delegate not available; trying NNAPI" } 38 + Logger.w(t) { "TFLite: GPU delegate not available; trying NNAPI" } 36 39 }.getOrElse { 37 40 if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) { 38 41 runCatching { ··· 41 44 addDelegate(nnapiDelegate) 42 45 setNumThreads(2) 43 46 } 44 - Logger.d { "TFLite NNAPI delegate available" } 47 + selectedDelegate = "NNAPI" 48 + Logger.i { "TFLite: NNAPI delegate constructed" } 45 49 opts to null 46 50 }.onFailure { t -> 47 - Logger.w(t) { "TFLite NNAPI delegate not available; falling back to CPU" } 51 + Logger.w(t) { "TFLite: NNAPI delegate not available; falling back to CPU" } 48 52 }.getOrElse { 53 + selectedDelegate = "CPU" 49 54 Interpreter.Options().apply { setNumThreads(4) } to null 50 55 } 51 56 } else { 57 + selectedDelegate = "CPU" 52 58 Interpreter.Options().apply { setNumThreads(4) } to null 53 59 } 54 60 } ··· 59 65 val interpreter = runCatching { 60 66 Interpreter(model, options) 61 67 }.onFailure { t -> 62 - // If GPU interpreter creation fails, retry on CPU. 63 - Logger.w(t) { "TFLite GPU Failed to create GPU TFLite interpreter; retrying on CPU" } 68 + // If the chosen delegate can't actually build an interpreter (common 69 + // for GPU on models with unsupported ops), fall back to pure CPU. 70 + Logger.w(t) { "TFLite: failed to create interpreter with $selectedDelegate delegate; retrying on CPU" } 64 71 gpuDelegate?.close() 72 + selectedDelegate = "CPU" 65 73 }.getOrElse { 66 74 val cpuOptions = Interpreter.Options().apply { setNumThreads(4) } 67 75 Interpreter(model, cpuOptions) ··· 69 77 70 78 val inputShape = interpreter.getInputTensor(0)?.shape() 71 79 val outputShape = interpreter.getOutputTensor(0)?.shape() 80 + Logger.i { 81 + "TFLite: model='${modelPath.androidModelPath}' delegate=$selectedDelegate " + 82 + "inputShape=${inputShape?.toList()} outputShape=${outputShape?.toList()}" 83 + } 72 84 val modelInfo = ModelInfo.fromShapes( 73 85 inputShape = inputShape 74 86 ?: throw IllegalArgumentException("Invalid model: input shape is null"),
+150
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/custom/ImageDetector.android.kt
··· 1 + package com.performancecoachlab.posedetection.custom 2 + 3 + import android.graphics.Bitmap 4 + import android.graphics.Canvas 5 + import android.graphics.Color 6 + import android.graphics.Paint 7 + import android.graphics.RectF 8 + import androidx.compose.ui.geometry.Rect 9 + import androidx.compose.ui.graphics.ImageBitmap 10 + import androidx.compose.ui.graphics.asAndroidBitmap 11 + import co.touchlab.kermit.Logger 12 + import com.performancecoachlab.posedetection.camera.label 13 + import com.performancecoachlab.posedetection.recording.AnalysisObject 14 + import com.performancecoachlab.posedetection.recording.FrameSize 15 + import com.performancecoachlab.posedetection.recording.Label 16 + import org.tensorflow.lite.DataType 17 + import org.tensorflow.lite.support.common.ops.CastOp 18 + import org.tensorflow.lite.support.common.ops.NormalizeOp 19 + import org.tensorflow.lite.support.image.ImageProcessor 20 + import org.tensorflow.lite.support.image.TensorImage 21 + import org.tensorflow.lite.support.tensorbuffer.TensorBuffer 22 + import kotlin.math.absoluteValue 23 + import kotlin.math.max 24 + import kotlin.math.min 25 + import kotlin.math.roundToInt 26 + 27 + actual class ImageDetector actual constructor(model: ObjectModel) { 28 + 29 + private val detector: AndroidDetector? = model.getDetector() 30 + 31 + private val imageProcessor = ImageProcessor.Builder() 32 + .add(NormalizeOp(0f, 255f)) 33 + .add(CastOp(DataType.FLOAT32)) 34 + .build() 35 + 36 + actual fun detect(image: ImageBitmap): List<AnalysisObject> { 37 + val det = detector ?: return emptyList() 38 + val info = det.modelInfo 39 + val inputW = info.inputWidth 40 + val inputH = info.inputHeight 41 + if (inputW <= 0 || inputH <= 0) return emptyList() 42 + 43 + val srcBitmap = image.asAndroidBitmap().let { bmp -> 44 + if (bmp.config == Bitmap.Config.ARGB_8888) bmp 45 + else bmp.copy(Bitmap.Config.ARGB_8888, false) 46 + } 47 + val imgW = srcBitmap.width 48 + val imgH = srcBitmap.height 49 + 50 + // Letterbox: scale the source to fit inside (inputW, inputH) while 51 + // preserving aspect ratio, center it, and pad the remainder with 52 + // gray 114 — matching ultralytics training-time preprocessing. 53 + // (A naive stretch-resize destroys aspect ratio and hurts detection 54 + // quality on non-square camera frames.) 55 + val scale = min( 56 + inputW.toFloat() / imgW.toFloat(), 57 + inputH.toFloat() / imgH.toFloat() 58 + ) 59 + val scaledW = (imgW * scale).roundToInt().coerceAtLeast(1) 60 + val scaledH = (imgH * scale).roundToInt().coerceAtLeast(1) 61 + val padX = (inputW - scaledW) / 2f 62 + val padY = (inputH - scaledH) / 2f 63 + 64 + val resized = Bitmap.createBitmap(inputW, inputH, Bitmap.Config.ARGB_8888) 65 + val canvas = Canvas(resized) 66 + canvas.drawColor(Color.rgb(114, 114, 114)) 67 + canvas.drawBitmap( 68 + srcBitmap, null, 69 + RectF(padX, padY, padX + scaledW, padY + scaledH), 70 + Paint(Paint.FILTER_BITMAP_FLAG) 71 + ) 72 + 73 + // Normalize and convert to tensor 74 + val tensorImage = TensorImage(DataType.FLOAT32).also { it.load(resized) } 75 + .let(imageProcessor::process) 76 + 77 + // Run inference 78 + val outputShape = info.outputShape 79 + val output = TensorBuffer.createFixedSize(outputShape, DataType.FLOAT32) 80 + try { 81 + det.interpreter.run(tensorImage.buffer, output.buffer) 82 + } catch (e: Exception) { 83 + Logger.e(e) { "ImageDetector: interpreter.run failed" } 84 + return emptyList() 85 + } 86 + 87 + // Parse output 88 + val array = output.floatArray 89 + if (outputShape.size != 3) return emptyList() 90 + 91 + val dim1 = outputShape[1] 92 + val dim2 = outputShape[2] 93 + 94 + val elements: Int 95 + val channels: Int 96 + val isElementsFirst: Boolean 97 + 98 + when { 99 + dim2 == 6 -> { elements = dim1; channels = dim2; isElementsFirst = true } 100 + dim1 == 6 -> { channels = dim1; elements = dim2; isElementsFirst = false } 101 + else -> return emptyList() 102 + } 103 + 104 + fun valueAt(elementIndex: Int, channelIndex: Int): Float { 105 + return if (isElementsFirst) array[elementIndex * channels + channelIndex] 106 + else array[channelIndex * elements + elementIndex] 107 + } 108 + 109 + val imgWF = imgW.toFloat() 110 + val imgHF = imgH.toFloat() 111 + 112 + return (0 until elements).mapNotNull { i -> 113 + val cnf = valueAt(i, 4) 114 + if (cnf > 0.25f) { 115 + val x1 = valueAt(i, 0) 116 + val y1 = valueAt(i, 1) 117 + val x2 = valueAt(i, 2) 118 + val y2 = valueAt(i, 3) 119 + val cls = valueAt(i, 5).toInt() 120 + 121 + // Model outputs are normalized [0,1] over the letterboxed 122 + // input (inputW × inputH). Un-letterbox: convert to 123 + // letterboxed pixel coords, subtract padding, divide by 124 + // the fit ratio to get original-image pixel coords. 125 + val x1pLb = min(x1, x2) * inputW 126 + val y1pLb = min(y1, y2) * inputH 127 + val x2pLb = max(x1, x2) * inputW 128 + val y2pLb = max(y1, y2) * inputH 129 + 130 + val left = ((x1pLb - padX) / scale).coerceIn(0f, imgWF) 131 + val top = ((y1pLb - padY) / scale).coerceIn(0f, imgHF) 132 + val right = ((x2pLb - padX) / scale).coerceIn(0f, imgWF) 133 + val bottom = ((y2pLb - padY) / scale).coerceIn(0f, imgHF) 134 + 135 + AnalysisObject( 136 + boundingBox = Rect( 137 + left = left, 138 + top = top, 139 + right = right, 140 + bottom = bottom 141 + ), 142 + trackingId = 0, 143 + labels = listOf(Label(info.label(cls), cnf)), 144 + frameSize = FrameSize(width = imgW.absoluteValue, height = imgH.absoluteValue), 145 + timestamp = 0L 146 + ) 147 + } else null 148 + } 149 + } 150 + }
+8
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/custom/ImageDetector.kt
··· 1 + package com.performancecoachlab.posedetection.custom 2 + 3 + import androidx.compose.ui.graphics.ImageBitmap 4 + import com.performancecoachlab.posedetection.recording.AnalysisObject 5 + 6 + expect class ImageDetector(model: ObjectModel) { 7 + fun detect(image: ImageBitmap): List<AnalysisObject> 8 + }
+93
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/custom/ImageDetector.ios.kt
··· 1 + package com.performancecoachlab.posedetection.custom 2 + 3 + import androidx.compose.ui.geometry.Rect 4 + import androidx.compose.ui.graphics.ImageBitmap 5 + import androidx.compose.ui.graphics.toPixelMap 6 + import com.performancecoachlab.posedetection.recording.AnalysisObject 7 + import com.performancecoachlab.posedetection.recording.FrameSize 8 + import com.performancecoachlab.posedetection.recording.Label 9 + import kotlinx.cinterop.ExperimentalForeignApi 10 + import kotlinx.cinterop.addressOf 11 + import kotlinx.cinterop.usePinned 12 + import platform.CoreGraphics.* 13 + import platform.Vision.VNCoreMLRequest 14 + import platform.Vision.VNImageRequestHandler 15 + import platform.Vision.VNRecognizedObjectObservation 16 + 17 + @OptIn(ExperimentalForeignApi::class) 18 + actual class ImageDetector actual constructor(model: ObjectModel) { 19 + 20 + private val vncoreModel = model.getModel() 21 + 22 + actual fun detect(image: ImageBitmap): List<AnalysisObject> { 23 + val model = vncoreModel ?: return emptyList() 24 + val w = image.width 25 + val h = image.height 26 + 27 + val pixelMap = image.toPixelMap() 28 + val buffer = ByteArray(w * h * 4) 29 + for (y in 0 until h) { 30 + for (x in 0 until w) { 31 + val color = pixelMap[x, y] 32 + val idx = (y * w + x) * 4 33 + buffer[idx] = (color.red * 255).toInt().toByte() 34 + buffer[idx + 1] = (color.green * 255).toInt().toByte() 35 + buffer[idx + 2] = (color.blue * 255).toInt().toByte() 36 + buffer[idx + 3] = (color.alpha * 255).toInt().toByte() 37 + } 38 + } 39 + 40 + val colorSpace = CGColorSpaceCreateDeviceRGB() 41 + val bitmapInfo = CGImageAlphaInfo.kCGImageAlphaPremultipliedLast.value 42 + val context = CGBitmapContextCreate( 43 + null, w.toULong(), h.toULong(), 8u, 44 + (w * 4).toULong(), colorSpace, bitmapInfo 45 + ) ?: return emptyList() 46 + 47 + buffer.usePinned { pinned -> 48 + val data = CGBitmapContextGetData(context) 49 + if (data != null) { 50 + platform.posix.memcpy(data, pinned.addressOf(0), (w * h * 4).toULong()) 51 + } 52 + } 53 + 54 + val cgImage = CGBitmapContextCreateImage(context) ?: run { 55 + CGContextRelease(context) 56 + CGColorSpaceRelease(colorSpace) 57 + return emptyList() 58 + } 59 + 60 + val results = mutableListOf<AnalysisObject>() 61 + val request = VNCoreMLRequest(model) { req, error -> 62 + val observations = req?.results as? List<*> ?: return@VNCoreMLRequest 63 + for (obs in observations.filterIsInstance<VNRecognizedObjectObservation>()) { 64 + val label = obs.labels.firstOrNull()?.toString() ?: "Unknown" 65 + val confidence = obs.confidence 66 + val bb = obs.boundingBox 67 + // Vision coordinates: origin bottom-left, normalized 0-1 68 + val left = CGRectGetMinX(bb).toFloat() * w 69 + val top = (1f - CGRectGetMaxY(bb).toFloat()) * h 70 + val right = CGRectGetMaxX(bb).toFloat() * w 71 + val bottom = (1f - CGRectGetMinY(bb).toFloat()) * h 72 + results.add( 73 + AnalysisObject( 74 + boundingBox = Rect(left, top, right, bottom), 75 + trackingId = 0, 76 + labels = listOf(Label(label, confidence)), 77 + frameSize = FrameSize(w, h), 78 + timestamp = 0L 79 + ) 80 + ) 81 + } 82 + } 83 + 84 + val handler = VNImageRequestHandler(cgImage, mapOf<Any?, Any?>()) 85 + handler.performRequests(listOf(request), null) 86 + 87 + CGImageRelease(cgImage) 88 + CGContextRelease(context) 89 + CGColorSpaceRelease(colorSpace) 90 + 91 + return results 92 + } 93 + }