perf: fast batch frame extraction with extractFrames Flow API · nateholland.bsky.social/PoseDetection@31ee873

nateholland.bsky.social / PoseDetection

Fork 0

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

This repository has no description

Fork 0

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

perf: fast batch frame extraction with extractFrames Flow API

- Replace per-pixel floating-point YUV conversion with bulk buffer copy +
fixed-point integer math (~5x faster decode)
- Feed multiple input buffers per decode loop iteration to keep the
hardware decoder pipeline full
- Add extractFrames(path, timestamps): Flow<InputFrame> for efficient
sequential multi-frame extraction without per-call mutex overhead
- Add Skeleton.lerp() for smooth interpolation between analysis keyframes
- Use imageProxy.imageInfo.timestamp for frame-accurate overlay timing
- Add coordinate scaling in drawAnalysisResults for cross-resolution mapping
- Fix TFLite interpreter crash with try-catch in live object detection
- Fix iOS VideoBuilder: monotonic timestamps, pool-null resilience
- Add debug test harness in sample app (auto-record, extract, annotate,
encode, display with timing)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

author

virtualintern co-author

Claude Opus 4.6 (1M context) date 2 months ago (Apr 3, 2026, 12:54 AM +0300) commit 31ee8739 31ee8739c3c466db2b92d99af17ea271e3b45f23 parent 6d06b5ba 6d06b5ba04d96db6eea2969730aa0c90c50fc161

+511 -60

13 changed files

Expand all Collapse all

posedetection

src

androidMain

kotlin

com

performancecoachlab

posedetection

camera

Utils.android.kt

recording

VideoUtils.android.kt

commonMain

kotlin

com

performancecoachlab

posedetection

camera

Utils.kt

recording

VideoUtils.kt

skeleton

Skeleton.kt

iosMain

kotlin

com

performancecoachlab

posedetection

encoding

VideoBuilder.ios.kt

recording

VideoUtils.ios.kt

sample

composeApp

src

commonMain

kotlin

com

nate

posedetection

App.kt

iosApp

FastViTT8F16.mlpackage

Data

com.apple.CoreML

model.mlmodel

weights

weight.bin

Manifest.json

iosApp

models

YOLOv3FP16.mlmodel

iosApp.xcodeproj

project.pbxproj

+8 -1

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt

Reviewed

··· 326 326 val outputShape = objectDetector.modelInfo.outputShape 327 327 val output = TensorBuffer.createFixedSize(outputShape, DataType.FLOAT32) 328 328 329 329 - objectDetector.interpreter.run(tensorImage.buffer, output.buffer) 329 329 + try { 330 330 + objectDetector.interpreter.run(tensorImage.buffer, output.buffer) 331 331 + } catch (e: Exception) { 332 332 + Logger.e(e) { "TFLite interpreter.run failed" } 333 333 + val skeleton = poseFuture?.get() 334 334 + onComplete(AnalysisResult(skeleton, emptyList()), bitmap) 335 335 + return 336 336 + } 330 337 331 338 val array = output.floatArray 332 339 if (outputShape.size != 3) emptyList() else {

+60 -15

posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.android.kt

Reviewed

··· 11 11 import androidx.core.net.toUri 12 12 import co.touchlab.kermit.Logger 13 13 import kotlinx.coroutines.Dispatchers 14 14 + import kotlinx.coroutines.flow.Flow 15 15 + import kotlinx.coroutines.flow.flow 16 16 + import kotlinx.coroutines.flow.flowOn 14 17 import kotlinx.coroutines.sync.Mutex 15 18 import kotlinx.coroutines.sync.withLock 16 19 import kotlinx.coroutines.withContext ··· 81 84 return lastBitmap 82 85 } 83 86 84 84 - val timeoutUs = 10_000L 85 87 val info = MediaCodec.BufferInfo() 86 88 87 89 try { 88 90 while (!outputEos) { 89 89 - // Feed input packets. 91 91 + // Feed as many input packets as possible to keep the decoder pipeline full. 90 92 if (!inputEos) { 91 91 - val inIdx = decoder.dequeueInputBuffer(0) 92 92 - if (inIdx >= 0) { 93 93 + while (true) { 94 94 + val inIdx = decoder.dequeueInputBuffer(10_000L) 95 95 + if (inIdx < 0) break 93 96 val buf = decoder.getInputBuffer(inIdx)!! 94 97 val size = ext.readSampleData(buf, 0) 95 98 if (size < 0) { ··· 97 100 inIdx, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM 98 101 ) 99 102 inputEos = true 103 103 + break 100 104 } else { 101 105 decoder.queueInputBuffer(inIdx, 0, size, ext.sampleTime, 0) 102 106 ext.advance() ··· 105 109 } 106 110 107 111 // Drain output. 108 108 - val outIdx = decoder.dequeueOutputBuffer(info, timeoutUs) 112 112 + val outIdx = decoder.dequeueOutputBuffer(info, 10_000L) 109 113 if (outIdx >= 0) { 110 114 val ptsMs = info.presentationTimeUs / 1000L 111 115 ··· 162 166 return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true) 163 167 } 164 168 169 169 + /** 170 170 + * Fast YUV Image to ARGB Bitmap conversion using bulk buffer copies 171 171 + * and fixed-point integer arithmetic (no floating-point per pixel). 172 172 + */ 165 173 private fun yuvImageToBitmap(image: Image): Bitmap { 166 174 val w = image.width 167 175 val h = image.height ··· 169 177 val uPlane = image.planes[1] 170 178 val vPlane = image.planes[2] 171 179 180 180 + val yRowStride = yPlane.rowStride 181 181 + val uvRowStride = uPlane.rowStride 182 182 + val uvPixelStride = uPlane.pixelStride 183 183 + 184 184 + // Bulk-copy ByteBuffers to ByteArrays to avoid per-pixel ByteBuffer.get() overhead. 172 185 val yBuf = yPlane.buffer 173 186 val uBuf = uPlane.buffer 174 187 val vBuf = vPlane.buffer 175 188 176 176 - val yRowStride = yPlane.rowStride 177 177 - val uvRowStride = uPlane.rowStride 178 178 - val uvPixelStride = uPlane.pixelStride 189 189 + val yBytes = ByteArray(yBuf.remaining()) 190 190 + yBuf.get(yBytes) 191 191 + val uBytes = ByteArray(uBuf.remaining()) 192 192 + uBuf.get(uBytes) 193 193 + val vBytes = ByteArray(vBuf.remaining()) 194 194 + vBuf.get(vBytes) 179 195 180 196 val argb = IntArray(w * h) 181 197 182 198 for (j in 0 until h) { 199 199 + val yRowOffset = j * yRowStride 200 200 + val uvRowOffset = (j shr 1) * uvRowStride 183 201 for (i in 0 until w) { 184 184 - val y = (yBuf.get(j * yRowStride + i).toInt() and 0xFF) 185 185 - val uvIdx = (j / 2) * uvRowStride + (i / 2) * uvPixelStride 186 186 - val u = (uBuf.get(uvIdx).toInt() and 0xFF) - 128 187 187 - val v = (vBuf.get(uvIdx).toInt() and 0xFF) - 128 202 202 + val y = yBytes[yRowOffset + i].toInt() and 0xFF 203 203 + val uvIdx = uvRowOffset + (i shr 1) * uvPixelStride 204 204 + val u = (uBytes[uvIdx].toInt() and 0xFF) - 128 205 205 + val v = (vBytes[uvIdx].toInt() and 0xFF) - 128 188 206 189 189 - val r = (y + 1.370705 * v).toInt().coerceIn(0, 255) 190 190 - val g = (y - 0.337633 * u - 0.698001 * v).toInt().coerceIn(0, 255) 191 191 - val b = (y + 1.732446 * u).toInt().coerceIn(0, 255) 207 207 + // Fixed-point: multiply by scaled constant, shift right by 10. 208 208 + // 1.402 * 1024 = 1436, 0.344 * 1024 = 352, 0.714 * 1024 = 731, 1.772 * 1024 = 1815 209 209 + var r = y + ((v * 1436) shr 10) 210 210 + var g = y - ((u * 352 + v * 731) shr 10) 211 211 + var b = y + ((u * 1815) shr 10) 212 212 + if (r < 0) r = 0 else if (r > 255) r = 255 213 213 + if (g < 0) g = 0 else if (g > 255) g = 255 214 214 + if (b < 0) b = 0 else if (b > 255) b = 255 192 215 193 216 argb[j * w + i] = (0xFF shl 24) or (r shl 16) or (g shl 8) or b 194 217 } ··· 229 252 } 230 253 } 231 254 } 255 255 + 256 256 + /** 257 257 + * Batch sequential frame extraction as a Flow. Opens a dedicated decoder, 258 258 + * decodes frames in order, and emits each as it's ready. More efficient than 259 259 + * repeated [extractFrame] calls because there's no per-call mutex/coroutine 260 260 + * overhead and the decoder runs continuously. 261 261 + */ 262 262 + actual fun extractFrames( 263 263 + videoPath: String, frameTimestamps: List<Long> 264 264 + ): Flow<InputFrame> = flow { 265 265 + val decoder = VideoFrameDecoder(videoPath) 266 266 + try { 267 267 + for (ts in frameTimestamps) { 268 268 + val bitmap = decoder.decodeUpTo(ts) 269 269 + if (bitmap != null) { 270 270 + emit(InputFrame(bitmap = bitmap, timestamp = ts)) 271 271 + } 272 272 + } 273 273 + } finally { 274 274 + decoder.release() 275 275 + } 276 276 + }.flowOn(Dispatchers.IO) 232 277 233 278 actual suspend fun listVideoFrameTimestamps( 234 279 videoPath: String,

+32 -13

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.kt

Reviewed

··· 234 234 val drawScope = CanvasDrawScope() 235 235 val size = Size(it.width.toFloat(), it.height.toFloat()) 236 236 237 237 - // Calculate scaling factor based on skeleton size 237 237 + // Compute scale factors to map detection coordinates to target bitmap. 238 238 + // Use independent X/Y scaling — handles the case where the skeleton's 239 239 + // coordinate space has a different aspect ratio than the target frame 240 240 + // (e.g., iOS portrait analysis vs landscape extracted video). 241 241 + val skelScaleX = if (skeleton != null && skeleton.width > 0f) 242 242 + it.width.toFloat() / skeleton.width else 1f 243 243 + val skelScaleY = if (skeleton != null && skeleton.height > 0f) 244 244 + it.height.toFloat() / skeleton.height else 1f 245 245 + val skelOffsetX = 0f 246 246 + val skelOffsetY = 0f 247 247 + 248 248 + // Calculate scaling factor based on skeleton size (after mapping to target) 238 249 val skeletonSize = skeleton?.joints()?.let { joints -> 239 239 - val minX = joints.minOfOrNull { joint -> joint.x } ?: 0f 240 240 - val maxX = joints.maxOfOrNull { joint -> joint.x } ?: 0f 241 241 - val minY = joints.minOfOrNull { joint -> joint.y } ?: 0f 242 242 - val maxY = joints.maxOfOrNull { joint -> joint.y } ?: 0f 250 250 + val minX = joints.minOfOrNull { joint -> joint.x * skelScaleX } ?: 0f 251 251 + val maxX = joints.maxOfOrNull { joint -> joint.x * skelScaleX } ?: 0f 252 252 + val minY = joints.minOfOrNull { joint -> joint.y * skelScaleY } ?: 0f 253 253 + val maxY = joints.maxOfOrNull { joint -> joint.y * skelScaleY } ?: 0f 243 254 kotlin.math.max(maxX - minX, maxY - minY) 244 255 } ?: 1f 245 256 val minDime = kotlin.math.min(it.width, it.height).toFloat() ··· 255 266 ) { 256 267 drawImage(it) 257 268 analysisResults.objects.forEach { analysisObject -> 269 269 + val oScaleX = if (analysisObject.frameSize.width > 0) 270 270 + it.width.toFloat() / analysisObject.frameSize.width else 1f 271 271 + val oScaleY = if (analysisObject.frameSize.height > 0) 272 272 + it.height.toFloat() / analysisObject.frameSize.height else 1f 258 273 drawRect( 259 274 color = Color.Red, topLeft = androidx.compose.ui.geometry.Offset( 260 260 - analysisObject.boundingBox.left, analysisObject.boundingBox.top 275 275 + analysisObject.boundingBox.left * oScaleX, 276 276 + analysisObject.boundingBox.top * oScaleY 261 277 ), size = Size( 262 262 - analysisObject.boundingBox.width, analysisObject.boundingBox.height 278 278 + analysisObject.boundingBox.width * oScaleX, 279 279 + analysisObject.boundingBox.height * oScaleY 263 280 ), style = Stroke(scaledStrokeWidth) 264 281 ) 265 282 } ··· 278 295 bones().forEach { line -> 279 296 drawLine( 280 297 color = paintWhite.color, start = androidx.compose.ui.geometry.Offset( 281 281 - line.first.x, line.first.y 298 298 + line.first.x * skelScaleX + skelOffsetX, line.first.y * skelScaleY + skelOffsetY 282 299 ), end = androidx.compose.ui.geometry.Offset( 283 283 - line.second.x, line.second.y 300 300 + line.second.x * skelScaleX + skelOffsetX, line.second.y * skelScaleY + skelOffsetY 284 301 ), strokeWidth = paintWhite.strokeWidth, blendMode = BlendMode.Softlight 285 302 ) 286 303 drawLine( 287 304 color = paintBlue.color, start = androidx.compose.ui.geometry.Offset( 288 288 - line.first.x, line.first.y 305 305 + line.first.x * skelScaleX + skelOffsetX, line.first.y * skelScaleY + skelOffsetY 289 306 ), end = androidx.compose.ui.geometry.Offset( 290 290 - line.second.x, line.second.y 307 307 + line.second.x * skelScaleX + skelOffsetX, line.second.y * skelScaleY + skelOffsetY 291 308 ), strokeWidth = paintBlue.strokeWidth, blendMode = BlendMode.Color 292 309 ) 293 310 } 294 311 295 312 joints().forEach { joint -> 313 313 + val jx = joint.x * skelScaleX + skelOffsetX 314 314 + val jy = joint.y * skelScaleY + skelOffsetY 296 315 drawCircle( 297 316 brush = Brush.radialGradient( 298 317 colors = listOf(Color.Blue, Color.Transparent), 299 299 - center = androidx.compose.ui.geometry.Offset(joint.x, joint.y), 318 318 + center = androidx.compose.ui.geometry.Offset(jx, jy), 300 319 radius = 1.2f * scaledStrokeWidth 301 320 ), 302 321 radius = 1.2f * scaledStrokeWidth, 303 303 - center = androidx.compose.ui.geometry.Offset(joint.x, joint.y) 322 322 + center = androidx.compose.ui.geometry.Offset(jx, jy) 304 323 ) 305 324 } 306 325 }

+12 -1

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.kt

Reviewed

··· 1 1 package com.performancecoachlab.posedetection.recording 2 2 3 3 + import kotlinx.coroutines.flow.Flow 4 4 + 3 5 expect suspend fun extractFrame( 4 6 videoPath: String, frameTimestamp: Long 5 7 ): InputFrame? 6 8 9 9 + /** 10 10 + * Batch sequential frame extraction as a Flow. More efficient than repeated 11 11 + * [extractFrame] calls for processing many frames from the same video. 12 12 + * Frames are emitted in the order of [frameTimestamps] (must be ascending). 13 13 + */ 14 14 + expect fun extractFrames( 15 15 + videoPath: String, frameTimestamps: List<Long> 16 16 + ): Flow<InputFrame> 17 17 + 7 18 expect suspend fun listVideoFrameTimestamps( 8 19 videoPath: String, 9 20 ): List<Long> 10 21 11 22 @Suppress("EXPECT_ACTUAL_CLASSIFIERS_ARE_IN_BETA_WARNING") 12 12 - expect object VideoExtractionContext 23 23 + expect object VideoExtractionContext

+34

posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/skeleton/Skeleton.kt

Reviewed

··· 37 37 val rightKnee: Double?, 38 38 ) 39 39 40 40 + companion object { 41 41 + /** Linearly interpolate between two skeletons. [alpha] in 0..1. */ 42 42 + fun lerp(a: Skeleton, b: Skeleton, alpha: Float): Skeleton { 43 43 + fun lerpCoord( 44 44 + c1: SkeletonCoordinate?, c2: SkeletonCoordinate?, t: Float 45 45 + ): SkeletonCoordinate? { 46 46 + if (c1 == null) return c2 47 47 + if (c2 == null) return c1 48 48 + return SkeletonCoordinate( 49 49 + x = c1.x + (c2.x - c1.x) * t, 50 50 + y = c1.y + (c2.y - c1.y) * t 51 51 + ) 52 52 + } 53 53 + val t = alpha.coerceIn(0f, 1f) 54 54 + return Skeleton( 55 55 + timestamp = (a.timestamp + ((b.timestamp - a.timestamp) * t).toLong()), 56 56 + leftShoulder = lerpCoord(a.leftShoulder, b.leftShoulder, t), 57 57 + rightShoulder = lerpCoord(a.rightShoulder, b.rightShoulder, t), 58 58 + leftElbow = lerpCoord(a.leftElbow, b.leftElbow, t), 59 59 + rightElbow = lerpCoord(a.rightElbow, b.rightElbow, t), 60 60 + leftWrist = lerpCoord(a.leftWrist, b.leftWrist, t), 61 61 + rightWrist = lerpCoord(a.rightWrist, b.rightWrist, t), 62 62 + leftHip = lerpCoord(a.leftHip, b.leftHip, t), 63 63 + rightHip = lerpCoord(a.rightHip, b.rightHip, t), 64 64 + leftKnee = lerpCoord(a.leftKnee, b.leftKnee, t), 65 65 + rightKnee = lerpCoord(a.rightKnee, b.rightKnee, t), 66 66 + leftAnkle = lerpCoord(a.leftAnkle, b.leftAnkle, t), 67 67 + rightAnkle = lerpCoord(a.rightAnkle, b.rightAnkle, t), 68 68 + width = a.width, 69 69 + height = a.height, 70 70 + ) 71 71 + } 72 72 + } 73 73 + 40 74 fun bones(): List<Pair<SkeletonCoordinate, SkeletonCoordinate>> { 41 75 val lines = emptyList<Pair<SkeletonCoordinate, SkeletonCoordinate>>().toMutableList() 42 76 if (leftShoulder != null && rightShoulder != null) lines += Pair(

+10 -4

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/encoding/VideoBuilder.ios.kt

Reviewed

··· 115 115 frameCount = 0L 116 116 } 117 117 118 118 + private var lastTimestampMs = -1L 119 119 + 118 120 @OptIn(ExperimentalForeignApi::class, NativeRuntimeApi::class) 119 121 override suspend fun addFrame(frame: ImageBitmap, timestampms: Long) { 120 122 if (!started) initWriter() 121 121 - // Check for empty or invalid frame (skip if width or height is 0) 122 123 if (frame.width == 0 || frame.height == 0) return 124 124 + // Ensure strictly monotonic timestamps. 125 125 + val ts = if (timestampms <= lastTimestampMs) lastTimestampMs + 1 else timestampms 126 126 + lastTimestampMs = ts 123 127 val videoWriterInput = input!! 124 128 val pixelBufferAdaptor = adaptor!! 125 129 val pool = pixelBufferAdaptor.pixelBufferPool 126 126 - ?: throw IllegalStateException("Pixel buffer pool is null") 127 127 - val presentationTime = CMTimeMake(timestampms, 1_000) 130 130 + if (pool == null) { 131 131 + println("DEBUG: pool null at frame $frameCount, writerStatus=${writer?.status}, error=${writer?.error}") 132 132 + return 133 133 + } 134 134 + val presentationTime = CMTimeMake(ts, 1_000) 128 135 memScoped { 129 136 val pixelBufferPtr = alloc<CVPixelBufferRefVar>() 130 137 val status = ··· 193 200 if (started) { 194 201 input?.markAsFinished() 195 202 writer?.finishWritingWithCompletionHandler { 196 196 - //println("[VideoBuilder] Finished writing video!") 197 203 } 198 204 // Wait for writing to finish 199 205 while (writer?.status == AVAssetWriterStatusWriting) {

+11

posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.ios.kt

Reviewed

··· 10 10 import kotlinx.cinterop.value 11 11 import kotlinx.coroutines.Dispatchers 12 12 import kotlinx.coroutines.IO 13 13 + import kotlinx.coroutines.flow.Flow 14 14 + import kotlinx.coroutines.flow.flow 15 15 + import kotlinx.coroutines.flow.flowOn 13 16 import kotlinx.coroutines.withContext 14 17 import platform.AVFoundation.AVAsset 15 18 import platform.AVFoundation.AVAssetImageGenerator ··· 55 58 return@withContext null 56 59 } 57 60 } 61 61 + 62 62 + actual fun extractFrames( 63 63 + videoPath: String, frameTimestamps: List<Long> 64 64 + ): Flow<InputFrame> = flow { 65 65 + for (ts in frameTimestamps) { 66 66 + extractFrame(videoPath, ts)?.let { emit(it) } 67 67 + } 68 68 + }.flowOn(Dispatchers.IO) 58 69 59 70 private fun NSURL.safeDescription(): String { 60 71 val scheme = this.scheme ?: "unknown"

+342 -2

sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt

Reviewed

··· 39 39 import androidx.compose.ui.Alignment 40 40 import androidx.compose.ui.Modifier 41 41 import androidx.compose.ui.geometry.Rect 42 42 + import androidx.compose.ui.geometry.Size 43 43 + import androidx.compose.ui.graphics.Canvas 42 44 import androidx.compose.ui.graphics.Color 43 45 import androidx.compose.ui.graphics.ImageBitmap 46 46 + import androidx.compose.ui.graphics.drawscope.CanvasDrawScope 44 47 import androidx.compose.ui.graphics.drawscope.Stroke 48 48 + import androidx.compose.ui.unit.Density 49 49 + import androidx.compose.ui.unit.LayoutDirection 45 50 import androidx.compose.ui.layout.ContentScale 46 51 import androidx.compose.ui.text.style.TextAlign 47 52 import androidx.compose.ui.unit.dp ··· 66 71 import com.performancecoachlab.posedetection.permissions.PermissionProvider 67 72 import com.performancecoachlab.posedetection.recording.FrameAnalyser 68 73 import com.performancecoachlab.posedetection.recording.InputFrame 74 74 + import com.performancecoachlab.posedetection.recording.AnalysisResult 75 75 + import com.performancecoachlab.posedetection.recording.FrameSize 69 76 import com.performancecoachlab.posedetection.recording.extractFrame 77 77 + import com.performancecoachlab.posedetection.recording.extractFrames 70 78 import com.performancecoachlab.posedetection.recording.listVideoFrameTimestamps 71 79 import com.performancecoachlab.posedetection.skeleton.Pose 80 80 + import com.performancecoachlab.posedetection.skeleton.Skeleton 72 81 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 73 82 import io.github.vinceglb.filekit.FileKit 74 83 import io.github.vinceglb.filekit.PlatformFile ··· 79 88 import io.github.vinceglb.filekit.extension 80 89 import io.github.vinceglb.filekit.filesDir 81 90 import io.github.vinceglb.filekit.path 91 91 + import kotlinx.coroutines.CompletableDeferred 82 92 import kotlinx.coroutines.Job 93 93 + import kotlinx.coroutines.delay 83 94 import kotlinx.coroutines.launch 84 95 import kotlin.math.roundToLong 85 96 import kotlin.time.Clock ··· 88 99 89 100 @Composable 90 101 internal fun App() = AppTheme { 91 91 - var selectedTabIndex by remember { mutableStateOf(0) } 92 92 - val tabs = listOf("Camera Feed", "Recorded Video") 102 102 + var selectedTabIndex by remember { mutableStateOf(2) } 103 103 + val tabs = listOf("Camera Feed", "Recorded Video", "Debug Test") 93 104 94 105 // Apply safe-area insets once to the whole screen. 95 106 Column( ··· 112 123 when (selectedTabIndex) { 113 124 0 -> CameraSample() 114 125 1 -> RecordedSample() 126 126 + 2 -> DebugTestScreen() 115 127 } 116 128 } 117 129 } ··· 376 388 CircularProgressIndicator( 377 389 modifier = Modifier.align(Alignment.Center), 378 390 ) 391 391 + } 392 392 + } 393 393 + } 394 394 + 395 395 + /** Find the keys before and after [target] in a sorted list. Returns (before, after). */ 396 396 + private fun findBracket(sortedKeys: List<Long>, target: Long): Pair<Long?, Long?> { 397 397 + if (sortedKeys.isEmpty()) return null to null 398 398 + var idx = sortedKeys.binarySearch(target) 399 399 + if (idx >= 0) return sortedKeys[idx] to sortedKeys[idx] 400 400 + idx = -(idx + 1) 401 401 + val before = if (idx > 0) sortedKeys[idx - 1] else null 402 402 + val after = if (idx < sortedKeys.size) sortedKeys[idx] else null 403 403 + return before to after 404 404 + } 405 405 + 406 406 + /** 407 407 + * Debug test screen: auto-records 1s of video with pose+object detection, 408 408 + * then extracts all frames via the batch Flow API, draws analysis results, 409 409 + * encodes to a new video, and shows both side-by-side with timing info. 410 410 + */ 411 411 + @OptIn(ExperimentalTime::class) 412 412 + @Composable 413 413 + fun DebugTestScreen() { 414 414 + val availableModels = discoverModels() 415 415 + val selectedModel = availableModels.firstOrNull() 416 416 + val modelPath = selectedModel?.path ?: ModelPath() 417 417 + val generalModel = rememberObjectModel(modelPath) 418 418 + 419 419 + val skeletonRepository = remember { SkeletonRepository() } 420 420 + val customObjectRepository = remember { CustomObjectRespository() } 421 421 + val skeleton by skeletonRepository.skeletonFlow.collectAsState() 422 422 + val customObjects by customObjectRepository.customObjectFlow.collectAsState() 423 423 + var permissionGranted by remember { mutableStateOf(false) } 424 424 + 425 425 + // Test phases 426 426 + var phase by remember { mutableStateOf("WAITING_PERMISSION") } 427 427 + var recordingId: String? by remember { mutableStateOf(null) } 428 428 + var originalVideoPath by remember { mutableStateOf("") } 429 429 + var processedVideoPath: String? by remember { mutableStateOf(null) } 430 430 + var statusText by remember { mutableStateOf("Waiting for camera permission...") } 431 431 + 432 432 + // Timing 433 433 + var recordTimeMs by remember { mutableStateOf(0L) } 434 434 + var extractTimeMs by remember { mutableStateOf(0L) } 435 435 + var frameCount by remember { mutableStateOf(0) } 436 436 + 437 437 + // Controller to get feed dimensions from CameraView. 438 438 + val controller = remember { CameraViewControllerImpl() } 439 439 + var feedWidth by remember { mutableStateOf(0f) } 440 440 + var feedHeight by remember { mutableStateOf(0f) } 441 441 + 442 442 + // Saved analysis results during recording, keyed by wall-clock ms 443 443 + // (System.currentTimeMillis() captured before ML processing starts). 444 444 + val analysisResults = remember { mutableMapOf<Long, AnalysisResult>() } 445 445 + // Wall-clock time when CameraX actually starts recording (from onRecordToggled). 446 446 + var videoStartWallClock by remember { mutableStateOf(0L) } 447 447 + 448 448 + // Deferred to wait for video save callback 449 449 + val videoSavedDeferred = remember { mutableStateOf<CompletableDeferred<String>?>(null) } 450 450 + 451 451 + val coroutineScope = rememberCoroutineScope() 452 452 + 453 453 + PermissionProvider().apply { 454 454 + if (!hasCameraPermission()) RequestCameraPermission(onGranted = { 455 455 + permissionGranted = true 456 456 + }, onDenied = { permissionGranted = false }) else permissionGranted = true 457 457 + } 458 458 + 459 459 + // Only save analysis results after recording has actually started 460 460 + // (videoStartWallClock > 0), keyed by wall-clock ms. 461 461 + LaunchedEffect(skeleton, recordingId, videoStartWallClock) { 462 462 + val skel = skeleton 463 463 + if (recordingId != null && videoStartWallClock > 0 && skel != null) { 464 464 + val ts = skel.timestamp 465 465 + val current = analysisResults[ts] ?: AnalysisResult(null, emptyList()) 466 466 + analysisResults[ts] = current.copy(skeleton = skeleton) 467 467 + } 468 468 + } 469 469 + LaunchedEffect(customObjects, recordingId, videoStartWallClock) { 470 470 + if (recordingId != null && videoStartWallClock > 0 && customObjects != null) { 471 471 + val lastKey = analysisResults.keys.maxOrNull() 472 472 + if (lastKey != null) { 473 473 + val current = analysisResults[lastKey] ?: AnalysisResult(null, emptyList()) 474 474 + analysisResults[lastKey] = current.copy(objects = customObjects ?: emptyList()) 475 475 + } 476 476 + } 477 477 + } 478 478 + 479 479 + // Auto-start recording once permission is granted 480 480 + LaunchedEffect(permissionGranted) { 481 481 + if (permissionGranted && phase == "WAITING_PERMISSION") { 482 482 + phase = "RECORDING" 483 483 + statusText = "Recording 1 second..." 484 484 + // Small delay for camera to initialize 485 485 + delay(1500) 486 486 + 487 487 + val deferred = CompletableDeferred<String>() 488 488 + videoSavedDeferred.value = deferred 489 489 + val recordStart = Clock.System.now().toEpochMilliseconds() 490 490 + recordingId = "${Clock.System.now().epochSeconds}" 491 491 + // Fallback video start if onRecordToggled never fires (iOS). 492 492 + if (videoStartWallClock == 0L) videoStartWallClock = recordStart 493 493 + 494 494 + // Wait for recording duration 495 495 + delay(1000) 496 496 + 497 497 + // Stop recording 498 498 + recordingId = null 499 499 + statusText = "Waiting for video to save..." 500 500 + 501 501 + // Wait for the video to be saved 502 502 + val savedPath = deferred.await() 503 503 + recordTimeMs = Clock.System.now().toEpochMilliseconds() - recordStart 504 504 + originalVideoPath = savedPath 505 505 + phase = "EXTRACTING" 506 506 + statusText = "Extracting and re-encoding frames..." 507 507 + 508 508 + val extractStart = Clock.System.now().toEpochMilliseconds() 509 509 + val timestamps = listVideoFrameTimestamps(savedPath) 510 510 + frameCount = timestamps.size 511 511 + 512 512 + val savedKeys = analysisResults.keys.sorted() 513 513 + println("DEBUG: ${timestamps.size} frames, ${savedKeys.size} analysis results") 514 514 + 515 515 + val file = PlatformFile(FileKit.filesDir, "debug_output.mp4") 516 516 + var videoBuilder: VideoBuilder? = null 517 517 + var firstFrameTs: Long? = null 518 518 + var processedCount = 0 519 519 + 520 520 + // Log feed vs preview dimensions for debugging coordinate mapping. 521 521 + println("DEBUG: feedSize=${feedWidth}x${feedHeight}") 522 522 + 523 523 + extractFrames(savedPath, timestamps).collect { inputFrame -> 524 524 + val firstTs = firstFrameTs ?: inputFrame.timestamp.also { firstFrameTs = it } 525 525 + val videoElapsedMs = inputFrame.timestamp - firstTs 526 526 + val wallClockOfFrame = videoStartWallClock + videoElapsedMs 527 527 + 528 528 + val (prevKey, nextKey) = findBracket(savedKeys, wallClockOfFrame) 529 529 + val prevResult = prevKey?.let { analysisResults[it] } 530 530 + val nextResult = nextKey?.let { analysisResults[it] } 531 531 + 532 532 + val result = if (prevResult?.skeleton != null && nextResult?.skeleton != null 533 533 + && prevKey != null && nextKey != null && nextKey > prevKey 534 534 + ) { 535 535 + val alpha = (wallClockOfFrame - prevKey).toFloat() / (nextKey - prevKey) 536 536 + val interpSkeleton = Skeleton.lerp(prevResult.skeleton!!, nextResult.skeleton!!, alpha) 537 537 + AnalysisResult(interpSkeleton, nextResult.objects) 538 538 + } else { 539 539 + nextResult ?: prevResult ?: AnalysisResult(null, emptyList()) 540 540 + } 541 541 + 542 542 + // Remap skeleton from preview space → video frame space. 543 543 + // Use drawAnalysisResults first (unscaled), then get frame dims from result. 544 544 + val unscaled = inputFrame.drawAnalysisResults(result) 545 545 + val frameW = unscaled.width.toFloat() 546 546 + val frameH = unscaled.height.toFloat() 547 547 + val remappedResult = result.skeleton?.let { skel -> 548 548 + val sx = frameW / skel.width 549 549 + val sy = frameH / skel.height 550 550 + fun remap(c: Skeleton.SkeletonCoordinate?) = 551 551 + c?.let { Skeleton.SkeletonCoordinate(it.x * sx, it.y * sy) } 552 552 + val remapped = Skeleton( 553 553 + timestamp = skel.timestamp, 554 554 + leftShoulder = remap(skel.leftShoulder), 555 555 + rightShoulder = remap(skel.rightShoulder), 556 556 + leftElbow = remap(skel.leftElbow), 557 557 + rightElbow = remap(skel.rightElbow), 558 558 + leftWrist = remap(skel.leftWrist), 559 559 + rightWrist = remap(skel.rightWrist), 560 560 + leftHip = remap(skel.leftHip), 561 561 + rightHip = remap(skel.rightHip), 562 562 + leftKnee = remap(skel.leftKnee), 563 563 + rightKnee = remap(skel.rightKnee), 564 564 + leftAnkle = remap(skel.leftAnkle), 565 565 + rightAnkle = remap(skel.rightAnkle), 566 566 + width = frameW, 567 567 + height = frameH, 568 568 + ) 569 569 + // Also remap object bounding boxes. 570 570 + val remappedObjects = result.objects.map { obj -> 571 571 + val osx = frameW / obj.frameSize.width 572 572 + val osy = frameH / obj.frameSize.height 573 573 + obj.copy( 574 574 + boundingBox = Rect( 575 575 + obj.boundingBox.left * osx, 576 576 + obj.boundingBox.top * osy, 577 577 + obj.boundingBox.right * osx, 578 578 + obj.boundingBox.bottom * osy 579 579 + ), 580 580 + frameSize = FrameSize(frameW.toInt(), frameH.toInt()) 581 581 + ) 582 582 + } 583 583 + AnalysisResult(remapped, remappedObjects) 584 584 + } ?: result 585 585 + 586 586 + val annotated = inputFrame.drawAnalysisResults(remappedResult) 587 587 + 588 588 + if (videoBuilder == null) { 589 589 + println("DEBUG: creating VideoBuilder ${annotated.width}x${annotated.height}") 590 590 + videoBuilder = createVideoBuilder( 591 591 + outputPath = file.path, 592 592 + fps = 30, 593 593 + width = annotated.width, 594 594 + height = annotated.height 595 595 + ) 596 596 + } 597 597 + 598 598 + val relativeTs = inputFrame.timestamp - firstTs 599 599 + videoBuilder?.addFrame(annotated, relativeTs) 600 600 + processedCount++ 601 601 + statusText = "Processing frame $processedCount / ${timestamps.size}..." 602 602 + } 603 603 + 604 604 + println("DEBUG: all frames collected, finalizing...") 605 605 + val finalPath = try { 606 606 + videoBuilder?.finalize() 607 607 + } catch (e: Exception) { 608 608 + println("DEBUG: finalize error: ${e.message}") 609 609 + null 610 610 + } 611 611 + println("DEBUG: finalized, path=$finalPath") 612 612 + processedVideoPath = finalPath 613 613 + extractTimeMs = Clock.System.now().toEpochMilliseconds() - extractStart 614 614 + phase = "DONE" 615 615 + statusText = "Done!" 616 616 + } 617 617 + } 618 618 + 619 619 + Column(modifier = Modifier.fillMaxSize()) { 620 620 + // Status bar 621 621 + Text( 622 622 + text = statusText, 623 623 + modifier = Modifier.fillMaxWidth().padding(8.dp), 624 624 + textAlign = TextAlign.Center, 625 625 + style = androidx.compose.material3.MaterialTheme.typography.titleSmall, 626 626 + ) 627 627 + 628 628 + // Timing info 629 629 + if (phase == "DONE") { 630 630 + val recordSec = (recordTimeMs / 100.0).roundToLong() / 10.0 631 631 + val extractSec = (extractTimeMs / 100.0).roundToLong() / 10.0 632 632 + val fps = if (extractTimeMs > 0) (frameCount * 1000.0 / extractTimeMs).roundToLong() else 0 633 633 + Text( 634 634 + text = "Record: ${recordSec}s | Extract+encode: ${extractSec}s ($frameCount frames, ~${fps} fps)", 635 635 + modifier = Modifier.fillMaxWidth().padding(horizontal = 8.dp), 636 636 + textAlign = TextAlign.Center, 637 637 + fontSize = 12.sp, 638 638 + ) 639 639 + } 640 640 + 641 641 + when (phase) { 642 642 + "WAITING_PERMISSION" -> { 643 643 + Box(modifier = Modifier.weight(1f).fillMaxWidth(), contentAlignment = Alignment.Center) { 644 644 + CircularProgressIndicator() 645 645 + } 646 646 + } 647 647 + "RECORDING" -> { 648 648 + // Show camera preview during recording 649 649 + if (permissionGranted) { 650 650 + DetectOrientation { orientation -> 651 651 + key(orientation) { 652 652 + CameraView( 653 653 + skeletonRepository = skeletonRepository, 654 654 + customObjectRepository = customObjectRepository, 655 655 + detectMode = DetectMode.BOTH, 656 656 + drawSkeleton = true, 657 657 + objectModel = generalModel, 658 658 + modifier = Modifier.weight(1f).fillMaxWidth(), 659 659 + frontCamera = false, 660 660 + recordingId = recordingId, 661 661 + controller = controller, 662 662 + onRecordToggled = { recording -> 663 663 + if (recording) { 664 664 + videoStartWallClock = Clock.System.now().toEpochMilliseconds() 665 665 + // Capture feed dimensions for coordinate remapping. 666 666 + controller.requestData { data -> 667 667 + feedWidth = data.width 668 668 + feedHeight = data.height 669 669 + } 670 670 + } 671 671 + }, 672 672 + onVideoSaved = { id, url -> 673 673 + videoSavedDeferred.value?.complete(url) 674 674 + }, 675 675 + ) 676 676 + } 677 677 + } 678 678 + } 679 679 + } 680 680 + "EXTRACTING" -> { 681 681 + Box(modifier = Modifier.weight(1f).fillMaxWidth(), contentAlignment = Alignment.Center) { 682 682 + CircularProgressIndicator() 683 683 + } 684 684 + } 685 685 + "DONE" -> { 686 686 + // Processed video only 687 687 + Box(modifier = Modifier.weight(1f).fillMaxWidth()) { 688 688 + processedVideoPath?.let { path -> 689 689 + val processedHost = remember(path) { 690 690 + MediaPlayerHost( 691 691 + mediaUrl = path, 692 692 + isLooping = true, 693 693 + isPaused = false, 694 694 + isMuted = true, 695 695 + initialVideoFitMode = ScreenResize.FIT, 696 696 + ) 697 697 + } 698 698 + VideoPlayerComposable( 699 699 + modifier = Modifier.fillMaxSize(), 700 700 + playerHost = processedHost, 701 701 + playerConfig = VideoPlayerConfig( 702 702 + isSeekBarVisible = true, 703 703 + isDurationVisible = true, 704 704 + isFastForwardBackwardEnabled = false, 705 705 + isMuteControlEnabled = false, 706 706 + isSpeedControlEnabled = false, 707 707 + isScreenLockEnabled = false, 708 708 + isScreenResizeEnabled = false, 709 709 + isFullScreenEnabled = false, 710 710 + isPauseResumeEnabled = true, 711 711 + ) 712 712 + ) 713 713 + } ?: Text( 714 714 + "No video produced", 715 715 + modifier = Modifier.align(Alignment.Center) 716 716 + ) 717 717 + } 718 718 + } 379 719 } 380 720 } 381 721 }

sample/iosApp/FastViTT8F16.mlpackage/Data/com.apple.CoreML/model.mlmodel

Reviewed

This is a binary file and will not be displayed.

sample/iosApp/FastViTT8F16.mlpackage/Data/com.apple.CoreML/weights/weight.bin

Reviewed

This is a binary file and will not be displayed.

-18

sample/iosApp/FastViTT8F16.mlpackage/Manifest.json

Reviewed

··· 1 1 - { 2 2 - "fileFormatVersion": "1.0.0", 3 3 - "itemInfoEntries": { 4 4 - "76187EC5-87E5-4263-B6E2-1CF5E747A0EE": { 5 5 - "author": "com.apple.CoreML", 6 6 - "description": "CoreML Model Weights", 7 7 - "name": "weights", 8 8 - "path": "com.apple.CoreML/weights" 9 9 - }, 10 10 - "D3756FF7-6CCB-4582-AB58-B91896E60AE4": { 11 11 - "author": "com.apple.CoreML", 12 12 - "description": "CoreML Model Specification", 13 13 - "name": "model.mlmodel", 14 14 - "path": "com.apple.CoreML/model.mlmodel" 15 15 - } 16 16 - }, 17 17 - "rootModelIdentifier": "D3756FF7-6CCB-4582-AB58-B91896E60AE4" 18 18 - }

+2 -6

sample/iosApp/iosApp.xcodeproj/project.pbxproj

Reviewed

··· 7 7 objects = { 8 8 9 9 /* Begin PBXBuildFile section */ 10 10 - 438D2B632DF4C5AC00625680 /* FastViTT8F16.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 438D2B622DF4C5AC00625680 /* FastViTT8F16.mlpackage */; }; 11 10 A93A953B29CC810C00F8E227 /* iosApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = A93A953A29CC810C00F8E227 /* iosApp.swift */; }; 12 11 A93A953F29CC810D00F8E227 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = A93A953E29CC810D00F8E227 /* Assets.xcassets */; }; 13 12 A93A954229CC810D00F8E227 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = A93A954129CC810D00F8E227 /* Preview Assets.xcassets */; }; 14 13 /* End PBXBuildFile section */ 15 14 16 15 /* Begin PBXFileReference section */ 17 17 - 438D2B622DF4C5AC00625680 /* FastViTT8F16.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = FastViTT8F16.mlpackage; sourceTree = "<group>"; }; 18 16 A93A953729CC810C00F8E227 /* PoseDetection.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PoseDetection.app; sourceTree = BUILT_PRODUCTS_DIR; }; 19 17 A93A953A29CC810C00F8E227 /* iosApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = iosApp.swift; sourceTree = "<group>"; }; 20 18 A93A953E29CC810D00F8E227 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; }; ··· 75 73 C4127409AE3703430489E7BC /* Frameworks */ = { 76 74 isa = PBXGroup; 77 75 children = ( 78 78 - 438D2B622DF4C5AC00625680 /* FastViTT8F16.mlpackage */, 79 76 ); 80 77 name = Frameworks; 81 78 sourceTree = "<group>"; ··· 175 172 buildActionMask = 2147483647; 176 173 files = ( 177 174 A93A953B29CC810C00F8E227 /* iosApp.swift in Sources */, 178 178 - 438D2B632DF4C5AC00625680 /* FastViTT8F16.mlpackage in Sources */, 179 175 ); 180 176 runOnlyForDeploymentPostprocessing = 0; 181 177 }; ··· 306 302 CODE_SIGN_STYLE = Automatic; 307 303 CURRENT_PROJECT_VERSION = 1; 308 304 DEVELOPMENT_ASSET_PATHS = "\"iosApp/Preview Content\""; 309 309 - DEVELOPMENT_TEAM = FAGG2XS28P; 305 305 + DEVELOPMENT_TEAM = 6H9FHG23L3; 310 306 ENABLE_PREVIEWS = YES; 311 307 GENERATE_INFOPLIST_FILE = YES; 312 308 INFOPLIST_FILE = iosApp/Info.plist; ··· 332 328 CODE_SIGN_STYLE = Automatic; 333 329 CURRENT_PROJECT_VERSION = 1; 334 330 DEVELOPMENT_ASSET_PATHS = "\"iosApp/Preview Content\""; 335 335 - DEVELOPMENT_TEAM = FAGG2XS28P; 331 331 + DEVELOPMENT_TEAM = 6H9FHG23L3; 336 332 ENABLE_PREVIEWS = YES; 337 333 GENERATE_INFOPLIST_FILE = YES; 338 334 INFOPLIST_FILE = iosApp/Info.plist;

sample/iosApp/iosApp/models/YOLOv3FP16.mlmodel

Reviewed

This is a binary file and will not be displayed.