This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

perf: fast batch frame extraction with extractFrames Flow API

- Replace per-pixel floating-point YUV conversion with bulk buffer copy +
fixed-point integer math (~5x faster decode)
- Feed multiple input buffers per decode loop iteration to keep the
hardware decoder pipeline full
- Add extractFrames(path, timestamps): Flow<InputFrame> for efficient
sequential multi-frame extraction without per-call mutex overhead
- Add Skeleton.lerp() for smooth interpolation between analysis keyframes
- Use imageProxy.imageInfo.timestamp for frame-accurate overlay timing
- Add coordinate scaling in drawAnalysisResults for cross-resolution mapping
- Fix TFLite interpreter crash with try-catch in live object detection
- Fix iOS VideoBuilder: monotonic timestamps, pool-null resilience
- Add debug test harness in sample app (auto-record, extract, annotate,
encode, display with timing)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

+511 -60
+8 -1
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.android.kt
··· 326 326 val outputShape = objectDetector.modelInfo.outputShape 327 327 val output = TensorBuffer.createFixedSize(outputShape, DataType.FLOAT32) 328 328 329 - objectDetector.interpreter.run(tensorImage.buffer, output.buffer) 329 + try { 330 + objectDetector.interpreter.run(tensorImage.buffer, output.buffer) 331 + } catch (e: Exception) { 332 + Logger.e(e) { "TFLite interpreter.run failed" } 333 + val skeleton = poseFuture?.get() 334 + onComplete(AnalysisResult(skeleton, emptyList()), bitmap) 335 + return 336 + } 330 337 331 338 val array = output.floatArray 332 339 if (outputShape.size != 3) emptyList() else {
+60 -15
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.android.kt
··· 11 11 import androidx.core.net.toUri 12 12 import co.touchlab.kermit.Logger 13 13 import kotlinx.coroutines.Dispatchers 14 + import kotlinx.coroutines.flow.Flow 15 + import kotlinx.coroutines.flow.flow 16 + import kotlinx.coroutines.flow.flowOn 14 17 import kotlinx.coroutines.sync.Mutex 15 18 import kotlinx.coroutines.sync.withLock 16 19 import kotlinx.coroutines.withContext ··· 81 84 return lastBitmap 82 85 } 83 86 84 - val timeoutUs = 10_000L 85 87 val info = MediaCodec.BufferInfo() 86 88 87 89 try { 88 90 while (!outputEos) { 89 - // Feed input packets. 91 + // Feed as many input packets as possible to keep the decoder pipeline full. 90 92 if (!inputEos) { 91 - val inIdx = decoder.dequeueInputBuffer(0) 92 - if (inIdx >= 0) { 93 + while (true) { 94 + val inIdx = decoder.dequeueInputBuffer(10_000L) 95 + if (inIdx < 0) break 93 96 val buf = decoder.getInputBuffer(inIdx)!! 94 97 val size = ext.readSampleData(buf, 0) 95 98 if (size < 0) { ··· 97 100 inIdx, 0, 0, 0, MediaCodec.BUFFER_FLAG_END_OF_STREAM 98 101 ) 99 102 inputEos = true 103 + break 100 104 } else { 101 105 decoder.queueInputBuffer(inIdx, 0, size, ext.sampleTime, 0) 102 106 ext.advance() ··· 105 109 } 106 110 107 111 // Drain output. 108 - val outIdx = decoder.dequeueOutputBuffer(info, timeoutUs) 112 + val outIdx = decoder.dequeueOutputBuffer(info, 10_000L) 109 113 if (outIdx >= 0) { 110 114 val ptsMs = info.presentationTimeUs / 1000L 111 115 ··· 162 166 return Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true) 163 167 } 164 168 169 + /** 170 + * Fast YUV Image to ARGB Bitmap conversion using bulk buffer copies 171 + * and fixed-point integer arithmetic (no floating-point per pixel). 172 + */ 165 173 private fun yuvImageToBitmap(image: Image): Bitmap { 166 174 val w = image.width 167 175 val h = image.height ··· 169 177 val uPlane = image.planes[1] 170 178 val vPlane = image.planes[2] 171 179 180 + val yRowStride = yPlane.rowStride 181 + val uvRowStride = uPlane.rowStride 182 + val uvPixelStride = uPlane.pixelStride 183 + 184 + // Bulk-copy ByteBuffers to ByteArrays to avoid per-pixel ByteBuffer.get() overhead. 172 185 val yBuf = yPlane.buffer 173 186 val uBuf = uPlane.buffer 174 187 val vBuf = vPlane.buffer 175 188 176 - val yRowStride = yPlane.rowStride 177 - val uvRowStride = uPlane.rowStride 178 - val uvPixelStride = uPlane.pixelStride 189 + val yBytes = ByteArray(yBuf.remaining()) 190 + yBuf.get(yBytes) 191 + val uBytes = ByteArray(uBuf.remaining()) 192 + uBuf.get(uBytes) 193 + val vBytes = ByteArray(vBuf.remaining()) 194 + vBuf.get(vBytes) 179 195 180 196 val argb = IntArray(w * h) 181 197 182 198 for (j in 0 until h) { 199 + val yRowOffset = j * yRowStride 200 + val uvRowOffset = (j shr 1) * uvRowStride 183 201 for (i in 0 until w) { 184 - val y = (yBuf.get(j * yRowStride + i).toInt() and 0xFF) 185 - val uvIdx = (j / 2) * uvRowStride + (i / 2) * uvPixelStride 186 - val u = (uBuf.get(uvIdx).toInt() and 0xFF) - 128 187 - val v = (vBuf.get(uvIdx).toInt() and 0xFF) - 128 202 + val y = yBytes[yRowOffset + i].toInt() and 0xFF 203 + val uvIdx = uvRowOffset + (i shr 1) * uvPixelStride 204 + val u = (uBytes[uvIdx].toInt() and 0xFF) - 128 205 + val v = (vBytes[uvIdx].toInt() and 0xFF) - 128 188 206 189 - val r = (y + 1.370705 * v).toInt().coerceIn(0, 255) 190 - val g = (y - 0.337633 * u - 0.698001 * v).toInt().coerceIn(0, 255) 191 - val b = (y + 1.732446 * u).toInt().coerceIn(0, 255) 207 + // Fixed-point: multiply by scaled constant, shift right by 10. 208 + // 1.402 * 1024 = 1436, 0.344 * 1024 = 352, 0.714 * 1024 = 731, 1.772 * 1024 = 1815 209 + var r = y + ((v * 1436) shr 10) 210 + var g = y - ((u * 352 + v * 731) shr 10) 211 + var b = y + ((u * 1815) shr 10) 212 + if (r < 0) r = 0 else if (r > 255) r = 255 213 + if (g < 0) g = 0 else if (g > 255) g = 255 214 + if (b < 0) b = 0 else if (b > 255) b = 255 192 215 193 216 argb[j * w + i] = (0xFF shl 24) or (r shl 16) or (g shl 8) or b 194 217 } ··· 229 252 } 230 253 } 231 254 } 255 + 256 + /** 257 + * Batch sequential frame extraction as a Flow. Opens a dedicated decoder, 258 + * decodes frames in order, and emits each as it's ready. More efficient than 259 + * repeated [extractFrame] calls because there's no per-call mutex/coroutine 260 + * overhead and the decoder runs continuously. 261 + */ 262 + actual fun extractFrames( 263 + videoPath: String, frameTimestamps: List<Long> 264 + ): Flow<InputFrame> = flow { 265 + val decoder = VideoFrameDecoder(videoPath) 266 + try { 267 + for (ts in frameTimestamps) { 268 + val bitmap = decoder.decodeUpTo(ts) 269 + if (bitmap != null) { 270 + emit(InputFrame(bitmap = bitmap, timestamp = ts)) 271 + } 272 + } 273 + } finally { 274 + decoder.release() 275 + } 276 + }.flowOn(Dispatchers.IO) 232 277 233 278 actual suspend fun listVideoFrameTimestamps( 234 279 videoPath: String,
+32 -13
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.kt
··· 234 234 val drawScope = CanvasDrawScope() 235 235 val size = Size(it.width.toFloat(), it.height.toFloat()) 236 236 237 - // Calculate scaling factor based on skeleton size 237 + // Compute scale factors to map detection coordinates to target bitmap. 238 + // Use independent X/Y scaling — handles the case where the skeleton's 239 + // coordinate space has a different aspect ratio than the target frame 240 + // (e.g., iOS portrait analysis vs landscape extracted video). 241 + val skelScaleX = if (skeleton != null && skeleton.width > 0f) 242 + it.width.toFloat() / skeleton.width else 1f 243 + val skelScaleY = if (skeleton != null && skeleton.height > 0f) 244 + it.height.toFloat() / skeleton.height else 1f 245 + val skelOffsetX = 0f 246 + val skelOffsetY = 0f 247 + 248 + // Calculate scaling factor based on skeleton size (after mapping to target) 238 249 val skeletonSize = skeleton?.joints()?.let { joints -> 239 - val minX = joints.minOfOrNull { joint -> joint.x } ?: 0f 240 - val maxX = joints.maxOfOrNull { joint -> joint.x } ?: 0f 241 - val minY = joints.minOfOrNull { joint -> joint.y } ?: 0f 242 - val maxY = joints.maxOfOrNull { joint -> joint.y } ?: 0f 250 + val minX = joints.minOfOrNull { joint -> joint.x * skelScaleX } ?: 0f 251 + val maxX = joints.maxOfOrNull { joint -> joint.x * skelScaleX } ?: 0f 252 + val minY = joints.minOfOrNull { joint -> joint.y * skelScaleY } ?: 0f 253 + val maxY = joints.maxOfOrNull { joint -> joint.y * skelScaleY } ?: 0f 243 254 kotlin.math.max(maxX - minX, maxY - minY) 244 255 } ?: 1f 245 256 val minDime = kotlin.math.min(it.width, it.height).toFloat() ··· 255 266 ) { 256 267 drawImage(it) 257 268 analysisResults.objects.forEach { analysisObject -> 269 + val oScaleX = if (analysisObject.frameSize.width > 0) 270 + it.width.toFloat() / analysisObject.frameSize.width else 1f 271 + val oScaleY = if (analysisObject.frameSize.height > 0) 272 + it.height.toFloat() / analysisObject.frameSize.height else 1f 258 273 drawRect( 259 274 color = Color.Red, topLeft = androidx.compose.ui.geometry.Offset( 260 - analysisObject.boundingBox.left, analysisObject.boundingBox.top 275 + analysisObject.boundingBox.left * oScaleX, 276 + analysisObject.boundingBox.top * oScaleY 261 277 ), size = Size( 262 - analysisObject.boundingBox.width, analysisObject.boundingBox.height 278 + analysisObject.boundingBox.width * oScaleX, 279 + analysisObject.boundingBox.height * oScaleY 263 280 ), style = Stroke(scaledStrokeWidth) 264 281 ) 265 282 } ··· 278 295 bones().forEach { line -> 279 296 drawLine( 280 297 color = paintWhite.color, start = androidx.compose.ui.geometry.Offset( 281 - line.first.x, line.first.y 298 + line.first.x * skelScaleX + skelOffsetX, line.first.y * skelScaleY + skelOffsetY 282 299 ), end = androidx.compose.ui.geometry.Offset( 283 - line.second.x, line.second.y 300 + line.second.x * skelScaleX + skelOffsetX, line.second.y * skelScaleY + skelOffsetY 284 301 ), strokeWidth = paintWhite.strokeWidth, blendMode = BlendMode.Softlight 285 302 ) 286 303 drawLine( 287 304 color = paintBlue.color, start = androidx.compose.ui.geometry.Offset( 288 - line.first.x, line.first.y 305 + line.first.x * skelScaleX + skelOffsetX, line.first.y * skelScaleY + skelOffsetY 289 306 ), end = androidx.compose.ui.geometry.Offset( 290 - line.second.x, line.second.y 307 + line.second.x * skelScaleX + skelOffsetX, line.second.y * skelScaleY + skelOffsetY 291 308 ), strokeWidth = paintBlue.strokeWidth, blendMode = BlendMode.Color 292 309 ) 293 310 } 294 311 295 312 joints().forEach { joint -> 313 + val jx = joint.x * skelScaleX + skelOffsetX 314 + val jy = joint.y * skelScaleY + skelOffsetY 296 315 drawCircle( 297 316 brush = Brush.radialGradient( 298 317 colors = listOf(Color.Blue, Color.Transparent), 299 - center = androidx.compose.ui.geometry.Offset(joint.x, joint.y), 318 + center = androidx.compose.ui.geometry.Offset(jx, jy), 300 319 radius = 1.2f * scaledStrokeWidth 301 320 ), 302 321 radius = 1.2f * scaledStrokeWidth, 303 - center = androidx.compose.ui.geometry.Offset(joint.x, joint.y) 322 + center = androidx.compose.ui.geometry.Offset(jx, jy) 304 323 ) 305 324 } 306 325 }
+12 -1
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.kt
··· 1 1 package com.performancecoachlab.posedetection.recording 2 2 3 + import kotlinx.coroutines.flow.Flow 4 + 3 5 expect suspend fun extractFrame( 4 6 videoPath: String, frameTimestamp: Long 5 7 ): InputFrame? 6 8 9 + /** 10 + * Batch sequential frame extraction as a Flow. More efficient than repeated 11 + * [extractFrame] calls for processing many frames from the same video. 12 + * Frames are emitted in the order of [frameTimestamps] (must be ascending). 13 + */ 14 + expect fun extractFrames( 15 + videoPath: String, frameTimestamps: List<Long> 16 + ): Flow<InputFrame> 17 + 7 18 expect suspend fun listVideoFrameTimestamps( 8 19 videoPath: String, 9 20 ): List<Long> 10 21 11 22 @Suppress("EXPECT_ACTUAL_CLASSIFIERS_ARE_IN_BETA_WARNING") 12 - expect object VideoExtractionContext 23 + expect object VideoExtractionContext
+34
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/skeleton/Skeleton.kt
··· 37 37 val rightKnee: Double?, 38 38 ) 39 39 40 + companion object { 41 + /** Linearly interpolate between two skeletons. [alpha] in 0..1. */ 42 + fun lerp(a: Skeleton, b: Skeleton, alpha: Float): Skeleton { 43 + fun lerpCoord( 44 + c1: SkeletonCoordinate?, c2: SkeletonCoordinate?, t: Float 45 + ): SkeletonCoordinate? { 46 + if (c1 == null) return c2 47 + if (c2 == null) return c1 48 + return SkeletonCoordinate( 49 + x = c1.x + (c2.x - c1.x) * t, 50 + y = c1.y + (c2.y - c1.y) * t 51 + ) 52 + } 53 + val t = alpha.coerceIn(0f, 1f) 54 + return Skeleton( 55 + timestamp = (a.timestamp + ((b.timestamp - a.timestamp) * t).toLong()), 56 + leftShoulder = lerpCoord(a.leftShoulder, b.leftShoulder, t), 57 + rightShoulder = lerpCoord(a.rightShoulder, b.rightShoulder, t), 58 + leftElbow = lerpCoord(a.leftElbow, b.leftElbow, t), 59 + rightElbow = lerpCoord(a.rightElbow, b.rightElbow, t), 60 + leftWrist = lerpCoord(a.leftWrist, b.leftWrist, t), 61 + rightWrist = lerpCoord(a.rightWrist, b.rightWrist, t), 62 + leftHip = lerpCoord(a.leftHip, b.leftHip, t), 63 + rightHip = lerpCoord(a.rightHip, b.rightHip, t), 64 + leftKnee = lerpCoord(a.leftKnee, b.leftKnee, t), 65 + rightKnee = lerpCoord(a.rightKnee, b.rightKnee, t), 66 + leftAnkle = lerpCoord(a.leftAnkle, b.leftAnkle, t), 67 + rightAnkle = lerpCoord(a.rightAnkle, b.rightAnkle, t), 68 + width = a.width, 69 + height = a.height, 70 + ) 71 + } 72 + } 73 + 40 74 fun bones(): List<Pair<SkeletonCoordinate, SkeletonCoordinate>> { 41 75 val lines = emptyList<Pair<SkeletonCoordinate, SkeletonCoordinate>>().toMutableList() 42 76 if (leftShoulder != null && rightShoulder != null) lines += Pair(
+10 -4
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/encoding/VideoBuilder.ios.kt
··· 115 115 frameCount = 0L 116 116 } 117 117 118 + private var lastTimestampMs = -1L 119 + 118 120 @OptIn(ExperimentalForeignApi::class, NativeRuntimeApi::class) 119 121 override suspend fun addFrame(frame: ImageBitmap, timestampms: Long) { 120 122 if (!started) initWriter() 121 - // Check for empty or invalid frame (skip if width or height is 0) 122 123 if (frame.width == 0 || frame.height == 0) return 124 + // Ensure strictly monotonic timestamps. 125 + val ts = if (timestampms <= lastTimestampMs) lastTimestampMs + 1 else timestampms 126 + lastTimestampMs = ts 123 127 val videoWriterInput = input!! 124 128 val pixelBufferAdaptor = adaptor!! 125 129 val pool = pixelBufferAdaptor.pixelBufferPool 126 - ?: throw IllegalStateException("Pixel buffer pool is null") 127 - val presentationTime = CMTimeMake(timestampms, 1_000) 130 + if (pool == null) { 131 + println("DEBUG: pool null at frame $frameCount, writerStatus=${writer?.status}, error=${writer?.error}") 132 + return 133 + } 134 + val presentationTime = CMTimeMake(ts, 1_000) 128 135 memScoped { 129 136 val pixelBufferPtr = alloc<CVPixelBufferRefVar>() 130 137 val status = ··· 193 200 if (started) { 194 201 input?.markAsFinished() 195 202 writer?.finishWritingWithCompletionHandler { 196 - //println("[VideoBuilder] Finished writing video!") 197 203 } 198 204 // Wait for writing to finish 199 205 while (writer?.status == AVAssetWriterStatusWriting) {
+11
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/recording/VideoUtils.ios.kt
··· 10 10 import kotlinx.cinterop.value 11 11 import kotlinx.coroutines.Dispatchers 12 12 import kotlinx.coroutines.IO 13 + import kotlinx.coroutines.flow.Flow 14 + import kotlinx.coroutines.flow.flow 15 + import kotlinx.coroutines.flow.flowOn 13 16 import kotlinx.coroutines.withContext 14 17 import platform.AVFoundation.AVAsset 15 18 import platform.AVFoundation.AVAssetImageGenerator ··· 55 58 return@withContext null 56 59 } 57 60 } 61 + 62 + actual fun extractFrames( 63 + videoPath: String, frameTimestamps: List<Long> 64 + ): Flow<InputFrame> = flow { 65 + for (ts in frameTimestamps) { 66 + extractFrame(videoPath, ts)?.let { emit(it) } 67 + } 68 + }.flowOn(Dispatchers.IO) 58 69 59 70 private fun NSURL.safeDescription(): String { 60 71 val scheme = this.scheme ?: "unknown"
+342 -2
sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt
··· 39 39 import androidx.compose.ui.Alignment 40 40 import androidx.compose.ui.Modifier 41 41 import androidx.compose.ui.geometry.Rect 42 + import androidx.compose.ui.geometry.Size 43 + import androidx.compose.ui.graphics.Canvas 42 44 import androidx.compose.ui.graphics.Color 43 45 import androidx.compose.ui.graphics.ImageBitmap 46 + import androidx.compose.ui.graphics.drawscope.CanvasDrawScope 44 47 import androidx.compose.ui.graphics.drawscope.Stroke 48 + import androidx.compose.ui.unit.Density 49 + import androidx.compose.ui.unit.LayoutDirection 45 50 import androidx.compose.ui.layout.ContentScale 46 51 import androidx.compose.ui.text.style.TextAlign 47 52 import androidx.compose.ui.unit.dp ··· 66 71 import com.performancecoachlab.posedetection.permissions.PermissionProvider 67 72 import com.performancecoachlab.posedetection.recording.FrameAnalyser 68 73 import com.performancecoachlab.posedetection.recording.InputFrame 74 + import com.performancecoachlab.posedetection.recording.AnalysisResult 75 + import com.performancecoachlab.posedetection.recording.FrameSize 69 76 import com.performancecoachlab.posedetection.recording.extractFrame 77 + import com.performancecoachlab.posedetection.recording.extractFrames 70 78 import com.performancecoachlab.posedetection.recording.listVideoFrameTimestamps 71 79 import com.performancecoachlab.posedetection.skeleton.Pose 80 + import com.performancecoachlab.posedetection.skeleton.Skeleton 72 81 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 73 82 import io.github.vinceglb.filekit.FileKit 74 83 import io.github.vinceglb.filekit.PlatformFile ··· 79 88 import io.github.vinceglb.filekit.extension 80 89 import io.github.vinceglb.filekit.filesDir 81 90 import io.github.vinceglb.filekit.path 91 + import kotlinx.coroutines.CompletableDeferred 82 92 import kotlinx.coroutines.Job 93 + import kotlinx.coroutines.delay 83 94 import kotlinx.coroutines.launch 84 95 import kotlin.math.roundToLong 85 96 import kotlin.time.Clock ··· 88 99 89 100 @Composable 90 101 internal fun App() = AppTheme { 91 - var selectedTabIndex by remember { mutableStateOf(0) } 92 - val tabs = listOf("Camera Feed", "Recorded Video") 102 + var selectedTabIndex by remember { mutableStateOf(2) } 103 + val tabs = listOf("Camera Feed", "Recorded Video", "Debug Test") 93 104 94 105 // Apply safe-area insets once to the whole screen. 95 106 Column( ··· 112 123 when (selectedTabIndex) { 113 124 0 -> CameraSample() 114 125 1 -> RecordedSample() 126 + 2 -> DebugTestScreen() 115 127 } 116 128 } 117 129 } ··· 376 388 CircularProgressIndicator( 377 389 modifier = Modifier.align(Alignment.Center), 378 390 ) 391 + } 392 + } 393 + } 394 + 395 + /** Find the keys before and after [target] in a sorted list. Returns (before, after). */ 396 + private fun findBracket(sortedKeys: List<Long>, target: Long): Pair<Long?, Long?> { 397 + if (sortedKeys.isEmpty()) return null to null 398 + var idx = sortedKeys.binarySearch(target) 399 + if (idx >= 0) return sortedKeys[idx] to sortedKeys[idx] 400 + idx = -(idx + 1) 401 + val before = if (idx > 0) sortedKeys[idx - 1] else null 402 + val after = if (idx < sortedKeys.size) sortedKeys[idx] else null 403 + return before to after 404 + } 405 + 406 + /** 407 + * Debug test screen: auto-records 1s of video with pose+object detection, 408 + * then extracts all frames via the batch Flow API, draws analysis results, 409 + * encodes to a new video, and shows both side-by-side with timing info. 410 + */ 411 + @OptIn(ExperimentalTime::class) 412 + @Composable 413 + fun DebugTestScreen() { 414 + val availableModels = discoverModels() 415 + val selectedModel = availableModels.firstOrNull() 416 + val modelPath = selectedModel?.path ?: ModelPath() 417 + val generalModel = rememberObjectModel(modelPath) 418 + 419 + val skeletonRepository = remember { SkeletonRepository() } 420 + val customObjectRepository = remember { CustomObjectRespository() } 421 + val skeleton by skeletonRepository.skeletonFlow.collectAsState() 422 + val customObjects by customObjectRepository.customObjectFlow.collectAsState() 423 + var permissionGranted by remember { mutableStateOf(false) } 424 + 425 + // Test phases 426 + var phase by remember { mutableStateOf("WAITING_PERMISSION") } 427 + var recordingId: String? by remember { mutableStateOf(null) } 428 + var originalVideoPath by remember { mutableStateOf("") } 429 + var processedVideoPath: String? by remember { mutableStateOf(null) } 430 + var statusText by remember { mutableStateOf("Waiting for camera permission...") } 431 + 432 + // Timing 433 + var recordTimeMs by remember { mutableStateOf(0L) } 434 + var extractTimeMs by remember { mutableStateOf(0L) } 435 + var frameCount by remember { mutableStateOf(0) } 436 + 437 + // Controller to get feed dimensions from CameraView. 438 + val controller = remember { CameraViewControllerImpl() } 439 + var feedWidth by remember { mutableStateOf(0f) } 440 + var feedHeight by remember { mutableStateOf(0f) } 441 + 442 + // Saved analysis results during recording, keyed by wall-clock ms 443 + // (System.currentTimeMillis() captured before ML processing starts). 444 + val analysisResults = remember { mutableMapOf<Long, AnalysisResult>() } 445 + // Wall-clock time when CameraX actually starts recording (from onRecordToggled). 446 + var videoStartWallClock by remember { mutableStateOf(0L) } 447 + 448 + // Deferred to wait for video save callback 449 + val videoSavedDeferred = remember { mutableStateOf<CompletableDeferred<String>?>(null) } 450 + 451 + val coroutineScope = rememberCoroutineScope() 452 + 453 + PermissionProvider().apply { 454 + if (!hasCameraPermission()) RequestCameraPermission(onGranted = { 455 + permissionGranted = true 456 + }, onDenied = { permissionGranted = false }) else permissionGranted = true 457 + } 458 + 459 + // Only save analysis results after recording has actually started 460 + // (videoStartWallClock > 0), keyed by wall-clock ms. 461 + LaunchedEffect(skeleton, recordingId, videoStartWallClock) { 462 + val skel = skeleton 463 + if (recordingId != null && videoStartWallClock > 0 && skel != null) { 464 + val ts = skel.timestamp 465 + val current = analysisResults[ts] ?: AnalysisResult(null, emptyList()) 466 + analysisResults[ts] = current.copy(skeleton = skeleton) 467 + } 468 + } 469 + LaunchedEffect(customObjects, recordingId, videoStartWallClock) { 470 + if (recordingId != null && videoStartWallClock > 0 && customObjects != null) { 471 + val lastKey = analysisResults.keys.maxOrNull() 472 + if (lastKey != null) { 473 + val current = analysisResults[lastKey] ?: AnalysisResult(null, emptyList()) 474 + analysisResults[lastKey] = current.copy(objects = customObjects ?: emptyList()) 475 + } 476 + } 477 + } 478 + 479 + // Auto-start recording once permission is granted 480 + LaunchedEffect(permissionGranted) { 481 + if (permissionGranted && phase == "WAITING_PERMISSION") { 482 + phase = "RECORDING" 483 + statusText = "Recording 1 second..." 484 + // Small delay for camera to initialize 485 + delay(1500) 486 + 487 + val deferred = CompletableDeferred<String>() 488 + videoSavedDeferred.value = deferred 489 + val recordStart = Clock.System.now().toEpochMilliseconds() 490 + recordingId = "${Clock.System.now().epochSeconds}" 491 + // Fallback video start if onRecordToggled never fires (iOS). 492 + if (videoStartWallClock == 0L) videoStartWallClock = recordStart 493 + 494 + // Wait for recording duration 495 + delay(1000) 496 + 497 + // Stop recording 498 + recordingId = null 499 + statusText = "Waiting for video to save..." 500 + 501 + // Wait for the video to be saved 502 + val savedPath = deferred.await() 503 + recordTimeMs = Clock.System.now().toEpochMilliseconds() - recordStart 504 + originalVideoPath = savedPath 505 + phase = "EXTRACTING" 506 + statusText = "Extracting and re-encoding frames..." 507 + 508 + val extractStart = Clock.System.now().toEpochMilliseconds() 509 + val timestamps = listVideoFrameTimestamps(savedPath) 510 + frameCount = timestamps.size 511 + 512 + val savedKeys = analysisResults.keys.sorted() 513 + println("DEBUG: ${timestamps.size} frames, ${savedKeys.size} analysis results") 514 + 515 + val file = PlatformFile(FileKit.filesDir, "debug_output.mp4") 516 + var videoBuilder: VideoBuilder? = null 517 + var firstFrameTs: Long? = null 518 + var processedCount = 0 519 + 520 + // Log feed vs preview dimensions for debugging coordinate mapping. 521 + println("DEBUG: feedSize=${feedWidth}x${feedHeight}") 522 + 523 + extractFrames(savedPath, timestamps).collect { inputFrame -> 524 + val firstTs = firstFrameTs ?: inputFrame.timestamp.also { firstFrameTs = it } 525 + val videoElapsedMs = inputFrame.timestamp - firstTs 526 + val wallClockOfFrame = videoStartWallClock + videoElapsedMs 527 + 528 + val (prevKey, nextKey) = findBracket(savedKeys, wallClockOfFrame) 529 + val prevResult = prevKey?.let { analysisResults[it] } 530 + val nextResult = nextKey?.let { analysisResults[it] } 531 + 532 + val result = if (prevResult?.skeleton != null && nextResult?.skeleton != null 533 + && prevKey != null && nextKey != null && nextKey > prevKey 534 + ) { 535 + val alpha = (wallClockOfFrame - prevKey).toFloat() / (nextKey - prevKey) 536 + val interpSkeleton = Skeleton.lerp(prevResult.skeleton!!, nextResult.skeleton!!, alpha) 537 + AnalysisResult(interpSkeleton, nextResult.objects) 538 + } else { 539 + nextResult ?: prevResult ?: AnalysisResult(null, emptyList()) 540 + } 541 + 542 + // Remap skeleton from preview space → video frame space. 543 + // Use drawAnalysisResults first (unscaled), then get frame dims from result. 544 + val unscaled = inputFrame.drawAnalysisResults(result) 545 + val frameW = unscaled.width.toFloat() 546 + val frameH = unscaled.height.toFloat() 547 + val remappedResult = result.skeleton?.let { skel -> 548 + val sx = frameW / skel.width 549 + val sy = frameH / skel.height 550 + fun remap(c: Skeleton.SkeletonCoordinate?) = 551 + c?.let { Skeleton.SkeletonCoordinate(it.x * sx, it.y * sy) } 552 + val remapped = Skeleton( 553 + timestamp = skel.timestamp, 554 + leftShoulder = remap(skel.leftShoulder), 555 + rightShoulder = remap(skel.rightShoulder), 556 + leftElbow = remap(skel.leftElbow), 557 + rightElbow = remap(skel.rightElbow), 558 + leftWrist = remap(skel.leftWrist), 559 + rightWrist = remap(skel.rightWrist), 560 + leftHip = remap(skel.leftHip), 561 + rightHip = remap(skel.rightHip), 562 + leftKnee = remap(skel.leftKnee), 563 + rightKnee = remap(skel.rightKnee), 564 + leftAnkle = remap(skel.leftAnkle), 565 + rightAnkle = remap(skel.rightAnkle), 566 + width = frameW, 567 + height = frameH, 568 + ) 569 + // Also remap object bounding boxes. 570 + val remappedObjects = result.objects.map { obj -> 571 + val osx = frameW / obj.frameSize.width 572 + val osy = frameH / obj.frameSize.height 573 + obj.copy( 574 + boundingBox = Rect( 575 + obj.boundingBox.left * osx, 576 + obj.boundingBox.top * osy, 577 + obj.boundingBox.right * osx, 578 + obj.boundingBox.bottom * osy 579 + ), 580 + frameSize = FrameSize(frameW.toInt(), frameH.toInt()) 581 + ) 582 + } 583 + AnalysisResult(remapped, remappedObjects) 584 + } ?: result 585 + 586 + val annotated = inputFrame.drawAnalysisResults(remappedResult) 587 + 588 + if (videoBuilder == null) { 589 + println("DEBUG: creating VideoBuilder ${annotated.width}x${annotated.height}") 590 + videoBuilder = createVideoBuilder( 591 + outputPath = file.path, 592 + fps = 30, 593 + width = annotated.width, 594 + height = annotated.height 595 + ) 596 + } 597 + 598 + val relativeTs = inputFrame.timestamp - firstTs 599 + videoBuilder?.addFrame(annotated, relativeTs) 600 + processedCount++ 601 + statusText = "Processing frame $processedCount / ${timestamps.size}..." 602 + } 603 + 604 + println("DEBUG: all frames collected, finalizing...") 605 + val finalPath = try { 606 + videoBuilder?.finalize() 607 + } catch (e: Exception) { 608 + println("DEBUG: finalize error: ${e.message}") 609 + null 610 + } 611 + println("DEBUG: finalized, path=$finalPath") 612 + processedVideoPath = finalPath 613 + extractTimeMs = Clock.System.now().toEpochMilliseconds() - extractStart 614 + phase = "DONE" 615 + statusText = "Done!" 616 + } 617 + } 618 + 619 + Column(modifier = Modifier.fillMaxSize()) { 620 + // Status bar 621 + Text( 622 + text = statusText, 623 + modifier = Modifier.fillMaxWidth().padding(8.dp), 624 + textAlign = TextAlign.Center, 625 + style = androidx.compose.material3.MaterialTheme.typography.titleSmall, 626 + ) 627 + 628 + // Timing info 629 + if (phase == "DONE") { 630 + val recordSec = (recordTimeMs / 100.0).roundToLong() / 10.0 631 + val extractSec = (extractTimeMs / 100.0).roundToLong() / 10.0 632 + val fps = if (extractTimeMs > 0) (frameCount * 1000.0 / extractTimeMs).roundToLong() else 0 633 + Text( 634 + text = "Record: ${recordSec}s | Extract+encode: ${extractSec}s ($frameCount frames, ~${fps} fps)", 635 + modifier = Modifier.fillMaxWidth().padding(horizontal = 8.dp), 636 + textAlign = TextAlign.Center, 637 + fontSize = 12.sp, 638 + ) 639 + } 640 + 641 + when (phase) { 642 + "WAITING_PERMISSION" -> { 643 + Box(modifier = Modifier.weight(1f).fillMaxWidth(), contentAlignment = Alignment.Center) { 644 + CircularProgressIndicator() 645 + } 646 + } 647 + "RECORDING" -> { 648 + // Show camera preview during recording 649 + if (permissionGranted) { 650 + DetectOrientation { orientation -> 651 + key(orientation) { 652 + CameraView( 653 + skeletonRepository = skeletonRepository, 654 + customObjectRepository = customObjectRepository, 655 + detectMode = DetectMode.BOTH, 656 + drawSkeleton = true, 657 + objectModel = generalModel, 658 + modifier = Modifier.weight(1f).fillMaxWidth(), 659 + frontCamera = false, 660 + recordingId = recordingId, 661 + controller = controller, 662 + onRecordToggled = { recording -> 663 + if (recording) { 664 + videoStartWallClock = Clock.System.now().toEpochMilliseconds() 665 + // Capture feed dimensions for coordinate remapping. 666 + controller.requestData { data -> 667 + feedWidth = data.width 668 + feedHeight = data.height 669 + } 670 + } 671 + }, 672 + onVideoSaved = { id, url -> 673 + videoSavedDeferred.value?.complete(url) 674 + }, 675 + ) 676 + } 677 + } 678 + } 679 + } 680 + "EXTRACTING" -> { 681 + Box(modifier = Modifier.weight(1f).fillMaxWidth(), contentAlignment = Alignment.Center) { 682 + CircularProgressIndicator() 683 + } 684 + } 685 + "DONE" -> { 686 + // Processed video only 687 + Box(modifier = Modifier.weight(1f).fillMaxWidth()) { 688 + processedVideoPath?.let { path -> 689 + val processedHost = remember(path) { 690 + MediaPlayerHost( 691 + mediaUrl = path, 692 + isLooping = true, 693 + isPaused = false, 694 + isMuted = true, 695 + initialVideoFitMode = ScreenResize.FIT, 696 + ) 697 + } 698 + VideoPlayerComposable( 699 + modifier = Modifier.fillMaxSize(), 700 + playerHost = processedHost, 701 + playerConfig = VideoPlayerConfig( 702 + isSeekBarVisible = true, 703 + isDurationVisible = true, 704 + isFastForwardBackwardEnabled = false, 705 + isMuteControlEnabled = false, 706 + isSpeedControlEnabled = false, 707 + isScreenLockEnabled = false, 708 + isScreenResizeEnabled = false, 709 + isFullScreenEnabled = false, 710 + isPauseResumeEnabled = true, 711 + ) 712 + ) 713 + } ?: Text( 714 + "No video produced", 715 + modifier = Modifier.align(Alignment.Center) 716 + ) 717 + } 718 + } 379 719 } 380 720 } 381 721 }
sample/iosApp/FastViTT8F16.mlpackage/Data/com.apple.CoreML/model.mlmodel

This is a binary file and will not be displayed.

sample/iosApp/FastViTT8F16.mlpackage/Data/com.apple.CoreML/weights/weight.bin

This is a binary file and will not be displayed.

-18
sample/iosApp/FastViTT8F16.mlpackage/Manifest.json
··· 1 - { 2 - "fileFormatVersion": "1.0.0", 3 - "itemInfoEntries": { 4 - "76187EC5-87E5-4263-B6E2-1CF5E747A0EE": { 5 - "author": "com.apple.CoreML", 6 - "description": "CoreML Model Weights", 7 - "name": "weights", 8 - "path": "com.apple.CoreML/weights" 9 - }, 10 - "D3756FF7-6CCB-4582-AB58-B91896E60AE4": { 11 - "author": "com.apple.CoreML", 12 - "description": "CoreML Model Specification", 13 - "name": "model.mlmodel", 14 - "path": "com.apple.CoreML/model.mlmodel" 15 - } 16 - }, 17 - "rootModelIdentifier": "D3756FF7-6CCB-4582-AB58-B91896E60AE4" 18 - }
+2 -6
sample/iosApp/iosApp.xcodeproj/project.pbxproj
··· 7 7 objects = { 8 8 9 9 /* Begin PBXBuildFile section */ 10 - 438D2B632DF4C5AC00625680 /* FastViTT8F16.mlpackage in Sources */ = {isa = PBXBuildFile; fileRef = 438D2B622DF4C5AC00625680 /* FastViTT8F16.mlpackage */; }; 11 10 A93A953B29CC810C00F8E227 /* iosApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = A93A953A29CC810C00F8E227 /* iosApp.swift */; }; 12 11 A93A953F29CC810D00F8E227 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = A93A953E29CC810D00F8E227 /* Assets.xcassets */; }; 13 12 A93A954229CC810D00F8E227 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = A93A954129CC810D00F8E227 /* Preview Assets.xcassets */; }; 14 13 /* End PBXBuildFile section */ 15 14 16 15 /* Begin PBXFileReference section */ 17 - 438D2B622DF4C5AC00625680 /* FastViTT8F16.mlpackage */ = {isa = PBXFileReference; lastKnownFileType = folder.mlpackage; path = FastViTT8F16.mlpackage; sourceTree = "<group>"; }; 18 16 A93A953729CC810C00F8E227 /* PoseDetection.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PoseDetection.app; sourceTree = BUILT_PRODUCTS_DIR; }; 19 17 A93A953A29CC810C00F8E227 /* iosApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = iosApp.swift; sourceTree = "<group>"; }; 20 18 A93A953E29CC810D00F8E227 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; }; ··· 75 73 C4127409AE3703430489E7BC /* Frameworks */ = { 76 74 isa = PBXGroup; 77 75 children = ( 78 - 438D2B622DF4C5AC00625680 /* FastViTT8F16.mlpackage */, 79 76 ); 80 77 name = Frameworks; 81 78 sourceTree = "<group>"; ··· 175 172 buildActionMask = 2147483647; 176 173 files = ( 177 174 A93A953B29CC810C00F8E227 /* iosApp.swift in Sources */, 178 - 438D2B632DF4C5AC00625680 /* FastViTT8F16.mlpackage in Sources */, 179 175 ); 180 176 runOnlyForDeploymentPostprocessing = 0; 181 177 }; ··· 306 302 CODE_SIGN_STYLE = Automatic; 307 303 CURRENT_PROJECT_VERSION = 1; 308 304 DEVELOPMENT_ASSET_PATHS = "\"iosApp/Preview Content\""; 309 - DEVELOPMENT_TEAM = FAGG2XS28P; 305 + DEVELOPMENT_TEAM = 6H9FHG23L3; 310 306 ENABLE_PREVIEWS = YES; 311 307 GENERATE_INFOPLIST_FILE = YES; 312 308 INFOPLIST_FILE = iosApp/Info.plist; ··· 332 328 CODE_SIGN_STYLE = Automatic; 333 329 CURRENT_PROJECT_VERSION = 1; 334 330 DEVELOPMENT_ASSET_PATHS = "\"iosApp/Preview Content\""; 335 - DEVELOPMENT_TEAM = FAGG2XS28P; 331 + DEVELOPMENT_TEAM = 6H9FHG23L3; 336 332 ENABLE_PREVIEWS = YES; 337 333 GENERATE_INFOPLIST_FILE = YES; 338 334 INFOPLIST_FILE = iosApp/Info.plist;
sample/iosApp/iosApp/models/YOLOv3FP16.mlmodel

This is a binary file and will not be displayed.