This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

feat: use custom object detection models

+450 -194
+24 -3
README.md
··· 1 1 An realtime pose detection library for [Android](https://www.android.com/) and [Compose Multiplatform](https://www.jetbrains.com/lp/compose-multiplatform/). 2 2 Android version uses CameraX and GoogleML Kit, while iOS version uses AVFoundation with VisionKit and CoreML. 3 + We also support analysing pre-recorded video files. 4 + We now also support adding custom object detection models to the library, allowing you to detect custom objects in your camera feed or video files along side body poses. 3 5 4 6 ## Quick Start 5 7 ··· 40 42 Create a Skeleton Repisitory 41 43 ```kotlin 42 44 val skeletonRepository = remember { SkeletonRepository() } 45 + val customObjectRepository = remember { CustomObjectRespository() } 43 46 ``` 44 47 45 48 Initialise the camera feed 46 49 ```kotlin 47 - if (permissionGranted) CameraView(skeletonRepository = skeletonRepository) 50 + if (permissionGranted) { 51 + CameraView( 52 + skeletonRepository = skeletonRepository, 53 + customObjectRepository = customObjectRespository, 54 + ) 55 + } else { 56 + Text("Camera permission not granted") 57 + } 48 58 ``` 49 59 50 60 Create a Pose to detect ··· 87 97 } 88 98 ``` 89 99 100 + Add a custom object detection model 101 + Initialse the custom models for ios and android respectively. 102 + For android you need to add a .tflite model file to your assets folder, then set androidModelPath to the name of the model file, including the .tflite extension. 103 + For iOS you need to add a .mlmodel model file to your Xcode project, then set iosModelPath to the name of the model file without the .mlmodel extension. 104 + ```kotlin 105 + CustomObjectModel.init( 106 + androidModelPath = "4.tflite", 107 + iosModelPath = "YOLOv3FP16" 108 + ) 109 + ``` 110 + Once this is done, you can use the `CustomObjectRepository` to receive detected objects in the camera feed or video frames. 111 + 112 + 90 113 Check out the sample app for full example of how to use the library. 91 114 92 115 ## License ··· 102 125 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 103 126 See the License for the specific language governing permissions and 104 127 limitations under the License. 105 - 106 -
+1 -1
posedetection/build.gradle.kts
··· 6 6 7 7 mavenPublishing { 8 8 publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL) 9 - coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "1.5.0") 9 + coordinates("com.performancecoachlab.posedetection", "posedetection-compose", "2.0.0") 10 10 11 11 pom { 12 12 name.set("Pose Detection")
posedetection/src/androidMain/assets/3.tflite sample/composeApp/src/androidMain/assets/4.tflite
+26 -14
posedetection/src/androidMain/kotlin/com.performancecoachlab/posedetection/camera/CameraView.android.kt
··· 43 43 import androidx.camera.core.Preview 44 44 import androidx.camera.core.ImageAnalysis 45 45 import androidx.compose.runtime.rememberCoroutineScope 46 + import androidx.compose.ui.geometry.Rect 47 + import com.performancecoachlab.posedetection.custom.CustomObjectModel 48 + import com.performancecoachlab.posedetection.custom.CustomObjectRespository 49 + import com.performancecoachlab.posedetection.objects.createObjectDetector 50 + import com.performancecoachlab.posedetection.recording.AnalysisObject 51 + import kotlinx.coroutines.delay 46 52 import kotlinx.coroutines.launch 47 53 48 54 @OptIn(ExperimentalGetImage::class) 49 55 @Composable 50 56 actual fun CameraView( 51 57 skeletonRepository: SkeletonRepository, 58 + customObjectRepository: CustomObjectRespository, 52 59 drawSkeleton: Boolean, 53 60 modifier: Modifier, 54 61 frontCamera: Boolean, 55 62 isRecording: Boolean, 56 63 onRecordToggled: (Boolean) -> Unit, 57 - onVideoSaved: (String) -> Unit 64 + onVideoSaved: (String) -> Unit, 58 65 ) { 59 66 var bitmap by remember { mutableStateOf<ImageBitmap?>(null) } 60 67 var skeleton by remember { mutableStateOf(Skeleton(width = 0f, height = 0f)) } ··· 65 72 val lifecycleOwner: LifecycleOwner = LocalLifecycleOwner.current 66 73 val previewView: PreviewView = remember { PreviewView(context) } 67 74 val executor = remember { Executors.newSingleThreadExecutor() } 68 - //val objectDetector = createObjectDetector() 75 + val modelPath = CustomObjectModel.getInstance().androidModelPath 76 + val objectDetector = createObjectDetector(modelPath) 69 77 val scope = rememberCoroutineScope() 70 78 var firstFrameTimestamp: Long? = null 71 79 ··· 129 137 val img = InputImage.fromMediaImage( 130 138 image, imageProxy.imageInfo.rotationDegrees 131 139 ) 132 - /*objectDetector.process(img).addOnSuccessListener { 133 - for (detectedObject in it) { 134 - val boundingBox = detectedObject.boundingBox 135 - val trackingId = detectedObject.trackingId 136 - for (label in detectedObject.labels) { 137 - val text = label.text 138 - val index = label.index 139 - val confidence = label.confidence 140 - println("Detected object: $text, Index: $index, Confidence: $confidence, BoundingBox: $boundingBox, TrackingId: $trackingId") 141 - } 140 + objectDetector?.process(img)?.addOnSuccessListener { detectedObjects -> 141 + detectedObjects.map { detectedObject -> 142 + AnalysisObject( 143 + boundingBox = Rect( 144 + left = detectedObject.boundingBox.left.toFloat(), 145 + top = detectedObject.boundingBox.top.toFloat(), 146 + right = detectedObject.boundingBox.right.toFloat(), 147 + bottom = detectedObject.boundingBox.bottom.toFloat() 148 + ), 149 + trackingId = detectedObject.trackingId, 150 + labels = detectedObject.labels.mapNotNull { if(it.confidence>0)it.text else null } 151 + ) 152 + }.also { 153 + customObjectRepository.updateCustomObject(it) 142 154 } 143 - }.addOnFailureListener { e -> 155 + }?.addOnFailureListener { e -> 144 156 println(e) 145 - }*/ 157 + } 146 158 poseDetector.process(img).addOnSuccessListener { pose -> 147 159 skeleton = skeleton(pose, timestamp, img.width, img.height) 148 160 skeletonRepository.updateSkeleton(skeleton.mirror())
+13 -11
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/objects/createObjectDetector.kt
··· 5 5 import com.google.mlkit.vision.objects.ObjectDetector 6 6 import com.google.mlkit.vision.objects.custom.CustomObjectDetectorOptions 7 7 8 - fun createObjectDetector(): ObjectDetector { 9 - val localModel = LocalModel.Builder() 10 - .setAssetFilePath("mlkit_compatible_model.tflite") 11 - .build() 12 - val customObjectDetectorOptions = 13 - CustomObjectDetectorOptions.Builder(localModel) 14 - .setDetectorMode(CustomObjectDetectorOptions.STREAM_MODE) 15 - .enableClassification() 16 - .setClassificationConfidenceThreshold(0.0f) 17 - .setMaxPerObjectLabelCount(3) 8 + fun createObjectDetector(objectModel: String?): ObjectDetector? { 9 + return objectModel?.let { 10 + val localModel = LocalModel.Builder() 11 + .setAssetFilePath(it) 18 12 .build() 19 - return ObjectDetection.getClient(customObjectDetectorOptions) 13 + val customObjectDetectorOptions = 14 + CustomObjectDetectorOptions.Builder(localModel) 15 + .setDetectorMode(CustomObjectDetectorOptions.STREAM_MODE) 16 + .enableClassification() 17 + .setClassificationConfidenceThreshold(0.0f) 18 + .setMaxPerObjectLabelCount(3) 19 + .build() 20 + ObjectDetection.getClient(customObjectDetectorOptions) 21 + } 20 22 }
+28 -27
posedetection/src/androidMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.android.kt
··· 10 10 import com.performancecoachlab.posedetection.camera.drawAnalysisResults 11 11 import com.performancecoachlab.posedetection.camera.drawSkeleton 12 12 import com.performancecoachlab.posedetection.camera.skeleton 13 + import com.performancecoachlab.posedetection.custom.CustomObjectModel 13 14 import com.performancecoachlab.posedetection.objects.createObjectDetector 14 15 import com.performancecoachlab.posedetection.skeleton.Skeleton 15 16 import kotlinx.coroutines.suspendCancellableCoroutine ··· 29 30 } 30 31 } 31 32 32 - actual class FrameAnalyser { 33 + actual class FrameAnalyser actual constructor() { 34 + private val modelPath = CustomObjectModel.getInstance().androidModelPath 35 + private val objectDetector = createObjectDetector(modelPath) 33 36 private val options = 34 37 PoseDetectorOptions.Builder().setDetectorMode(PoseDetectorOptions.STREAM_MODE).build() 35 38 private val poseDetector = PoseDetection.getClient(options) 36 - private val objectDetector = createObjectDetector() 37 39 actual suspend fun analyseFrame(inputFrame: InputFrame): AnalysisResult = 38 40 suspendCancellableCoroutine { continuation -> 39 41 val img = InputImage.fromBitmap(inputFrame.bitmap, 0) ··· 47 49 continuation.resume(AnalysisResult(poseResult,objectResults?: emptyList())) 48 50 } 49 51 } 50 - 51 - /*objectDetector.process(img).addOnSuccessListener { 52 - objectResults = it.map { detectedObject -> 53 - val boundingBox = detectedObject.boundingBox.let{ bound-> 54 - Rect( 55 - left = bound.left.toFloat(), 56 - top = bound.top.toFloat(), 57 - right = bound.right.toFloat(), 58 - bottom = bound.bottom.toFloat() 52 + objectDetector?.let { detector -> 53 + detector.process(img).addOnSuccessListener { 54 + objectResults = it.map { detectedObject -> 55 + val boundingBox = detectedObject.boundingBox.let{ bound-> 56 + Rect( 57 + left = bound.left.toFloat(), 58 + top = bound.top.toFloat(), 59 + right = bound.right.toFloat(), 60 + bottom = bound.bottom.toFloat() 61 + ) 62 + } 63 + val trackingId = detectedObject.trackingId 64 + val labels = detectedObject.labels.map { label -> 65 + label.text 66 + } 67 + AnalysisObject( 68 + boundingBox = boundingBox, 69 + trackingId = trackingId, 70 + labels = labels 59 71 ) 60 72 } 61 - val trackingId = detectedObject.trackingId 62 - val labels = detectedObject.labels.map { label -> 63 - label.text 64 - } 65 - println("Detected object: $labels, bounding box: $boundingBox, trackingId: $trackingId") 66 - AnalysisObject( 67 - boundingBox = boundingBox, 68 - trackingId = trackingId, 69 - labels = labels 70 - ) 73 + tryResume() 74 + }.addOnFailureListener { 75 + println(it.message) 76 + tryResume() 71 77 } 72 - tryResume() 73 - }.addOnFailureListener { 74 - println(it.message) 75 - tryResume() 76 - }*/ 77 - tryResume() 78 + }?:tryResume() 78 79 poseDetector.process(img).addOnSuccessListener { pose -> 79 80 poseResult = skeleton(pose, inputFrame.timestamp, img.width, img.height) 80 81 tryResume()
+2
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.kt
··· 3 3 import androidx.compose.foundation.layout.fillMaxSize 4 4 import androidx.compose.runtime.Composable 5 5 import androidx.compose.ui.Modifier 6 + import com.performancecoachlab.posedetection.custom.CustomObjectRespository 6 7 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 7 8 8 9 @Composable 9 10 expect fun CameraView( 10 11 skeletonRepository: SkeletonRepository, 12 + customObjectRepository: CustomObjectRespository, 11 13 drawSkeleton: Boolean = true, 12 14 modifier: Modifier = Modifier.fillMaxSize(), 13 15 frontCamera: Boolean = true,
+3 -3
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/camera/Utils.kt
··· 134 134 kotlin.math.max(maxX - minX, maxY - minY) 135 135 } ?: 1f 136 136 val scaleFactor = skeletonSize / kotlin.math.min(it.width, it.height) 137 + val baseStrokeWidth = 30f 138 + val scaledStrokeWidth = baseStrokeWidth * scaleFactor 137 139 138 140 drawScope.draw( 139 141 Density(1f), ··· 153 155 analysisObject.boundingBox.width, 154 156 analysisObject.boundingBox.height 155 157 ), 156 - style = Stroke(width = 0.4f) 158 + style = Stroke(scaledStrokeWidth) 157 159 ) 158 160 } 159 161 skeleton?.apply { 160 - val baseStrokeWidth = 30f 161 - val scaledStrokeWidth = baseStrokeWidth * scaleFactor 162 162 163 163 val paintWhite = Paint().apply { 164 164 color = Color.White
+19
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/custom/CustomObjectModel.kt
··· 1 + package com.performancecoachlab.posedetection.custom 2 + 3 + import kotlin.concurrent.Volatile 4 + 5 + class CustomObjectModel private constructor( 6 + val androidModelPath: String?, 7 + val iosModelPath: String? 8 + ) { 9 + companion object { 10 + @Volatile 11 + private var instance = CustomObjectModel(null, null) 12 + 13 + fun init(androidModelPath: String?, iosModelPath: String?) { 14 + instance = CustomObjectModel(androidModelPath, iosModelPath) 15 + } 16 + 17 + fun getInstance(): CustomObjectModel = instance 18 + } 19 + }
+14
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/custom/CustomObjectRespository.kt
··· 1 + package com.performancecoachlab.posedetection.custom 2 + 3 + import com.performancecoachlab.posedetection.recording.AnalysisObject 4 + import kotlinx.coroutines.flow.MutableStateFlow 5 + import kotlinx.coroutines.flow.StateFlow 6 + 7 + class CustomObjectRespository { 8 + private val _customObjectFlow = MutableStateFlow<List<AnalysisObject>?>(null) 9 + val customObjectFlow: StateFlow<List<AnalysisObject>?> get() = _customObjectFlow 10 + 11 + fun updateCustomObject(customObject: List<AnalysisObject>) { 12 + _customObjectFlow.value = customObject 13 + } 14 + }
+1 -1
posedetection/src/commonMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.kt
··· 12 12 val timestamp: Long 13 13 } 14 14 15 - expect class FrameAnalyser(){ 15 + expect class FrameAnalyser() { 16 16 suspend fun analyseFrame(inputFrame: InputFrame): AnalysisResult 17 17 } 18 18
+47 -96
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraEngine.kt
··· 8 8 import androidx.compose.ui.graphics.toComposeImageBitmap 9 9 import androidx.compose.ui.unit.Density 10 10 import androidx.compose.ui.unit.LayoutDirection 11 + import com.performancecoachlab.posedetection.custom.CustomObjectModel 12 + import com.performancecoachlab.posedetection.custom.CustomObjectRespository 13 + import com.performancecoachlab.posedetection.recording.createObjectDetector 11 14 import com.performancecoachlab.posedetection.skeleton.Skeleton 12 15 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 13 16 import kotlinx.cinterop.ExperimentalForeignApi ··· 186 189 cameraController.skeletonRepository = repository 187 190 } 188 191 192 + fun addCustomObjectRepository(repository: CustomObjectRespository) { 193 + cameraController.customObjectRepository = repository 194 + } 195 + 189 196 fun addFrameListener(listener: FrameRepository) { 190 197 cameraController.frameListener = listener 191 198 } ··· 218 225 var cameraPreviewLayer: AVCaptureVideoPreviewLayer? = null 219 226 var isUsingFrontCamera = true 220 227 var skeletonRepository: SkeletonRepository? = null 228 + var customObjectRepository:CustomObjectRespository? = null 221 229 var frameListener: FrameRepository? = null 222 230 var onError: ((CameraException) -> Unit)? = null 223 231 var startTime: Long? = null ··· 427 435 onError?.invoke(CameraException.ConfigurationError(e.message ?: "Unknown error")) 428 436 } 429 437 } 430 - private val frameProcessor = FrameProcessor() 438 + private val modelPath = CustomObjectModel.getInstance().iosModelPath 439 + private val objectDetector = createObjectDetector(modelPath) 440 + private val frameProcessor = FrameProcessor(objectDetector) 431 441 432 442 @OptIn(ExperimentalForeignApi::class, NativeRuntimeApi::class) 433 443 override fun captureOutput( ··· 442 452 ).also { processingQueue -> 443 453 dispatch_async(processingQueue) { 444 454 try { 445 - frameProcessor.analyseBuffer( 446 - CMSampleBufferGetImageBuffer(didOutputSampleBuffer), timestamp 447 - ) { skeleton -> 448 - cameraPreviewLayer?.also { preview -> 449 - skeleton?.let { 450 - mapSkeletonToPreview( 451 - skeleton = it, 452 - previewLayer = preview, 453 - isUsingFrontCamera = isUsingFrontCamera, 454 - width = 480f, 455 - height = 360f 456 - ) 457 - }?.also { updatedSkeleton -> 458 - skeletonRepository?.updateSkeleton( 459 - updatedSkeleton 460 - ) 461 - preview.bounds.useContents { 462 - Pair( 463 - size.width.toInt(), 464 - size.height.toInt() 455 + frameProcessor.analyseBufferForAll( 456 + CMSampleBufferGetImageBuffer(didOutputSampleBuffer), timestamp, 457 + onSkeletonProcessed = { skeleton -> 458 + cameraPreviewLayer?.also { preview -> 459 + skeleton?.let { 460 + mapSkeletonToPreview( 461 + skeleton = it, 462 + previewLayer = preview, 463 + isUsingFrontCamera = isUsingFrontCamera, 464 + width = 480f, 465 + height = 360f 465 466 ) 466 - }.also { bo -> 467 - ImageBitmap( 468 - bo.first, bo.second 469 - ).drawSkeleton(inskeleton = updatedSkeleton) 470 - .also { drawn -> 471 - frameListener?.updateFrame( 472 - drawn 473 - ) 474 - } 467 + }?.also { updatedSkeleton -> 468 + skeletonRepository?.updateSkeleton( 469 + updatedSkeleton 470 + ) 471 + preview.bounds.useContents { 472 + Pair( 473 + size.width.toInt(), 474 + size.height.toInt() 475 + ) 476 + }.also { bo -> 477 + ImageBitmap( 478 + bo.first, bo.second 479 + ).drawSkeleton(inskeleton = updatedSkeleton) 480 + .also { drawn -> 481 + frameListener?.updateFrame( 482 + drawn 483 + ) 484 + } 485 + } 475 486 } 476 487 } 488 + }, 489 + onObjectsProcessed = { objects -> 490 + customObjectRepository?.updateCustomObject(objects) 477 491 } 478 - } 479 - }catch (e: Exception) { 492 + ) 493 + } catch (e: Exception) { 480 494 println(e.message ?: "Unknown error in frame processing") 481 495 } 482 - 483 496 } 484 497 } 485 498 } 486 499 } 487 500 MemoryManager.updateMemoryStatus() 488 501 kotlin.native.runtime.GC.collect() 489 - /* timeStamp().also { timestamp-> 490 - runCatching { 491 - CMSampleBufferGetImageBuffer(didOutputSampleBuffer).also { buffer -> 492 - CVPixelBufferGetWidth(buffer).also { w -> 493 - CVPixelBufferGetHeight(buffer).also { h -> 494 - CIImage.imageWithCVPixelBuffer(buffer).also { imageBuffer -> 495 - imageBuffer.toCGImageRef(w, h).also { cgImage -> 496 - dispatch_queue_create( 497 - "com.performancecoachlab.frameProcessing", null 498 - ).also { processingQueue -> 499 - dispatch_async(processingQueue) { 500 - frameProcessor.analyseFrame( 501 - cgImage, timestamp 502 - ) { skeleton -> 503 - cameraPreviewLayer?.also { preview -> 504 - skeleton?.let { 505 - mapSkeletonToPreview( 506 - skeleton = it, 507 - previewLayer = preview, 508 - isUsingFrontCamera = isUsingFrontCamera, 509 - width = w.toFloat(), 510 - height = h.toFloat() 511 - ) 512 - }?.also { updatedSkeleton -> 513 - skeletonRepository?.updateSkeleton( 514 - updatedSkeleton 515 - ) 516 - preview.bounds.useContents { 517 - Pair( 518 - size.width.toInt(), 519 - size.height.toInt() 520 - ) 521 - }.also { bo -> 522 - ImageBitmap( 523 - bo.first, bo.second 524 - ).drawSkeleton(inskeleton = updatedSkeleton) 525 - .also { drawn -> 526 - frameListener?.updateFrame( 527 - drawn 528 - ) 529 - // Add to image repository: use original skeleton and the raw frame image 530 - *//*imageRepository?.updateImage( 531 - com.performancecoachlab.posedetection.recording.SkeletonImage( 532 - skeleton = skeleton, // original, not mapped 533 - image = safeBitmap 534 - ) 535 - )*//* 536 - } 537 - } 538 - } 539 - } 540 - CGImageRelease(cgImage) 541 - } 542 - } 543 - } 544 - } 545 - } 546 - } 547 - } 548 - } 549 - } 550 - }*/ 551 - } 502 + } 552 503 fun ImageBitmap.copy(): ImageBitmap { 553 504 val original = this 554 505 val copied = ImageBitmap(width, height, config, hasAlpha, colorSpace)
+5 -2
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/CameraView.ios.kt
··· 12 12 import androidx.compose.runtime.setValue 13 13 import androidx.compose.ui.Modifier 14 14 import androidx.compose.ui.layout.ContentScale 15 + import com.performancecoachlab.posedetection.custom.CustomObjectRespository 15 16 import com.performancecoachlab.posedetection.skeleton.SkeletonRepository 16 17 import kotlinx.cinterop.BetaInteropApi 17 18 import kotlinx.cinterop.autoreleasepool ··· 21 22 @Composable 22 23 actual fun CameraView( 23 24 skeletonRepository: SkeletonRepository, 25 + customObjectRepository:CustomObjectRespository, 24 26 drawSkeleton: Boolean, 25 27 modifier: Modifier, 26 28 frontCamera: Boolean, 27 29 isRecording: Boolean, 28 30 onRecordToggled: (Boolean) -> Unit, 29 - onVideoSaved: (String) -> Unit 30 - ) { 31 + onVideoSaved: (String) -> Unit, 32 + ) { 31 33 val cameraEngine = remember { mutableStateOf<CameraEngine?>(null) } 32 34 val frameListener = remember { FrameRepository() } 33 35 val frameBitmap by frameListener.frameFlow.collectAsState() ··· 35 37 LaunchedEffect(cameraEngine.value, isRecording) { 36 38 cameraEngine.value?.apply { 37 39 addSkeletonRepository(skeletonRepository) 40 + addCustomObjectRepository(customObjectRepository) 38 41 addFrameListener(frameListener) 39 42 setOnVideoSavedCallback(onVideoSaved) 40 43 if (isRecording && !lastRecordingState) startRecording()
+211 -29
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/camera/FrameProcessor.kt
··· 1 1 package com.performancecoachlab.posedetection.camera 2 2 3 + import androidx.compose.ui.geometry.Rect 3 4 import com.performancecoachlab.posedetection.recording.AnalysisObject 4 5 import com.performancecoachlab.posedetection.skeleton.Skeleton 5 6 import kotlinx.cinterop.BetaInteropApi ··· 19 20 import platform.CoreGraphics.CGPoint 20 21 import platform.CoreGraphics.CGRect 21 22 import platform.CoreGraphics.CGRectMake 22 - import platform.CoreML.MLModel 23 23 import platform.CoreVideo.CVImageBufferRef 24 24 import platform.CoreVideo.CVPixelBufferRef 25 - import platform.Foundation.NSBundle 26 25 import platform.Foundation.NSError 26 + import platform.Foundation.NSUUID 27 + import platform.Vision.VNClassificationObservation 28 + import platform.Vision.VNCoreMLModel 29 + import platform.Vision.VNCoreMLRequest 27 30 import platform.Vision.VNDetectHumanBodyPoseRequest 28 31 import platform.Vision.VNHumanBodyPoseObservation 29 32 import platform.Vision.VNHumanBodyPoseObservationJointName ··· 41 44 import platform.Vision.VNHumanBodyPoseObservationJointNameRightWrist 42 45 import platform.Vision.VNImagePointForNormalizedPoint 43 46 import platform.Vision.VNImageRequestHandler 47 + import platform.Vision.VNRecognizedObjectObservation 44 48 import platform.Vision.VNRecognizedPoint 45 49 import platform.Vision.VNRequest 46 - import platform.Foundation.NSUUID 47 - import platform.Vision.* 50 + import kotlin.experimental.ExperimentalNativeApi 51 + import kotlin.native.identityHashCode 48 52 49 53 fun bodyPoseHandler(request: VNRequest): MutableMap<VNHumanBodyPoseObservationJointName, VNRecognizedPoint>? { 50 54 try { ··· 102 106 return Skeleton( 103 107 timestamp = skeleton.timestamp, 104 108 leftShoulder = skeleton.leftShoulder ?: findMostRecentNonNullPoint { it.leftShoulder }, 105 - rightShoulder = skeleton.rightShoulder ?: findMostRecentNonNullPoint { it.rightShoulder }, 109 + rightShoulder = skeleton.rightShoulder 110 + ?: findMostRecentNonNullPoint { it.rightShoulder }, 106 111 leftElbow = skeleton.leftElbow ?: findMostRecentNonNullPoint { it.leftElbow }, 107 112 rightElbow = skeleton.rightElbow ?: findMostRecentNonNullPoint { it.rightElbow }, 108 113 leftWrist = skeleton.leftWrist ?: findMostRecentNonNullPoint { it.leftWrist }, ··· 120 125 } 121 126 122 127 @OptIn(ExperimentalForeignApi::class) 123 - class FrameProcessor() { 128 + class FrameProcessor(val modelObj: VNCoreMLModel?) { 124 129 private val skelBuffer = SkelBuffer(maxSize = 10) 125 130 private var regionOfInterest = CGRectMake(0.0, 0.0, 1.0, 1.0) 126 131 private var requests = mutableListOf<VNRequest>() 127 - val model = setUpModel() 128 132 val objectRecognition = setUpRecognition() 129 133 130 - private fun setUpModel(): VNCoreMLModel? { 131 - val url = NSBundle.mainBundle.URLForResource("YOLOv3", "mlmodelc") 132 - return url?.let { 133 - MLModel.modelWithContentsOfURL(it, null)?.let { model -> 134 - VNCoreMLModel.modelForMLModel(model,null) 135 - } 136 - } 137 - } 138 134 private fun setUpRecognition() { 139 - if (model == null) { 135 + if (modelObj == null) { 140 136 return 141 137 } 142 138 try { 143 - val objectRecognition = VNCoreMLRequest(model) { request, error -> 139 + val objectRecognition = VNCoreMLRequest(modelObj) { request, error -> 144 140 val results = request?.results as? List<*> ?: return@VNCoreMLRequest 145 141 val recognized = results.filterIsInstance<VNRecognizedObjectObservation>() 146 142 processResults(recognized) ··· 162 158 163 159 private fun processResults(results: List<VNRecognizedObjectObservation>) { 164 160 val detectedObjects = results.map { observation -> 165 - val label = observation.labels.firstOrNull()?.toString()?:"Unknown" 161 + val label = observation.labels.firstOrNull()?.toString() ?: "Unknown" 166 162 val confidence = observation.confidence 167 163 val boundingBox = observation.boundingBox 168 164 DetectedObject( 169 - label = label, 170 - confidence = confidence, 171 - boundingBox = boundingBox 165 + label = label, confidence = confidence, boundingBox = boundingBox 172 166 ) 173 167 } 174 168 } 169 + 175 170 @OptIn(BetaInteropApi::class) 176 - fun analyseBuffer(buffer: CVImageBufferRef?, timestamp: Long, onProcessed: (Skeleton?) -> Unit) { 177 - if( buffer == null) { 171 + fun analyseBuffer( 172 + buffer: CVImageBufferRef?, timestamp: Long, onProcessed: (Skeleton?) -> Unit 173 + ) { 174 + if (buffer == null) { 178 175 onProcessed(null) 179 176 return 180 177 } ··· 354 351 } 355 352 } 356 353 357 - @OptIn(BetaInteropApi::class) 358 - fun analyseFrameForObjects(cgImage: CGImageRef?, timestamp: Long, onProccessed: (List<AnalysisObject>) -> Unit) { 354 + @OptIn(BetaInteropApi::class, ExperimentalNativeApi::class) 355 + fun analyseFrameForObjects( 356 + cgImage: CGImageRef?, timestamp: Long, onProccessed: (List<AnalysisObject>) -> Unit 357 + ) { 359 358 autoreleasepool { 360 359 if (cgImage == null) { 361 360 onProccessed(emptyList()) ··· 368 367 return 369 368 } 370 369 memScoped { 371 - onProccessed(emptyList()) 370 + val errorPtr = alloc<ObjCObjectVar<NSError?>>() 371 + if (modelObj == null) { 372 + onProccessed(emptyList()) 373 + return@memScoped 374 + } 375 + try { 376 + val request = VNCoreMLRequest(modelObj) { request, error -> 377 + if (error != null) { 378 + onProccessed(emptyList()) 379 + return@VNCoreMLRequest 380 + } 381 + val results = request?.results as? List<*> ?: emptyList<Any>() 382 + val recognized = results.filterIsInstance<VNRecognizedObjectObservation>() 383 + val analysisObjects = recognized.map { observation -> 384 + val confidence = observation.confidence 385 + val boundingBox = observation.boundingBox.useContents { 386 + val w = width.toFloat() 387 + val h = height.toFloat() 388 + val left = origin.x * w 389 + val top = (1.0 - origin.y) * h 390 + val right = (origin.x + size.width) * w 391 + val bottom = (1.0 - (origin.y + size.height)) * h 392 + Rect( 393 + left = left.toFloat(), 394 + top = top.toFloat(), 395 + right = right.toFloat(), 396 + bottom = bottom.toFloat() 397 + ) 398 + } 399 + val labels = observation.labels.mapNotNull { 400 + (it as VNClassificationObservation).let { ca -> 401 + if (ca.confidence > 0.0) ca.identifier else null 402 + } 403 + } 404 + AnalysisObject( 405 + trackingId = observation.identityHashCode(), 406 + labels = labels, 407 + boundingBox = boundingBox, 408 + ) 409 + } 410 + onProccessed(analysisObjects) 411 + } 412 + val handler = VNImageRequestHandler(cgImage, mapOf<Any?, Any?>()) 413 + handler.performRequests(listOf(request), errorPtr.ptr) 414 + if (errorPtr.value != null) { 415 + println("Error performing object detection request: ${errorPtr.value}") 416 + onProccessed(emptyList()) 417 + } 418 + } catch (e: Throwable) { 419 + println("Unable to perform the object detection request: ${e.message}") 420 + onProccessed(emptyList()) 421 + } 372 422 } 373 423 } 424 + } 374 425 426 + @OptIn(BetaInteropApi::class, ExperimentalNativeApi::class) 427 + fun analyseBufferForAll( 428 + buffer: CVImageBufferRef?, 429 + timestamp: Long, 430 + onObjectsProcessed: (List<AnalysisObject>) -> Unit, 431 + onSkeletonProcessed: (Skeleton?) -> Unit 432 + ) { 433 + autoreleasepool { 434 + if (buffer == null) { 435 + onObjectsProcessed(emptyList()) 436 + onSkeletonProcessed(null) 437 + return 438 + } 439 + val width = 480uL // You may want to get actual width from buffer if needed 440 + val height = 360uL // You may want to get actual height from buffer if needed 441 + memScoped { 442 + val errorPtr = alloc<ObjCObjectVar<NSError?>>() 443 + if (modelObj == null) { 444 + onObjectsProcessed(emptyList()) 445 + onSkeletonProcessed(null) 446 + return@memScoped 447 + } 448 + try { 449 + val requestForObjects = VNCoreMLRequest(modelObj) { request, error -> 450 + if (error != null) { 451 + onObjectsProcessed(emptyList()) 452 + return@VNCoreMLRequest 453 + } 454 + val results = request?.results as? List<*> ?: emptyList<Any>() 455 + val recognized = results.filterIsInstance<VNRecognizedObjectObservation>() 456 + val analysisObjects = recognized.map { observation -> 457 + val confidence = observation.confidence 458 + val boundingBox = observation.boundingBox.useContents { 459 + val w = width.toFloat() 460 + val h = height.toFloat() 461 + val left = origin.x * w 462 + val top = (1.0 - origin.y) * h 463 + val right = (origin.x + size.width) * w 464 + val bottom = (1.0 - (origin.y + size.height)) * h 465 + Rect( 466 + left = left.toFloat(), 467 + top = top.toFloat(), 468 + right = right.toFloat(), 469 + bottom = bottom.toFloat() 470 + ) 471 + } 472 + val labels = observation.labels.mapNotNull { 473 + (it as VNClassificationObservation).let { ca -> 474 + if (ca.confidence > 0.0) ca.identifier else null 475 + } 476 + } 477 + AnalysisObject( 478 + trackingId = observation.identityHashCode(), 479 + labels = labels, 480 + boundingBox = boundingBox, 481 + ) 482 + } 483 + onObjectsProcessed(analysisObjects) 484 + } 485 + val options = mapOf<Any?, Any?>( 486 + "orientation" to AVCaptureVideoOrientationLandscapeRight 487 + ) 488 + val requestForSkeleton = VNDetectHumanBodyPoseRequest { request, error -> 489 + if (error != null) { 490 + onSkeletonProcessed(null) 491 + } else { 492 + request?.also { vnRequest -> 493 + val recognizedPoints = bodyPoseHandler(vnRequest) 494 + regionOfInterest = calculateRegionOfInterest(recognizedPoints) 495 + val updatedSkeleton = Skeleton( 496 + timestamp = timestamp, 497 + leftShoulder = recognizedPoints?.get( 498 + VNHumanBodyPoseObservationJointNameLeftShoulder 499 + )?.location?.toSkeletonPoint(width, height), 500 + rightShoulder = recognizedPoints?.get( 501 + VNHumanBodyPoseObservationJointNameRightShoulder 502 + )?.location?.toSkeletonPoint(width, height), 503 + leftElbow = recognizedPoints?.get( 504 + VNHumanBodyPoseObservationJointNameLeftElbow 505 + )?.location?.toSkeletonPoint(width, height), 506 + rightElbow = recognizedPoints?.get( 507 + VNHumanBodyPoseObservationJointNameRightElbow 508 + )?.location?.toSkeletonPoint(width, height), 509 + leftWrist = recognizedPoints?.get( 510 + VNHumanBodyPoseObservationJointNameLeftWrist 511 + )?.location?.toSkeletonPoint(width, height), 512 + rightWrist = recognizedPoints?.get( 513 + VNHumanBodyPoseObservationJointNameRightWrist 514 + )?.location?.toSkeletonPoint(width, height), 515 + leftHip = recognizedPoints?.get( 516 + VNHumanBodyPoseObservationJointNameLeftHip 517 + )?.location?.toSkeletonPoint(width, height), 518 + rightHip = recognizedPoints?.get( 519 + VNHumanBodyPoseObservationJointNameRightHip 520 + )?.location?.toSkeletonPoint(width, height), 521 + leftKnee = recognizedPoints?.get( 522 + VNHumanBodyPoseObservationJointNameLeftKnee 523 + )?.location?.toSkeletonPoint(width, height), 524 + rightKnee = recognizedPoints?.get( 525 + VNHumanBodyPoseObservationJointNameRightKnee 526 + )?.location?.toSkeletonPoint(width, height), 527 + leftAnkle = recognizedPoints?.get( 528 + VNHumanBodyPoseObservationJointNameLeftAnkle 529 + )?.location?.toSkeletonPoint(width, height), 530 + rightAnkle = recognizedPoints?.get( 531 + VNHumanBodyPoseObservationJointNameRightAnkle 532 + )?.location?.toSkeletonPoint(width, height), 533 + height = height.toFloat(), 534 + width = width.toFloat() 535 + ) 536 + onSkeletonProcessed(skelBuffer.smooth(updatedSkeleton)) 537 + } 538 + } 539 + } 540 + requestForSkeleton.regionOfInterest = regionOfInterest 541 + val handler = VNImageRequestHandler(buffer, options) 542 + handler.performRequests( 543 + listOf(requestForObjects, requestForSkeleton), errorPtr.ptr 544 + ) 545 + if (errorPtr.value != null) { 546 + println("Error performing object detection request: ${errorPtr.value}") 547 + onObjectsProcessed(emptyList()) 548 + onSkeletonProcessed(null) 549 + } 550 + } catch (e: Throwable) { 551 + println("Unable to perform the object detection request: ${e.message}") 552 + onObjectsProcessed(emptyList()) 553 + onSkeletonProcessed(null) 554 + } 555 + } 556 + } 375 557 } 376 558 } 377 559 ··· 391 573 392 574 val x = (minX - margin).coerceIn(0.0, 1.0) 393 575 val y = (minY - margin).coerceIn(0.0, 1.0) 394 - val width = (maxX - minX + 2 * margin).coerceIn(0.0, 1.0 -x) 395 - val height = (maxY - minY + 2 * margin).coerceIn(0.0, 1.0-y) 576 + val width = (maxX - minX + 2 * margin).coerceIn(0.0, 1.0 - x) 577 + val height = (maxY - minY + 2 * margin).coerceIn(0.0, 1.0 - y) 396 578 397 579 return CGRectMake( 398 580 x = x, ··· 403 585 } 404 586 405 587 } 588 + 406 589 data class DetectedObject @OptIn(ExperimentalForeignApi::class) constructor( 407 590 val id: String = NSUUID().UUIDString(), 408 591 val label: String, ··· 417 600 ) { 418 601 requestHandler.performRequests(listOf(request), errorPtr.ptr) 419 602 } 420 -
+24 -3
posedetection/src/iosMain/kotlin/com/performancecoachlab/posedetection/recording/InputFrame.ios.kt
··· 2 2 3 3 import androidx.compose.ui.graphics.ImageBitmap 4 4 import com.performancecoachlab.posedetection.camera.FrameProcessor 5 + import com.performancecoachlab.posedetection.camera.drawAnalysisResults 5 6 import com.performancecoachlab.posedetection.camera.drawSkeleton 6 7 import com.performancecoachlab.posedetection.camera.toImageBitmap 8 + import com.performancecoachlab.posedetection.custom.CustomObjectModel 7 9 import com.performancecoachlab.posedetection.skeleton.Skeleton 8 10 import kotlinx.cinterop.ExperimentalForeignApi 9 11 import kotlinx.coroutines.suspendCancellableCoroutine 10 12 import platform.CoreGraphics.CGImageRef 13 + import platform.CoreML.MLModel 14 + import platform.Foundation.NSBundle 15 + import platform.Foundation.NSURL 11 16 import platform.UIKit.UIImage 17 + import platform.Vision.VNCoreMLModel 12 18 import kotlin.coroutines.resume 13 19 14 20 actual class InputFrame @OptIn(ExperimentalForeignApi::class) constructor( ··· 24 30 } 25 31 26 32 actual fun drawAnalysisResults(analysisResults: AnalysisResult): ImageBitmap { 27 - return toImageBitmap().drawSkeleton(analysisResults.skeleton) 33 + return toImageBitmap().drawAnalysisResults(analysisResults) 28 34 } 29 35 } 30 36 37 + @OptIn(ExperimentalForeignApi::class) 38 + fun createObjectDetector(model: String?): VNCoreMLModel? { 39 + println("Model input: $model") 40 + val path = NSBundle.mainBundle.pathForResource(model, "mlmodelc") 41 + println("Model path: $path") 42 + val url = path?.let { NSURL.fileURLWithPath(it) } 43 + println("Model URL: $url") 44 + val modelCont = url?.let {MLModel.modelWithContentsOfURL(it, null)} 45 + println("Model content: $modelCont") 46 + val modelObj = modelCont?.let {VNCoreMLModel.modelForMLModel(it,null)} 47 + println("Model: $modelObj") 48 + return modelObj 49 + } 31 50 32 51 actual class FrameAnalyser actual constructor() { 33 - private val frameProcessor = FrameProcessor() 52 + private val modelPath = CustomObjectModel.getInstance().iosModelPath 53 + private val modelObj:VNCoreMLModel? = createObjectDetector(modelPath) 54 + private val frameProcessor = FrameProcessor(modelObj) 34 55 35 56 @OptIn(ExperimentalForeignApi::class) 36 57 actual suspend fun analyseFrame(inputFrame: InputFrame): AnalysisResult { ··· 43 64 fun tryResume() { 44 65 completed++ 45 66 if (completed == 2) { 46 - continuation.resume(AnalysisResult(poseResult, objectResults ?: emptyList())) 67 + continuation.resume(AnalysisResult(poseResult, objectResults)) 47 68 } 48 69 } 49 70
+23 -3
sample/composeApp/src/commonMain/kotlin/com/nate/posedetection/App.kt
··· 38 38 import com.nate.posedetection.theme.AppTheme 39 39 import com.performancecoachlab.posedetection.camera.CameraView 40 40 import com.performancecoachlab.posedetection.camera.drawSkeleton 41 + import com.performancecoachlab.posedetection.custom.CustomObjectModel 42 + import com.performancecoachlab.posedetection.custom.CustomObjectRespository 41 43 import com.performancecoachlab.posedetection.encoding.VideoBuilder 42 44 import com.performancecoachlab.posedetection.encoding.createVideoBuilder 43 45 import com.performancecoachlab.posedetection.permissions.PermissionProvider ··· 63 65 internal fun App() = AppTheme { 64 66 var selectedTabIndex by remember { mutableStateOf(0) } 65 67 val tabs = listOf("Camera Feed", "Recorded Video") 68 + CustomObjectModel.init( 69 + androidModelPath = "4.tflite", 70 + iosModelPath = "YOLOv3FP16" 71 + ) 72 + 66 73 67 74 Column { 68 75 TabRow(selectedTabIndex = selectedTabIndex) { ··· 131 138 var image by remember { mutableStateOf<InputFrame?>(null) } 132 139 val timeRange = Pair(0L, duration) 133 140 var frame by remember { mutableStateOf(timeRange.first) } 134 - val frameAnalyser = FrameAnalyser() 141 + //val frameAnalyser = FrameAnalyser("4.tflite") 142 + val frameAnalyser by remember { mutableStateOf(FrameAnalyser())} 135 143 var bitmap by remember { mutableStateOf<ImageBitmap?>(null) } 136 144 val videoBuilder = remember { mutableStateOf<VideoBuilder?>(null) } 137 145 var isRecording by remember { mutableStateOf(false) } ··· 164 172 try { 165 173 videoBuilder.value?.let { builder -> 166 174 savedPath = builder.finalize() 167 - println(savedPath) 168 175 //launcher.launch(file) 169 176 videoBuilder.value = null 170 177 firstFrameTimestamp = null ··· 263 270 @Composable 264 271 fun CameraSample() { 265 272 val skeletonRepository = remember { SkeletonRepository() } 273 + val customObjectRespository = remember { CustomObjectRespository() } 266 274 val skeleton by skeletonRepository.skeletonFlow.collectAsState() 275 + val customObjects by customObjectRespository.customObjectFlow.collectAsState() 267 276 var permissionGranted by remember { mutableStateOf(false) } 268 277 var isRecording by remember { mutableStateOf(false) } 269 278 var path by remember { mutableStateOf("") } ··· 308 317 } 309 318 CameraView( 310 319 skeletonRepository = skeletonRepository, 320 + customObjectRepository = customObjectRespository, 311 321 drawSkeleton = true, 312 322 modifier = Modifier.weight(1f), 313 323 frontCamera = true, 314 324 isRecording = isRecording, 315 325 onRecordToggled = { isRecording = it }, 316 - onVideoSaved = { path = it } 326 + onVideoSaved = { path = it }, 317 327 ) 318 328 } 319 329 } else Text("Camera permission not granted") ··· 337 347 if (upRightPose.matches(it)) Text("Standing", fontSize = 80.sp) 338 348 else if (starPose.matches(it)) Text("Star", fontSize = 80.sp) 339 349 else Text("No Pose Detected", fontSize = 80.sp) 350 + } 351 + customObjects?.let { 352 + if (it.isNotEmpty()) { 353 + println("Detected Objects") 354 + it.forEach { obj -> 355 + println("${obj.labels.firstOrNull()}") 356 + } 357 + } else { 358 + println("No Objects Detected") 359 + } 340 360 } 341 361 }
+9 -1
sample/iosApp/iosApp.xcodeproj/project.pbxproj
··· 3 3 archiveVersion = 1; 4 4 classes = { 5 5 }; 6 - objectVersion = 56; 6 + objectVersion = 70; 7 7 objects = { 8 8 9 9 /* Begin PBXBuildFile section */ ··· 21 21 A93A954129CC810D00F8E227 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; }; 22 22 /* End PBXFileReference section */ 23 23 24 + /* Begin PBXFileSystemSynchronizedRootGroup section */ 25 + 435F0A962E15C2B300951ADC /* models */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = models; sourceTree = "<group>"; }; 26 + /* End PBXFileSystemSynchronizedRootGroup section */ 27 + 24 28 /* Begin PBXFrameworksBuildPhase section */ 25 29 A93A953429CC810C00F8E227 /* Frameworks */ = { 26 30 isa = PBXFrameworksBuildPhase; ··· 52 56 A93A953929CC810C00F8E227 /* iosApp */ = { 53 57 isa = PBXGroup; 54 58 children = ( 59 + 435F0A962E15C2B300951ADC /* models */, 55 60 A93A953A29CC810C00F8E227 /* iosApp.swift */, 56 61 A93A953E29CC810D00F8E227 /* Assets.xcassets */, 57 62 A93A954029CC810D00F8E227 /* Preview Content */, ··· 90 95 buildRules = ( 91 96 ); 92 97 dependencies = ( 98 + ); 99 + fileSystemSynchronizedGroups = ( 100 + 435F0A962E15C2B300951ADC /* models */, 93 101 ); 94 102 name = iosApp; 95 103 productName = iosApp;
sample/iosApp/iosApp/models/YOLOv3FP16.mlmodel

This is a binary file and will not be displayed.