rollingscan very clean

likelyhood -> find similar
REFRESHCLAUD.MD 20260126
This commit is contained in:
genki
2026-01-26 22:46:38 -05:00
parent cfec2b980a
commit 804f3d5640
11 changed files with 557 additions and 134 deletions

View File

@@ -4,10 +4,10 @@
<selectionStates>
<SelectionState runConfigName="app">
<option name="selectionMode" value="DROPDOWN" />
<DropdownSelection timestamp="2026-01-26T02:23:12.309011764Z">
<DropdownSelection timestamp="2026-01-27T00:21:15.014661014Z">
<Target type="DEFAULT_BOOT">
<handle>
<DeviceId pluginId="LocalEmulator" identifier="path=/home/genki/.android/avd/Medium_Phone.avd" />
<DeviceId pluginId="PhysicalDevice" identifier="serial=R3CX106YYCB" />
</handle>
</Target>
</DropdownSelection>

View File

@@ -10,6 +10,10 @@ import com.placeholder.sherpai2.data.local.entity.*
/**
* AppDatabase - Complete database for SherpAI2
*
* VERSION 12 - Distribution-based rejection stats
* - Added similarityStdDev, similarityMin to FaceModelEntity
* - Enables self-calibrating threshold for face matching
*
* VERSION 10 - User Feedback Loop
* - Added UserFeedbackEntity for storing user corrections
* - Enables cluster refinement before training
@@ -52,7 +56,7 @@ import com.placeholder.sherpai2.data.local.entity.*
CollectionImageEntity::class,
CollectionFilterEntity::class
],
version = 11, // INCREMENTED for person statistics
version = 12, // INCREMENTED for distribution-based rejection stats
exportSchema = false
)
abstract class AppDatabase : RoomDatabase() {
@@ -272,13 +276,32 @@ val MIGRATION_10_11 = object : Migration(10, 11) {
}
}
/**
* MIGRATION 11 → 12 (Distribution-based Rejection Stats)
*
* Changes:
* 1. Add similarityStdDev column to face_models (default 0.05)
* 2. Add similarityMin column to face_models (default 0.6)
*
* These fields enable self-calibrating thresholds during scanning.
* During training, we compute stats from training sample similarities
* and use (mean - 2*stdDev) as a floor for matching.
*/
val MIGRATION_11_12 = object : Migration(11, 12) {
override fun migrate(database: SupportSQLiteDatabase) {
// Add distribution stats columns with sensible defaults for existing models
database.execSQL("ALTER TABLE face_models ADD COLUMN similarityStdDev REAL NOT NULL DEFAULT 0.05")
database.execSQL("ALTER TABLE face_models ADD COLUMN similarityMin REAL NOT NULL DEFAULT 0.6")
}
}
/**
* PRODUCTION MIGRATION NOTES:
*
* Before shipping to users, update DatabaseModule to use migrations:
*
* Room.databaseBuilder(context, AppDatabase::class.java, "sherpai.db")
* .addMigrations(MIGRATION_7_8, MIGRATION_8_9, MIGRATION_9_10, MIGRATION_10_11) // Add all migrations
* .addMigrations(MIGRATION_7_8, MIGRATION_8_9, MIGRATION_9_10, MIGRATION_10_11, MIGRATION_11_12) // Add all migrations
* // .fallbackToDestructiveMigration() // Remove this
* .build()
*/

View File

@@ -143,6 +143,13 @@ data class FaceModelEntity(
@ColumnInfo(name = "averageConfidence")
val averageConfidence: Float,
// Distribution stats for self-calibrating rejection
@ColumnInfo(name = "similarityStdDev")
val similarityStdDev: Float = 0.05f, // Default for backwards compat
@ColumnInfo(name = "similarityMin")
val similarityMin: Float = 0.6f, // Default for backwards compat
@ColumnInfo(name = "createdAt")
val createdAt: Long,
@@ -157,26 +164,29 @@ data class FaceModelEntity(
) {
companion object {
/**
* Backwards compatible create() method
* Used by existing FaceRecognitionRepository code
* Create with distribution stats for self-calibrating rejection
*/
fun create(
personId: String,
embeddingArray: FloatArray,
trainingImageCount: Int,
averageConfidence: Float
averageConfidence: Float,
similarityStdDev: Float = 0.05f,
similarityMin: Float = 0.6f
): FaceModelEntity {
return createFromEmbedding(personId, embeddingArray, trainingImageCount, averageConfidence)
return createFromEmbedding(personId, embeddingArray, trainingImageCount, averageConfidence, similarityStdDev, similarityMin)
}
/**
* Create from single embedding (backwards compatible)
* Create from single embedding with distribution stats
*/
fun createFromEmbedding(
personId: String,
embeddingArray: FloatArray,
trainingImageCount: Int,
averageConfidence: Float
averageConfidence: Float,
similarityStdDev: Float = 0.05f,
similarityMin: Float = 0.6f
): FaceModelEntity {
val now = System.currentTimeMillis()
val centroid = TemporalCentroid(
@@ -194,6 +204,8 @@ data class FaceModelEntity(
centroidsJson = serializeCentroids(listOf(centroid)),
trainingImageCount = trainingImageCount,
averageConfidence = averageConfidence,
similarityStdDev = similarityStdDev,
similarityMin = similarityMin,
createdAt = now,
updatedAt = now,
lastUsed = null,

View File

@@ -99,11 +99,19 @@ class FaceRecognitionRepository @Inject constructor(
}
val avgConfidence = confidences.average().toFloat()
// Compute distribution stats for self-calibrating rejection
val stdDev = kotlin.math.sqrt(
confidences.map { (it - avgConfidence).toDouble().let { d -> d * d } }.average()
).toFloat()
val minSimilarity = confidences.minOrNull() ?: 0f
val faceModel = FaceModelEntity.create(
personId = personId,
embeddingArray = personEmbedding,
trainingImageCount = validImages.size,
averageConfidence = avgConfidence
averageConfidence = avgConfidence,
similarityStdDev = stdDev,
similarityMin = minSimilarity
)
faceModelDao.insertFaceModel(faceModel)

View File

@@ -29,6 +29,64 @@ import kotlin.math.sqrt
*/
object FaceQualityFilter {
/**
* Age group estimation for filtering (child vs adult detection)
*/
enum class AgeGroup { CHILD, ADULT, UNCERTAIN }
/**
* Estimate whether a face belongs to a child or adult based on facial proportions.
*
* Uses two heuristics:
* 1. Eye position ratio - Children have larger foreheads, so eyes are lower (~45% from top)
* Adults have eyes at ~35% from top
* 2. Face roundness (width/height ratio) - Children: ~0.85-1.0, Adults: ~0.7-0.85
*
* @return AgeGroup.CHILD, AgeGroup.ADULT, or AgeGroup.UNCERTAIN
*/
fun estimateAgeGroup(face: Face, imageWidth: Int, imageHeight: Int): AgeGroup {
val leftEye = face.getLandmark(FaceLandmark.LEFT_EYE)
val rightEye = face.getLandmark(FaceLandmark.RIGHT_EYE)
if (leftEye == null || rightEye == null) {
return AgeGroup.UNCERTAIN
}
// Eye-to-face height ratio (where eyes sit relative to face top)
val faceHeight = face.boundingBox.height().toFloat()
val faceTop = face.boundingBox.top.toFloat()
val eyeY = (leftEye.position.y + rightEye.position.y) / 2
val eyePositionRatio = (eyeY - faceTop) / faceHeight
// Children: eyes at ~45% from top (larger forehead proportionally)
// Adults: eyes at ~35% from top
// Score: higher = more child-like
// Face roundness (width/height)
val faceWidth = face.boundingBox.width().toFloat()
val faceRatio = faceWidth / faceHeight
// Children: ratio ~0.85-1.0 (rounder faces)
// Adults: ratio ~0.7-0.85 (longer/narrower faces)
var childScore = 0
// Eye position scoring
if (eyePositionRatio > 0.45f) childScore += 2 // Strong child signal
else if (eyePositionRatio > 0.42f) childScore += 1 // Mild child signal
else if (eyePositionRatio < 0.35f) childScore -= 1 // Adult signal
// Face roundness scoring
if (faceRatio > 0.90f) childScore += 2 // Very round = child
else if (faceRatio > 0.82f) childScore += 1 // Somewhat round
else if (faceRatio < 0.75f) childScore -= 1 // Long face = adult
return when {
childScore >= 3 -> AgeGroup.CHILD
childScore <= 0 -> AgeGroup.ADULT
else -> AgeGroup.UNCERTAIN
}
}
/**
* Validate face for Discovery/Clustering
*

View File

@@ -19,6 +19,7 @@ import com.placeholder.sherpai2.data.local.entity.PersonEntity
import com.placeholder.sherpai2.data.local.entity.PhotoFaceTagEntity
import com.placeholder.sherpai2.ml.FaceNetModel
import com.placeholder.sherpai2.ml.ThresholdStrategy
import com.placeholder.sherpai2.domain.clustering.FaceQualityFilter
import dagger.hilt.android.lifecycle.HiltViewModel
import dagger.hilt.android.qualifiers.ApplicationContext
import kotlinx.coroutines.Dispatchers
@@ -142,7 +143,7 @@ class PersonInventoryViewModel @Inject constructor(
val detectorOptions = FaceDetectorOptions.Builder()
.setPerformanceMode(FaceDetectorOptions.PERFORMANCE_MODE_ACCURATE)
.setLandmarkMode(FaceDetectorOptions.LANDMARK_MODE_NONE)
.setLandmarkMode(FaceDetectorOptions.LANDMARK_MODE_ALL) // Needed for age estimation
.setClassificationMode(FaceDetectorOptions.CLASSIFICATION_MODE_NONE)
.setMinFaceSize(0.15f)
.build()
@@ -159,9 +160,23 @@ class PersonInventoryViewModel @Inject constructor(
}
val faceNetModel = FaceNetModel(context)
// Production threshold - balance precision vs recall
val baseThreshold = 0.58f
android.util.Log.d("PersonScan", "Using threshold: $baseThreshold, centroids: ${modelCentroids.size}")
// Production threshold - STRICT to avoid false positives
// Solo face photos: 0.62, Group photos: 0.68
val baseThreshold = 0.62f
val groupPhotoThreshold = 0.68f // Higher bar for multi-face images
// Load ALL other models for "best match wins" comparison
val allModels = faceModelDao.getAllActiveFaceModels()
val otherModelCentroids = allModels
.filter { it.id != faceModel.id }
.map { model -> model.id to model.getCentroids().map { it.getEmbeddingArray() } }
// Distribution-based minimum threshold (self-calibrating)
val distributionMin = (faceModel.averageConfidence - 2 * faceModel.similarityStdDev)
.coerceAtLeast(faceModel.similarityMin - 0.05f)
.coerceAtLeast(0.50f) // Never go below 0.50 absolute floor
android.util.Log.d("PersonScan", "Using threshold: solo=$baseThreshold, group=$groupPhotoThreshold, distributionMin=$distributionMin (avgConf=${faceModel.averageConfidence}, stdDev=${faceModel.similarityStdDev}), centroids: ${modelCentroids.size}, competing models: ${otherModelCentroids.size}, isChild=${person.isChild}")
val completed = AtomicInteger(0)
val facesFound = AtomicInteger(0)
@@ -173,7 +188,7 @@ class PersonInventoryViewModel @Inject constructor(
val jobs = untaggedImages.map { image ->
async {
semaphore.withPermit {
processImage(image, detector, faceNetModel, modelCentroids, trainingCount, baseThreshold, personId, faceModel.id, batchMatches, batchUpdateMutex, completed, facesFound, startTime, totalToScan, person.name)
processImage(image, detector, faceNetModel, modelCentroids, otherModelCentroids, trainingCount, baseThreshold, groupPhotoThreshold, distributionMin, person.isChild, personId, faceModel.id, batchMatches, batchUpdateMutex, completed, facesFound, startTime, totalToScan, person.name)
}
}
}
@@ -200,7 +215,10 @@ class PersonInventoryViewModel @Inject constructor(
private suspend fun processImage(
image: ImageEntity, detector: com.google.mlkit.vision.face.FaceDetector, faceNetModel: FaceNetModel,
modelCentroids: List<FloatArray>, trainingCount: Int, baseThreshold: Float, personId: String, faceModelId: String,
modelCentroids: List<FloatArray>, otherModelCentroids: List<Pair<String, List<FloatArray>>>,
trainingCount: Int, baseThreshold: Float, groupPhotoThreshold: Float,
distributionMin: Float, isChildTarget: Boolean,
personId: String, faceModelId: String,
batchMatches: MutableList<Triple<String, String, Float>>, batchUpdateMutex: Mutex,
completed: AtomicInteger, facesFound: AtomicInteger, startTime: Long, totalToScan: Int, personName: String
) {
@@ -225,9 +243,13 @@ class PersonInventoryViewModel @Inject constructor(
val scaleX = sizeOpts.outWidth.toFloat() / detectionBitmap.width
val scaleY = sizeOpts.outHeight.toFloat() / detectionBitmap.height
val imageQuality = ThresholdStrategy.estimateImageQuality(sizeOpts.outWidth, sizeOpts.outHeight)
val detectionContext = ThresholdStrategy.estimateDetectionContext(faces.size)
val threshold = ThresholdStrategy.getOptimalThreshold(trainingCount, imageQuality, detectionContext).coerceAtMost(baseThreshold)
// CRITICAL: Use higher threshold for group photos (more likely false positives)
val isGroupPhoto = faces.size > 1
val effectiveThreshold = if (isGroupPhoto) groupPhotoThreshold else baseThreshold
// Track best match in this image (only tag ONE face per image)
var bestMatchSimilarity = 0f
var foundMatch = false
for (face in faces) {
val scaledBounds = android.graphics.Rect(
@@ -237,19 +259,62 @@ class PersonInventoryViewModel @Inject constructor(
(face.boundingBox.bottom * scaleY).toInt()
)
// Skip very small faces (less reliable)
val faceArea = scaledBounds.width() * scaledBounds.height()
val imageArea = sizeOpts.outWidth * sizeOpts.outHeight
val faceRatio = faceArea.toFloat() / imageArea
if (faceRatio < 0.02f) continue // Face must be at least 2% of image
// SIGNAL 2: Age plausibility check (if target is a child)
if (isChildTarget) {
val ageGroup = FaceQualityFilter.estimateAgeGroup(face, detectionBitmap.width, detectionBitmap.height)
if (ageGroup == FaceQualityFilter.AgeGroup.ADULT) {
continue // Reject clearly adult faces when searching for a child
}
}
// CRITICAL: Add padding to face crop (same as training)
val faceBitmap = loadFaceRegionWithPadding(uri, scaledBounds, sizeOpts.outWidth, sizeOpts.outHeight) ?: continue
val faceEmbedding = faceNetModel.generateEmbedding(faceBitmap)
faceBitmap.recycle()
// Match against ALL centroids, use best match
val bestSimilarity = modelCentroids.maxOfOrNull { centroid ->
// Match against target person's centroids
val targetSimilarity = modelCentroids.maxOfOrNull { centroid ->
faceNetModel.calculateSimilarity(faceEmbedding, centroid)
} ?: 0f
if (bestSimilarity >= threshold) {
// SIGNAL 1: Distribution-based rejection
// If similarity is below (mean - 2*stdDev) or (min - 0.05), it's a statistical outlier
if (targetSimilarity < distributionMin) {
continue // Too far below training distribution
}
// SIGNAL 3: Basic threshold check
if (targetSimilarity < effectiveThreshold) {
continue
}
// SIGNAL 4: "Best match wins" - check if any OTHER model scores higher
// This prevents tagging siblings/similar people incorrectly
val bestOtherSimilarity = otherModelCentroids.maxOfOrNull { (_, centroids) ->
centroids.maxOfOrNull { centroid ->
faceNetModel.calculateSimilarity(faceEmbedding, centroid)
} ?: 0f
} ?: 0f
val isTargetBestMatch = targetSimilarity > bestOtherSimilarity
// All signals must pass
if (isTargetBestMatch && targetSimilarity > bestMatchSimilarity) {
bestMatchSimilarity = targetSimilarity
foundMatch = true
}
}
// Only add ONE tag per image (the best match)
if (foundMatch) {
batchUpdateMutex.withLock {
batchMatches.add(Triple(personId, image.imageId, bestSimilarity))
batchMatches.add(Triple(personId, image.imageId, bestMatchSimilarity))
facesFound.incrementAndGet()
if (batchMatches.size >= BATCH_DB_SIZE) {
saveBatchMatches(batchMatches.toList(), faceModelId)
@@ -257,7 +322,7 @@ class PersonInventoryViewModel @Inject constructor(
}
}
}
}
detectionBitmap.recycle()
} catch (e: Exception) {
} finally {

View File

@@ -2,7 +2,9 @@ package com.placeholder.sherpai2.ui.rollingscan
import android.net.Uri
import androidx.compose.foundation.BorderStroke
import androidx.compose.foundation.ExperimentalFoundationApi
import androidx.compose.foundation.clickable
import androidx.compose.foundation.combinedClickable
import androidx.compose.foundation.layout.*
import androidx.compose.foundation.lazy.grid.GridCells
import androidx.compose.foundation.lazy.grid.GridItemSpan
@@ -37,7 +39,7 @@ import com.placeholder.sherpai2.domain.similarity.FaceSimilarityScorer
* - Quick action buttons (Select Top N)
* - Submit button with validation
*/
@OptIn(ExperimentalMaterial3Api::class)
@OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class)
@Composable
fun RollingScanScreen(
seedImageIds: List<String>,
@@ -48,6 +50,7 @@ fun RollingScanScreen(
) {
val uiState by viewModel.uiState.collectAsState()
val selectedImageIds by viewModel.selectedImageIds.collectAsState()
val negativeImageIds by viewModel.negativeImageIds.collectAsState()
val rankedPhotos by viewModel.rankedPhotos.collectAsState()
val isScanning by viewModel.isScanning.collectAsState()
@@ -70,6 +73,7 @@ fun RollingScanScreen(
isReadyForTraining = viewModel.isReadyForTraining(),
validationMessage = viewModel.getValidationMessage(),
onSelectTopN = { count -> viewModel.selectTopN(count) },
onSelectAboveThreshold = { threshold -> viewModel.selectAllAboveThreshold(threshold) },
onSubmit = {
val uris = viewModel.getSelectedImageUris()
onSubmitForTraining(uris)
@@ -93,8 +97,10 @@ fun RollingScanScreen(
RollingScanPhotoGrid(
rankedPhotos = rankedPhotos,
selectedImageIds = selectedImageIds,
negativeImageIds = negativeImageIds,
isScanning = isScanning,
onToggleSelection = { imageId -> viewModel.toggleSelection(imageId) },
onToggleNegative = { imageId -> viewModel.toggleNegative(imageId) },
modifier = Modifier.padding(padding)
)
}
@@ -159,19 +165,26 @@ private fun RollingScanTopBar(
}
// ═══════════════════════════════════════════════════════════
// PHOTO GRID
// PHOTO GRID - Similarity-based bucketing
// ═══════════════════════════════════════════════════════════
@OptIn(ExperimentalFoundationApi::class)
@Composable
private fun RollingScanPhotoGrid(
rankedPhotos: List<FaceSimilarityScorer.ScoredPhoto>,
selectedImageIds: Set<String>,
negativeImageIds: Set<String>,
isScanning: Boolean,
onToggleSelection: (String) -> Unit,
onToggleNegative: (String) -> Unit,
modifier: Modifier = Modifier
) {
Column(modifier = modifier.fillMaxSize()) {
// Bucket by similarity score
val veryLikely = rankedPhotos.filter { it.finalScore >= 0.60f }
val probably = rankedPhotos.filter { it.finalScore in 0.45f..0.599f }
val maybe = rankedPhotos.filter { it.finalScore < 0.45f }
Column(modifier = modifier.fillMaxSize()) {
// Scanning indicator
if (isScanning) {
LinearProgressIndicator(
@@ -180,69 +193,78 @@ private fun RollingScanPhotoGrid(
)
}
// Hint for negative marking
Text(
text = "Tap to select • Long-press to mark as NOT this person",
style = MaterialTheme.typography.bodySmall,
color = MaterialTheme.colorScheme.onSurfaceVariant,
modifier = Modifier.padding(horizontal = 12.dp, vertical = 4.dp)
)
LazyVerticalGrid(
columns = GridCells.Fixed(3),
contentPadding = PaddingValues(8.dp),
horizontalArrangement = Arrangement.spacedBy(8.dp),
verticalArrangement = Arrangement.spacedBy(8.dp)
) {
// Section: Most Similar (top 10)
val topMatches = rankedPhotos.take(10)
if (topMatches.isNotEmpty()) {
// Section: Very Likely (>60%)
if (veryLikely.isNotEmpty()) {
item(span = { GridItemSpan(3) }) {
SectionHeader(
icon = Icons.Default.Whatshot,
text = "🔥 Most Similar (${topMatches.size})",
color = MaterialTheme.colorScheme.primary
text = "🟢 Very Likely (${veryLikely.size})",
color = Color(0xFF4CAF50)
)
}
items(topMatches, key = { it.imageId }) { photo ->
items(veryLikely, key = { it.imageId }) { photo ->
PhotoCard(
photo = photo,
isSelected = photo.imageId in selectedImageIds,
isNegative = photo.imageId in negativeImageIds,
onToggle = { onToggleSelection(photo.imageId) },
onLongPress = { onToggleNegative(photo.imageId) },
showSimilarityBadge = true
)
}
}
// Section: Good Matches (11-30)
val goodMatches = rankedPhotos.drop(10).take(20)
if (goodMatches.isNotEmpty()) {
// Section: Probably (45-60%)
if (probably.isNotEmpty()) {
item(span = { GridItemSpan(3) }) {
SectionHeader(
icon = Icons.Default.CheckCircle,
text = "📊 Good Matches (${goodMatches.size})",
color = MaterialTheme.colorScheme.tertiary
text = "🟡 Probably (${probably.size})",
color = Color(0xFFFFC107)
)
}
items(goodMatches, key = { it.imageId }) { photo ->
items(probably, key = { it.imageId }) { photo ->
PhotoCard(
photo = photo,
isSelected = photo.imageId in selectedImageIds,
onToggle = { onToggleSelection(photo.imageId) }
isNegative = photo.imageId in negativeImageIds,
onToggle = { onToggleSelection(photo.imageId) },
onLongPress = { onToggleNegative(photo.imageId) },
showSimilarityBadge = true
)
}
}
// Section: Other Photos
val otherPhotos = rankedPhotos.drop(30)
if (otherPhotos.isNotEmpty()) {
// Section: Maybe (<45%)
if (maybe.isNotEmpty()) {
item(span = { GridItemSpan(3) }) {
SectionHeader(
icon = Icons.Default.Photo,
text = "📷 Other Photos (${otherPhotos.size})",
color = MaterialTheme.colorScheme.onSurfaceVariant
text = "🟠 Maybe (${maybe.size})",
color = Color(0xFFFF9800)
)
}
items(otherPhotos, key = { it.imageId }) { photo ->
items(maybe, key = { it.imageId }) { photo ->
PhotoCard(
photo = photo,
isSelected = photo.imageId in selectedImageIds,
onToggle = { onToggleSelection(photo.imageId) }
isNegative = photo.imageId in negativeImageIds,
onToggle = { onToggleSelection(photo.imageId) },
onLongPress = { onToggleNegative(photo.imageId) }
)
}
}
@@ -258,24 +280,34 @@ private fun RollingScanPhotoGrid(
}
// ═══════════════════════════════════════════════════════════
// PHOTO CARD
// PHOTO CARD - with long-press for negative marking
// ═══════════════════════════════════════════════════════════
@OptIn(ExperimentalFoundationApi::class)
@Composable
private fun PhotoCard(
photo: FaceSimilarityScorer.ScoredPhoto,
isSelected: Boolean,
isNegative: Boolean = false,
onToggle: () -> Unit,
onLongPress: () -> Unit = {},
showSimilarityBadge: Boolean = false
) {
val borderColor = when {
isNegative -> Color(0xFFE53935) // Red for negative
isSelected -> MaterialTheme.colorScheme.primary
else -> MaterialTheme.colorScheme.outline.copy(alpha = 0.3f)
}
val borderWidth = if (isSelected || isNegative) 3.dp else 1.dp
Card(
modifier = Modifier
.aspectRatio(1f)
.clickable(onClick = onToggle),
border = if (isSelected)
BorderStroke(3.dp, MaterialTheme.colorScheme.primary)
else
BorderStroke(1.dp, MaterialTheme.colorScheme.outline.copy(alpha = 0.3f)),
.combinedClickable(
onClick = onToggle,
onLongClick = onLongPress
),
border = BorderStroke(borderWidth, borderColor),
elevation = CardDefaults.cardElevation(
defaultElevation = if (isSelected) 4.dp else 1.dp
)
@@ -289,22 +321,47 @@ private fun PhotoCard(
contentScale = ContentScale.Crop
)
// Similarity badge (top-left) - Only for top matches
if (showSimilarityBadge) {
// Dim overlay for negatives
if (isNegative) {
Box(
modifier = Modifier
.fillMaxSize()
.padding(0.dp),
contentAlignment = Alignment.Center
) {
Surface(
modifier = Modifier.fillMaxSize(),
color = Color.Black.copy(alpha = 0.5f)
) {}
Icon(
Icons.Default.Close,
contentDescription = "Not this person",
tint = Color.White,
modifier = Modifier.size(32.dp)
)
}
}
// Similarity badge (top-left)
if (showSimilarityBadge && !isNegative) {
Surface(
modifier = Modifier
.align(Alignment.TopStart)
.padding(6.dp),
shape = RoundedCornerShape(8.dp),
color = MaterialTheme.colorScheme.primary,
color = when {
photo.finalScore >= 0.60f -> Color(0xFF4CAF50)
photo.finalScore >= 0.45f -> Color(0xFFFFC107)
else -> Color(0xFFFF9800)
},
shadowElevation = 4.dp
) {
Text(
text = "${(photo.similarityScore * 100).toInt()}%",
text = "${(photo.finalScore * 100).toInt()}%",
modifier = Modifier.padding(horizontal = 8.dp, vertical = 4.dp),
style = MaterialTheme.typography.labelSmall,
fontWeight = FontWeight.Bold,
color = MaterialTheme.colorScheme.onPrimary
color = Color.White
)
}
}
@@ -332,7 +389,7 @@ private fun PhotoCard(
}
// Face count badge (bottom-right)
if (photo.faceCount > 1) {
if (photo.faceCount > 1 && !isNegative) {
Surface(
modifier = Modifier
.align(Alignment.BottomEnd)
@@ -395,6 +452,7 @@ private fun RollingScanBottomBar(
isReadyForTraining: Boolean,
validationMessage: String?,
onSelectTopN: (Int) -> Unit,
onSelectAboveThreshold: (Float) -> Unit,
onSubmit: () -> Unit
) {
Surface(
@@ -416,30 +474,41 @@ private fun RollingScanBottomBar(
)
}
// First row: threshold selection
Row(
modifier = Modifier.fillMaxWidth(),
horizontalArrangement = Arrangement.spacedBy(8.dp)
horizontalArrangement = Arrangement.spacedBy(6.dp)
) {
// Quick select buttons
OutlinedButton(
onClick = { onSelectTopN(10) },
modifier = Modifier.weight(1f)
onClick = { onSelectAboveThreshold(0.60f) },
modifier = Modifier.weight(1f),
contentPadding = PaddingValues(horizontal = 8.dp, vertical = 4.dp)
) {
Text("Top 10")
Text(">60%", style = MaterialTheme.typography.labelSmall)
}
OutlinedButton(
onClick = { onSelectAboveThreshold(0.50f) },
modifier = Modifier.weight(1f),
contentPadding = PaddingValues(horizontal = 8.dp, vertical = 4.dp)
) {
Text(">50%", style = MaterialTheme.typography.labelSmall)
}
OutlinedButton(
onClick = { onSelectTopN(15) },
modifier = Modifier.weight(1f),
contentPadding = PaddingValues(horizontal = 8.dp, vertical = 4.dp)
) {
Text("Top 15", style = MaterialTheme.typography.labelSmall)
}
}
OutlinedButton(
onClick = { onSelectTopN(20) },
modifier = Modifier.weight(1f)
) {
Text("Top 20")
}
Spacer(Modifier.height(8.dp))
// Submit button
// Second row: submit
Button(
onClick = onSubmit,
enabled = isReadyForTraining,
modifier = Modifier.weight(1.5f)
modifier = Modifier.fillMaxWidth()
) {
Icon(
Icons.Default.Done,
@@ -447,8 +516,7 @@ private fun RollingScanBottomBar(
modifier = Modifier.size(18.dp)
)
Spacer(Modifier.width(8.dp))
Text("Train ($selectedCount)")
}
Text("Train Model ($selectedCount photos)")
}
}
}

View File

@@ -44,6 +44,11 @@ class RollingScanViewModel @Inject constructor(
private const val TAG = "RollingScanVM"
private const val DEBOUNCE_DELAY_MS = 300L
private const val MIN_PHOTOS_FOR_TRAINING = 15
// Progressive thresholds based on selection count
private const val FLOOR_FEW_SEEDS = 0.30f // 1-3 seeds
private const val FLOOR_MEDIUM_SEEDS = 0.40f // 4-10 seeds
private const val FLOOR_MANY_SEEDS = 0.50f // 10+ seeds
}
// ═══════════════════════════════════════════════════════════
@@ -71,6 +76,11 @@ class RollingScanViewModel @Inject constructor(
// Cache of selected embeddings
private val selectedEmbeddings = mutableListOf<FloatArray>()
// Negative embeddings (marked as "not this person")
private val _negativeImageIds = MutableStateFlow<Set<String>>(emptySet())
val negativeImageIds: StateFlow<Set<String>> = _negativeImageIds.asStateFlow()
private val negativeEmbeddings = mutableListOf<FloatArray>()
// All available image IDs
private var allImageIds: List<String> = emptyList()
@@ -156,24 +166,55 @@ class RollingScanViewModel @Inject constructor(
current.remove(imageId)
viewModelScope.launch {
// Remove embedding from cache
val cached = faceCacheDao.getEmbeddingByImageId(imageId)
cached?.getEmbedding()?.let { selectedEmbeddings.remove(it) }
}
} else {
// Select
// Select (and remove from negatives if present)
current.add(imageId)
if (imageId in _negativeImageIds.value) {
toggleNegative(imageId)
}
viewModelScope.launch {
// Add embedding to cache
val cached = faceCacheDao.getEmbeddingByImageId(imageId)
cached?.getEmbedding()?.let { selectedEmbeddings.add(it) }
}
}
_selectedImageIds.value = current
_selectedImageIds.value = current.toSet() // Immutable copy
scanDebouncer.debounce {
triggerRollingScan()
}
}
/**
* Toggle negative marking ("Not this person")
*/
fun toggleNegative(imageId: String) {
val current = _negativeImageIds.value.toMutableSet()
if (imageId in current) {
current.remove(imageId)
viewModelScope.launch {
val cached = faceCacheDao.getEmbeddingByImageId(imageId)
cached?.getEmbedding()?.let { negativeEmbeddings.remove(it) }
}
} else {
current.add(imageId)
// Remove from selected if present
if (imageId in _selectedImageIds.value) {
toggleSelection(imageId)
}
viewModelScope.launch {
val cached = faceCacheDao.getEmbeddingByImageId(imageId)
cached?.getEmbedding()?.let { negativeEmbeddings.add(it) }
}
}
_negativeImageIds.value = current.toSet() // Immutable copy
// Debounced rescan
scanDebouncer.debounce {
triggerRollingScan()
}
@@ -190,13 +231,33 @@ class RollingScanViewModel @Inject constructor(
val current = _selectedImageIds.value.toMutableSet()
current.addAll(topPhotos)
_selectedImageIds.value = current
_selectedImageIds.value = current.toSet() // Immutable copy
viewModelScope.launch {
// Add embeddings
val embeddings = faceCacheDao.getEmbeddingsForImages(topPhotos.toList())
selectedEmbeddings.addAll(embeddings.mapNotNull { it.getEmbedding() })
triggerRollingScan()
}
}
/**
* Select all photos above a similarity threshold
*/
fun selectAllAboveThreshold(threshold: Float) {
val photosAbove = _rankedPhotos.value
.filter { it.finalScore >= threshold }
.map { it.imageId }
val current = _selectedImageIds.value.toMutableSet()
current.addAll(photosAbove)
_selectedImageIds.value = current.toSet() // Immutable copy
viewModelScope.launch {
val newIds = photosAbove.filter { it !in _selectedImageIds.value }
if (newIds.isNotEmpty()) {
val embeddings = faceCacheDao.getEmbeddingsForImages(newIds)
selectedEmbeddings.addAll(embeddings.mapNotNull { it.getEmbedding() })
}
triggerRollingScan()
}
}
@@ -207,17 +268,24 @@ class RollingScanViewModel @Inject constructor(
fun clearSelection() {
_selectedImageIds.value = emptySet()
selectedEmbeddings.clear()
// Reset ranking
_rankedPhotos.value = emptyList()
}
/**
* Clear negative markings
*/
fun clearNegatives() {
_negativeImageIds.value = emptySet()
negativeEmbeddings.clear()
scanDebouncer.debounce { triggerRollingScan() }
}
// ═══════════════════════════════════════════════════════════
// ROLLING SCAN LOGIC
// ═══════════════════════════════════════════════════════════
/**
* CORE: Trigger rolling similarity scan
* CORE: Trigger rolling similarity scan with progressive filtering
*/
private suspend fun triggerRollingScan() {
if (selectedEmbeddings.isEmpty()) {
@@ -228,7 +296,15 @@ class RollingScanViewModel @Inject constructor(
try {
_isScanning.value = true
Log.d(TAG, "Starting scan with ${selectedEmbeddings.size} selected embeddings")
val selectionCount = selectedEmbeddings.size
Log.d(TAG, "Starting scan with $selectionCount selected, ${negativeEmbeddings.size} negative")
// Progressive threshold based on selection count
val similarityFloor = when {
selectionCount <= 3 -> FLOOR_FEW_SEEDS
selectionCount <= 10 -> FLOOR_MEDIUM_SEEDS
else -> FLOOR_MANY_SEEDS
}
// Calculate centroid from selected embeddings
val centroid = faceSimilarityScorer.calculateCentroid(selectedEmbeddings)
@@ -240,17 +316,38 @@ class RollingScanViewModel @Inject constructor(
centroid = centroid
)
// Update image URIs in scored photos
val photosWithUris = scoredPhotos.map { photo ->
// Apply negative penalty, quality boost, and floor filter
val filteredPhotos = scoredPhotos
.map { photo ->
// Calculate max similarity to any negative embedding
val negativePenalty = if (negativeEmbeddings.isNotEmpty()) {
negativeEmbeddings.maxOfOrNull { neg ->
cosineSimilarity(photo.cachedEmbedding, neg)
} ?: 0f
} else 0f
// Quality multiplier: solo face, large face, good quality
val qualityMultiplier = 1f +
(if (photo.faceCount == 1) 0.15f else 0f) +
(if (photo.faceAreaRatio > 0.15f) 0.10f else 0f) +
(if (photo.qualityScore > 0.7f) 0.10f else 0f)
// Final score = (similarity - negativePenalty) * qualityMultiplier
val adjustedScore = ((photo.similarityScore - negativePenalty * 0.5f) * qualityMultiplier)
.coerceIn(0f, 1f)
photo.copy(
imageUri = imageUriCache[photo.imageId] ?: photo.imageId
imageUri = imageUriCache[photo.imageId] ?: photo.imageId,
finalScore = adjustedScore
)
}
.filter { it.finalScore >= similarityFloor } // Apply floor
.filter { it.imageId !in _negativeImageIds.value } // Hide negatives
.sortedByDescending { it.finalScore }
Log.d(TAG, "Scan complete. Scored ${photosWithUris.size} photos")
Log.d(TAG, "Scan complete. ${filteredPhotos.size} photos above floor $similarityFloor")
// Update ranked list
_rankedPhotos.value = photosWithUris
_rankedPhotos.value = filteredPhotos
} catch (e: Exception) {
Log.e(TAG, "Scan failed", e)
@@ -259,6 +356,19 @@ class RollingScanViewModel @Inject constructor(
}
}
private fun cosineSimilarity(a: FloatArray, b: FloatArray): Float {
if (a.size != b.size) return 0f
var dot = 0f
var normA = 0f
var normB = 0f
for (i in a.indices) {
dot += a[i] * b[i]
normA += a[i] * a[i]
normB += b[i] * b[i]
}
return if (normA > 0 && normB > 0) dot / (kotlin.math.sqrt(normA) * kotlin.math.sqrt(normB)) else 0f
}
// ═══════════════════════════════════════════════════════════
// SUBMISSION
// ═══════════════════════════════════════════════════════════
@@ -299,9 +409,11 @@ class RollingScanViewModel @Inject constructor(
fun reset() {
_uiState.value = RollingScanState.Idle
_selectedImageIds.value = emptySet()
_negativeImageIds.value = emptySet()
_rankedPhotos.value = emptyList()
_isScanning.value = false
selectedEmbeddings.clear()
negativeEmbeddings.clear()
allImageIds = emptyList()
imageUriCache = emptyMap()
scanDebouncer.cancel()

View File

@@ -67,13 +67,14 @@ class FaceDetectionHelper(private val context: Context) {
val inputImage = InputImage.fromBitmap(bitmap, 0)
val faces = detector.process(inputImage).await()
// Filter to quality faces only
// Filter to quality faces - use lenient scanning filter
// (Discovery filter was too strict, rejecting faces from rolling scan)
val qualityFaces = faces.filter { face ->
FaceQualityFilter.validateForDiscovery(
FaceQualityFilter.validateForScanning(
face = face,
imageWidth = bitmap.width,
imageHeight = bitmap.height
).isValid
)
}
// Sort by face size (area) to get the largest quality face

View File

@@ -192,11 +192,10 @@ class TrainViewModel @Inject constructor(
.first()
if (backgroundTaggingEnabled) {
// Lower threshold (0.55) since we use multi-centroid matching
// Use default threshold (0.62 solo, 0.68 group)
val scanRequest = LibraryScanWorker.createWorkRequest(
personId = personId,
personName = personName,
threshold = 0.55f
personName = personName
)
workManager.enqueue(scanRequest)
}
@@ -382,7 +381,7 @@ class TrainViewModel @Inject constructor(
faceDetectionResults = updatedFaceResults,
validationErrors = updatedErrors,
validImagesWithFaces = updatedValidImages,
excludedImages = excludedImages
excludedImages = excludedImages.toSet() // Immutable copy for Compose state detection
)
}

View File

@@ -9,6 +9,7 @@ import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.face.FaceDetection
import com.google.mlkit.vision.face.FaceDetectorOptions
import com.placeholder.sherpai2.data.local.dao.FaceModelDao
import com.placeholder.sherpai2.data.local.dao.PersonDao
import com.placeholder.sherpai2.domain.clustering.FaceQualityFilter
import com.placeholder.sherpai2.ml.FaceNormalizer
import com.placeholder.sherpai2.data.local.dao.ImageDao
@@ -54,7 +55,8 @@ class LibraryScanWorker @AssistedInject constructor(
@Assisted workerParams: WorkerParameters,
private val imageDao: ImageDao,
private val faceModelDao: FaceModelDao,
private val photoFaceTagDao: PhotoFaceTagDao
private val photoFaceTagDao: PhotoFaceTagDao,
private val personDao: PersonDao
) : CoroutineWorker(context, workerParams) {
companion object {
@@ -67,7 +69,8 @@ class LibraryScanWorker @AssistedInject constructor(
const val KEY_MATCHES_FOUND = "matches_found"
const val KEY_PHOTOS_SCANNED = "photos_scanned"
private const val DEFAULT_THRESHOLD = 0.70f // Slightly looser than validation
private const val DEFAULT_THRESHOLD = 0.62f // Solo photos
private const val GROUP_THRESHOLD = 0.68f // Group photos (stricter)
private const val BATCH_SIZE = 20
private const val MAX_RETRIES = 3
@@ -139,21 +142,40 @@ class LibraryScanWorker @AssistedInject constructor(
)
}
// Step 2.5: Load person to check isChild flag
val person = withContext(Dispatchers.IO) {
personDao.getPersonById(personId)
}
val isChildTarget = person?.isChild ?: false
// Step 3: Initialize ML components
val faceNetModel = FaceNetModel(context)
val detector = FaceDetection.getClient(
FaceDetectorOptions.Builder()
.setPerformanceMode(FaceDetectorOptions.PERFORMANCE_MODE_ACCURATE)
.setLandmarkMode(FaceDetectorOptions.LANDMARK_MODE_ALL) // Needed for age estimation
.setMinFaceSize(0.15f)
.build()
)
// Distribution-based minimum threshold (self-calibrating)
val distributionMin = (faceModel.averageConfidence - 2 * faceModel.similarityStdDev)
.coerceAtLeast(faceModel.similarityMin - 0.05f)
.coerceAtLeast(0.50f) // Never go below 0.50 absolute floor
// Get ALL centroids for multi-centroid matching (critical for children)
val modelCentroids = faceModel.getCentroids().map { it.getEmbeddingArray() }
if (modelCentroids.isEmpty()) {
return@withContext Result.failure(workDataOf("error" to "No centroids in model"))
}
// Load ALL other models for "best match wins" comparison
// This prevents tagging siblings incorrectly
val allModels = withContext(Dispatchers.IO) { faceModelDao.getAllActiveFaceModels() }
val otherModelCentroids = allModels
.filter { it.id != faceModel.id }
.map { model -> model.id to model.getCentroids().map { it.getEmbeddingArray() } }
var matchesFound = 0
var photosScanned = 0
@@ -172,9 +194,12 @@ class LibraryScanWorker @AssistedInject constructor(
personId = personId,
faceModelId = faceModel.id,
modelCentroids = modelCentroids,
otherModelCentroids = otherModelCentroids,
faceNetModel = faceNetModel,
detector = detector,
threshold = threshold
threshold = threshold,
distributionMin = distributionMin,
isChildTarget = isChildTarget
)
if (tags.isNotEmpty()) {
@@ -236,9 +261,12 @@ class LibraryScanWorker @AssistedInject constructor(
personId: String,
faceModelId: String,
modelCentroids: List<FloatArray>,
otherModelCentroids: List<Pair<String, List<FloatArray>>>,
faceNetModel: FaceNetModel,
detector: com.google.mlkit.vision.face.FaceDetector,
threshold: Float
threshold: Float,
distributionMin: Float,
isChildTarget: Boolean
): List<PhotoFaceTagEntity> = withContext(Dispatchers.IO) {
try {
@@ -250,45 +278,94 @@ class LibraryScanWorker @AssistedInject constructor(
val inputImage = InputImage.fromBitmap(bitmap, 0)
val faces = detector.process(inputImage).await()
if (faces.isEmpty()) {
bitmap.recycle()
return@withContext emptyList()
}
// Use higher threshold for group photos
val isGroupPhoto = faces.size > 1
val effectiveThreshold = if (isGroupPhoto) GROUP_THRESHOLD else threshold
// Track best match (only tag ONE face per image to avoid false positives)
var bestMatch: PhotoFaceTagEntity? = null
var bestSimilarity = 0f
// Check each face (filter by quality first)
val tags = faces.mapNotNull { face ->
for (face in faces) {
// Quality check
if (!FaceQualityFilter.validateForScanning(face, bitmap.width, bitmap.height)) {
return@mapNotNull null
continue
}
// Skip very small faces
val faceArea = face.boundingBox.width() * face.boundingBox.height()
val imageArea = bitmap.width * bitmap.height
if (faceArea.toFloat() / imageArea < 0.02f) continue
// SIGNAL 2: Age plausibility check (if target is a child)
if (isChildTarget) {
val ageGroup = FaceQualityFilter.estimateAgeGroup(face, bitmap.width, bitmap.height)
if (ageGroup == FaceQualityFilter.AgeGroup.ADULT) {
continue // Reject clearly adult faces when searching for a child
}
}
try {
// Crop and normalize face for best recognition
val faceBitmap = FaceNormalizer.cropAndNormalize(bitmap, face)
?: return@mapNotNull null
?: continue
// Generate embedding
val faceEmbedding = faceNetModel.generateEmbedding(faceBitmap)
faceBitmap.recycle()
// Match against ALL centroids, use best match (critical for children)
val similarity = modelCentroids.maxOfOrNull { centroid ->
// Match against target person's centroids
val targetSimilarity = modelCentroids.maxOfOrNull { centroid ->
faceNetModel.calculateSimilarity(faceEmbedding, centroid)
} ?: 0f
if (similarity >= threshold) {
PhotoFaceTagEntity.create(
// SIGNAL 1: Distribution-based rejection
// If similarity is below (mean - 2*stdDev) or (min - 0.05), it's a statistical outlier
if (targetSimilarity < distributionMin) {
continue // Too far below training distribution
}
// SIGNAL 3: Basic threshold check
if (targetSimilarity < effectiveThreshold) {
continue
}
// SIGNAL 4: "Best match wins" - check if any OTHER model scores higher
// This prevents tagging siblings incorrectly
val bestOtherSimilarity = otherModelCentroids.maxOfOrNull { (_, centroids) ->
centroids.maxOfOrNull { centroid ->
faceNetModel.calculateSimilarity(faceEmbedding, centroid)
} ?: 0f
} ?: 0f
val isTargetBestMatch = targetSimilarity > bestOtherSimilarity
// All signals must pass
if (isTargetBestMatch && targetSimilarity > bestSimilarity) {
bestSimilarity = targetSimilarity
bestMatch = PhotoFaceTagEntity.create(
imageId = photo.imageId,
faceModelId = faceModelId,
boundingBox = face.boundingBox,
confidence = similarity,
confidence = targetSimilarity,
faceEmbedding = faceEmbedding
)
} else {
null
}
} catch (e: Exception) {
null
// Skip this face
}
}
bitmap.recycle()
tags
// Return only the best match (or empty)
if (bestMatch != null) listOf(bestMatch) else emptyList()
} catch (e: Exception) {
emptyList()