Fast Nonparametric Estimation of Class Proportions in the Positive Unlabeled Classification Setting

Install

conda create -n dist_curve_env python=3.9

conda activate dist_curve_env

git clone git@github.com:Dzeiberg/dist_curve.git

python -m pip install -e dist_curve

Make Curve

import numpy as np
from dist_curve.curve_constructor import makeCurve, plotCurve

alpha = 0.4
posSize = 100
mixSize = 500
dim = 1
posInstances = np.random.normal(loc=1,scale=1,size=(posSize, dim))

mixInstances = np.concatenate((np.random.normal(loc=1, scale=1, size=(int(mixSize*(alpha)), dim)),
                               np.random.normal(loc=3,scale=1,size=(int(mixSize * (1-alpha)), dim))),
                              axis=0)

curve = makeCurve(posInstances, mixInstances,)

plotCurve(curve)

Estimate Class Prior

from dist_curve.model import getTrainedEstimator
pathToModel = "/data/dzeiberg/ClassPriorEstimation/model.hdf5"
model = getTrainedEstimator(pathToModel)
model.predict(curve.reshape((1,-1))/curve.sum())