1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
| def qini_curve(y_true, uplift, treatment): """Compute Qini curve.
For computing the area under the Qini Curve, see :func:`.qini_auc_score`.
Args: y_true (1d array-like): Correct (true) binary target values. uplift (1d array-like): Predicted uplift, as returned by a model. treatment (1d array-like): Treatment labels.
Returns: array (shape = [>2]), array (shape = [>2]): Points on a curve.
See also: :func:`.uplift_curve`: Compute the area under the Qini curve.
:func:`.perfect_qini_curve`: Compute the perfect Qini curve.
:func:`.plot_qini_curves`: Plot Qini curves from predictions..
:func:`.uplift_curve`: Compute Uplift curve.
References: Nicholas J Radcliffe. (2007). Using control groups to target on predicted lift: Building and assessing uplift model. Direct Marketing Analytics Journal, (3):14–21, 2007.
Devriendt, F., Guns, T., & Verbeke, W. (2020). Learning to rank for uplift modeling. ArXiv, abs/2002.05897. """
check_consistent_length(y_true, uplift, treatment) check_is_binary(treatment) check_is_binary(y_true) y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment)
desc_score_indices = np.argsort(uplift, kind="mergesort")[::-1]
y_true = y_true[desc_score_indices] treatment = treatment[desc_score_indices] uplift = uplift[desc_score_indices]
y_true_ctrl, y_true_trmnt = y_true.copy(), y_true.copy()
y_true_ctrl[treatment == 1] = 0 y_true_trmnt[treatment == 0] = 0
distinct_value_indices = np.where(np.diff(uplift))[0] threshold_indices = np.r_[distinct_value_indices, uplift.size - 1]
num_trmnt = stable_cumsum(treatment)[threshold_indices] y_trmnt = stable_cumsum(y_true_trmnt)[threshold_indices]
num_all = threshold_indices + 1
num_ctrl = num_all - num_trmnt y_ctrl = stable_cumsum(y_true_ctrl)[threshold_indices]
curve_values = y_trmnt - y_ctrl * np.divide(num_trmnt, num_ctrl, out=np.zeros_like(num_trmnt), where=num_ctrl != 0) if num_all.size == 0 or curve_values[0] != 0 or num_all[0] != 0: num_all = np.r_[0, num_all] curve_values = np.r_[0, curve_values]
return num_all, curve_values
def perfect_qini_curve(y_true, treatment, negative_effect=True): """Compute the perfect (optimum) Qini curve.
For computing the area under the Qini Curve, see :func:`.qini_auc_score`.
Args: y_true (1d array-like): Correct (true) binary target values. treatment (1d array-like): Treatment labels. negative_effect (bool): If True, optimum Qini Curve contains the negative effects (negative uplift because of campaign). Otherwise, optimum Qini Curve will not contain the negative effects. Returns: array (shape = [>2]), array (shape = [>2]): Points on a curve.
See also: :func:`.qini_curve`: Compute Qini curve.
:func:`.qini_auc_score`: Compute the area under the Qini curve.
:func:`.plot_qini_curves`: Plot Qini curves from predictions.. """
check_consistent_length(y_true, treatment) check_is_binary(treatment) check_is_binary(y_true) n_samples = len(y_true)
y_true, treatment = np.array(y_true), np.array(treatment)
if not isinstance(negative_effect, bool): raise TypeError(f'Negative_effects flag should be bool, got: {type(negative_effect)}')
if negative_effect: x_perfect, y_perfect = qini_curve( y_true, y_true * treatment - y_true * (1 - treatment), treatment ) else: ratio_random = (y_true[treatment == 1].sum() - len(y_true[treatment == 1]) * y_true[treatment == 0].sum() / len(y_true[treatment == 0]))
x_perfect, y_perfect = np.array([0, ratio_random, n_samples]), np.array([0, ratio_random, ratio_random])
return x_perfect, y_perfect
def qini_auc_score(y_true, uplift, treatment, negative_effect=True): """Compute normalized Area Under the Qini curve (aka Qini coefficient) from prediction scores.
By computing the area under the Qini curve, the curve information is summarized in one number. For binary outcomes the ratio of the actual uplift gains curve above the diagonal to that of the optimum Qini curve.
Args: y_true (1d array-like): Correct (true) binary target values. uplift (1d array-like): Predicted uplift, as returned by a model. treatment (1d array-like): Treatment labels. negative_effect (bool): If True, optimum Qini Curve contains the negative effects (negative uplift because of campaign). Otherwise, optimum Qini Curve will not contain the negative effects.
.. versionadded:: 0.2.0
Returns: float: Qini coefficient.
See also: :func:`.qini_curve`: Compute Qini curve.
:func:`.perfect_qini_curve`: Compute the perfect (optimum) Qini curve.
:func:`.plot_qini_curves`: Plot Qini curves from predictions..
:func:`.uplift_auc_score`: Compute normalized Area Under the Uplift curve from prediction scores.
References: Nicholas J Radcliffe. (2007). Using control groups to target on predicted lift: Building and assessing uplift model. Direct Marketing Analytics Journal, (3):14–21, 2007. """
check_consistent_length(y_true, uplift, treatment) check_is_binary(treatment) check_is_binary(y_true) y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment)
if not isinstance(negative_effect, bool): raise TypeError(f'Negative_effects flag should be bool, got: {type(negative_effect)}')
x_actual, y_actual = qini_curve(y_true, uplift, treatment) x_perfect, y_perfect = perfect_qini_curve(y_true, treatment, negative_effect) x_baseline, y_baseline = np.array([0, x_perfect[-1]]), np.array([0, y_perfect[-1]])
auc_score_baseline = auc(x_baseline, y_baseline) auc_score_perfect = auc(x_perfect, y_perfect) - auc_score_baseline auc_score_actual = auc(x_actual, y_actual) - auc_score_baseline
return auc_score_actual / auc_score_perfect
|