Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions causalml/inference/tree/_tree/_criterion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ cdef class Criterion:

# Internal structures
cdef const float64_t[:, ::1] y # Values of y
cdef const int32_t[:] treatment # Treatment assignment: 1 for treatment, 0 for control
cdef const float64_t[:] sample_weight # Sample weights

cdef const intp_t[:] sample_indices # Sample indices in X, y
Expand All @@ -50,7 +49,6 @@ cdef class Criterion:
cdef int init(
self,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const intp_t[:] sample_indices,
Expand Down
4 changes: 0 additions & 4 deletions causalml/inference/tree/_tree/_criterion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ cdef class Criterion:
cdef int init(
self,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const intp_t[:] sample_indices,
Expand Down Expand Up @@ -357,7 +356,6 @@ cdef class ClassificationCriterion(Criterion):
cdef int init(
self,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const intp_t[:] sample_indices,
Expand Down Expand Up @@ -871,7 +869,6 @@ cdef class RegressionCriterion(Criterion):
cdef int init(
self,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const intp_t[:] sample_indices,
Expand Down Expand Up @@ -1250,7 +1247,6 @@ cdef class MAE(RegressionCriterion):
cdef int init(
self,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const intp_t[:] sample_indices,
Expand Down
2 changes: 0 additions & 2 deletions causalml/inference/tree/_tree/_splitter.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ cdef class Splitter:
# +1: monotonic increase
cdef const int8_t[:] monotonic_cst
cdef bint with_monotonic_cst
cdef const int32_t[:] treatment
cdef const float64_t[:] sample_weight

# The samples vector `samples` is maintained by the Splitter object such
Expand All @@ -94,7 +93,6 @@ cdef class Splitter:
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
const unsigned char[::1] missing_values_in_feature_mask,
) except -1
Expand Down
18 changes: 4 additions & 14 deletions causalml/inference/tree/_tree/_splitter.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ cdef class Splitter:
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
const unsigned char[::1] missing_values_in_feature_mask,
) except -1:
Expand All @@ -145,9 +144,6 @@ cdef class Splitter:
This is the vector of targets, or true labels, for the samples represented
as a Cython memoryview.

treatment : ndarray, dtype=int32_t
The treatment labels for each sample, represented as a Cython memoryview.

sample_weight : ndarray, dtype=float64_t
The weights of the samples, where higher weighted samples are fit
closer than lower weight samples. If not provided, all samples
Expand Down Expand Up @@ -194,7 +190,6 @@ cdef class Splitter:

self.y = y

self.treatment = treatment
self.sample_weight = sample_weight
if missing_values_in_feature_mask is not None:
self.criterion.init_sum_missing()
Expand Down Expand Up @@ -226,7 +221,6 @@ cdef class Splitter:

self.criterion.init(
self.y,
self.treatment,
self.sample_weight,
self.weighted_n_samples,
self.samples,
Expand Down Expand Up @@ -1515,11 +1509,10 @@ cdef class BestSplitter(Splitter):
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
const unsigned char[::1] missing_values_in_feature_mask,
) except -1:
Splitter.init(self, X, y, treatment, sample_weight, missing_values_in_feature_mask)
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
self.partitioner = DensePartitioner(
X, self.samples, self.feature_values, missing_values_in_feature_mask
)
Expand All @@ -1546,11 +1539,10 @@ cdef class BestSparseSplitter(Splitter):
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
const unsigned char[::1] missing_values_in_feature_mask,
) except -1:
Splitter.init(self, X, y, treatment, sample_weight, missing_values_in_feature_mask)
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
self.partitioner = SparsePartitioner(
X, self.samples, self.n_samples, self.feature_values, missing_values_in_feature_mask
)
Expand All @@ -1577,11 +1569,10 @@ cdef class RandomSplitter(Splitter):
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
const unsigned char[::1] missing_values_in_feature_mask,
) except -1:
Splitter.init(self, X, y, treatment, sample_weight, missing_values_in_feature_mask)
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
self.partitioner = DensePartitioner(
X, self.samples, self.feature_values, missing_values_in_feature_mask
)
Expand All @@ -1608,11 +1599,10 @@ cdef class RandomSparseSplitter(Splitter):
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
const unsigned char[::1] missing_values_in_feature_mask,
) except -1:
Splitter.init(self, X, y, treatment, sample_weight, missing_values_in_feature_mask)
Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
self.partitioner = SparsePartitioner(
X, self.samples, self.n_samples, self.feature_values, missing_values_in_feature_mask
)
Expand Down
10 changes: 4 additions & 6 deletions causalml/inference/tree/_tree/_tree.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,19 @@ cdef class TreeBuilder:
# This class controls the various stopping criteria and the node splitting
# evaluation order, e.g. depth-first or best-first.

cdef Splitter splitter # Splitting algorithm
cdef Splitter splitter # Splitting algorithm

cdef intp_t min_samples_split # Minimum number of samples in an internal node
cdef intp_t min_samples_leaf # Minimum number of samples in a leaf
cdef intp_t min_samples_split # Minimum number of samples in an internal node
cdef intp_t min_samples_leaf # Minimum number of samples in a leaf
cdef float64_t min_weight_leaf # Minimum weight in a leaf
cdef intp_t max_depth # Maximal tree depth
cdef intp_t max_depth # Maximal tree depth
cdef float64_t min_impurity_decrease # Impurity threshold for early stopping

cpdef build(
self,
Tree tree,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight=*,
const unsigned char[::1] missing_values_in_feature_mask=*,
)
Expand All @@ -128,7 +127,6 @@ cdef class TreeBuilder:
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
)

Expand Down
23 changes: 9 additions & 14 deletions causalml/inference/tree/_tree/_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ cdef class TreeBuilder:
Tree tree,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight=None,
const unsigned char[::1] missing_values_in_feature_mask=None,
):
Expand All @@ -117,7 +116,6 @@ cdef class TreeBuilder:
self,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight,
):
"""Check input dtype, layout and format"""
Expand All @@ -142,9 +140,6 @@ cdef class TreeBuilder:
if y.base.dtype != DOUBLE or not y.base.flags.contiguous:
y = np.ascontiguousarray(y, dtype=DOUBLE)

if treatment.base.dtype != INT or not treatment.base.flags.contiguous:
treatment = np.ascontiguousarray(treatment, dtype=INT)

if (
sample_weight is not None and
(
Expand All @@ -154,15 +149,16 @@ cdef class TreeBuilder:
):
sample_weight = np.asarray(sample_weight, dtype=DOUBLE, order="C")

return X, y, treatment, sample_weight
return X, y, sample_weight

# Depth first builder ---------------------------------------------------------
cdef class DepthFirstTreeBuilder(TreeBuilder):
"""Build a decision tree in depth-first fashion."""

def __cinit__(self, Splitter splitter, intp_t min_samples_split,
intp_t min_samples_leaf, float64_t min_weight_leaf,
intp_t max_depth, float64_t min_impurity_decrease):
intp_t max_depth, float64_t min_impurity_decrease,
*args, **kwargs):
self.splitter = splitter
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
Expand All @@ -175,14 +171,13 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
Tree tree,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight=None,
const unsigned char[::1] missing_values_in_feature_mask=None,
):
"""Build a decision tree from the training set (X, y)."""

# check input
X, y, treatment, sample_weight = self._check_input(X, y, treatment, sample_weight)
X, y, sample_weight = self._check_input(X, y, sample_weight)

# Initial capacity
cdef intp_t init_capacity
Expand All @@ -203,7 +198,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
cdef float64_t min_impurity_decrease = self.min_impurity_decrease

# Recursive partition (without actual recursion)
splitter.init(X, y, treatment, sample_weight, missing_values_in_feature_mask)
splitter.init(X, y, sample_weight, missing_values_in_feature_mask)

cdef intp_t start
cdef intp_t end
Expand Down Expand Up @@ -399,7 +394,8 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
def __cinit__(self, Splitter splitter, intp_t min_samples_split,
intp_t min_samples_leaf, min_weight_leaf,
intp_t max_depth, intp_t max_leaf_nodes,
float64_t min_impurity_decrease):
float64_t min_impurity_decrease,
*args, **kwargs):
self.splitter = splitter
self.min_samples_split = min_samples_split
self.min_samples_leaf = min_samples_leaf
Expand All @@ -413,21 +409,20 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
Tree tree,
object X,
const float64_t[:, ::1] y,
const int32_t[:] treatment,
const float64_t[:] sample_weight=None,
const unsigned char[::1] missing_values_in_feature_mask=None,
):
"""Build a decision tree from the training set (X, y)."""

# check input
X, y, treatment, sample_weight = self._check_input(X, y, treatment, sample_weight)
X, y, sample_weight = self._check_input(X, y, sample_weight)

# Parameters
cdef Splitter splitter = self.splitter
cdef intp_t max_leaf_nodes = self.max_leaf_nodes

# Recursive partition (without actual recursion)
splitter.init(X, y, treatment, sample_weight, missing_values_in_feature_mask)
splitter.init(X, y, sample_weight, missing_values_in_feature_mask)

cdef vector[FrontierRecord] frontier
cdef FrontierRecord record
Expand Down
2 changes: 1 addition & 1 deletion causalml/inference/tree/causal/_builder.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@

from .._tree._tree cimport Node, Tree, TreeBuilder
from .._tree._splitter cimport Splitter, SplitRecord
from .._tree._tree cimport intp_t, int32_t, float64_t
from .._tree._typedefs cimport intp_t, int32_t, int64_t, float32_t, float64_t
from .._tree._tree cimport FrontierRecord, StackRecord
from .._tree._tree cimport ParentInfo, _init_parent_record
Loading