27 Eclat(
double min_support);
34 std::vector<std::vector<int>>
run(
const std::vector<std::vector<int>>& transactions);
49 void eclat_recursive(
const std::vector<int>& prefix,
50 const std::vector<int>& items,
51 const std::map<
int, std::vector<int>>& tid_sets);
54 int min_support_count;
55 int total_transactions;
56 std::map<std::vector<int>,
int> support_counts;
60 : min_support(min_support), min_support_count(0), total_transactions(0) {
61 if (min_support <= 0.0 || min_support > 1.0) {
62 throw std::invalid_argument(
"min_support must be between 0 and 1.");
66 std::vector<std::vector<int>>
Eclat::run(
const std::vector<std::vector<int>>& transactions) {
67 total_transactions =
static_cast<int>(transactions.size());
68 min_support_count =
static_cast<int>(std::ceil(min_support * total_transactions));
71 std::map<int, std::vector<int>> item_tidsets;
72 for (
int tid = 0; tid < total_transactions; ++tid) {
73 for (
int item : transactions[tid]) {
74 item_tidsets[item].push_back(tid);
79 for (
auto& [item, tids] : item_tidsets) {
80 std::sort(tids.begin(), tids.end());
84 std::vector<int> frequent_items;
85 for (
const auto& [item, tidset] : item_tidsets) {
86 if (
static_cast<int>(tidset.size()) >= min_support_count) {
87 frequent_items.push_back(item);
92 std::sort(frequent_items.begin(), frequent_items.end());
95 for (
int item : frequent_items) {
96 std::vector<int> itemset = {item};
97 support_counts[itemset] =
static_cast<int>(item_tidsets[item].size());
101 eclat_recursive({}, frequent_items, item_tidsets);
104 std::vector<std::vector<int>> frequent_itemsets;
105 for (
const auto& [itemset, count] : support_counts) {
106 if (count >= min_support_count) {
107 frequent_itemsets.push_back(itemset);
111 return frequent_itemsets;
114 void Eclat::eclat_recursive(
const std::vector<int>& prefix,
115 const std::vector<int>& items,
116 const std::map<
int, std::vector<int>>& tid_sets) {
117 size_t n = items.size();
118 for (
size_t i = 0; i < n; ++i) {
120 std::vector<int> new_prefix = prefix;
121 new_prefix.push_back(item);
124 int support =
static_cast<int>(tid_sets.at(item).size());
125 support_counts[new_prefix] = support;
128 std::vector<int> remaining_items;
129 std::map<int, std::vector<int>> new_tid_sets;
131 for (
size_t j = i + 1; j < n; ++j) {
132 int next_item = items[j];
135 std::vector<int> intersect_tid_set;
136 const auto& tid_set1 = tid_sets.at(item);
137 const auto& tid_set2 = tid_sets.at(next_item);
138 std::set_intersection(tid_set1.begin(), tid_set1.end(),
139 tid_set2.begin(), tid_set2.end(),
140 std::back_inserter(intersect_tid_set));
142 if (
static_cast<int>(intersect_tid_set.size()) >= min_support_count) {
143 remaining_items.push_back(next_item);
144 new_tid_sets[next_item] = std::move(intersect_tid_set);
149 if (!remaining_items.empty()) {
150 eclat_recursive(new_prefix, remaining_items, new_tid_sets);
156 return support_counts;
Class to perform frequent itemset mining using the Eclat algorithm.
Definition: Eclat.hpp:21
std::map< std::vector< int >, int > get_support_counts() const
Gets the support counts for all frequent itemsets found.
Definition: Eclat.hpp:155
std::vector< std::vector< int > > run(const std::vector< std::vector< int >> &transactions)
Runs the Eclat algorithm on the provided dataset.
Definition: Eclat.hpp:66
Eclat(double min_support)
Constructor for the Eclat class.
Definition: Eclat.hpp:59