Cpp ML Library  1.0.0
A library of Machine Learning Algorithmns seen from the Udemy course Machine Learning A to Z.
Eclat.hpp
Go to the documentation of this file.
1 #ifndef ECLAT_HPP
2 #define ECLAT_HPP
3 
4 #include <map>
5 #include <vector>
6 #include <algorithm>
7 #include <iostream>
8 #include <string>
9 #include <cmath>
10 #include <stdexcept>
11 
21 class Eclat {
22 public:
27  Eclat(double min_support);
28 
34  std::vector<std::vector<int>> run(const std::vector<std::vector<int>>& transactions);
35 
40  std::map<std::vector<int>, int> get_support_counts() const;
41 
42 private:
49  void eclat_recursive(const std::vector<int>& prefix,
50  const std::vector<int>& items,
51  const std::map<int, std::vector<int>>& tid_sets);
52 
53  double min_support;
54  int min_support_count;
55  int total_transactions;
56  std::map<std::vector<int>, int> support_counts;
57 };
58 
59 Eclat::Eclat(double min_support)
60  : min_support(min_support), min_support_count(0), total_transactions(0) {
61  if (min_support <= 0.0 || min_support > 1.0) {
62  throw std::invalid_argument("min_support must be between 0 and 1.");
63  }
64 }
65 
66 std::vector<std::vector<int>> Eclat::run(const std::vector<std::vector<int>>& transactions) {
67  total_transactions = static_cast<int>(transactions.size());
68  min_support_count = static_cast<int>(std::ceil(min_support * total_transactions));
69 
70  // Map each item to its TID vector
71  std::map<int, std::vector<int>> item_tidsets;
72  for (int tid = 0; tid < total_transactions; ++tid) {
73  for (int item : transactions[tid]) {
74  item_tidsets[item].push_back(tid);
75  }
76  }
77 
78  // Sort TID vectors
79  for (auto& [item, tids] : item_tidsets) {
80  std::sort(tids.begin(), tids.end());
81  }
82 
83  // Filter items that meet the minimum support
84  std::vector<int> frequent_items;
85  for (const auto& [item, tidset] : item_tidsets) {
86  if (static_cast<int>(tidset.size()) >= min_support_count) {
87  frequent_items.push_back(item);
88  }
89  }
90 
91  // Sort items for consistent order
92  std::sort(frequent_items.begin(), frequent_items.end());
93 
94  // Initialize support counts for single items
95  for (int item : frequent_items) {
96  std::vector<int> itemset = {item};
97  support_counts[itemset] = static_cast<int>(item_tidsets[item].size());
98  }
99 
100  // Start recursive mining
101  eclat_recursive({}, frequent_items, item_tidsets);
102 
103  // Collect frequent itemsets from support counts
104  std::vector<std::vector<int>> frequent_itemsets;
105  for (const auto& [itemset, count] : support_counts) {
106  if (count >= min_support_count) {
107  frequent_itemsets.push_back(itemset);
108  }
109  }
110 
111  return frequent_itemsets;
112 }
113 
114 void Eclat::eclat_recursive(const std::vector<int>& prefix,
115  const std::vector<int>& items,
116  const std::map<int, std::vector<int>>& tid_sets) {
117  size_t n = items.size();
118  for (size_t i = 0; i < n; ++i) {
119  int item = items[i];
120  std::vector<int> new_prefix = prefix;
121  new_prefix.push_back(item);
122 
123  // Update support counts
124  int support = static_cast<int>(tid_sets.at(item).size());
125  support_counts[new_prefix] = support;
126 
127  // Generate new combinations
128  std::vector<int> remaining_items;
129  std::map<int, std::vector<int>> new_tid_sets;
130 
131  for (size_t j = i + 1; j < n; ++j) {
132  int next_item = items[j];
133 
134  // Intersect TID sets
135  std::vector<int> intersect_tid_set;
136  const auto& tid_set1 = tid_sets.at(item);
137  const auto& tid_set2 = tid_sets.at(next_item);
138  std::set_intersection(tid_set1.begin(), tid_set1.end(),
139  tid_set2.begin(), tid_set2.end(),
140  std::back_inserter(intersect_tid_set));
141 
142  if (static_cast<int>(intersect_tid_set.size()) >= min_support_count) {
143  remaining_items.push_back(next_item);
144  new_tid_sets[next_item] = std::move(intersect_tid_set);
145  }
146  }
147 
148  // Recursive call
149  if (!remaining_items.empty()) {
150  eclat_recursive(new_prefix, remaining_items, new_tid_sets);
151  }
152  }
153 }
154 
155 std::map<std::vector<int>, int> Eclat::get_support_counts() const {
156  return support_counts;
157 }
158 
159 #endif // ECLAT_HPP
Class to perform frequent itemset mining using the Eclat algorithm.
Definition: Eclat.hpp:21
std::map< std::vector< int >, int > get_support_counts() const
Gets the support counts for all frequent itemsets found.
Definition: Eclat.hpp:155
std::vector< std::vector< int > > run(const std::vector< std::vector< int >> &transactions)
Runs the Eclat algorithm on the provided dataset.
Definition: Eclat.hpp:66
Eclat(double min_support)
Constructor for the Eclat class.
Definition: Eclat.hpp:59