4 #include <unordered_map>
5 #include <unordered_set>
36 std::vector<std::set<int>>
run(
const std::vector<std::vector<int>>& transactions);
58 std::set<std::set<int>> generate_candidates(
const std::set<std::set<int>>& frequent_itemsets,
int k);
66 std::set<std::set<int>> prune_candidates(
const std::set<std::set<int>>& candidates,
67 const std::set<std::set<int>>& frequent_itemsets_k_minus_1);
75 std::unordered_map<std::string, int> count_support(
const std::set<std::set<int>>& candidates,
76 const std::vector<std::vector<int>>& transactions);
85 bool has_infrequent_subset(
const std::set<int>& candidate,
86 const std::set<std::set<int>>& frequent_itemsets_k_minus_1);
89 int min_support_count;
90 int total_transactions;
91 std::unordered_map<std::string, int> support_counts;
95 : min_support(min_support), min_support_count(0), total_transactions(0) {
96 if (min_support <= 0.0 || min_support > 1.0) {
97 throw std::invalid_argument(
"min_support must be between 0 and 1.");
101 std::vector<std::set<int>>
Apriori::run(
const std::vector<std::vector<int>>& transactions) {
102 total_transactions =
static_cast<int>(transactions.size());
103 min_support_count =
static_cast<int>(std::ceil(min_support * total_transactions));
106 std::unordered_map<int, int> item_counts;
107 for (
const auto& transaction : transactions) {
108 for (
int item : transaction) {
113 std::set<std::set<int>> frequent_itemsets;
114 std::set<std::set<int>> frequent_itemsets_k;
115 for (
const auto& [item, count] : item_counts) {
116 if (count >= min_support_count) {
117 std::set<int> itemset = {item};
118 frequent_itemsets.insert(itemset);
119 frequent_itemsets_k.insert(itemset);
125 while (!frequent_itemsets_k.empty()) {
127 auto candidates_k = generate_candidates(frequent_itemsets_k, k);
130 auto candidate_supports = count_support(candidates_k, transactions);
133 frequent_itemsets_k.clear();
134 for (
const auto& [itemset_str, count] : candidate_supports) {
135 if (count >= min_support_count) {
137 std::set<int> itemset;
140 std::string s = itemset_str;
141 while ((pos = s.find(
',')) != std::string::npos) {
142 token = s.substr(0, pos);
143 itemset.insert(std::stoi(token));
146 itemset.insert(std::stoi(s));
148 frequent_itemsets.insert(itemset);
149 frequent_itemsets_k.insert(itemset);
150 support_counts[itemset_str] = count;
158 std::vector<std::set<int>> result(frequent_itemsets.begin(), frequent_itemsets.end());
162 std::set<std::set<int>> Apriori::generate_candidates(
const std::set<std::set<int>>& frequent_itemsets,
int k) {
163 std::set<std::set<int>> candidates;
164 for (
auto it1 = frequent_itemsets.begin(); it1 != frequent_itemsets.end(); ++it1) {
165 for (
auto it2 = std::next(it1); it2 != frequent_itemsets.end(); ++it2) {
167 std::vector<int> v1(it1->begin(), it1->end());
168 std::vector<int> v2(it2->begin(), it2->end());
169 if (std::equal(v1.begin(), v1.end() - 1, v2.begin())) {
170 std::set<int> candidate = *it1;
171 candidate.insert(*v2.rbegin());
173 if (!has_infrequent_subset(candidate, frequent_itemsets)) {
174 candidates.insert(candidate);
182 bool Apriori::has_infrequent_subset(
const std::set<int>& candidate,
183 const std::set<std::set<int>>& frequent_itemsets_k_minus_1) {
184 for (
auto it = candidate.begin(); it != candidate.end(); ++it) {
185 std::set<int> subset = candidate;
187 if (frequent_itemsets_k_minus_1.find(subset) == frequent_itemsets_k_minus_1.end()) {
194 std::unordered_map<std::string, int> Apriori::count_support(
const std::set<std::set<int>>& candidates,
195 const std::vector<std::vector<int>>& transactions) {
196 std::unordered_map<std::string, int> counts;
197 for (
const auto& transaction : transactions) {
198 std::set<int> transaction_set(transaction.begin(), transaction.end());
199 for (
const auto& candidate : candidates) {
200 if (std::includes(transaction_set.begin(), transaction_set.end(),
201 candidate.begin(), candidate.end())) {
203 counts[candidate_str]++;
211 return support_counts;
216 for (
auto it = itemset.begin(); it != itemset.end(); ++it) {
217 s += std::to_string(*it);
218 if (std::next(it) != itemset.end()) {
Class to perform frequent itemset mining using the Apriori algorithm.
Definition: Apriori.hpp:23
std::vector< std::set< int > > run(const std::vector< std::vector< int >> &transactions)
Runs the Apriori algorithm on the provided dataset.
Definition: Apriori.hpp:101
std::string itemset_to_string(const std::set< int > &itemset) const
Converts an itemset to a string representation for use as a key.
Definition: Apriori.hpp:214
Apriori(double min_support)
Constructor for the Apriori class.
Definition: Apriori.hpp:94
std::unordered_map< std::string, int > get_support_counts() const
Gets the support counts for all frequent itemsets found.
Definition: Apriori.hpp:210