diff --git a/README.md b/README.md index e2a7365f21b7bc59c15342d6cc4f03e89b659f85..10c880c7510f830e48ea324d82ada88b34620101 100644 --- a/README.md +++ b/README.md @@ -67,10 +67,36 @@ closed (double value). - `target` : target word name Here is an example: + +```bash +python3 run_greedy.py ./models/text8 yes 0.0279 5 0.2233 yes queen +``` +results are stored in a CSV file + +### The HillClimbing method + +The main program **run_hillclimbing.py** requires several parameters: +- `dataset` : the dataset location +- `only_pos` : 'yes' if the research is limited to positive words, 'no' otherwise. +- `seed` : a seed number, the hillclimbing method is not deterministic +- `target` : target word name + +Here is an example: + ```bash -python3 run_greedy.py ./models/text8_article yes 0.0279 5 0.2233 yes queen +python3 run_hillclimbing.py ./models/text8 no 16 brother ``` +results are stored in a CSV file + +### The _exampleAnalogy.py_ program +This simple program computes and displays the cosine similarity between fixed solutions and a target word. + +The unique parameter is the dataset location, here is an example : + +```bash +python3 exampleAnalogy.py ./models/glove-wiki-gigaword-100 +``` ## Information ### Authors diff --git a/methods/common.py b/methods/common.py index efea003ed74e2f6db08597590f6ea2b206897fb0..d2c3a3a0d8c31709ca6d44915956e0ddd7d077d5 100644 --- a/methods/common.py +++ b/methods/common.py @@ -43,14 +43,12 @@ def cosine_similarity(vector_a, vector_b) -> float: vector_a - the first vector vector_b - the second vector """ - a = vector_a - b = vector_b if len(vector_a) == 0: return 0.0 - norm_a = norm(a) - norm_b = norm(b) + norm_a = norm(vector_a) + norm_b = norm(vector_b) if norm_a == 0.0 or norm_b == 0.0: return 0.0 - cos_sim = dot(a, b) / (norm_a * norm_b) + cos_sim = dot(vector_a, vector_b) / (norm_a * norm_b) return cos_sim \ No newline at end of file diff --git a/methods/greedy/greedy.py b/methods/greedy/greedy.py index e8e9b09f7a8f3564c2ec0bef961c1f0da4424f5d..d27a541748becb93a7ff4f1180312fcb314c578f 100755 --- a/methods/greedy/greedy.py +++ b/methods/greedy/greedy.py @@ -137,7 +137,6 @@ def greedy_prepare_data(norm_model, pos_only, min_d, min_p, threshold, target_wo else: target_word_vector = norm_model[target_word] wv_size = len(target_word_vector) - print("size word:",wv_size) if threshold == 0: coverage = list(range(wv_size)) # init coverage else: diff --git a/methods/hillclimbing/utils.py b/methods/hillclimbing/utils.py index 66a9eb83bd8817ae7a5a2f324349cf9a0fadd0a7..12feddb6212ea8c9a30a955701c7c727fdd348be 100644 --- a/methods/hillclimbing/utils.py +++ b/methods/hillclimbing/utils.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """ PEACEWORD, Prototype for Extracting And Considering the Explainability of WORD embeddings. @@ -71,16 +70,9 @@ class WordEmbedding: :param predict_word: the target word :return: the cosine similarity """ - result = 0 - if len(solution.positive) == 0 and len(solution.negative) == 0: - print("WARNING : EMPTY SOLUTION PASSED") - return 0 - for word in solution.positive: - result = result + self.wv[word] - for word in solution.negative: - result = result - self.wv[word] + result = solution.word_vector(self.wv, len(self.wv[predict_word])) - return round(cosine_similarity(result, self.wv[predict_word]), 6) + return cosine_similarity(result, self.wv[predict_word]) def neighbor_solutions(self, solution, vocab, eval_word): """ diff --git a/methods/solution.py b/methods/solution.py index 3c7524a3ddd022adbb5a1d07c9ec4ec9b3900a46..9508d0ebc43f97db30bcfcc06403fd8d6596f7e8 100644 --- a/methods/solution.py +++ b/methods/solution.py @@ -56,7 +56,8 @@ class Solution: for word in self.negative: if self.negative_words_in_dataset: result = result + map_word_vector["-"+word] - result = result - map_word_vector[word] # to test + else: + result = result - map_word_vector[word] return result def add(self,word):