From b3c20ce1c2308aff4d555c12323bfc7ee43331a1 Mon Sep 17 00:00:00 2001 From: Olivier Caron <Olivier.Caron@univ-lille.fr> Date: Wed, 12 Feb 2025 13:48:48 +0100 Subject: [PATCH] README.md update, small refactoring --- README.md | 28 +++++++++++++++++++++++++++- methods/common.py | 8 +++----- methods/greedy/greedy.py | 1 - methods/hillclimbing/utils.py | 12 ++---------- methods/solution.py | 3 ++- 5 files changed, 34 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index e2a7365..10c880c 100644 --- a/README.md +++ b/README.md @@ -67,10 +67,36 @@ closed (double value). - `target` : target word name Here is an example: + +```bash +python3 run_greedy.py ./models/text8 yes 0.0279 5 0.2233 yes queen +``` +results are stored in a CSV file + +### The HillClimbing method + +The main program **run_hillclimbing.py** requires several parameters: +- `dataset` : the dataset location +- `only_pos` : 'yes' if the research is limited to positive words, 'no' otherwise. +- `seed` : a seed number, the hillclimbing method is not deterministic +- `target` : target word name + +Here is an example: + ```bash -python3 run_greedy.py ./models/text8_article yes 0.0279 5 0.2233 yes queen +python3 run_hillclimbing.py ./models/text8 no 16 brother ``` +results are stored in a CSV file + +### The _exampleAnalogy.py_ program +This simple program computes and displays the cosine similarity between fixed solutions and a target word. + +The unique parameter is the dataset location, here is an example : + +```bash +python3 exampleAnalogy.py ./models/glove-wiki-gigaword-100 +``` ## Information ### Authors diff --git a/methods/common.py b/methods/common.py index efea003..d2c3a3a 100644 --- a/methods/common.py +++ b/methods/common.py @@ -43,14 +43,12 @@ def cosine_similarity(vector_a, vector_b) -> float: vector_a - the first vector vector_b - the second vector """ - a = vector_a - b = vector_b if len(vector_a) == 0: return 0.0 - norm_a = norm(a) - norm_b = norm(b) + norm_a = norm(vector_a) + norm_b = norm(vector_b) if norm_a == 0.0 or norm_b == 0.0: return 0.0 - cos_sim = dot(a, b) / (norm_a * norm_b) + cos_sim = dot(vector_a, vector_b) / (norm_a * norm_b) return cos_sim \ No newline at end of file diff --git a/methods/greedy/greedy.py b/methods/greedy/greedy.py index e8e9b09..d27a541 100755 --- a/methods/greedy/greedy.py +++ b/methods/greedy/greedy.py @@ -137,7 +137,6 @@ def greedy_prepare_data(norm_model, pos_only, min_d, min_p, threshold, target_wo else: target_word_vector = norm_model[target_word] wv_size = len(target_word_vector) - print("size word:",wv_size) if threshold == 0: coverage = list(range(wv_size)) # init coverage else: diff --git a/methods/hillclimbing/utils.py b/methods/hillclimbing/utils.py index 66a9eb8..12feddb 100644 --- a/methods/hillclimbing/utils.py +++ b/methods/hillclimbing/utils.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - """ PEACEWORD, Prototype for Extracting And Considering the Explainability of WORD embeddings. @@ -71,16 +70,9 @@ class WordEmbedding: :param predict_word: the target word :return: the cosine similarity """ - result = 0 - if len(solution.positive) == 0 and len(solution.negative) == 0: - print("WARNING : EMPTY SOLUTION PASSED") - return 0 - for word in solution.positive: - result = result + self.wv[word] - for word in solution.negative: - result = result - self.wv[word] + result = solution.word_vector(self.wv, len(self.wv[predict_word])) - return round(cosine_similarity(result, self.wv[predict_word]), 6) + return cosine_similarity(result, self.wv[predict_word]) def neighbor_solutions(self, solution, vocab, eval_word): """ diff --git a/methods/solution.py b/methods/solution.py index 3c7524a..9508d0e 100644 --- a/methods/solution.py +++ b/methods/solution.py @@ -56,7 +56,8 @@ class Solution: for word in self.negative: if self.negative_words_in_dataset: result = result + map_word_vector["-"+word] - result = result - map_word_vector[word] # to test + else: + result = result - map_word_vector[word] return result def add(self,word): -- GitLab