From b3c20ce1c2308aff4d555c12323bfc7ee43331a1 Mon Sep 17 00:00:00 2001
From: Olivier Caron <Olivier.Caron@univ-lille.fr>
Date: Wed, 12 Feb 2025 13:48:48 +0100
Subject: [PATCH] README.md update, small refactoring

---
 README.md                     | 28 +++++++++++++++++++++++++++-
 methods/common.py             |  8 +++-----
 methods/greedy/greedy.py      |  1 -
 methods/hillclimbing/utils.py | 12 ++----------
 methods/solution.py           |  3 ++-
 5 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index e2a7365..10c880c 100644
--- a/README.md
+++ b/README.md
@@ -67,10 +67,36 @@ closed (double value).
 - `target` : target word name
 
 Here is an example:
+
+```bash
+python3 run_greedy.py ./models/text8 yes 0.0279 5 0.2233 yes queen
+```
+results are stored in a CSV file
+
+### The HillClimbing method
+
+The main program **run_hillclimbing.py** requires several parameters:
+- `dataset` : the dataset location
+- `only_pos` : 'yes' if the research is limited to positive words, 'no' otherwise.
+- `seed` : a seed number, the hillclimbing method is not deterministic
+- `target` : target word name
+
+Here is an example:
+
 ```bash
-python3 run_greedy.py ./models/text8_article yes 0.0279 5 0.2233 yes queen
+python3 run_hillclimbing.py ./models/text8 no 16 brother
 ```
+results are stored in a CSV file
+
+### The _exampleAnalogy.py_ program
 
+This simple program computes and displays the cosine similarity between  fixed solutions and a target word.
+
+The unique parameter is the dataset location, here is an example :
+
+```bash
+python3 exampleAnalogy.py ./models/glove-wiki-gigaword-100 
+```
 ## Information
 
 ### Authors
diff --git a/methods/common.py b/methods/common.py
index efea003..d2c3a3a 100644
--- a/methods/common.py
+++ b/methods/common.py
@@ -43,14 +43,12 @@ def cosine_similarity(vector_a, vector_b) -> float:
     vector_a - the first vector
     vector_b - the second vector
     """
-    a = vector_a
-    b = vector_b
     if len(vector_a) == 0:
         return 0.0
 
-    norm_a = norm(a)
-    norm_b = norm(b)
+    norm_a = norm(vector_a)
+    norm_b = norm(vector_b)
     if norm_a == 0.0 or norm_b == 0.0:
         return 0.0
-    cos_sim = dot(a, b) / (norm_a * norm_b)
+    cos_sim = dot(vector_a, vector_b) / (norm_a * norm_b)
     return cos_sim
\ No newline at end of file
diff --git a/methods/greedy/greedy.py b/methods/greedy/greedy.py
index e8e9b09..d27a541 100755
--- a/methods/greedy/greedy.py
+++ b/methods/greedy/greedy.py
@@ -137,7 +137,6 @@ def greedy_prepare_data(norm_model, pos_only, min_d, min_p, threshold, target_wo
     else:
         target_word_vector = norm_model[target_word]
     wv_size = len(target_word_vector)
-    print("size word:",wv_size)
     if threshold == 0:
         coverage = list(range(wv_size)) # init coverage
     else:
diff --git a/methods/hillclimbing/utils.py b/methods/hillclimbing/utils.py
index 66a9eb8..12feddb 100644
--- a/methods/hillclimbing/utils.py
+++ b/methods/hillclimbing/utils.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-
 """
 PEACEWORD, Prototype for Extracting And Considering
            the Explainability of WORD embeddings.
@@ -71,16 +70,9 @@ class WordEmbedding:
         :param predict_word: the target word
         :return: the cosine similarity
         """
-        result = 0
-        if len(solution.positive) == 0 and len(solution.negative) == 0:
-            print("WARNING : EMPTY SOLUTION PASSED")
-            return 0
-        for word in solution.positive:
-            result = result + self.wv[word]
-        for word in solution.negative:
-            result = result - self.wv[word]
+        result = solution.word_vector(self.wv, len(self.wv[predict_word]))
 
-        return round(cosine_similarity(result, self.wv[predict_word]), 6)
+        return cosine_similarity(result, self.wv[predict_word])
 
     def neighbor_solutions(self, solution, vocab, eval_word):
         """
diff --git a/methods/solution.py b/methods/solution.py
index 3c7524a..9508d0e 100644
--- a/methods/solution.py
+++ b/methods/solution.py
@@ -56,7 +56,8 @@ class Solution:
         for word in self.negative:
             if self.negative_words_in_dataset:
                 result = result + map_word_vector["-"+word]
-            result = result - map_word_vector[word] # to test
+            else:
+                result = result - map_word_vector[word]
         return result
 
     def add(self,word):
-- 
GitLab