README.md update, small refactoring

b3c20ce1 · Caron Olivier · 76bd6a09 · b3c20ce1 · b3c20ce1 · b3c20ce1
Commit b3c20ce1 authored 5 months ago by Caron Olivier
--- a/README.md
+++ b/README.md
@@ -67,10 +67,36 @@ closed (double value).
 - `target` : target word name
 Here is an example:
+```bash
+python3 run_greedy.py ./models/text8 yes 0.0279 5 0.2233 yes queen
+```
+results are stored in a CSV file
+### The HillClimbing method
+The main program **run_hillclimbing.py** requires several parameters:
+- `dataset` : the dataset location
+- `only_pos` : 'yes' if the research is limited to positive words, 'no' otherwise.
+- `seed` : a seed number, the hillclimbing method is not deterministic
+- `target` : target word name
+Here is an example:
 ```bash
-python3 run_greedy.py ./models/text8_article yes 0.0279 5 0.2233 yes queen
+python3 run_hillclimbing.py ./models/text8 no 16 brother
 ```
+results are stored in a CSV file
+### The _exampleAnalogy.py_ program
+This simple program computes and displays the cosine similarity between  fixed solutions and a target word.
+The unique parameter is the dataset location, here is an example :
+```bash
+python3 exampleAnalogy.py ./models/glove-wiki-gigaword-100 
+```
 ## Information
 ### Authors

--- a/methods/common.py
+++ b/methods/common.py
@@ -43,14 +43,12 @@ def cosine_similarity(vector_a, vector_b) -> float:
    vector_a - the first vector
    vector_b - the second vector
    """
-    a = vector_a
-    b = vector_b
    if len(vector_a) == 0:
        return 0.0
-    norm_a = norm(a)
+    norm_a = norm(vector_a)
-    norm_b = norm(b)
+    norm_b = norm(vector_b)
    if norm_a == 0.0 or norm_b == 0.0:
        return 0.0
-    cos_sim = dot(a, b) / (norm_a * norm_b)
+    cos_sim = dot(vector_a, vector_b) / (norm_a * norm_b)
    return cos_sim
\ No newline at end of file
--- a/methods/greedy/greedy.py
+++ b/methods/greedy/greedy.py
@@ -137,7 +137,6 @@ def greedy_prepare_data(norm_model, pos_only, min_d, min_p, threshold, target_wo
    else:
        target_word_vector = norm_model[target_word]
    wv_size = len(target_word_vector)
-    print("size word:",wv_size)
    if threshold == 0:
        coverage = list(range(wv_size)) # init coverage
    else:

--- a/methods/hillclimbing/utils.py
+++ b/methods/hillclimbing/utils.py
 # -*- coding: utf-8 -*-
 """
 PEACEWORD, Prototype for Extracting And Considering
           the Explainability of WORD embeddings.
@@ -71,16 +70,9 @@ class WordEmbedding:
        :param predict_word: the target word
        :return: the cosine similarity
        """
-        result = 0
+        result = solution.word_vector(self.wv, len(self.wv[predict_word]))
-        if len(solution.positive) == 0 and len(solution.negative) == 0:
-            print("WARNING : EMPTY SOLUTION PASSED")
-            return 0
-        for word in solution.positive:
-            result = result + self.wv[word]
-        for word in solution.negative:
-            result = result - self.wv[word]
-        return round(cosine_similarity(result, self.wv[predict_word]), 6)
+        return cosine_similarity(result, self.wv[predict_word])
    def neighbor_solutions(self, solution, vocab, eval_word):
        """

--- a/methods/solution.py
+++ b/methods/solution.py
@@ -56,7 +56,8 @@ class Solution:
        for word in self.negative:
            if self.negative_words_in_dataset:
                result = result + map_word_vector["-"+word]
-            result = result - map_word_vector[word] # to test
+            else:
+                result = result - map_word_vector[word]
        return result
    def add(self,word):