Skip to content
Snippets Groups Projects
Commit b3c20ce1 authored by Caron Olivier's avatar Caron Olivier
Browse files

README.md update, small refactoring

parent 76bd6a09
No related branches found
No related tags found
No related merge requests found
...@@ -67,10 +67,36 @@ closed (double value). ...@@ -67,10 +67,36 @@ closed (double value).
- `target` : target word name - `target` : target word name
Here is an example: Here is an example:
```bash
python3 run_greedy.py ./models/text8 yes 0.0279 5 0.2233 yes queen
```
results are stored in a CSV file
### The HillClimbing method
The main program **run_hillclimbing.py** requires several parameters:
- `dataset` : the dataset location
- `only_pos` : 'yes' if the research is limited to positive words, 'no' otherwise.
- `seed` : a seed number, the hillclimbing method is not deterministic
- `target` : target word name
Here is an example:
```bash ```bash
python3 run_greedy.py ./models/text8_article yes 0.0279 5 0.2233 yes queen python3 run_hillclimbing.py ./models/text8 no 16 brother
``` ```
results are stored in a CSV file
### The _exampleAnalogy.py_ program
This simple program computes and displays the cosine similarity between fixed solutions and a target word.
The unique parameter is the dataset location, here is an example :
```bash
python3 exampleAnalogy.py ./models/glove-wiki-gigaword-100
```
## Information ## Information
### Authors ### Authors
......
...@@ -43,14 +43,12 @@ def cosine_similarity(vector_a, vector_b) -> float: ...@@ -43,14 +43,12 @@ def cosine_similarity(vector_a, vector_b) -> float:
vector_a - the first vector vector_a - the first vector
vector_b - the second vector vector_b - the second vector
""" """
a = vector_a
b = vector_b
if len(vector_a) == 0: if len(vector_a) == 0:
return 0.0 return 0.0
norm_a = norm(a) norm_a = norm(vector_a)
norm_b = norm(b) norm_b = norm(vector_b)
if norm_a == 0.0 or norm_b == 0.0: if norm_a == 0.0 or norm_b == 0.0:
return 0.0 return 0.0
cos_sim = dot(a, b) / (norm_a * norm_b) cos_sim = dot(vector_a, vector_b) / (norm_a * norm_b)
return cos_sim return cos_sim
\ No newline at end of file
...@@ -137,7 +137,6 @@ def greedy_prepare_data(norm_model, pos_only, min_d, min_p, threshold, target_wo ...@@ -137,7 +137,6 @@ def greedy_prepare_data(norm_model, pos_only, min_d, min_p, threshold, target_wo
else: else:
target_word_vector = norm_model[target_word] target_word_vector = norm_model[target_word]
wv_size = len(target_word_vector) wv_size = len(target_word_vector)
print("size word:",wv_size)
if threshold == 0: if threshold == 0:
coverage = list(range(wv_size)) # init coverage coverage = list(range(wv_size)) # init coverage
else: else:
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
PEACEWORD, Prototype for Extracting And Considering PEACEWORD, Prototype for Extracting And Considering
the Explainability of WORD embeddings. the Explainability of WORD embeddings.
...@@ -71,16 +70,9 @@ class WordEmbedding: ...@@ -71,16 +70,9 @@ class WordEmbedding:
:param predict_word: the target word :param predict_word: the target word
:return: the cosine similarity :return: the cosine similarity
""" """
result = 0 result = solution.word_vector(self.wv, len(self.wv[predict_word]))
if len(solution.positive) == 0 and len(solution.negative) == 0:
print("WARNING : EMPTY SOLUTION PASSED")
return 0
for word in solution.positive:
result = result + self.wv[word]
for word in solution.negative:
result = result - self.wv[word]
return round(cosine_similarity(result, self.wv[predict_word]), 6) return cosine_similarity(result, self.wv[predict_word])
def neighbor_solutions(self, solution, vocab, eval_word): def neighbor_solutions(self, solution, vocab, eval_word):
""" """
......
...@@ -56,7 +56,8 @@ class Solution: ...@@ -56,7 +56,8 @@ class Solution:
for word in self.negative: for word in self.negative:
if self.negative_words_in_dataset: if self.negative_words_in_dataset:
result = result + map_word_vector["-"+word] result = result + map_word_vector["-"+word]
result = result - map_word_vector[word] # to test else:
result = result - map_word_vector[word]
return result return result
def add(self,word): def add(self,word):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment