Skip to content
Snippets Groups Projects
Commit ced992d6 authored by Liam Byrne's avatar Liam Byrne
Browse files

module embeddings working

parent cb1dc564
No related branches found
No related tags found
No related merge requests found
......@@ -33,6 +33,7 @@ class ModuleEmbeddingTrainer:
def from_files(self, module_pairs_pkl: str):
with open(module_pairs_pkl, 'rb') as f:
self.training_pairs = pickle.load(f)
self.module_vocab = list(set([x for y in met.training_pairs for x in y]))
def from_db(self, row_limit=100000, save_path: str = None):
post_body_series = pd.read_sql_query(f"SELECT Body FROM Post WHERE (Tags LIKE '%python%') AND (Body LIKE '%import%') LIMIT {row_limit}", self.db)
......@@ -138,7 +139,6 @@ if __name__ == '__main__':
#met.from_db(save_path='../data/raw/module_pairs_1mil.pkl', row_limit=1000000)
met.from_files('../data/raw/module_pairs_1mil.pkl')
print(len(met.training_pairs))
met.module_vocab = list(set([x for y in met.training_pairs for x in y]))
print(len(met.module_vocab))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment