From 1c2c2453f6595c6c0a2f2f18703b6fdc35fb0149 Mon Sep 17 00:00:00 2001
From: Utaemon Toyota <toyota@cl.uni-heidelberg.de>
Date: Tue, 26 Feb 2019 00:58:42 +0100
Subject: [PATCH] pack code senseval prep

---
 Cora_Preprocessing/{cora => cora_data}/README |  0
 .../{cora => cora_data}/cora.cites            |  0
 .../{cora => cora_data}/cora.content          |  0
 .../{cora => cora_data}/cora_small.content    |  0
 Senseval_Prep/senseval_preprocessing.py       | 24 +++++++++++++++++--
 5 files changed, 22 insertions(+), 2 deletions(-)
 rename Cora_Preprocessing/{cora => cora_data}/README (100%)
 rename Cora_Preprocessing/{cora => cora_data}/cora.cites (100%)
 rename Cora_Preprocessing/{cora => cora_data}/cora.content (100%)
 rename Cora_Preprocessing/{cora => cora_data}/cora_small.content (100%)

diff --git a/Cora_Preprocessing/cora/README b/Cora_Preprocessing/cora_data/README
similarity index 100%
rename from Cora_Preprocessing/cora/README
rename to Cora_Preprocessing/cora_data/README
diff --git a/Cora_Preprocessing/cora/cora.cites b/Cora_Preprocessing/cora_data/cora.cites
similarity index 100%
rename from Cora_Preprocessing/cora/cora.cites
rename to Cora_Preprocessing/cora_data/cora.cites
diff --git a/Cora_Preprocessing/cora/cora.content b/Cora_Preprocessing/cora_data/cora.content
similarity index 100%
rename from Cora_Preprocessing/cora/cora.content
rename to Cora_Preprocessing/cora_data/cora.content
diff --git a/Cora_Preprocessing/cora/cora_small.content b/Cora_Preprocessing/cora_data/cora_small.content
similarity index 100%
rename from Cora_Preprocessing/cora/cora_small.content
rename to Cora_Preprocessing/cora_data/cora_small.content
diff --git a/Senseval_Prep/senseval_preprocessing.py b/Senseval_Prep/senseval_preprocessing.py
index 8a0a567..13f45b0 100644
--- a/Senseval_Prep/senseval_preprocessing.py
+++ b/Senseval_Prep/senseval_preprocessing.py
@@ -1,3 +1,18 @@
+#!/usr/bin/env python3
+
+"""
+@author: Utaemon Toyota
+@date: 25.2.2019
+@project: Software Projekt @ Heidelberg University, Institute for Computational Linguistics
+@members: Nadia Arslan, Lyuba Dimitrova, Nicolas Weber, Utaemon Toyota
+@required data: Senseval english-all-word test data and their penn treebank files in the same directory.
+@usage: python3 senseval_preprocessing.py [-s] [-g] [-v]
+        -s / --stopwords    Path to txt-file with stopwords
+        -g / --gloss        Path to txt-file with gloss mappings
+        -v / --version      valid input: 2 or 3 for senseval 2 / 3
+"""
+
+import argparse
 import re
 import pickle as pkl
 from nltk.stem import WordNetLemmatizer
@@ -228,7 +243,7 @@ def get_sats(tokens, info):
             new_info.append(info[idx])
     return [new_tokens, new_info]
 
-def write_pkl(version = 2, stop_path="stopwords.txt", gloss_path = "gloss_mapping.txt"):
+def write_pkl(version = 3, stop_path="stopwords.txt", gloss_path = "gloss_mapping.txt"):
     file_path = ""
     tree_path = ""
     if version == 2:
@@ -245,4 +260,9 @@ def write_pkl(version = 2, stop_path="stopwords.txt", gloss_path = "gloss_mappin
             print (key, "Done")
 
 if __name__ == "__main__":
-    write_pkl(version=3)
+    parser = argparse.ArgumentParser(description="Senseval Preprocessing script.")
+    parser.add_argument("-s", "--stopwords", default="stopwords.txt", help="path to stopwords-txt-file")
+    parser.add_argument("-g", "--gloss", default="gloss_mapping.txt", help = "path to gloss mapping txt-file")
+    parser.add_argument("-v", "--version", default = 3, help="2 or 3 for senseval version")
+    args = parser.parse_args()
+    write_pkl(version=int(args.version), stop_path=args.stopwords, gloss_path=args.gloss)
-- 
GitLab