From 22fe70c49db0941178b0c97f0669828e546fbf4c Mon Sep 17 00:00:00 2001
From: opi <opitz@cl.uni-heidelberg.de>
Date: Tue, 23 Jul 2019 15:50:41 +0200
Subject: [PATCH] added comments

---
 src/graph_helpers.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/graph_helpers.py b/src/graph_helpers.py
index 359f92e..581c0d1 100644
--- a/src/graph_helpers.py
+++ b/src/graph_helpers.py
@@ -23,6 +23,11 @@ def contract_gender(G, gender="male"):
     return G,vns[0][0]
 
 def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1):
+    """function takes our graph and simplifies text description nodes
+       E.g., felouneously stealing, on the 10th Decembre,  two silver watches ----> watches
+    
+    """
+    
     if mode == "None":
         return G, node_index_dict
     #collect all descriptions and their neighbor category
@@ -32,32 +37,42 @@ def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1):
     trialnodes=[n for n in G.nodes(data=True) if isinstance(n[1]["nodeobj"],dh.TrialNode)]
     descr_nodes=[]
     mask=[]
+    
+    # we iterate over all trials
     for i,tn in enumerate(trialnodes):
         #get corresponding cat node
         catn = [n for n in G.neighbors(tn[0]) if isinstance(G.nodes[n]["nodeobj"],dh.OffenceNode)][0]
         catn=[catn,G.nodes[catn]]
-        #print(catn)
-        #catn=[cat]
         category=catn[1]["nodeobj"].d["category"]
         descr_vectors = None
         tid = None
+        # we iterate over all neighbors of the trial
         for nb in G[tn[0]]:
             for edge_id in G[tn[0]][nb]:
+
+                # and grab nodes which describe a offence
                 if G[tn[0]][nb][edge_id]["edge_class"] == "with-offence-description":
-                    #print(G.nodes[nb])
+                    
+                    # we collect the noun chunk vectors
                     descr_vectors,_ = G.nodes[nb]["nodeobj"].get_noun_chunk_vectors()
                     descr_nodes.append(G.nodes[nb]["nodeobj"])
-                    #descr_nodes[-1].simplify_to_direct_object()
                     tid=(tn[0],nb)  
                     Xid.append(tid)
                     for dv in descr_vectors:
+                        #put noun chunk vector into training data
                         Xvector.append(dv)
+                        #put label into training data
                         related_cat.append(category)
                     
     if mode=="classifier":
+        #fit a classifier to learn a mapping between noun chunks and labels
         clf=LogisticRegression()
         clf.fit(Xvector,related_cat)
+    
+    
+    # now we can remove the textdescription nodes and insert their simplified fporms
     for i,idx in enumerate(Xid):
+
         if idx[1] in G:
             G.remove_node(idx[1])
             #node_index_dict.pop(descr_nodes[i])
-- 
GitLab