From 22fe70c49db0941178b0c97f0669828e546fbf4c Mon Sep 17 00:00:00 2001 From: opi <opitz@cl.uni-heidelberg.de> Date: Tue, 23 Jul 2019 15:50:41 +0200 Subject: [PATCH] added comments --- src/graph_helpers.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/graph_helpers.py b/src/graph_helpers.py index 359f92e..581c0d1 100644 --- a/src/graph_helpers.py +++ b/src/graph_helpers.py @@ -23,6 +23,11 @@ def contract_gender(G, gender="male"): return G,vns[0][0] def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1): + """function takes our graph and simplifies text description nodes + E.g., felouneously stealing, on the 10th Decembre, two silver watches ----> watches + + """ + if mode == "None": return G, node_index_dict #collect all descriptions and their neighbor category @@ -32,32 +37,42 @@ def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1): trialnodes=[n for n in G.nodes(data=True) if isinstance(n[1]["nodeobj"],dh.TrialNode)] descr_nodes=[] mask=[] + + # we iterate over all trials for i,tn in enumerate(trialnodes): #get corresponding cat node catn = [n for n in G.neighbors(tn[0]) if isinstance(G.nodes[n]["nodeobj"],dh.OffenceNode)][0] catn=[catn,G.nodes[catn]] - #print(catn) - #catn=[cat] category=catn[1]["nodeobj"].d["category"] descr_vectors = None tid = None + # we iterate over all neighbors of the trial for nb in G[tn[0]]: for edge_id in G[tn[0]][nb]: + + # and grab nodes which describe a offence if G[tn[0]][nb][edge_id]["edge_class"] == "with-offence-description": - #print(G.nodes[nb]) + + # we collect the noun chunk vectors descr_vectors,_ = G.nodes[nb]["nodeobj"].get_noun_chunk_vectors() descr_nodes.append(G.nodes[nb]["nodeobj"]) - #descr_nodes[-1].simplify_to_direct_object() tid=(tn[0],nb) Xid.append(tid) for dv in descr_vectors: + #put noun chunk vector into training data Xvector.append(dv) + #put label into training data related_cat.append(category) if mode=="classifier": + #fit a classifier to learn a mapping between noun chunks and labels clf=LogisticRegression() clf.fit(Xvector,related_cat) + + + # now we can remove the textdescription nodes and insert their simplified fporms for i,idx in enumerate(Xid): + if idx[1] in G: G.remove_node(idx[1]) #node_index_dict.pop(descr_nodes[i]) -- GitLab