Skip to content
Snippets Groups Projects
Commit 22fe70c4 authored by opitz's avatar opitz
Browse files

added comments

parent 22ddc850
No related branches found
No related tags found
No related merge requests found
...@@ -23,6 +23,11 @@ def contract_gender(G, gender="male"): ...@@ -23,6 +23,11 @@ def contract_gender(G, gender="male"):
return G,vns[0][0] return G,vns[0][0]
def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1): def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1):
"""function takes our graph and simplifies text description nodes
E.g., felouneously stealing, on the 10th Decembre, two silver watches ----> watches
"""
if mode == "None": if mode == "None":
return G, node_index_dict return G, node_index_dict
#collect all descriptions and their neighbor category #collect all descriptions and their neighbor category
...@@ -32,32 +37,42 @@ def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1): ...@@ -32,32 +37,42 @@ def simplify_text_description_nodes(G,node_index_dict,mode="None",min_freq=1):
trialnodes=[n for n in G.nodes(data=True) if isinstance(n[1]["nodeobj"],dh.TrialNode)] trialnodes=[n for n in G.nodes(data=True) if isinstance(n[1]["nodeobj"],dh.TrialNode)]
descr_nodes=[] descr_nodes=[]
mask=[] mask=[]
# we iterate over all trials
for i,tn in enumerate(trialnodes): for i,tn in enumerate(trialnodes):
#get corresponding cat node #get corresponding cat node
catn = [n for n in G.neighbors(tn[0]) if isinstance(G.nodes[n]["nodeobj"],dh.OffenceNode)][0] catn = [n for n in G.neighbors(tn[0]) if isinstance(G.nodes[n]["nodeobj"],dh.OffenceNode)][0]
catn=[catn,G.nodes[catn]] catn=[catn,G.nodes[catn]]
#print(catn)
#catn=[cat]
category=catn[1]["nodeobj"].d["category"] category=catn[1]["nodeobj"].d["category"]
descr_vectors = None descr_vectors = None
tid = None tid = None
# we iterate over all neighbors of the trial
for nb in G[tn[0]]: for nb in G[tn[0]]:
for edge_id in G[tn[0]][nb]: for edge_id in G[tn[0]][nb]:
# and grab nodes which describe a offence
if G[tn[0]][nb][edge_id]["edge_class"] == "with-offence-description": if G[tn[0]][nb][edge_id]["edge_class"] == "with-offence-description":
#print(G.nodes[nb])
# we collect the noun chunk vectors
descr_vectors,_ = G.nodes[nb]["nodeobj"].get_noun_chunk_vectors() descr_vectors,_ = G.nodes[nb]["nodeobj"].get_noun_chunk_vectors()
descr_nodes.append(G.nodes[nb]["nodeobj"]) descr_nodes.append(G.nodes[nb]["nodeobj"])
#descr_nodes[-1].simplify_to_direct_object()
tid=(tn[0],nb) tid=(tn[0],nb)
Xid.append(tid) Xid.append(tid)
for dv in descr_vectors: for dv in descr_vectors:
#put noun chunk vector into training data
Xvector.append(dv) Xvector.append(dv)
#put label into training data
related_cat.append(category) related_cat.append(category)
if mode=="classifier": if mode=="classifier":
#fit a classifier to learn a mapping between noun chunks and labels
clf=LogisticRegression() clf=LogisticRegression()
clf.fit(Xvector,related_cat) clf.fit(Xvector,related_cat)
# now we can remove the textdescription nodes and insert their simplified fporms
for i,idx in enumerate(Xid): for i,idx in enumerate(Xid):
if idx[1] in G: if idx[1] in G:
G.remove_node(idx[1]) G.remove_node(idx[1])
#node_index_dict.pop(descr_nodes[i]) #node_index_dict.pop(descr_nodes[i])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment