diff --git a/src/graph_builder.py b/src/graph_builder.py index 45742a4f9708c6c2ef44fefd6bb8deed635cd914..46204f9686f61e6e9224e4090543521a428dc12d 100644 --- a/src/graph_builder.py +++ b/src/graph_builder.py @@ -13,7 +13,7 @@ import logging def build_graph(years=[],include_utterances=False,subcat=False): - + #a small helper function def maybe_subcat(string): if not subcat: return False @@ -37,16 +37,11 @@ def build_graph(years=[],include_utterances=False,subcat=False): #and the year year = exact_date[:4] - + #add a trial node trialnode = dh.TrialNode(exact_date,fp+"_"+trial_index_infile) nodeindex,trialindex = gh.maybe_new_index(trialnode,node_index_dict) G.add_node(trialindex, label="Trial", nodeobj=trialnode) - - #yearnode = dh.YearNode(year) - #nodeindex,yindex = gh.maybe_new_index(yearnode,node_index_dict) - #G.add_node(yindex, label="Year", nodeobj=yearnode) - #G.add_edge(trialindex,yindex,edge_class="in-year") @@ -55,18 +50,16 @@ def build_graph(years=[],include_utterances=False,subcat=False): #offs = [dh.create_cat_elm(of,"offence") for of in offs] descriptions = [dh.HasDescriptionNode(of) for of in offs] logging.info("descriptions found: {}".format(descriptions)) - #[desc.simplify_to_direct_object() for desc in descriptions] offs = [dh.OffenceNode(of,"offence",subcat=maybe_subcat("offence"),dummy=False) for of in offs] - #print(descriptions) - #asd + for i,off in enumerate(offs): - #each offence has a text and a (symbolic) category + #add offence node nodeindex,offindex = gh.maybe_new_index(off,node_index_dict) G.add_node(offindex, label="Offence", nodeobj=off) G.add_edge(trialindex,offindex,edge_class="with-offence") - #nodeindex+=1 - + + # add textual description node nodeindex,descr_index = gh.maybe_new_index(descriptions[i],node_index_dict) G.add_node(descr_index, label="Description", nodeobj=descriptions[i]) G.add_edge(trialindex,descr_index,edge_class="with-offence-description") @@ -105,6 +98,7 @@ def build_graph(years=[],include_utterances=False,subcat=False): ds = [dh.NamedEntityNode(dname) for dname in dnames] for d in ds: + #inser a defendant node and connect it to trial nodeindex,dindex = gh.maybe_new_index(d,node_index_dict) G.add_node(dindex, label=d.get_fullname(), nodeobj=d) G.add_edge(trialindex,dindex,edge_class="with-defendant") @@ -116,7 +110,7 @@ def build_graph(years=[],include_utterances=False,subcat=False): G.add_node(victindex, label=vict.get_fullname(), nodeobj=vict) G.add_edge(trialindex,victindex,edge_class="with-victim") - #we get the text from the trial account to extract named entities and utterances + #we get the text from the trial account to extract various other named entities and utterances ps = div.findAll("p") ps = [p for p in ps if len(p.findAll("persname")) == 1 and len(p.findAll("u"))] ents = [ p.findAll("persname")[0] for p in ps]