diff --git a/code/.ipynb_checkpoints/generalized_graph_building-checkpoint.ipynb b/code/.ipynb_checkpoints/generalized_graph_building-checkpoint.ipynb index 1092f949d6e0f11040f79eaf34262f80c5cf395e..e65187f57c44818a6cdef1358f801b5d9cfbabf7 100644 --- a/code/.ipynb_checkpoints/generalized_graph_building-checkpoint.ipynb +++ b/code/.ipynb_checkpoints/generalized_graph_building-checkpoint.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -76,12 +76,16 @@ "(b / be-located-at-91\n", " :ARG1 (p / person\n", " :name (n / name\n", - " :op1 \"Addie\"))\n", - " :ARG2 (z / zoo)\n", - " :accompanier (p2 / person\n", - " :ARG0-of (h / have-rel-role-91\n", - " :ARG1 p\n", - " :ARG2 (f / family))))\n" + " :op1 \"Max\")\n", + " :ARG0-of (h / have-org-role-91\n", + " :ARG1 (o / organization\n", + " :name (n2 / name\n", + " :op1 \"Some\"\n", + " :op2 \"Germans\"))\n", + " :ARG2 (m / member)))\n", + " :ARG2 (c / country\n", + " :name (n3 / name\n", + " :op1 \"France\")))\n" ] } ], @@ -90,7 +94,7 @@ " with open(file,'r') as f:\n", " return(json.load(f))\n", "amr_dict = open_amr_dict('../amr/amr_dict.json')\n", - "print(amr_dict['Addie was at the zoo with her family'])" + "print(amr_dict['Max who is a member of Some Germans is at France'])" ] }, { @@ -121,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -551,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -594,7 +598,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -615,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -632,7 +636,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -643,7 +647,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -662,7 +666,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -674,50 +678,22 @@ " # get general rule\n", " general_rule = annotation['2parse'][1]\n", " part1, part2 = general_rule[0], general_rule[2]\n", - " \n", - " # get general rule\n", - " #general_rule = annotation['general_0']\n", - " #part1, part2 = general_rule[0].lower(), general_rule[2].lower()\n", - " #print(part1)\n", - " #print(part2)\n", "\n", " # rename fillers\n", " filler1 = [replacers[word.replace(')','')] for word in annotation['general_0'][0].split(' ') if '_' in word]\n", " filler2 = [replacers[word.replace(')','')] for word in annotation['general_0'][2].split(' ') if '_' in word]\n", " filler = [el for el in filler1 if el in filler2]\n", - " #filler = {el:[] for el in filler1 if el in filler2}\n", - " #part1, part2 = rename_fillers(part1), rename_fillers(part2)\n", - " #print(part1)\n", - " #print(part2)\n", - " # save index of fillers in original sentence\n", - " #for fill in filler:\n", - " #filler[fill] = [part1.split().index(fill), part2.split().index(fill)]\n", - " #print(filler)\n", - "\n", - " # parse to amr\n", - " #doc1, doc2 = nlp(part1), nlp(part2)\n", - " #parse_part1, parse_part2 = doc1._.to_amr(), doc2._.to_amr()\n", - " #triples_part1, triples_part2 = get_graph_triples(parse_part1[0]), get_graph_triples(parse_part2[0])\n", - "\n", - " #print(parse_part1)\n", - " #print(parse_part2)\n", - " \n", - " \n", - " # parse to amr\n", + " left_fillers = [el for el in filler1 if el not in filler]+[el for el in filler2 if el not in filler]\n", + " #print(left_fillers,' fillers left')\n", + " # look up amr parse\n", " parse_part1, parse_part2 = replace_names(amr_dict[part1]),replace_names(amr_dict[part2])\n", " triples_part1, triples_part2 = get_graph_triples(parse_part1), get_graph_triples(parse_part2)\n", - " #print(parse_part1)\n", - " #print(triples_part1)\n", - " #print(parse_part2)\n", - " #print(triples_part2)\n", " \n", " for fill in filler:\n", " fill = re.sub('[()]', '', replace_names(amr_dict[fill]).split('/ ')[1].split('\\n')[0])\n", - " #fill = filler[fill][0]\n", - " #fill = re.sub('[()]', '', doc1[fill:fill+1]._.to_amr()[0].split('/ ')[1].split('\\n')[0])\n", - " #print(fill)\n", + " \n", " for triple in triples_part1:\n", - " #print(triple)\n", + " \n", " triple1 = (None,None)\n", " if not '_of' in triple[1]:\n", " if fill in triple[2]:\n", @@ -741,134 +717,143 @@ " mapping = (triple1,triple2)\n", " argument_mappings.append(mapping)\n", " else:\n", - " left_over.append(rule)\n", + " left_over.append(annotation)\n", + " #print(triple1,triple2)\n", " continue\n", - " print(list(set(argument_mappings)))\n", + " #print(list(set(argument_mappings)))\n", " #print('\\n\\n')\n", " return((list(set(argument_mappings)),left_over))" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[(('w / want-01', ':ARG0', 'p / \"Germany\"'), ('b / be-located-at-91', ':ARG1', 'p / \"Germany\"'))]\n" + "['France'] fillers left\n", + "[(('w / want-01', ':ARG0', 'p / \"Germany\"'), ('b / be-located-at-91', ':ARG1', 'p / \"Germany\"'))]\n", + "[]\n" ] - }, - { - "data": { - "text/plain": [ - "([(('w / want-01', ':ARG0', 'p / \"Germany\"'),\n", - " ('b / be-located-at-91', ':ARG1', 'p / \"Germany\"'))],\n", - " [])" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "map_arguments(annotation)" + "argument_mapping, left_over = map_arguments(annotation)\n", + "print(argument_mapping)\n", + "print(left_over)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### In loop for every annotation of the edge" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "annotations = [{'dimension': '1', 'quality': 3, 'worker_id': 152, 'rules': [[\"Addie's family drive to the zoo\", 'Causes/Enables', 'Addie is at the zoo'], [\"{Addie's family}_[subject] {drive}_[verb] {to}_[preposition1] {the zoo}_[object1]\", 'Causes/Enables', '{Addie}_[subject] {is}_[verb] {at}_[preposition1] {the zoo}_[object1]'], ['Some People_A drive to Somewhere_A', 'Causes/Enables', 'Someone_A (who is a member of Some People_A) is at Somewhere_A'], ['{Some People_A}_[subject] {drive}_[verb] {to}_[preposition1] {Somewhere_A}_[object1]', 'Causes/Enables', '{Someone_A ||who is a member of Some People_A||}_[subject] {is}_[verb] {at}_[preposition1] {Somewhere_A}_[object1]']], '2parse': [[\"Addie's family drive to the zoo\", 'Causes/Enables', 'Addie is at the zoo'], ['Some Germans drive to France', 'Causes/Enables', 'Max who is a member of Some Germans is at France']], 'general_0': ['Some people_a drive to somewhere_a', 'Causes/Enables', 'someone_a who is a member of Some people_a is at somewhere_a'], 'general_replaced': ['Some Germans drive to France', 'Causes/Enables', 'Max who is a member of Some Germans is at France'], 'general_no_brackets': ['Some Germans drive to France', 'Causes/Enables', 'Max who is a member of Some Germans is at France']}, {'dimension': '1', 'quality': 2, 'worker_id': 11, 'rules': [['Addie goes to the zoo with her family', 'Causes/Enables', 'Addie is at the zoo with her family'], ['{Addie}_[subject] {goes}_[verb] {to}_[preposition1] {the zoo}_[object1] {with}_[preposition2] {her family}_[object2]', 'Causes/Enables', '{Addie}_[subject] {is}_[verb] {at}_[preposition1] {the zoo}_[object1] {with}_[preposition2] {her family}_[object2]'], ['Someone_A goes to Somewhere_A with Some People_A', 'Causes/Enables', 'Someone_A is at Somewhere_A with Some People_A'], ['{Someone_A}_[subject] {goes}_[verb] {to}_[preposition1] {Somewhere_A}_[object1] {with}_[preposition2] {Some People_A}_[object2]', 'Causes/Enables', '{Someone_A}_[subject] {is}_[verb] {at}_[preposition1] {Somewhere_A}_[object1] {with}_[preposition2] {Some People_A}_[object2]']], '2parse': [['Addie goes to the zoo with her family', 'Causes/Enables', 'Addie is at the zoo with her family'], ['Max goes to France with Some Germans', 'Causes/Enables', 'Max is at France with Some Germans']], 'general_0': ['someone_a goes to somewhere_a with Some people_a', 'Causes/Enables', 'someone_a is at somewhere_a with Some people_a'], 'general_replaced': ['Max goes to France with Some Germans', 'Causes/Enables', 'Max is at France with Some Germans'], 'general_no_brackets': ['Max goes to France with Some Germans', 'Causes/Enables', 'Max is at France with Some Germans']}]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['\"Russia\"']\n", - "['person\\n :mod (c / \"Russia\")']\n", - "(p / possible-01\n", - " :ARG1 (r / run-02\n", - " :ARG0 (c / crown\n", - " :ARG1-of (g / give-01\n", - " :ARG0 (p2 / \"Max\")\n", - " :ARG2 (s / sweater)))\n", - " :ARG1 (a / around\n", - " :op1 (c2 / \"France\"))\n", - " :manner (t / throne))\n", - " p / \"Russia\") \n", - " c / \"Adidas\"\n" + "(('d / drive-01', ':ARG4', 'c2 / \"France\"'), ('b / be-located-at-91', ':ARG2', 'c / \"France\"'))\n", + "(('g / go-02', ':accompanier', 'p2 / \"Germany\"'), ('b / be-located-at-91', ':accompanier', 'p2 / \"Germany\"'))\n", + "(('g / go-02', ':ARG4', 'c / \"France\"'), ('b / be-located-at-91', ':ARG2', 'c / \"France\"'))\n", + "(('g / go-02', ':ARG0', 'p / \"Max\"'), ('b / be-located-at-91', ':ARG1', 'p / \"Max\"'))\n" ] } ], "source": [ - "parse =\"\"\"(f / feed-01\n", - " :ARG0 (p / person\n", - " :name (n / name\n", - " :op1 \"Max\"))\n", - " :ARG2 (c / crown))\"\"\"\n", - "parse=\"\"\"(p / possible-01\n", - " :ARG1 (r / run-02\n", - " :ARG0 (c / crown\n", - " :ARG1-of (g / give-01\n", - " :ARG0 (p2 / person\n", - " :name (n / name\n", - " :op1 \"Max\"))\n", - " :ARG2 (s / sweater)))\n", - " :ARG1 (a / around\n", - " :op1 (c2 / country\n", - " :name (n2 / name\n", - " :op1 \"France\")))\n", - " :manner (t / throne))\n", - " p / person\n", - " :mod (c / country\n", - " :name (n / name\n", - " :op1 \"Russia\"))) \n", - " c / company\n", - " :name (n / name\n", - " :op1 \"Adidas\")\"\"\"\n", - "name = re.findall('[a-z]+[\\s\\n\\t]*:name \\(n[0-9]? / name[\\s\\n\\t]*:op1 (\"[A-Za-z]+\")\\)', parse)\n", - "names = re.findall('[a-z]+[\\s\\n\\t]*:name \\(n[0-9]? / name[\\s\\n\\t]*:op1 \"[A-Za-z]+\"\\)', parse)\n", - "for i,el in enumerate(names):\n", - " parse = parse.replace(el,name[i])\n", - "name = re.findall('[a-z]+[\\s\\n\\t]*:mod \\(c[0-9]? / (\"[A-Za-z]+\")\\)', parse)\n", - "names = re.findall('[a-z]+[\\s\\n\\t]*:mod \\(c[0-9]? / \"[A-Za-z]+\"\\)', parse)\n", - "for i,el in enumerate(names):\n", - " parse = parse.replace(el,name[i])\n", - "print(name)\n", - "print(names)\n", - "print(parse)" + "def get_argument_mappings(edge):\n", + " annotations = graph.edges[edge]['annotations']\n", + " argument_mappings = []\n", + " for annotation in annotations:\n", + " argument_mapping, left_over = map_arguments(annotation)\n", + " argument_mappings += argument_mapping\n", + " #print(left_over)\n", + " return(argument_mappings)\n", + "\n", + "edge = ('3500_EMO_0', '3500_0')\n", + "edge = ('3500_EMO_1', '3500_2')\n", + "edge = ('3500_EVENT_3', '3500_0')\n", + "argument_mappings = get_argument_mappings(edge)\n", + "for map in argument_mappings:\n", + " print(map)" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(p / possible-01\n", - " :ARG1 (r / run-02\n", - " :ARG0 (c / crown\n", - " :ARG1-of (g / give-01\n", - " :ARG0 (p2 / \"Max\")\n", - " :ARG2 (s / sweater)))\n", - " :ARG1 (a / around\n", - " :op1 (c2 / \"France\"))\n", - " :manner (t / throne))\n", - " p / person\n", - " :mod (c / \"Russia\")) \n", - " (c / \"Adidas\"\n" + "['Some Germans drive to France', 'Causes/Enables', 'Max who is a member of Some Germans is at France']\n", + "['Max goes to France with Some Germans', 'Causes/Enables', 'Max is at France with Some Germans']\n", + "('p / \"Germany\"', ':quant', 's / some')\n", + "('g / go-02', ':ARG0', 'p / \"Max\"')\n", + "('n2 / name', ':op1', '\"Some\"')\n", + "('g / go-02', ':accompanier', 'p2 / \"Germany\"')\n", + "('o / organization', ':name', 'n2 / name')\n", + "('b / be-located-at-91', ':ARG2', 'c / \"France\"')\n", + "('p2 / \"Germany\"', ':quant', 's / some')\n", + "('h / have-org-role-91', ':ARG1', 'o / organization')\n", + "('b / be-located-at-91', ':ARG1', 'p / \"Max\"')\n", + "('h / have-org-role-91', ':ARG2', 'm / member')\n", + "('n2 / name', ':op2', '\"Germans\"')\n", + "('g / go-02', ':ARG4', 'c / \"France\"')\n", + "('b / be-located-at-91', ':accompanier', 'p2 / \"Germany\"')\n", + "('p / \"Max\"', ':ARG0-of', 'h / have-org-role-91')\n", + "('d / drive-01', ':ARG0', 'p / \"Germany\"')\n", + "('d / drive-01', ':ARG4', 'c2 / \"France\"')\n" ] } ], "source": [ - "for i,el in enumerate(names):\n", - " parse = parse.replace(el,name[i])\n", - "print(parse)" + "def unite_graph_triples(edge):\n", + " unite_triples = []\n", + " for annotation in graph.edges[edge]['annotations']:\n", + " general_rule = annotation['2parse'][1]\n", + " print(general_rule)\n", + " part1, part2 = general_rule[0], general_rule[2]\n", + " unite_triples += get_graph_triples(replace_names(amr_dict[part1]))+get_graph_triples(replace_names(amr_dict[part2]))\n", + " unite_triples = list(set(unite_triples))\n", + " return(unite_triples)\n", + "unite_triples = unite_graph_triples(edge)\n", + "for triple in unite_triples:\n", + " print(triple)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check :name and :op2" ] }, { @@ -241764,6 +241749,68 @@ "for el i" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#for el in amr_dict.values():\n", + " #if ':op2' in el:\n", + " #print(el,'\\n\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(b / be-located-at-91\n", + " :ARG1 (p / person\n", + " :name (n / name\n", + " :op1 \"Max\")\n", + " :ARG0-of (h / have-org-role-91\n", + " :ARG1 (o / organization\n", + " :name (n2 / name\n", + " :op1 \"Some\"\n", + " :op2 \"Germans\"))\n", + " :ARG2 (m / member)))\n", + " :ARG2 (c / country\n", + " :name (n3 / name\n", + " :op1 \"France\")))\n" + ] + } + ], + "source": [ + "print(amr_dict['Max who is a member of Some Germans is at France'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " (b / be-located-at-91\n", + " :ARG1 (e / elephant\n", + " :ARG1-of (n / new-01))\n", + " :ARG2 (c / city\n", + " :name (n2 / name\n", + " :op1 \"Oklahoma\"))) \n", + " \n", + " (ii / include-91\n", + " :ARG1 (g / game\n", + " :name (n / name\n", + " :op1 \"Animal\"\n", + " :op2 \"Crossing\"))\n", + " :ARG2 (g2 / game\n", + " :ARG1-of (f / favor-01\n", + " :ARG0 (ii2 / i)\n", + " :time (k / kid)))) " + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/data/story_data_final_coref.json b/data/story_data_final_coref.json index 49197eaf23662849c24e6d0359c54acfb3576842..2d7498059c07cbf3a43159647fcc87bc0c9b856b 100644 Binary files a/data/story_data_final_coref.json and b/data/story_data_final_coref.json differ diff --git a/doc/22_01_12.md b/doc/22_01_12.md index 20ae784254db6fd5a6e8ed4acdc6f8d8c1187127..eee0212d32b9833a85403a33494be01ee5466eaf 100644 --- a/doc/22_01_12.md +++ b/doc/22_01_12.md @@ -1,4 +1,11 @@ +PENMAN: +======= + +- https://github.com/goodmami/penman/blob/main/docs/api-demo.ipynb +- https://github.com/goodmami/penman +- https://penman.readthedocs.io/en/latest/library.html + IDEAS: ======