amr_error

09b73969 · wesenberg · 0fde5672 · 09b73969 · 09b73969 · 09b73969
Commit 09b73969 authored 2 years ago by wesenberg
--- a/cmd_amr.py
+++ b/cmd_amr.py
@@ -9,4 +9,4 @@ source = load_txt("", "amr_source")
    # if go_on == "end" or go_on == "'end'":
        # break

-print(calc_one_amr(nlp, source))
+print(calc_one_amr(nlp, source, True))
--- a/scrl/rewards.py
+++ b/scrl/rewards.py
@@ -399,8 +399,8 @@ class AmrReward:
        scores = []
        for source, summary in zip(sources, summaries):
            if source != "" and summary != "":
-                graph_source = util.calc_one_amr(self.nlp, source)
-                graph_summary = util.calc_one_amr(self.nlp, summary)
+                graph_source = util.calc_one_amr(self.nlp, source, True, "train")
+                graph_summary = util.calc_one_amr(self.nlp, summary, False, "train")
                score = calc_smatch_to_r(name="train", source=source, summary=summary, graph_source=graph_source, graph_summary=graph_summary)
            else:
                score = 0

--- a/wesenberg/AmrClass.py
+++ b/wesenberg/AmrClass.py
@@ -42,32 +42,32 @@ class AmrClass:
                    if self.SUMMERY in backup_amr[tmp_source]:
                        graph_summary = backup_amr[tmp_source][self.SUMMERY]
                    else:
-                        graph_summary = util.calc_one_amr(nlp, tmp_summary)
+                        graph_summary = util.calc_one_amr(nlp, tmp_summary, True, "baseline")
                        backup_amr[tmp_source][self.SUMMERY] = graph_summary

                    if self.SOURCE in backup_amr[tmp_source]:
                        graph_source = backup_amr[tmp_source][self.SOURCE]
                    else:
-                        graph_source = util.calc_one_amr(nlp, tmp_source)
+                        graph_source = util.calc_one_amr(nlp, tmp_source, True, "baseline")
                        backup_amr[tmp_source][self.SOURCE] = graph_source

                    if self.GOLD in backup_amr[tmp_source]:
                        graph_gold = backup_amr[tmp_source][self.GOLD]
                    else:
-                        graph_gold = util.calc_one_amr(nlp, tmp_gold)
+                        graph_gold = util.calc_one_amr(nlp, tmp_gold, True, "baseline")
                        backup_amr[tmp_source][self.GOLD] = graph_gold

                    if self.BASELINE in backup_amr[tmp_source]:
                        graph_baseline = backup_amr[tmp_source][self.BASELINE]
                    else:
-                        graph_baseline = util.calc_one_amr(nlp, tmp_baseline)
+                        graph_baseline = util.calc_one_amr(nlp, tmp_baseline, True, "baseline")
                        backup_amr[tmp_source][self.BASELINE] = graph_baseline
                else:

-                    graph_summary = util.calc_one_amr(nlp, tmp_summary)
-                    graph_baseline = util.calc_one_amr(nlp, tmp_baseline)
-                    graph_source = util.calc_one_amr(nlp, tmp_source)
-                    graph_gold = util.calc_one_amr(nlp, tmp_gold)
+                    graph_summary = util.calc_one_amr(nlp, tmp_summary, True, "baseline")
+                    graph_baseline = util.calc_one_amr(nlp, tmp_baseline, True, "baseline")
+                    graph_source = util.calc_one_amr(nlp, tmp_source, True, "baseline")
+                    graph_gold = util.calc_one_amr(nlp, tmp_gold, True, "baseline")

                    backup_amr[tmp_source] = {}
                    backup_amr[tmp_source][self.SUMMERY] = graph_summary

--- a/wesenberg/util.py
+++ b/wesenberg/util.py
@@ -15,24 +15,26 @@ from wesenberg.Konstanten import LIST_SYMBOLS_TO_DELETE, ROOT_PATH, AMRLIB_PATH,
    AMR_ERROR_PATH, SEARCH_AMR_ERROR


-def calc_one_amr(nlp, sentence):
+def remove_bugs(sentence):
    sentence = remove_hash_for_amr(sentence)
-    sentence = remove_single_char_for_amr(sentence)
-    sentence = remove_minus_for_amr(sentence)
+    # replace with "  " to find and between two removed chars
+    sentence = remove_single_char_for_amr(sentence, "  ")
+    sentence = remove_minus_for_amr(sentence, "  ")
    sentence = remove_and_for_amr(sentence)
    sentence = remove_space_for_amr(sentence)
+    return sentence


-    # sentence = util.strip_for_amr(sentence)
-    # if not sentence.strip().endswith("."):
-    #    sentence = sentence + " ."
+def calc_one_amr(nlp, sentence, safe_error, file_name):
+    sentence = remove_bugs(sentence)
    graphs = nlp(sentence)._.to_amr()

    if graphs[0] is None:
        output = strip_amr_for_smatch(graphs[1])
    else:
        output = strip_amr_for_smatch(graphs[0])
-    if SEARCH_AMR_ERROR:
+
+    if SEARCH_AMR_ERROR and safe_error:
        output_list = output.split("\n")
        return_list = []
        for item in output_list:
@@ -41,7 +43,7 @@ def calc_one_amr(nlp, sentence):
                path = os.path.join(AMR_ERROR_PATH)
                if not os.path.exists(path):
                    os.makedirs(path)
-                with open(path + "amr_error.txt", 'a') as txt:
+                with open(path + file_name + ".txt", 'a') as txt:
                    txt.write(sentence + "\n" + output + "\n\n")
                    break
            else:
@@ -102,13 +104,13 @@ def remove_hash_for_amr(text):
    return text


-def remove_single_char_for_amr(text):
-    text = re.sub(" [b-z] ", "  ", text)
+def remove_single_char_for_amr(text, replace):
+    text = re.sub(" [b-z] ", replace, text)
    return text


-def remove_minus_for_amr(text):
-    text = re.sub(" -[a-z]*- ", "  ", text)
+def remove_minus_for_amr(text, replace):
+    text = re.sub(" -[a-z]*- ", replace, text)
    return text