Skip to content
Snippets Groups Projects
Commit f577199c authored by Simon Will's avatar Simon Will
Browse files

Replace erroneously added symlink with actual mrl directory

parent 6eab12f6
No related branches found
No related tags found
No related merge requests found
/home/gorgor/coli/Bachelor-Arbeit/nlmaps-gan/mrl
\ No newline at end of file
# MRL
The MRL module contains utilities for working with machine readable
languages, in particular for working with the NLMaps MRL described in
Haas and Riezler 2016.
They were written by Carolin Lawrence (née Haas). Small revisions have
been done by Simon Will for this project.
## References
Carolin Haas and Stefan Riezler (2016). A Corpus and Semantic Parser
for Multilingual Natural Language Querying of OpenStreetMap. In
Proceedings of the Conference of the North American Chapter of the
Association for Computational Linguistics – Human Language
Technologies (NAACL HLT 2016), San Diego, CA.
import os.path
CFG_DIR = os.path.join(os.path.dirname(__file__), 'cfgs')
from .mrl import MRL, NLmaps, MRLS
[S] ||| [X,1]
[S] ||| dist( [X,1] , unit( [KMMI,2] ) )
[S] ||| dist( [X,1] , [X,2] , unit( [KMMI,3] ) )
[S] ||| dist( [X,1] )
[S] ||| dist( [X,1] , [X,2] )
[S] ||| dist( [X,1] , for( ' [CW,2] ' ) )
[S] ||| dist( [X,1] , [X,2] , for( ' [CW,3] ' ) )
[X] ||| query( north( [AROUND,1] ) , [META,2] )
[X] ||| query( north( [QUERY,1] ) , [META,2] )
[X] ||| query( west( [AROUND,1] ) , [META,2] )
[X] ||| query( west( [QUERY,1] ) , [META,2] )
[X] ||| query( south( [AROUND,1] ) , [META,2] )
[X] ||| query( south( [QUERY,1] ) , [META,2] )
[X] ||| query( east( [AROUND,1] ) , [META,2] )
[X] ||| query( east( [QUERY,1] ) , [META,2] )
[X] ||| query( [AROUND,1] , [META,2] )
[X] ||| query( [QUERY,1] , [META,2] )
[AROUND] ||| around( center( [QUERY,1] ) , search( [QUERY,2] ) , maxdist( [DIST,3] ) )
[AROUND] ||| around( center( [QUERY,1] ) , search( [QUERY,2] ) , maxdist( [DIST,3] ) , [META_TOPX,4] )
[QUERY] ||| [AREA,1] , [OSM,2]
[QUERY] ||| [OSM,1]
[META] ||| qtype( [META_REQ,1] )
[META] ||| qtype( [META_POS,1] )
[META_REQ] ||| [META_REQ,1] , [META_REQ,2]
[META_REQ] ||| [META_REQ,1] , [META_POS,2]
[META_REQ] ||| findkey( and( ' [KEY,1] ' , ' [KEY,2] ' ) )
[META_REQ] ||| findkey( ' [KEY,1] ' )
[META_REQ] ||| findkey( ' [KEY,1] ' , [META_TOPX,2] )
[META_REQ] ||| count
[META_REQ] ||| latlong
[META_REQ] ||| latlong( [META_TOPX,1] )
[META_REQ] ||| least( [META_TOPX,1] )
[META_POS] ||| nodup( [META_REQ,1] )
[META_TOPX] ||| topx( [INT,1] )
[AREA] ||| area( [INNER,1] )
[OSM] ||| nwr( [INNER,1] )
[OSM] ||| nwr( [INNER,1] ) , [OSM,2]
[INNER] ||| and( [INNER,1] , [INNER,2] )
[INNER] ||| or( [INNER,1] , [INNER,2] )
[INNER] ||| keyval( ' [KEY,1] ' , [VAL,2] ) , [INNER,3]
[INNER] ||| keyval( ' [KEY,1] ' , [VAL,2] )
[CW] ||| car
[CW] ||| walk
[KMMI] ||| km
[KMMI] ||| mi
[DIST] ||| WALKDING_DIST
[DIST] ||| DIST_INTOWN
[DIST] ||| DIST_OUTTOWN
[DIST] ||| DIST_DAYTRIP
[DIST] ||| [INT,1]
[VAL] ||| or( [VAL,1] , [VAL,2] )
[VAL] ||| and( [VAL,1] , [VAL,2] )
[VAL] ||| ' valvariable '
[KEY] ||| keyvariable
[KEY] ||| monitoring:bicycle
[KEY] ||| fuel:diesel
[KEY] ||| product
[KEY] ||| social_facility:for
[KEY] ||| hazard
[KEY] ||| memorial:type
[KEY] ||| person:date_of_birth
[KEY] ||| Schalansky_ref
[KEY] ||| tower:type
[KEY] ||| sports
[KEY] ||| school:de
[KEY] ||| artwork_type
[KEY] ||| artist_name
[KEY] ||| fire_hydrant:type
[KEY] ||| communication:mobile_phone
[KEY] ||| advertising
[KEY] ||| organic
[KEY] ||| artwork:group
[KEY] ||| collection_times
[KEY] ||| abandoned:tourism
[KEY] ||| network
[KEY] ||| manufacturer
[KEY] ||| generator:source
[KEY] ||| station
[KEY] ||| roof:colour
[KEY] ||| internet_access:fee
[KEY] ||| country
[KEY] ||| shelter_type
[KEY] ||| recycling:glass
[KEY] ||| second_hand
[KEY] ||| recycling:clothes
[KEY] ||| ruins
[KEY] ||| brand
[KEY] ||| bicycle_parking
[KEY] ||| capacity
[KEY] ||| 4wd_only
[KEY] ||| abutters
[KEY] ||| access
[KEY] ||| addr:city
[KEY] ||| addr:country
[KEY] ||| addr:district
[KEY] ||| addr:flats
[KEY] ||| addr:full
[KEY] ||| addr:hamlet
[KEY] ||| addr:housename
[KEY] ||| addr:housenumber
[KEY] ||| addr:inclusion
[KEY] ||| addr:interpolation
[KEY] ||| addr:place
[KEY] ||| addr:postcode
[KEY] ||| addr:province
[KEY] ||| addr:state
[KEY] ||| addr:street
[KEY] ||| addr:street
[KEY] ||| addr:subdistrict
[KEY] ||| addr:suburb
[KEY] ||| aerialway
[KEY] ||| aerodrome
[KEY] ||| aeroway
[KEY] ||| agricultural
[KEY] ||| alt_name
[KEY] ||| alt_name:lg
[KEY] ||| alt_name_1
[KEY] ||| amenity
[KEY] ||| architect
[KEY] ||| area
[KEY] ||| atm
[KEY] ||| attribution
[KEY] ||| atv
[KEY] ||| barrier
[KEY] ||| bdouble
[KEY] ||| bicycle
[KEY] ||| bicycle
[KEY] ||| bicycle_road
[KEY] ||| boat
[KEY] ||| boundary
[KEY] ||| bridge
[KEY] ||| building
[KEY] ||| charge
[KEY] ||| comments
[KEY] ||| contact:diaspora
[KEY] ||| contact:email
[KEY] ||| contact:facebook
[KEY] ||| contact:fax
[KEY] ||| contact:google_plus
[KEY] ||| contact:instagram
[KEY] ||| contact:linkedin
[KEY] ||| contact:phone
[KEY] ||| contact:twitter
[KEY] ||| contact:vhf
[KEY] ||| contact:webcam
[KEY] ||| contact:website
[KEY] ||| contact:xing
[KEY] ||| covered
[KEY] ||| craft
[KEY] ||| crossing
[KEY] ||| cuisine
[KEY] ||| cuisine
[KEY] ||| cutting
[KEY] ||| cycleway
[KEY] ||| de:place
[KEY] ||| de:amtlicher_gemeindeschluessel
[KEY] ||| denomination
[KEY] ||| description
[KEY] ||| diaspora
[KEY] ||| disused
[KEY] ||| drive_in
[KEY] ||| drive_through
[KEY] ||| driving_side
[KEY] ||| ele
[KEY] ||| ele
[KEY] ||| electrified
[KEY] ||| email
[KEY] ||| email
[KEY] ||| embankment
[KEY] ||| emergency
[KEY] ||| end_date
[KEY] ||| est_width
[KEY] ||| facebook
[KEY] ||| fax
[KEY] ||| fire_object:type
[KEY] ||| fire_operator
[KEY] ||| fire_rank
[KEY] ||| fireplace
[KEY] ||| fixme
[KEY] ||| foot
[KEY] ||| ford
[KEY] ||| forestry
[KEY] ||| frequency
[KEY] ||| geological
[KEY] ||| goods
[KEY] ||| google_plus
[KEY] ||| hazmat
[KEY] ||| height
[KEY] ||| hgv
[KEY] ||| highway
[KEY] ||| highway
[KEY] ||| hiking
[KEY] ||| historic
[KEY] ||| horse
[KEY] ||| iata
[KEY] ||| icao
[KEY] ||| ice_road
[KEY] ||| image
[KEY] ||| incline
[KEY] ||| information
[KEY] ||| inline_skates
[KEY] ||| inscription
[KEY] ||| instagram
[KEY] ||| int_name
[KEY] ||| int_ref
[KEY] ||| int_ref
[KEY] ||| intermittent
[KEY] ||| internet_access
[KEY] ||| internet_access
[KEY] ||| is_in:country
[KEY] ||| is_in
[KEY] ||| junction
[KEY] ||| landuse
[KEY] ||| lanes
[KEY] ||| lanes
[KEY] ||| layer
[KEY] ||| lcn_ref
[KEY] ||| leaf_cycle
[KEY] ||| leaf_type
[KEY] ||| leisure
[KEY] ||| leisure
[KEY] ||| lhv
[KEY] ||| linkedin
[KEY] ||| lit
[KEY] ||| loc_name
[KEY] ||| loc_ref
[KEY] ||| location
[KEY] ||| lock
[KEY] ||| man_made
[KEY] ||| maxheight
[KEY] ||| maxlength
[KEY] ||| maxspeed
[KEY] ||| maxspeed
[KEY] ||| maxstay
[KEY] ||| maxweight
[KEY] ||| maxwidth
[KEY] ||| military
[KEY] ||| minspeed
[KEY] ||| mofa
[KEY] ||| mooring
[KEY] ||| moped
[KEY] ||| motor_vehicle
[KEY] ||| motorboat
[KEY] ||| motorcar
[KEY] ||| motorcycle
[KEY] ||| motorroad
[KEY] ||| mountain_pass
[KEY] ||| mtb:description
[KEY] ||| mtb:scale
[KEY] ||| mtb:scale:imba
[KEY] ||| mtb:scale:uphill
[KEY] ||| name
[KEY] ||| name
[KEY] ||| name:lg
[KEY] ||| name:left
[KEY] ||| name:right
[KEY] ||| narrow
[KEY] ||| nat_name
[KEY] ||| nat_ref
[KEY] ||| natural
[KEY] ||| ncn_ref
[KEY] ||| noexit
[KEY] ||| note
[KEY] ||| note
[KEY] ||| nudism
[KEY] ||| office
[KEY] ||| official_name
[KEY] ||| old_name
[KEY] ||| old_name:lg
[KEY] ||| old_ref
[KEY] ||| oneway
[KEY] ||| oneway
[KEY] ||| opening_hours
[KEY] ||| operator
[KEY] ||| operator
[KEY] ||| outdoor_seating
[KEY] ||| overtaking
[KEY] ||| parking
[KEY] ||| parking:condition
[KEY] ||| parking:lane
[KEY] ||| passing_places
[KEY] ||| phone
[KEY] ||| phone
[KEY] ||| place
[KEY] ||| place_numbers
[KEY] ||| population
[KEY] ||| population
[KEY] ||| postal_code
[KEY] ||| power
[KEY] ||| psv
[KEY] ||| public_transport
[KEY] ||| railway
[KEY] ||| rcn_ref
[KEY] ||| ref
[KEY] ||| ref
[KEY] ||| reference_point
[KEY] ||| reg_name
[KEY] ||| reg_ref
[KEY] ||| Relation:restriction
[KEY] ||| religion
[KEY] ||| restaurant
[KEY] ||| roadtrain
[KEY] ||| rooms
[KEY] ||| route
[KEY] ||| sac_scale
[KEY] ||| service
[KEY] ||| service_times
[KEY] ||| shop
[KEY] ||| short_name
[KEY] ||| site_type
[KEY] ||| ski
[KEY] ||| smoking
[KEY] ||| smoking
[KEY] ||| smoothness
[KEY] ||| sorting_name
[KEY] ||| source
[KEY] ||| source:name
[KEY] ||| source:ref
[KEY] ||| source_ref
[KEY] ||| sport
[KEY] ||| star
[KEY] ||| stars
[KEY] ||| stars
[KEY] ||| start_date
[KEY] ||| surface
[KEY] ||| surface
[KEY] ||| tactile_paving
[KEY] ||| tank
[KEY] ||| tidal
[KEY] ||| TMC:LocationCode
[KEY] ||| todo
[KEY] ||| toilets:wheelchair
[KEY] ||| toll
[KEY] ||| tourism
[KEY] ||| tracks
[KEY] ||| tracktype
[KEY] ||| traffic_calming
[KEY] ||| traffic_sign
[KEY] ||| trail_visibility
[KEY] ||| tunnel
[KEY] ||| twitter
[KEY] ||| url
[KEY] ||| usage
[KEY] ||| vehicle
[KEY] ||| vhf
[KEY] ||| voltage
[KEY] ||| waterway
[KEY] ||| webcam
[KEY] ||| website
[KEY] ||| website
[KEY] ||| wheelchair
[KEY] ||| wheelchair
[KEY] ||| width
[KEY] ||| wifi
[KEY] ||| wikipedia
[KEY] ||| wikipedia
[KEY] ||| winter_road
[KEY] ||| xing
[INT] ||| 0 [INT,1]
[INT] ||| 1 [INT,1]
[INT] ||| 2 [INT,1]
[INT] ||| 3 [INT,1]
[INT] ||| 4 [INT,1]
[INT] ||| 5 [INT,1]
[INT] ||| 6 [INT,1]
[INT] ||| 7 [INT,1]
[INT] ||| 8 [INT,1]
[INT] ||| 9 [INT,1]
[INT] ||| 0
[INT] ||| 1
[INT] ||| 2
[INT] ||| 3
[INT] ||| 4
[INT] ||| 5
[INT] ||| 6
[INT] ||| 7
[INT] ||| 8
[INT] ||| 9
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Runs the functionaliser on the supplied file"""
import argparse
from . import local_io, mrl
def parse_arguments():
"""Parses the command line arguments.
:return parsed_arguments: a dictionary containing the command line arguments
"""
parser = argparse.ArgumentParser(description='A neural network based semantic parser for NLmaps')
parser.add_argument('--input', '-i', required=True, help='Location of input file')
parser.add_argument('--output', '-o', required=True, help='Location of output file')
parsed_arguments = parser.parse_args()
return parsed_arguments
def main():
parsed_arguments = parse_arguments()
mrl_world = mrl.MRLS['nlmaps']()
input = local_io.read_lines_in_list(parsed_arguments.input)
output = []
for line in input:
output.append(mrl_world.functionalise(line))
local_io.write_list_to_file(output, parsed_arguments.output)
if __name__ == '__main__':
main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Runs the functionaliser on the supplied file"""
import argparse
from . import local_io, mrl
def parse_arguments():
"""Parses the command line arguments.
:return parsed_arguments: a dictionary containing the command line arguments
"""
parser = argparse.ArgumentParser(description='A neural network based semantic parser for NLmaps')
parser.add_argument('--input', '-i', required=True, help='Location of input file')
parser.add_argument('--output', '-o', required=True, help='Location of output file')
parsed_arguments = parser.parse_args()
return parsed_arguments
def main():
parsed_arguments = parse_arguments()
mrl_world = mrl.MRLS['nlmaps']()
input = local_io.read_lines_in_list(parsed_arguments.input)
output = []
for line in input:
output.append(mrl_world.preprocess_mrl(line))
local_io.write_list_to_file(output, parsed_arguments.output)
if __name__ == '__main__':
main()
# -*- coding: utf-8 -*-
"""Holds various read and write functions"""
def read_lines_in_list(file_to_read):
""" Iterates over the lines in a file and adds the line to a list
:param file_to_read: the location of the file to be read
:return: a list where each entry corresponds to a line in the file
"""
list = []
with open(file_to_read, 'r', encoding='utf8') as f:
for line in f:
list.append(line.rstrip('\n'))
return list
def write_list_to_file(list, file_to_write):
""" Iterates over the entries in a list and writes them to a file,
one list entry corresponds to one line in the file
:param list: the list to be written to a file
:param file_to_write: the file to write to
:return: 0 on success
"""
with open(file_to_write, 'w', encoding='utf8') as f:
for line in list:
f.write('{}\n'.format(line))
return 0
# -*- coding: utf-8 -*-
"""Contains the superclass MRL for basic handling of a MRL formula that is not closer defined
as well as subclasses for specific MRL languages"""
import collections
import re
import tempfile
import shutil
import subprocess
import os
class MRL:
"""Superclass that implements basic functions one might want to apply to a MRL formula"""
def __init__(self):
"""
Initializes a generic MRL object for pre- & postprocessing
"""
self.mrl_type = 'MRL'
def preprocess_mrl(self, mrl):
"""Preprocessing consists of simple splitting at white space
:param mrl: the MRL formula to be preprocessed
:return: the preprocessed MRL formula
"""
return mrl.split(" ")
def preprocess_mrl_set(self, mrls):
"""Preprocesses a list of MRL formulae
:param mrls: a list of MRL formulae
:return: a preprocessed list of MRL formulae
"""
preprocessed_set = []
for mrl in mrls:
preprocessed_set.append(self.preprocess_mrl(mrl))
return preprocessed_set
def delete_first_n_occurences(self, string_to_shorten, element, n):
while n > 0:
try:
#m = re.search(r'\b%s\b' % re.sub(r'([.^$|()\[\]{}*+?\\])', r'\g<1>', element), mrl)
to_search = '\\b%s\\b' % re.escape(element)
m = re.search(r'%s' % to_search, string_to_shorten)
string_to_shorten = string_to_shorten[m.end():]
n = n - 1
except:
return ""
return string_to_shorten
def count_arguments(self, s):
args_found = False
num_brackets = 0
num_commas = 0 #equals number of arguments
i = 0
while i < len(s) and ((not args_found and num_brackets == 0) or (args_found and num_brackets > 0)):
c = s[i:i + 1]
if c == '(':
args_found = True
num_brackets += 1
elif c == ')':
num_brackets -= 1
elif num_brackets == 1 and c == ',':
num_commas += 1
elif num_brackets < 1 and c == ',':
break
i += 1
if args_found:
return num_commas + 1
else:
assert num_commas == 0
return 0
return num_commas
def delete_spaces(self, mrl):
quotes_seen = 0
new_chars = []
for c in mrl:
if c == "'":
quotes_seen += 1
if c != ' ' or quotes_seen % 2 != 0:
new_chars.append(c)
return ''.join(new_chars)
class NLmaps(MRL):
"""Overrides MRL functions to work specifically for the NLmaps MRL"""
def __init__(self, cdec = None, query_db = None, db_dir = None):
"""
Initializes a NLmaps MRL object for pre- & postprocessing
"""
self.mrl_type = 'MRL'
self.cdec = cdec
self.query_db = query_db
self.db_dir = db_dir
def preprocess_mrl(self, mrl):
"""Preprocessing for a NLmaps MRL formula
:param mrl: the sentence to be preprocessed
:return: the preprocessed sentence
"""
# sequence of characters that does not contain ( or ) : [^\\(\\)]
mrl = re.sub(r"(','[^\(\)]*?),([^\(\)]*?')", "\g<1>SAVECOMMA\g<2>", mrl)
# need to protect brackets that occur in values, assumes that there is at most one open ( and 1 close)
mrl = re.sub(r",' *([^\(\)]*?)\((.*?) *'\)", ",'\g<1>BRACKETOPEN\g<2>')", mrl)
mrl = re.sub(r",' *([^\(\)]*?)\)([^\(\)]*?) *'\)", ",'\g<1>BRACKETCLOSE\g<2>')", mrl)
mrl = mrl.replace(" ", "")
mrl = re.sub(r"(?<=([^,\(\)]))'(?=([^,\(\)]))", "SAVEAPO", mrl)
mrl = re.sub(r"and\(' *([^\(\)]+?) *',' *([^\(\)]+?) *'\)", "and(\g<1>@s','\g<2>@s)",
mrl) # for when a and() surrounds two end values
mrl = re.sub(r"\(' *([^\(\)]+?) *'\)", "(\g<1>@s)",
mrl) # a bracket ( or ) is not allowed withing any key or value
mrl = re.sub(r"([,\)\(])or\(([^\(\)]+?)','([^\(\)]+?)@s\)", "\g<1>or(\g<2>@s','\g<3>@s)",
mrl) # for when a or() surrounds two values
mrl = re.sub(r"\s+", " ", mrl)
mrl = re.sub(r"'", "", mrl)
mrl = mrl.strip()
mrl = self.linearise(mrl)
return mrl
def linearise(self, mrl):
"""Linearises a NLmaps MRL formula. For example:
query(area(keyval('name','City of Edinburgh')),nwr(keyval('amenity','police')),qtype(least(topx(1))))
becomes
query@3 area@1 keyval@2 name@0 City€of€Edinburgh@s nwr@1 keyval@2 amenity@0 police@s qtype@1 least@1 topx@1 1@0
:param mrl: the sentence to be linearised
:return: the linearised sentence
"""
just_words = mrl
just_words = just_words.replace("(", " ")
just_words = just_words.replace(")", " ")
just_words = just_words.replace(",", " ")
just_words = re.sub(r"\s+", " ", just_words)
just_words = just_words.strip()
lin = []
seen_string_x_times = collections.defaultdict(lambda:0)
for element in just_words.split(" "):
seen_string_x_times[element] += 1
if element.endswith("@s"):
lin.append(element)
continue
shortened_string = self.delete_first_n_occurences(mrl, element, seen_string_x_times[element])
args = self.count_arguments(shortened_string)
lin.append("%s@%s" % (element, args))
return ' '.join(lin)
def insert_pass_through_words(self, lin, non_stemmed, stemmed):
non_stemmed = non_stemmed.split(" ")
stemmed = stemmed.split(" ")
if len(non_stemmed) != len(stemmed):
return lin
for i, element in enumerate(lin):
word_pos = -1
if "@" not in element:
for j, stemmed_element in enumerate(stemmed):
if element == stemmed_element: #then word was passed through
word_pos = j
if word_pos is not -1:
lin[i] = "%s@s" % non_stemmed[word_pos]
word_pos = -1
return lin
def transform_if_tree(self, lin):
stack_arity = []
mrl = []
prev = ""
for element in lin:
if "@" not in element: #else invalid
return ""
element, arity = element.rsplit("@")
arity_s_found = False
if arity == "s":
arity = 0
arity_s_found = True
else:
try:
arity = int(arity)
except:
return ""
if arity > 0:
mrl.append(element)
mrl.append("(")
stack_arity.append(arity)
else:
if arity_s_found and len(stack_arity) is 0:
return ""
if arity_s_found or prev == "keyval" or prev == "findkey":
element = element.replace("", " ")
element = "'%s'" % element
mrl.append(element)
while len(stack_arity) > 0:
top = stack_arity.pop()
if top > 1:
mrl.append(",")
stack_arity.append(top-1)
break
else:
mrl.append(")")
prev = element
if len(stack_arity) is not 0:
return ""
return ''.join(mrl)
def check_MRL_tree(self, mrl, cfg):
if self.cdec is None: return False
temp_dir = tempfile.mkdtemp("", "semparse_functionalizer")
try:
mrl = mrl.replace("(", "( ")
mrl = mrl.replace(",", " , ")
mrl = mrl.replace(")", " )")
mrl = re.sub(r"name:.*? \)", "name:lg )", mrl)
mrl = re.sub(r"keyval\( '([^\(\)]+?)' , '[^\(\)]+?' ", "keyval( '\g<1>' , 'valvariable' ", mrl)
mrl = re.sub(r"keyval\( '([^\(\)]+?)' , or\( '[^\(\)]+?' , '[^\(\)]+?' ", "keyval( '$1' , or( 'valvariable' , 'valvariable' ", mrl)
mrl = re.sub(r"keyval\( '([^\(\)]+?)' , and\( '[^\(\)]+?' , '[^\(\)]+?' ", "keyval( '$1' , and( 'valvariable' , 'valvariable' ", mrl)
mrl = re.sub(r" '(.*?)' ", " ' \g<1> ' ", mrl)
m = re.search("topx\( (.*?) \)", mrl)
if m:
new_number = ""
for digit in m.group(1):
new_number = new_number + digit + " "
mrl = re.sub(r"topx\( (.*?)\)", r"topx( " + new_number + ")", mrl)
m = re.search("maxdist\( (.*?) \)", mrl)
if m:
new_number = ""
for digit in m.group(1):
new_number = new_number + digit + " "
mrl = re.sub(r"maxdist\( (.*?)\)", r"maxdist( " + new_number + ")", mrl)
ini_file = open('%s/cdec_validate.ini' % temp_dir, 'w')
print >>ini_file, "formalism=scfg"
print >>ini_file, "intersection_strategy=cube_pruning"
print >>ini_file, "cubepruning_pop_limit=1000"
print >>ini_file, "grammar=%s" % cfg
print >>ini_file, "scfg_max_span_limit=1000"
ini_file.close()
args = ["%s/decoder/cdec" % self.cdec,
'-c', '%s/cdec_validate.ini' % (temp_dir)]
infile = open('%s/sent.tmp' % temp_dir, 'w')
print >>infile, mrl
infile.close()
infile = open('%s/sent.tmp' % temp_dir, 'r')
nullfile = open(os.devnull, 'w')
p = subprocess.Popen(args, stdin=infile, stdout=nullfile, stderr=subprocess.PIPE)
cfg_log = p.stderr.read()
infile.close()
nullfile.close()
shutil.rmtree(temp_dir)
if "NO PARSE" in cfg_log:
return False
return True
except:
shutil.rmtree(temp_dir)
return False
def functionalise(self, lin, non_stemmed = None, stemmed = None, cfg = None):
"""Functionalises a NLmaps MRL formula. For example:
query@3 area@1 keyval@2 name@0 City€of€Edinburgh@s nwr@1 keyval@2 amenity@0 police@s qtype@1 least@1 topx@1 1@0
becomes
query(area(keyval('name','City of Edinburgh')),nwr(keyval('amenity','police')),qtype(least(topx(1))))
:param mrl: the sentence to be functionalised
:return: the functionalised sentence
"""
lin = lin.replace("<topx>", "")
lin = lin.replace("</topx>", "@0")
lin = lin.split(" ")
if non_stemmed is not None and stemmed is not None:
lin = self.insert_pass_through_words(lin, non_stemmed, stemmed)
mrl = self.transform_if_tree(lin)
if mrl == "":
return ""
if cfg is not None:
bool_valid_under_CFG = self.check_MRL_tree(mrl, cfg)
if not bool_valid_under_CFG:
return ""
mrl = mrl.replace("SAVEAPO", "'")
mrl = mrl.replace("BRACKETOPEN", "(")
mrl = mrl.replace("BRACKETCLOSE", ")")
mrl = mrl.replace("SAVECOMMA", ",")
return mrl
MRLS = collections.OrderedDict([
('', MRL),
('nlmaps', NLmaps)
])
# -*- coding: utf-8 -*-
"""Contains tests for the MRL class in mrl.py"""
import os
import unittest
from . import local_io
from . import mrl
from . import CFG_DIR
class MRLTests(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.nlmaps_world = mrl.MRLS['nlmaps']()
cls.nlmaps_train_lin = os.environ.get('NLMAPS_TRAIN_LIN')
cls.nlmaps_train_mrl = os.environ.get('NLMAPS_TRAIN_MRL')
cls.nlmaps_dev_lin = os.environ.get('NLMAPS_DEV_LIN')
cls.nlmaps_dev_mrl = os.environ.get('NLMAPS_DEV_MRL')
cls.nlmaps_test_lin = os.environ.get('NLMAPS_TEST_LIN')
cls.nlmaps_test_mrl = os.environ.get('NLMAPS_TEST_MRL')
cls.cfg = os.path.join(CFG_DIR, 'nlmaps')
def test_functionalise_from_nlmaps(self):
# generic tests
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japanese@s qtype@1 count@0")
goal = "query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('cuisine','japanese')),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 McDonaldSAVEAPOs@s qtype@1 count@0")
goal = "query(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','McDonald's')),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 MBRACKETOPENcBRACKETCLOSEDonalds@s qtype@1 count@0")
goal = "query(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','M(c)Donalds')),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 Mc€Donalds@s qtype@1 count@0")
goal = "query(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','Mc Donalds')),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japaneseSAVECOMMAitalian@s qtype@1 count@0")
goal = "query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('cuisine','japanese,italian')),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@2 around@4 center@2 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 Yorckstra\xdfe@s search@1 nwr@1 and@2 keyval@2 amenity@0 bank@s keyval@2 amenity@0 pharmacy@s maxdist@1 DIST_INTOWN@0 topx@1 1@0 qtype@1 latlong@0")
goal = "query(around(center(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','Yorckstraße'))),search(nwr(and(keyval('amenity','bank'),keyval('amenity','pharmacy')))),maxdist(DIST_INTOWN),topx(1)),qtype(latlong))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@2 keyval@2 amenity@0 restaurant@s keyval@2 cuisine@0 or@2 greek@s italian@s qtype@1 count@0")
goal = "query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('amenity','restaurant'),keyval('cuisine',or('greek','italian'))),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
# test pass through
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 pari keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japanese@s qtype@1 count@0", non_stemmed = "noise noise Paris noise", stemmed = "noise noise pari noise")
goal = "query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('cuisine','japanese')),qtype(count))"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
# test CFG
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japanese@s qtype@1 count@0", cfg = self.cfg)
goal = "query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('cuisine','japanese')),qtype(count))"
#self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.functionalise("query@3 area@2 failval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japanese@s qtype@1 count@0", cfg = self.cfg)
goal = ""
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
# test to ensure that wrong MRLs actually do fail
test_reponse = self.nlmaps_world.functionalise("query@5 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japanese@s qtype@1 count@0")
goal = ""
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
# test whole nlmaps
if self.nlmaps_train_lin and self.nlmaps_train_mrl:
input = local_io.read_lines_in_list(self.nlmaps_train_lin)
goal = local_io.read_lines_in_list(self.nlmaps_train_mrl)
for i, line in enumerate(input):
line_preprocessed = self.nlmaps_world.functionalise(line)
self.assertEqual(line_preprocessed, goal[i], "These are not the same:\noutput: %s\ngoal: %s" % (line_preprocessed, goal[i]))
if self.nlmaps_dev_lin and self.nlmaps_dev_mrl:
input = local_io.read_lines_in_list(self.nlmaps_dev_lin)
goal = local_io.read_lines_in_list(self.nlmaps_dev_mrl)
for i, line in enumerate(input):
line_preprocessed = self.nlmaps_world.functionalise(line)
self.assertEqual(line_preprocessed, goal[i], "These are not the same:\noutput: %s\ngoal: %s" % (line_preprocessed, goal[i]))
if self.nlmaps_test_lin and self.nlmaps_test_mrl:
input = local_io.read_lines_in_list(self.nlmaps_dev_lin)
goal = local_io.read_lines_in_list(self.nlmaps_dev_mrl)
for i, line in enumerate(input):
line_preprocessed = self.nlmaps_world.functionalise(line)
self.assertEqual(line_preprocessed, goal[i], "These are not the same:\noutput: %s\ngoal: %s" % (line_preprocessed, goal[i]))
def test_preprocess_mrl_from_nlmaps(self):
test_reponse = self.nlmaps_world.preprocess_mrl("query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('cuisine','japanese')),qtype(count))")
goal = "query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japanese@s qtype@1 count@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.preprocess_mrl("query(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','McDonald's')),qtype(count))")
goal = "query@3 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 McDonaldSAVEAPOs@s qtype@1 count@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.preprocess_mrl("query(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','M(c)Donalds')),qtype(count))")
goal = "query@3 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 MBRACKETOPENcBRACKETCLOSEDonalds@s qtype@1 count@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.preprocess_mrl("query(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','Mc Donalds')),qtype(count))")
goal = "query@3 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 Mc€Donalds@s qtype@1 count@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.preprocess_mrl("query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('cuisine','japanese,italian')),qtype(count))")
goal = "query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@1 keyval@2 cuisine@0 japaneseSAVECOMMAitalian@s qtype@1 count@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.preprocess_mrl("query(around(center(area(keyval('name','Heidelberg'),keyval('de:place','city')),nwr(keyval('name','Yorckstraße'))),search(nwr(and(keyval('amenity','bank'),keyval('amenity','pharmacy')))),maxdist(DIST_INTOWN),topx(1)),qtype(latlong))")
goal = "query@2 around@4 center@2 area@2 keyval@2 name@0 Heidelberg@s keyval@2 de:place@0 city@s nwr@1 keyval@2 name@0 Yorckstraße@s search@1 nwr@1 and@2 keyval@2 amenity@0 bank@s keyval@2 amenity@0 pharmacy@s maxdist@1 DIST_INTOWN@0 topx@1 1@0 qtype@1 latlong@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
test_reponse = self.nlmaps_world.preprocess_mrl("query(area(keyval('name','Paris'),keyval('is_in:country','France')),nwr(keyval('amenity','restaurant'),keyval('cuisine',or('greek','italian'))),qtype(count))")
goal = "query@3 area@2 keyval@2 name@0 Paris@s keyval@2 is_in:country@0 France@s nwr@2 keyval@2 amenity@0 restaurant@s keyval@2 cuisine@0 or@2 greek@s italian@s qtype@1 count@0"
self.assertEqual(test_reponse, goal, "These are not the same:\noutput: %s\ngoal: %s" % (test_reponse, goal))
# test whole nlmaps
if self.nlmaps_train_lin and self.nlmaps_train_mrl:
input = local_io.read_lines_in_list(self.nlmaps_train_mrl)
goal = local_io.read_lines_in_list(self.nlmaps_train_lin)
for i, line in enumerate(input):
line_preprocessed = self.nlmaps_world.preprocess_mrl(line)
self.assertEqual(line_preprocessed, goal[i], "These are not the same:\noutput: %s\ngoal: %s" % (line_preprocessed, goal[i]))
if self.nlmaps_dev_lin and self.nlmaps_dev_mrl:
input = local_io.read_lines_in_list(self.nlmaps_dev_mrl)
goal = local_io.read_lines_in_list(self.nlmaps_dev_lin)
for i, line in enumerate(input):
line_preprocessed = self.nlmaps_world.preprocess_mrl(line)
self.assertEqual(line_preprocessed, goal[i], "These are not the same:\noutput: %s\ngoal: %s" % (line_preprocessed, goal[i]))
if self.nlmaps_test_lin and self.nlmaps_test_mrl:
input = local_io.read_lines_in_list(self.nlmaps_test_mrl)
goal = local_io.read_lines_in_list(self.nlmaps_test_lin)
for i, line in enumerate(input):
line_preprocessed = self.nlmaps_world.preprocess_mrl(line)
self.assertEqual(line_preprocessed, goal[i], "These are not the same:\noutput: %s\ngoal: %s" % (line_preprocessed, goal[i]))
def main():
unittest.main()
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment