Skip to content
Snippets Groups Projects
Commit b9be7450 authored by Simon Will's avatar Simon Will
Browse files

Make generator able to generate dist queries

parent 1569c357
No related branches found
No related tags found
No related merge requests found
Showing
with 266 additions and 155 deletions
......@@ -37,12 +37,18 @@ def collect_templates(subdir_basename):
COMMON_TEMPLATES = collect_templates('common')
IN_QUERY_ONLY_TEMPLATES = collect_templates('in_query')
AROUND_QUERY_ONLY_TEMPLATES = collect_templates('around_query')
DIST_SAME_AREA_ONLY_TEMPLATES = collect_templates('dist_same_area')
NAMED_IN_QUERY_TEMPLATES = collect_templates('named_in_query')
CLOSEST_AROUND_QUERY_TEMPLATES = collect_templates('closest_around_query')
DIST_CLOSEST_TEMPLATES = collect_templates('dist_closest')
DIST_DIFF_AREA_TEMPLATES = collect_templates('dist_diff_area')
IN_QUERY_TEMPLATES = merge_templates(COMMON_TEMPLATES, IN_QUERY_ONLY_TEMPLATES)
AROUND_QUERY_TEMPLATES = merge_templates(COMMON_TEMPLATES,
AROUND_QUERY_ONLY_TEMPLATES)
DIST_SAME_AREA_TEMPLATES = merge_templates(DIST_SAME_AREA_ONLY_TEMPLATES,
DIST_DIFF_AREA_TEMPLATES)
SHORTHAND_TO_QTYPE = {
'name': (('findkey', 'name'),),
......@@ -123,53 +129,98 @@ def choose_poi(pois):
def generate_features(thing_table, areas, pois):
if optional('dist', 0.1):
return generate_dist_query_features(thing_table, areas, pois)
return generate_poi_query_features(thing_table, areas, pois)
def generate_dist_query_features(thing_table, areas, pois):
pass
rfeatures = {'qtype_shorthand': 'dist'}
features = {'rendering_features': rfeatures, 'query_type': 'dist'}
if optional('closest'):
around_query = generate_tag_query_features(thing_table, areas, pois,
closest=True)
features['sub'] = [around_query]
rfeatures['dist_type'] = 'closest'
rfeatures['plural'] = around_query['rendering_features']['plural']
rfeatures['thing_singular'
] = around_query['rendering_features']['thing_singular']
rfeatures['thing_plural'
] = around_query['rendering_features']['thing_plural']
else:
rfeatures['dist_type'] = choose(['same_area', 'diff_area'], [0.3, 0.7])
if rfeatures['dist_type'] == 'same_area':
in_query_1 = generate_ne_query_features(areas, pois, with_area=True)
in_query_2 = generate_ne_query_features(areas, pois, with_area=False)
in_query_2['area'] = in_query_1['area']
else:
in_query_1 = generate_ne_query_features(areas, pois)
in_query_2 = generate_ne_query_features(areas, pois)
features['sub'] = [in_query_1, in_query_2]
rfeatures['first_plural'
] = in_query_1['rendering_features']['plural']
rfeatures['first_thing_singular'
] = in_query_1['rendering_features']['thing_singular']
rfeatures['first_thing_plural'
] = in_query_1['rendering_features']['thing_plural']
rfeatures['second_plural'
] = in_query_2['rendering_features']['plural']
rfeatures['second_thing_singular'
] = in_query_2['rendering_features']['thing_singular']
rfeatures['second_thing_plural'
] = in_query_2['rendering_features']['thing_plural']
return features
def generate_poi_query_features(thing_table, areas, pois, ne=None,
around=None, closest=None):
if ne or (ne is None and optional('named_entity_query', 0.05)):
return generate_ne_query_features(areas, pois)
else:
return generate_tag_query_features(thing_table, areas, pois,
around=around, closest=closest)
def generate_poi_query_features(thing_table, areas, pois):
def generate_tag_query_features(thing_table, areas, pois, around=None,
closest=None):
if closest is True:
around = True
rfeatures = {}
features = {'rendering_features': rfeatures}
idx = random.randint(0, len(thing_table) - 1)
rfeatures['named_entity'] = choose([True, False], [0.05, 0.95])
rfeatures['named_entity'] = True
if rfeatures['named_entity']:
# Query for name tag, e.g. name=Völkerschlachtdenkmal.
features['query_type'] = 'in_query'
rfeatures['thing_singular'] = rfeatures['thing_plural'] = choose(pois)
features['target_nwr'] = [('name', rfeatures['thing_singular'])]
rfeatures['plural'] = False
if optional('with_area', 0.6):
features['area'] = choose(areas)
else:
# Normal query by non-name tags, e.g. amenity=restaurant.
thing = thing_table[idx]
features['target_nwr'] = thing.tags
if thing.singular:
rfeatures['thing_singular'] = choose(thing.singular)
if thing.plural:
plural_chance = 0.7
rfeatures['thing_plural'] = choose(thing.plural)
else:
plural_chance = 0.0
rfeatures['thing_plural'] = rfeatures['thing_singular']
elif thing.plural:
plural_chance = 1.0
thing = thing_table[idx]
features['target_nwr'] = thing.tags
if thing.singular:
rfeatures['thing_singular'] = choose(thing.singular)
if thing.plural:
plural_chance = 0.7
rfeatures['thing_plural'] = choose(thing.plural)
else:
raise ValueError('Neither singular nor plural in thing: {}'
.format(thing))
plural_chance = 0.0
rfeatures['thing_plural'] = rfeatures['thing_singular']
elif thing.plural:
plural_chance = 1.0
rfeatures['thing_plural'] = choose(thing.plural)
else:
raise ValueError('Neither singular nor plural in thing: {}'
.format(thing))
rfeatures['plural'] = choose([True, False],
[plural_chance, 1 - plural_chance])
rfeatures['plural'] = choose([True, False],
[plural_chance, 1 - plural_chance])
features['area'] = choose(areas)
features['area'] = choose(areas)
if around:
features['query_type'] = 'around_query'
elif around is False:
features['query_type'] = 'in_query'
else: # around is None
features['query_type'] = choose(['around_query', 'in_query'], [0.6, 0.4])
if closest:
features['cardinal_direction'] = None
else:
features['cardinal_direction'] = choose(
[None, 'east', 'north', 'south', 'west'],
[0.7, 0.075, 0.075, 0.075, 0.075]
......@@ -179,7 +230,7 @@ def generate_poi_query_features(thing_table, areas, pois):
if features['cardinal_direction']:
features['maxdist'] = Symbol('DIST_OUTTOWN')
else:
if optional('closest', 0.3):
if closest or (closest is None and optional('closest', 0.3)):
features['around_topx'] = Symbol('1')
features['maxdist'] = Symbol('DIST_INTOWN')
rfeatures['plural'] = choose([True, False], [0.1, 0.9])
......@@ -200,12 +251,7 @@ def generate_poi_query_features(thing_table, areas, pois):
features['center_nwr'] = choose_poi(pois)
del features['area']
if rfeatures['named_entity']:
rfeatures['qtype_shorthand'] = choose(
['latlong', 'website', 'opening-hours'],
[0.6, 0.00, 0.4]
)
elif features.get('around_topx'):
if features.get('around_topx'):
rfeatures['qtype_shorthand'] = choose(
['name', 'latlong', 'website', 'opening-hours'],
[0.4, 0.4, 0.00, 0.2]
......@@ -222,8 +268,33 @@ def generate_poi_query_features(thing_table, areas, pois):
return features
def generate_ne_query_features(areas, pois, with_area=None):
rfeatures = {'named_entity': True}
features = {'rendering_features': rfeatures}
features['query_type'] = 'in_query'
rfeatures['thing_singular'] = rfeatures['thing_plural'] = choose(pois)
features['target_nwr'] = [('name', rfeatures['thing_singular'])]
rfeatures['plural'] = False
if with_area is True or (with_area is None and optional('with_area', 0.6)):
features['area'] = choose(areas)
rfeatures['qtype_shorthand'] = choose(
['latlong', 'website', 'opening-hours'],
[0.6, 0.00, 0.4]
)
features['qtype'] = SHORTHAND_TO_QTYPE[rfeatures['qtype_shorthand']]
return features
def generate_nl(features, noise=False):
if features['query_type'] == 'in_query':
rfeatures = features['rendering_features']
if rfeatures.get('dist_type') == 'closest':
templates = DIST_CLOSEST_TEMPLATES
elif rfeatures.get('dist_type') == 'same_area':
templates = DIST_SAME_AREA_TEMPLATES
elif rfeatures.get('dist_type') == 'diff_area':
templates = DIST_DIFF_AREA_TEMPLATES
elif features['query_type'] == 'in_query':
if features['rendering_features'].get('named_entity'):
templates = NAMED_IN_QUERY_TEMPLATES
else:
......@@ -235,7 +306,6 @@ def generate_nl(features, noise=False):
templates = AROUND_QUERY_TEMPLATES
else:
templates = COMMON_TEMPLATES
rfeatures = features['rendering_features']
possible_templates = templates[rfeatures['qtype_shorthand']]
template = ENV.get_template(choose(possible_templates))
rfeatures['template'] = template # This is only saved as debugging info.
......@@ -261,6 +331,7 @@ def omit_location(loc):
return (
re.match(r'^[\s\d]+$', loc)
or len(loc) > 40
or len(loc) < 2
# Allow only Unicode code blocks Basic Latin, Latin-1 Supplement, Latin
# Extended-A and Latin Extended-B as well as General Punctuation
or any(ord(char) > 0x024f and not 0x2000 <= ord(char) <= 0x206F
......
......@@ -113,4 +113,25 @@ POI in LOCATION -> area(name: LOCATION), target_nwr(name: POI)
DIST
====
How far is THING1 in LOC1 from THING2 in LOC2
What is the distance between|from A and|to B
What's the distance between|from A and|to B
Tell/Give me the the distance between|from A and B
How long|far is the distance between|from A and|to B
How far (apart) is A from B?
How far is A apart from B?
How far (is it) from A to B
Tell me how far it is from A to B
Type 1
In LOCATION, tell me the distance of NE and NE
Tell me the distance from NE to NE in LOCATION
Type 2
Tell me the distance from NE in LOCATION to NE in LOCATION
from NE in LOCATION to NE in LOCATION how far
from NE in LOCATION, how far to NE in LOCATION
How far apart are NE in LOCATION and NE in LOCATION
Type 3
✓Tell me the distance from LOCATION to the closest THING
✓How far is it to the closest THING from LOCATION
......@@ -4,38 +4,24 @@
{% endif %}
{% if optional('show_give_indicate') %}
{% set qword = choose(['show', 'give', 'indicate']) %}
{{ qword }}
{% if qword == 'indicate' %}
{{ choose(['for me', 'for us', ''], [0.2, 0.2, 0.6]) }}
{% else %}
{{ choose(['me', 'us', ''], [0.3, 0.3, 0.4]) }}
{% endif %}
{% set qword = choose(['show', 'give', 'indicate']) %}
{{ qword }}
{% if qword == 'indicate' %}
{{ choose(['for me', 'for us', ''], [0.2, 0.2, 0.6]) }}
{% else %}
{{ choose(['me', 'us', ''], [0.3, 0.3, 0.4]) }}
{% endif %}
{% endif %}
{{ choose(['the location of', 'the locations of', 'the coordinates of', ''], [0.15, 0.1, 0.1, 0.7]) }}
{{ optional('the', 0.5) }}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['from', 'to', 'of'], [0.4, 0.4, 0.2]) }}
{% if features['area'] and separate_area_and_nwr %}
{{ features['center_nwr'][0][1] }}
{{ features['center_nwr'][0][1] }}
{% else %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% endif %}
{{ optional('?') }}
......@@ -6,27 +6,13 @@
where
{{ 'are' if plural else 'is' }}
{{ optional('the', 0.8) }}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['from', 'to', 'of'], [0.4, 0.4, 0.2]) }}
{% if features['area'] and separate_area_and_nwr %}
{{ features['center_nwr'][0][1] }}
{{ features['center_nwr'][0][1] }}
{% else %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% endif %}
{{ optional('?') }}
......@@ -4,39 +4,25 @@
{% endif %}
{% if optional('give', 0.6) %}
{{ choose(['show', 'give']) }}
{{ choose(['me', 'us', ''], [0.3, 0.3, 0.4]) }}
{{ optional('the', 0.8) }}
name
of
{{ optional('the', 0.8) }}
{{ choose(['show', 'give']) }}
{{ choose(['me', 'us', ''], [0.3, 0.3, 0.4]) }}
{{ optional('the', 0.8) }}
name
of
{{ optional('the', 0.8) }}
{% else %}
name
{{ optional('of') }}
{{ optional('the', 0.8) }}
name
{{ optional('of') }}
{{ optional('the', 0.8) }}
{% endif %}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['from', 'to', 'of'], [0.4, 0.4, 0.2]) }}
{% if features['area'] and separate_area_and_nwr %}
{{ features['center_nwr'][0][1] }}
{{ features['center_nwr'][0][1] }}
{% else %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% endif %}
{{ choose(['!', '.', ''], [0.25, 0.25, 0.5]) }}
......@@ -14,21 +14,7 @@
opening {{ choose(['hours', 'times']) }}
of
{{ optional('the', 0.8) }}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['from', 'to', 'of'], [0.4, 0.4, 0.2]) }}
......
......@@ -10,21 +10,7 @@ when
{{ choose(['is', 'does']) }}
{% endif %}
{{ optional('the', 0.8) }}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['from', 'to', 'of'], [0.4, 0.4, 0.2]) }}
......
......@@ -7,27 +7,13 @@ when
{{ choose(['can I', 'can we', 'to'], [0.3, 0.3, 0.4]) }}
{{ choose(['visit', 'go to'], [0.6, 0.4]) }}
{{ optional('the', 0.8) }}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['from', 'to', 'of'], [0.4, 0.4, 0.2]) }}
{% if features['area'] and separate_area_and_nwr %}
{{ features['center_nwr'][0][1] }}
{{ features['center_nwr'][0][1] }}
{% else %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% include 'meta/around_query_bare_location.jinja2' %}
{% endif %}
{{ optional('?') }}
{% include 'meta/dist_tell_distance.jinja2' %}
{% set old_features = features %}
{% set features = features['sub'][0] %}
{% if optional('location_first') %}
{{ choose(['from', 'between']) }}
{% include 'meta/around_query_bare_location.jinja2' %}
{{ choose(['to', 'and']) }}
{{ optional('the', 0.8) }}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{% else %}
{{ choose(['from', 'between']) }}
{{ optional('the', 0.8) }}
{% include 'meta/closest_around_query_closest_thing.jinja2' %}
{{ choose(['to', 'and']) }}
{% include 'meta/around_query_bare_location.jinja2' %}
{% endif %}
{% set features = old_features %}
{{ choose(['?', '!', '.', ''], [0.3, 0.15, 0.15, 0.4]) }}
{% include 'meta/dist_tell_distance.jinja2' %}
{{ choose(['from', 'between']) }}
{{ first_thing_plural if first_plural else first_thing_singular }}
{% set old_features = features %}
{% set features = features['sub'][0] %}
{% include 'meta/in_location.jinja2' %}
{% set features = old_features %}
{{ choose(['to', 'and']) }}
{{ second_thing_plural if second_plural else second_thing_singular }}
{% set old_features = features %}
{% set features = features['sub'][1] %}
{% include 'meta/in_location.jinja2' %}
{% set features = old_features %}
{{ choose(['?', '!', '.', ''], [0.3, 0.15, 0.15, 0.4]) }}
{% set old_features = features %}
{% set features = features['sub'][0] %}
{% include('meta/in_location.jinja2') %}{{ optional(',', 0.5) }}
{% set features = old_features %}
{% include 'meta/dist_tell_distance.jinja2' %}
{{ choose(['from', 'between']) }}
{{ first_thing_plural if first_plural else first_thing_singular }}
{{ choose(['to', 'and']) }}
{{ second_thing_plural if second_plural else second_thing_singular }}
{{ choose(['?', '!', '.', ''], [0.3, 0.15, 0.15, 0.4]) }}
{% if optional('thing_first') %}
{{ thing_plural if plural else thing_singular }}
{% if plural %}
{{ choose(['that are', 'which are', ''], [0.25, 0.25, 0.5]) }}
{% else %}
{{ choose(['that is', 'which is', ''], [0.25, 0.25, 0.5]) }}
{% endif %}
{{ choose(['closest', 'nearest', 'next']) }}
{% else %}
{{ choose(['closest', 'nearest', 'next']) }}
{{ thing_plural if plural else thing_singular }}
{% endif %}
{% set qword1 = choose(['distance', 'how', 'tell_how'], [0.6, 0.3, 0.1]) %}
{% if qword1 == 'distance' %}
{% set qword2 = choose(['give', 'what', 'how']) %}
{% if qword2 == 'give' %}
{{ choose(['tell', 'give', 'show']) }}
{{ choose(['me', 'us', ''], [0.3, 0.3, 0.4]) }}
{% elif qword2 == 'give' %}
{{ choose(['what is', 'what’s', "what's"]) }}
{% else %}
how
{{ choose(['far', 'long']) }}
{{ optional('is', 0.8) }}
{% endif %}
{{ optional('the', 0.8) }}
distance
{% elif qword1 == 'how' %}
how
{{ choose(['far', 'long']) }}
{{ optional('apart', 0.2) }}
{{ choose(['is it', 'is', ''], [0.5, 0.1, 0.4]) }}
{% else %}
tell
{{ choose(['me', 'us', ''], [0.3, 0.3, 0.4]) }}
how
{{ choose(['far', 'long']) }}
{{ choose(['it is', 'is it', 'is', ''], [0.4, 0.2, 0.1, 0.3]) }}
{% endif %}
......@@ -6569,6 +6569,18 @@
|-
| Hi-Fis near || shop || hifi || near || Y
|-
| Hi-Fi Shop || shop || hifi || - || N
|-
| Hi-Fi Shops || shop || hifi || - || Y
|-
| Hi-Fi Shop in || shop || hifi || in || N
|-
| Hi-Fi Shops in || shop || hifi || in || Y
|-
| Hi-Fi Shop near || shop || hifi || near || N
|-
| Hi-Fi Shops near || shop || hifi || near || Y
|-
| Insurance || office || insurance || - || N
|-
| Insurances || office || insurance || - || Y
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment