Skip to content
Snippets Groups Projects
Commit 44d5d96b authored by Victor Zimmermann's avatar Victor Zimmermann
Browse files

Targets may be written as one word. (i.e. microchip vs micro chip)

parent 522f47ca
No related branches found
No related tags found
No related merge requests found
......@@ -320,6 +320,7 @@ def process_file(context_list: list, target_string: str,
"""
spaced_target_string = target_string.replace('_', ' ')
onewordtarget = target_string.replace('_', '')
stopword_list = config.stop_words
allowed_tag_list = config.allowed_tags
......@@ -331,12 +332,13 @@ def process_file(context_list: list, target_string: str,
for context in context_list:
context = context.lower()
if spaced_target_string in context: # Pre-select lines greedy.
if spaced_target_string in context or onewordtarget in context: # Pre-select lines greedy.
token_set = set()
# Allow target to be treated as single entity.
context = context.replace(spaced_target_string, target_string)
context = context.replace(onewordtarget, target_string)
processed_context = nlp(context)
if target_string in [token.text for token in processed_context]:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment