From 1edf58ce67a78229a8b5d0ef08b9a4d8797ead38 Mon Sep 17 00:00:00 2001 From: Jakob Moser <moser@cl.uni-heidelberg.de> Date: Tue, 23 May 2023 22:57:08 +0200 Subject: [PATCH] Try supporting utf-8 --- README.md | 12 ++++++++++++ entrypoint.sh | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/README.md b/README.md index 3afa233..f3ac252 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,18 @@ sudo docker container run xle $(cat data.json | base64 -w0) The idea to implement it that way came from [Christian Heusel](https://christian.heusel.eu/), thank you very much! </details> +## :abcd: A word about encoding + +By default, this container lets XLE use its default encoding, whatever that is (not UTF-8). If you want to use UTF-8, you have to add the following line to your grammar file in the `CONFIG` section + +``` +CHARACTERENCODING utf-8. +``` + +**and** execute `set-character-encoding stdio utf-8` every time after starting XLE. + +`xle-docker` will automatically use `utf-8` as character encoding for standard input if it detects the character encoding configuration line (more precisely, a line matching `^\s*CHARACTERENCODING\s+utf-8\s*\..*$` anywhere in the file). + ## :page_with_curl: About Here, a more detailed explanation of the relevant files is given. The used sources are marked with :books:. diff --git a/entrypoint.sh b/entrypoint.sh index ff56c22..7ce771b 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -15,8 +15,12 @@ else SENTENCE=$1 fi +export LC_ALL="C.utf8" + # Write xlerc containing instructions to parse echo "create-parser /grammar.lfg" > xlerc +# Check if we are using utf-8 +#(grep -E '^\s*CHARACTERENCODING\s+utf-8\s*\..*$' /grammar.lfg > /dev/null) && echo "set-character-encoding stdio utf-8" >> xlerc echo "packed-xml-fs { $SENTENCE } packed-parses.xml" >> xlerc echo "exit" >> xlerc -- GitLab