From 1edf58ce67a78229a8b5d0ef08b9a4d8797ead38 Mon Sep 17 00:00:00 2001
From: Jakob Moser <moser@cl.uni-heidelberg.de>
Date: Tue, 23 May 2023 22:57:08 +0200
Subject: [PATCH] Try supporting utf-8

---
 README.md     | 12 ++++++++++++
 entrypoint.sh |  4 ++++
 2 files changed, 16 insertions(+)

diff --git a/README.md b/README.md
index 3afa233..f3ac252 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,18 @@ sudo docker container run xle $(cat data.json | base64 -w0)
 The idea to implement it that way came from [Christian Heusel](https://christian.heusel.eu/), thank you very much!
 </details>
 
+## :abcd: A word about encoding
+
+By default, this container lets XLE use its default encoding, whatever that is (not UTF-8). If you want to use UTF-8, you have to add the following line to your grammar file in the `CONFIG` section
+
+```
+CHARACTERENCODING utf-8.
+```
+
+**and** execute `set-character-encoding stdio utf-8` every time after starting XLE.
+
+`xle-docker` will automatically use `utf-8` as character encoding for standard input if it detects the character encoding configuration line (more precisely, a line matching `^\s*CHARACTERENCODING\s+utf-8\s*\..*$` anywhere in the file).
+
 ## :page_with_curl: About
 
 Here, a more detailed explanation of the relevant files is given. The used sources are marked with :books:.
diff --git a/entrypoint.sh b/entrypoint.sh
index ff56c22..7ce771b 100644
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -15,8 +15,12 @@ else
     SENTENCE=$1
 fi
 
+export LC_ALL="C.utf8"
+
 # Write xlerc containing instructions to parse
 echo "create-parser /grammar.lfg" > xlerc
+# Check if we are using utf-8
+#(grep -E '^\s*CHARACTERENCODING\s+utf-8\s*\..*$' /grammar.lfg > /dev/null) && echo "set-character-encoding stdio utf-8" >> xlerc
 echo "packed-xml-fs { $SENTENCE } packed-parses.xml" >> xlerc
 echo "exit" >> xlerc
 
-- 
GitLab