#! /bin/sh

# Which characters have a combining compatibility decomposition?
# example:
# 0E33;THAI CHARACTER SARA AM;Lo;0;L;<compat> 0E4D 0E32;;;;N;THAI VOWEL SIGN SARA AM;;;;
# 0E4D;THAI CHARACTER NIKHAHIT;Mn;0;NSM;;;;;N;THAI NIKKHAHIT;;;;
#
# These need to be detected during text rendering so they will 
# not be separated from their previous (base) character.

sed -e "s,^\([0-9A-F]*\);\([^;]*\);[^;]*;[^;]*;[^;]*;<compat> *\([0-9A-F][0-9A-F]* [0-9A-F][0-9A-F]*[^;]*\);.*,\1	\3	\2," -e t -e d UnicodeData.txt

