[PATCH] console utf-8 mode fixes
authorAdam Tlalka <atlka@pg.gda.pl>
Fri, 29 Sep 2006 08:59:53 +0000 (01:59 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Fri, 29 Sep 2006 16:18:11 +0000 (09:18 -0700)
Fix utf-8 mode so alternate charset modes always work according to control
sequences interpreted in do_con_trol function preserving backward US-ASCII
and VT100 semigraphics compatibility.

Malformed utf-8 sequences are represented as sequences of replacement
glyphs,original codes or '?' as a last resort.

unicode-xterm, gnome-terminal, kconsole and other terminal emulators in
utf-8 mode respect acsc, enacs, rmacs sequences.  Also I found that some
important system programs (from Debian distro) uses acsc in utf-8 mode -
dselect, aptitude, w3m for example.

Signed-off-by: Adam Tlalka <atlka@pg.gda.pl>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
drivers/char/vt.c

index d7d880f..0fca83e 100644 (file)
  *
  * Removed console_lock, enabled interrupts across all console operations
  * 13 March 2001, Andrew Morton
+ *
+ * Fixed UTF-8 mode so alternate charset modes always work according
+ * to control sequences interpreted in do_con_trol function
+ * preserving backward VT100 semigraphics compatibility,
+ * malformed UTF sequences represented as sequences of replacement glyphs,
+ * original codes or '?' as a last resort if replacement glyph is undefined
+ * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
  */
 
 #include <linux/module.h>
@@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
                /* Do no translation at all in control states */
                if (vc->vc_state != ESnormal) {
                        tc = c;
-               } else if (vc->vc_utf) {
+               } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
                    /* Combine UTF-8 into Unicode */
-                   /* Incomplete characters silently ignored */
+                   /* Malformed sequences as sequences of replacement glyphs */
+rescan_last_byte:
                    if(c > 0x7f) {
-                       if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) {
-                               vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
-                               vc->vc_utf_count--;
-                               if (vc->vc_utf_count == 0)
-                                   tc = c = vc->vc_utf_char;
-                               else continue;
+                       if (vc->vc_utf_count) {
+                              if ((c & 0xc0) == 0x80) {
+                                      vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
+                                              if (--vc->vc_utf_count) {
+                                              vc->vc_npar++;
+                                              continue;
+                                              }
+                                      tc = c = vc->vc_utf_char;
+                              } else
+                                      goto replacement_glyph;
                        } else {
+                               vc->vc_npar = 0;
                                if ((c & 0xe0) == 0xc0) {
                                    vc->vc_utf_count = 1;
                                    vc->vc_utf_char = (c & 0x1f);
@@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
                                    vc->vc_utf_count = 5;
                                    vc->vc_utf_char = (c & 0x01);
                                } else
-                                   vc->vc_utf_count = 0;
+                                   goto replacement_glyph;
                                continue;
                              }
                    } else {
+                     if (vc->vc_utf_count)
+                             goto replacement_glyph;
                      tc = c;
-                     vc->vc_utf_count = 0;
                    }
-               } else {        /* no utf */
+               } else {        /* no utf or alternate charset mode */
                  tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
                }
 
@@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
                  * direct-to-font zone in UTF-8 mode.
                  */
                 ok = tc && (c >= 32 ||
-                           (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS
-                                               : CTRL_ACTION) >> c) & 1)))
+                           !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
+                                 vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
                        && (c != 127 || vc->vc_disp_ctrl)
                        && (c != 128+27);
 
                if (vc->vc_state == ESnormal && ok) {
                        /* Now try to find out how to display it */
                        tc = conv_uni_to_pc(vc, tc);
-                       if ( tc == -4 ) {
+                       if (tc & ~charmask) {
+                               if ( tc == -4 ) {
                                 /* If we got -4 (not found) then see if we have
                                    defined a replacement character (U+FFFD) */
-                                tc = conv_uni_to_pc(vc, 0xfffd);
-
-                               /* One reason for the -4 can be that we just
-                                  did a clear_unimap();
-                                  try at least to show something. */
-                               if (tc == -4)
-                                    tc = c;
-                        } else if ( tc == -3 ) {
-                                /* Bad hash table -- hope for the best */
-                                tc = c;
-                        }
-                       if (tc & ~charmask)
-                                continue; /* Conversion failed */
+replacement_glyph:
+                                       tc = conv_uni_to_pc(vc, 0xfffd);
+                                       if (!(tc & ~charmask))
+                                               goto display_glyph;
+                               } else if ( tc != -3 )
+                                       continue; /* nothing to display */
+                                /* no hash table or no replacement --
+                                * hope for the best */
+                               if ( c & ~charmask )
+                                       tc = '?';
+                               else
+                                       tc = c;
+                       }
 
+display_glyph:
                        if (vc->vc_need_wrap || vc->vc_decim)
                                FLUSH
                        if (vc->vc_need_wrap) {
@@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
                                vc->vc_x++;
                                draw_to = (vc->vc_pos += 2);
                        }
+                       if (vc->vc_utf_count) {
+                               if (vc->vc_npar) {
+                                       vc->vc_npar--;
+                                       goto display_glyph;
+                               }
+                               vc->vc_utf_count = 0;
+                               c = orig;
+                               goto rescan_last_byte;
+                       }
                        continue;
                }
                FLUSH