/* ** String formatting. ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h */ #include #define lj_strfmt_c #define LUA_CORE #include "lj_obj.h" #include "lj_err.h" #include "lj_buf.h" #include "lj_str.h" #include "lj_meta.h" #include "lj_state.h" #include "lj_char.h" #include "lj_strfmt.h" #if LJ_HASFFI #include "lj_ctype.h" #endif #include "lj_lib.h" /* -- Format parser ------------------------------------------------------- */ static const uint8_t strfmt_map[('x'-'A')+1] = { STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, 0,0,0,0,0,0, STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X }; SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) { const uint8_t *p = fs->p, *e = fs->e; fs->str = (const char *)p; for (; p < e; p++) { if (*p == '%') { /* Escape char? */ if (p[1] == '%') { /* '%%'? */ fs->p = ++p+1; goto retlit; } else { SFormat sf = 0; uint32_t c; if (p != (const uint8_t *)fs->str) break; for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { /* Parse flags. */ if (*p == '-') sf |= STRFMT_F_LEFT; else if (*p == '+') sf |= STRFMT_F_PLUS; else if (*p == '0') sf |= STRFMT_F_ZERO; else if (*p == ' ') sf |= STRFMT_F_SPACE; else if (*p == '#') sf |= STRFMT_F_ALT; else break; } if ((uint32_t)*p - '0' < 10) { /* Parse width. */ uint32_t width = (uint32_t)*p++ - '0'; if ((uint32_t)*p - '0' < 10) width = (uint32_t)*p++ - '0' + width*10; sf |= (width << STRFMT_SH_WIDTH); } if (*p == '.') { /* Parse precision. */ uint32_t prec = 0; p++; if ((uint32_t)*p - '0' < 10) { prec = (uint32_t)*p++ - '0'; if ((uint32_t)*p - '0' < 10) prec = (uint32_t)*p++ - '0' + prec*10; } sf |= ((prec+1) << STRFMT_SH_PREC); } /* Parse conversion. */ c = (uint32_t)*p - 'A'; if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { uint32_t sx = strfmt_map[c]; if (sx) { fs->p = p+1; return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); } } /* Return error location. */ if (*p >= 32) p++; fs->len = (MSize)(p - (const uint8_t *)fs->str); fs->p = fs->e; return STRFMT_ERR; } } } fs->p = p; retlit: fs->len = (MSize)(p - (const uint8_t *)fs->str); return fs->len ? STRFMT_LIT : STRFMT_EOF; } /* -- Raw conversions ----------------------------------------------------- */ #define WINT_R(x, sh, sc) \ { uint32_t d = (x*(((1<>sh; x -= d*sc; *p++ = (char)('0'+d); } /* Write integer to buffer. */ char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) { uint32_t u = (uint32_t)k; if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } if (u < 10000) { if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3; } else { uint32_t v = u / 10000; u -= v * 10000; if (v < 10000) { if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7; } else { uint32_t w = v / 10000; v -= w * 10000; if (w >= 10) WINT_R(w, 10, 10) *p++ = (char)('0'+w); } WINT_R(v, 23, 1000) dig7: WINT_R(v, 12, 100) dig6: WINT_R(v, 10, 10) dig5: *p++ = (char)('0'+v); } WINT_R(u, 23, 1000) dig3: WINT_R(u, 12, 100) dig2: WINT_R(u, 10, 10) dig1: *p++ = (char)('0'+u); return p; } #undef WINT_R /* Write pointer to buffer. */ char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) { ptrdiff_t x = (ptrdiff_t)v; MSize i, n = STRFMT_MAXBUF_PTR; if (x == 0) { *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; return p; } #if LJ_64 /* Shorten output for 64 bit pointers. */ n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); #endif p[0] = '0'; p[1] = 'x'; for (i = n-1; i >= 2; i--, x >>= 4) p[i] = "0123456789abcdef"[(x & 15)]; return p+n; } /* Write ULEB128 to buffer. */ char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) { for (; v >= 0x80; v >>= 7) *p++ = (char)((v & 0x7f) | 0x80); *p++ = (char)v; return p; } /* Return string or write number to tmp buffer and return pointer to start. */ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) { SBuf *sb; if (tvisstr(o)) { *lenp = strV(o)->len; return strVdata(o); } else if (tvisbuf(o)) { SBufExt *sbx = bufV(o); *lenp = sbufxlen(sbx); return sbx->r; } else if (tvisint(o)) { sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); } else { return NULL; } *lenp = sbuflen(sb); return sb->b; } /* -- Unformatted conversions to buffer ----------------------------------- */ /* Add integer to buffer. */ SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) { sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k); return sb; } #if LJ_HASJIT /* Add number to buffer. */ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) { return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); } #endif SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) { sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v); return sb; } /* Add quoted string to buffer. */ static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len) { lj_buf_putb(sb, '"'); while (len--) { uint32_t c = (uint32_t)(uint8_t)*s++; char *w = lj_buf_more(sb, 4); if (c == '"' || c == '\\' || c == '\n') { *w++ = '\\'; } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ uint32_t d; *w++ = '\\'; if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; goto tens; } else if (c >= 10) { tens: d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d); } c += '0'; } *w++ = (char)c; sb->w = w; } lj_buf_putb(sb, '"'); return sb; } #if LJ_HASJIT SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) { return strfmt_putquotedlen(sb, strdata(str), str->len); } #endif /* -- Formatted conversions to buffer ------------------------------------- */ /* Add formatted char to buffer. */ SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) { MSize width = STRFMT_WIDTH(sf); char *w = lj_buf_more(sb, width > 1 ? width : 1); if ((sf & STRFMT_F_LEFT)) *w++ = (char)c; while (width-- > 1) *w++ = ' '; if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c; sb->w = w; return sb; } /* Add formatted string to buffer. */ static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len) { MSize width = STRFMT_WIDTH(sf); char *w; if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf); w = lj_buf_more(sb, width > len ? width : len); if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); while (width-- > len) *w++ = ' '; if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); sb->w = w; return sb; } #if LJ_HASJIT SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) { return strfmt_putfstrlen(sb, sf, strdata(str), str->len); } #endif /* Add formatted signed/unsigned integer to buffer. */ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) { char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w; #ifdef LUA_USE_ASSERT char *ws; #endif MSize prefix = 0, len, prec, pprec, width, need; /* Figure out signed prefixes. */ if (STRFMT_TYPE(sf) == STRFMT_INT) { if ((int64_t)k < 0) { k = (uint64_t)-(int64_t)k; prefix = 256 + '-'; } else if ((sf & STRFMT_F_PLUS)) { prefix = 256 + '+'; } else if ((sf & STRFMT_F_SPACE)) { prefix = 256 + ' '; } } /* Convert number and store to fixed-size buffer in reverse order. */ prec = STRFMT_PREC(sf); if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; if (k == 0) { /* Special-case zero argument. */ if (prec != 0 || (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) *--q = '0'; } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ uint32_t k2; while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } k2 = (uint32_t)k; do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : "0123456789abcdef"; do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x'); } else { /* Octal. */ do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); if ((sf & STRFMT_F_ALT)) *--q = '0'; } /* Calculate sizes. */ len = (MSize)(buf + sizeof(buf) - q); if ((int32_t)len >= (int32_t)prec) prec = len; width = STRFMT_WIDTH(sf); pprec = prec + (prefix >> 8); need = width > pprec ? width : pprec; w = lj_buf_more(sb, need); #ifdef LUA_USE_ASSERT ws = w; #endif /* Format number with leading/trailing whitespace and zeros. */ if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) while (width-- > pprec) *w++ = ' '; if (prefix) { if ((char)prefix >= 'X') *w++ = '0'; *w++ = (char)prefix; } if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) while (width-- > pprec) *w++ = '0'; while (prec-- > len) *w++ = '0'; while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */ if ((sf & STRFMT_F_LEFT)) while (width-- > pprec) *w++ = ' '; lj_assertX(need == (MSize)(w - ws), "miscalculated format size"); sb->w = w; return sb; } /* Add number formatted as signed integer to buffer. */ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) { int64_t k = (int64_t)n; if (checki32(k) && sf == STRFMT_INT) return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ else return lj_strfmt_putfxint(sb, sf, (uint64_t)k); } /* Add number formatted as unsigned integer to buffer. */ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) { int64_t k; if (n >= 9223372036854775808.0) k = (int64_t)(n - 18446744073709551616.0); else k = (int64_t)n; return lj_strfmt_putfxint(sb, sf, (uint64_t)k); } /* Format stack arguments to buffer. */ int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) { int narg = (int)(L->top - L->base); GCstr *fmt = lj_lib_checkstr(L, arg); FormatState fs; SFormat sf; lj_strfmt_init(&fs, strdata(fmt), fmt->len); while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { if (sf == STRFMT_LIT) { lj_buf_putmem(sb, fs.str, fs.len); } else if (sf == STRFMT_ERR) { lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); } else { TValue *o = &L->base[arg++]; if (arg > narg) lj_err_arg(L, arg, LJ_ERR_NOVAL); switch (STRFMT_TYPE(sf)) { case STRFMT_INT: if (tvisint(o)) { int32_t k = intV(o); if (sf == STRFMT_INT) lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ else lj_strfmt_putfxint(sb, sf, k); break; } #if LJ_HASFFI if (tviscdata(o)) { GCcdata *cd = cdataV(o); if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); break; } } #endif lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); break; case STRFMT_UINT: if (tvisint(o)) { lj_strfmt_putfxint(sb, sf, intV(o)); break; } #if LJ_HASFFI if (tviscdata(o)) { GCcdata *cd = cdataV(o); if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); break; } } #endif lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); break; case STRFMT_NUM: lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); break; case STRFMT_STR: { MSize len; const char *s; cTValue *mo; if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { /* Call __tostring metamethod once. */ copyTV(L, L->top++, mo); copyTV(L, L->top++, o); lua_call(L, 1, 1); o = &L->base[arg-1]; /* Stack may have been reallocated. */ copyTV(L, o, --L->top); /* Replace inline for retry. */ if (retry < 2) { /* Global buffer may have been overwritten. */ retry = 1; break; } } if (LJ_LIKELY(tvisstr(o))) { len = strV(o)->len; s = strVdata(o); #if LJ_HASBUFFER } else if (tvisbuf(o)) { SBufExt *sbx = bufV(o); if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); len = sbufxlen(sbx); s = sbx->r; #endif } else { GCstr *str = lj_strfmt_obj(L, o); len = str->len; s = strdata(str); } if ((sf & STRFMT_T_QUOTED)) strfmt_putquotedlen(sb, s, len); /* No formatting. */ else strfmt_putfstrlen(sb, sf, s, len); break; } case STRFMT_CHAR: lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); break; case STRFMT_PTR: /* No formatting. */ lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o)); break; default: lj_assertL(0, "bad string format type"); break; } } } return retry; } /* -- Conversions to strings ---------------------------------------------- */ /* Convert integer to string. */ GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) { char buf[STRFMT_MAXBUF_INT]; MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); return lj_str_new(L, buf, len); } /* Convert integer or number to string. */ GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) { return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); } #if LJ_HASJIT /* Convert char value to string. */ GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) { char buf[1]; buf[0] = c; return lj_str_new(L, buf, 1); } #endif /* Raw conversion of object to string. */ GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) { if (tvisstr(o)) { return strV(o); } else if (tvisnumber(o)) { return lj_strfmt_number(L, o); } else if (tvisnil(o)) { return lj_str_newlit(L, "nil"); } else if (tvisfalse(o)) { return lj_str_newlit(L, "false"); } else if (tvistrue(o)) { return lj_str_newlit(L, "true"); } else { char buf[8+2+2+16], *p = buf; p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o))); *p++ = ':'; *p++ = ' '; if (tvisfunc(o) && isffunc(funcV(o))) { p = lj_buf_wmem(p, "builtin#", 8); p = lj_strfmt_wint(p, funcV(o)->c.ffid); } else { p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o)); } return lj_str_new(L, buf, (size_t)(p - buf)); } } /* -- Internal string formatting ------------------------------------------ */ /* ** These functions are only used for lua_pushfstring(), lua_pushvfstring() ** and for internal string formatting (e.g. error messages). Caveat: unlike ** string.format(), only a limited subset of formats and flags are supported! ** ** LuaJIT has support for a couple more formats than Lua 5.1/5.2: ** - %d %u %o %x with full formatting, 32 bit integers only. ** - %f and other FP formats are really %.14g. ** - %s %c %p without formatting. */ /* Push formatted message as a string object to Lua stack. va_list variant. */ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) { SBuf *sb = lj_buf_tmp_(L); FormatState fs; SFormat sf; GCstr *str; lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt)); while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { switch (STRFMT_TYPE(sf)) { case STRFMT_LIT: lj_buf_putmem(sb, fs.str, fs.len); break; case STRFMT_INT: lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t)); break; case STRFMT_UINT: lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); break; case STRFMT_NUM: lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); break; case STRFMT_STR: { const char *s = va_arg(argp, char *); if (s == NULL) s = "(null)"; lj_buf_putmem(sb, s, (MSize)strlen(s)); break; } case STRFMT_CHAR: lj_buf_putb(sb, va_arg(argp, int)); break; case STRFMT_PTR: lj_strfmt_putptr(sb, va_arg(argp, void *)); break; case STRFMT_ERR: default: lj_buf_putb(sb, '?'); lj_assertL(0, "bad string format near offset %d", fs.len); break; } } str = lj_buf_str(L, sb); setstrV(L, L->top, str); incr_top(L); return strdata(str); } /* Push formatted message as a string object to Lua stack. Vararg variant. */ const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) { const char *msg; va_list argp; va_start(argp, fmt); msg = lj_strfmt_pushvf(L, fmt, argp); va_end(argp); return msg; }