Index: src/parse.y ================================================================== --- src/parse.y +++ src/parse.y @@ -1921,10 +1921,16 @@ IF_NULL_ROW /* the if-null-row operator */ ASTERISK /* The "*" in count(*) and similar */ SPAN /* The span operator */ ERROR /* An expression containing an error */ . + +term(A) ::= QNUMBER(X). { + A=tokenExpr(pParse,@X,X); + sqlite3DequoteNumber(pParse, A); +} + /* There must be no more than 255 tokens defined above. If this grammar ** is extended with new rules and tokens, they must either be so few in ** number that TK_SPAN is no more than 255, or else the new tokens must ** appear after this line. */ Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -607,10 +607,12 @@ */ #if defined(SQLITE_OMIT_VIRTUALTABLE) && !defined(SQLITE_OMIT_ALTERTABLE) # define SQLITE_OMIT_ALTERTABLE #endif +#define SQLITE_DIGIT_SEPARATOR '_' + /* ** Return true (non-zero) if the input is an integer that is too large ** to fit in 32-bits. This macro is used inside of various testcase() ** macros to verify that we have tested SQLite for large-file support. */ @@ -4790,10 +4792,11 @@ void sqlite3ErrorMsg(Parse*, const char*, ...); int sqlite3ErrorToParser(sqlite3*,int); void sqlite3Dequote(char*); void sqlite3DequoteExpr(Expr*); void sqlite3DequoteToken(Token*); +void sqlite3DequoteNumber(Parse*, Expr*); void sqlite3TokenInit(Token*,char*); int sqlite3KeywordCode(const unsigned char*, int); int sqlite3RunParser(Parse*, const char*); void sqlite3FinishCoding(Parse*); int sqlite3GetTempReg(Parse*); Index: src/tokenize.c ================================================================== --- src/tokenize.c +++ src/tokenize.c @@ -435,31 +435,62 @@ testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); testcase( z[0]=='9' ); testcase( z[0]=='.' ); *tokenType = TK_INTEGER; #ifndef SQLITE_OMIT_HEX_INTEGER if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ - for(i=3; sqlite3Isxdigit(z[i]); i++){} - return i; - } + for(i=3; 1; i++){ + if( sqlite3Isxdigit(z[i])==0 ){ + if( z[i]==SQLITE_DIGIT_SEPARATOR ){ + *tokenType = TK_QNUMBER; + }else{ + break; + } + } + } + }else #endif - for(i=0; sqlite3Isdigit(z[i]); i++){} + { + for(i=0; 1; i++){ + if( sqlite3Isdigit(z[i])==0 ){ + if( z[i]==SQLITE_DIGIT_SEPARATOR ){ + *tokenType = TK_QNUMBER; + }else{ + break; + } + } + } #ifndef SQLITE_OMIT_FLOATING_POINT - if( z[i]=='.' ){ - i++; - while( sqlite3Isdigit(z[i]) ){ i++; } - *tokenType = TK_FLOAT; - } - if( (z[i]=='e' || z[i]=='E') && - ( sqlite3Isdigit(z[i+1]) - || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) - ) - ){ - i += 2; - while( sqlite3Isdigit(z[i]) ){ i++; } - *tokenType = TK_FLOAT; - } + if( z[i]=='.' ){ + if( *tokenType==TK_INTEGER ) *tokenType = TK_FLOAT; + for(i++; 1; i++){ + if( sqlite3Isdigit(z[i])==0 ){ + if( z[i]==SQLITE_DIGIT_SEPARATOR ){ + *tokenType = TK_QNUMBER; + }else{ + break; + } + } + } + } + if( (z[i]=='e' || z[i]=='E') && + ( sqlite3Isdigit(z[i+1]) + || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) + ) + ){ + if( *tokenType==TK_INTEGER ) *tokenType = TK_FLOAT; + for(i+=2; 1; i++){ + if( sqlite3Isdigit(z[i])==0 ){ + if( z[i]==SQLITE_DIGIT_SEPARATOR ){ + *tokenType = TK_QNUMBER; + }else{ + break; + } + } + } + } #endif + } while( IdChar(z[i]) ){ *tokenType = TK_ILLEGAL; i++; } return i; @@ -620,14 +651,17 @@ } #ifndef SQLITE_OMIT_WINDOWFUNC if( tokenType>=TK_WINDOW ){ assert( tokenType==TK_SPACE || tokenType==TK_OVER || tokenType==TK_FILTER || tokenType==TK_ILLEGAL || tokenType==TK_WINDOW + || tokenType==TK_QNUMBER ); #else if( tokenType>=TK_SPACE ){ - assert( tokenType==TK_SPACE || tokenType==TK_ILLEGAL ); + assert( tokenType==TK_SPACE || tokenType==TK_ILLEGAL + || tokenType==TK_QNUMBER + ); #endif /* SQLITE_OMIT_WINDOWFUNC */ if( AtomicLoad(&db->u1.isInterrupted) ){ pParse->rc = SQLITE_INTERRUPT; pParse->nErr++; break; @@ -656,11 +690,11 @@ tokenType = analyzeOverKeyword((const u8*)&zSql[4], lastTokenParsed); }else if( tokenType==TK_FILTER ){ assert( n==6 ); tokenType = analyzeFilterKeyword((const u8*)&zSql[6], lastTokenParsed); #endif /* SQLITE_OMIT_WINDOWFUNC */ - }else{ + }else if( tokenType!=TK_QNUMBER ){ Token x; x.z = zSql; x.n = n; sqlite3ErrorMsg(pParse, "unrecognized token: \"%T\"", &x); break; Index: src/util.c ================================================================== --- src/util.c +++ src/util.c @@ -308,10 +308,38 @@ assert( !ExprHasProperty(p, EP_IntValue) ); assert( sqlite3Isquote(p->u.zToken[0]) ); p->flags |= p->u.zToken[0]=='"' ? EP_Quoted|EP_DblQuoted : EP_Quoted; sqlite3Dequote(p->u.zToken); } + +/* +** Expression p is a QNUMBER (quoted number). Dequote the value in p->u.zToken +** and set the type to INTEGER or FLOAT. "Quoted" integers or floats are those +** that contain '_' characters that must be removed before further processing. +*/ +void sqlite3DequoteNumber(Parse *pParse, Expr *p){ + if( p ){ + const char *pIn = p->u.zToken; + char *pOut = p->u.zToken; + int bHex = (pIn[0]=='0' && (pIn[1]=='x' || pIn[1]=='X')); + assert( p->op==TK_QNUMBER ); + p->op = TK_INTEGER; + do { + if( *pIn!=SQLITE_DIGIT_SEPARATOR ){ + *pOut++ = *pIn; + if( *pIn=='e' || *pIn=='E' || *pIn=='.' ) p->op = TK_FLOAT; + }else{ + if( (bHex==0 && (!sqlite3Isdigit(pIn[-1]) || !sqlite3Isdigit(pIn[1]))) + || (bHex==1 && (!sqlite3Isxdigit(pIn[-1]) || !sqlite3Isxdigit(pIn[1]))) + ){ + sqlite3ErrorMsg(pParse, "unrecognized token: \"%s\"", p->u.zToken); + } + } + }while( *pIn++ ); + if( bHex ) p->op = TK_INTEGER; + } +} /* ** If the input token p is quoted, try to adjust the token to remove ** the quotes. This is not always possible: ** Index: test/literal.test ================================================================== --- test/literal.test +++ test/literal.test @@ -17,18 +17,31 @@ set ::testprefix literal proc test_literal {tn lit type val} { do_execsql_test $tn.1 "SELECT typeof( $lit ), $lit" [list $type $val] - do_execsql_test $tn.2 " + ifcapable altertable { + do_execsql_test $tn.2 " + DROP TABLE IF EXISTS x1; + CREATE TABLE x1(a); + INSERT INTO x1 VALUES(123); + ALTER TABLE x1 ADD COLUMN b DEFAULT $lit ; + SELECT typeof(b), b FROM x1; + " [list $type $val] + } + + do_execsql_test $tn.3 " DROP TABLE IF EXISTS x1; - CREATE TABLE x1(a); - INSERT INTO x1 VALUES(123); - ALTER TABLE x1 ADD COLUMN b DEFAULT $lit ; - SELECT typeof(b), b FROM x1; + CREATE TABLE x1(a DEFAULT $lit); + INSERT INTO x1 DEFAULT VALUES; + SELECT typeof(a), a FROM x1; " [list $type $val] } + +proc test_literal_error {tn lit unrec} { + do_catchsql_test $tn "SELECT $lit" "1 {unrecognized token: \"$unrec\"}" +} test_literal 1.0 45 integer 45 test_literal 1.1 0xFF integer 255 test_literal 1.2 0xFFFFFFFF integer [expr 0xFFFFFFFF] @@ -40,14 +53,44 @@ test_literal 1.9 +0x7FFFFFFFFFFFFFFF integer [expr +0x7FFFFFFFFFFFFFFF] test_literal 1.10 -45 integer -45 test_literal 1.11 '0xFF' text 0xFF test_literal 1.12 '-0xFF' text -0xFF test_literal 1.13 -'0xFF' integer 0 - test_literal 1.14 -9223372036854775808 integer -9223372036854775808 test_literal 2.1 1e12 real 1000000000000.0 test_literal 2.2 1.0 real 1.0 test_literal 2.3 1e1000 real Inf test_literal 2.4 -1e1000 real -Inf + +test_literal 3.1 1_000 integer 1000 +test_literal 3.2 1.1_1 real 1.11 +test_literal 3.3 1_0.1_1 real 10.11 +test_literal 3.4 1e1_000 real Inf +test_literal 3.5 12_3_456.7_8_9 real 123456.789 +test_literal 3.6 9_223_372_036_854_775_807 integer 9223372036854775807 +test_literal 3.7 9_223_372_036_854_775_808 real 9.22337203685478e+18 +test_literal 3.8 -9_223_372_036_854_775_808 integer -9223372036854775808 + +foreach {tn lit unrec} { + 0 123a456 123a456 + 1 1_ 1_ + 2 1_.4 1_.4 + 3 1e_4 1e_4 + 4 1_e4 1_e4 + 5 1.4_e4 1.4_e4 + 6 1.4e+_4 1.4e + 7 1.4e-_4 1.4e + 8 1.4e4_ 1.4e4_ + 9 1.4_e4 1.4_e4 + 10 1.4e_4 1.4e_4 + 11 12__34 12__34 + 12 1234_ 1234_ + 13 12._34 12._34 + 14 12_.34 12_.34 + 15 12.34_ 12.34_ + 16 1.0e1_______2 1.0e1_______2 +} { + test_literal_error 4.$tn $lit $unrec +} finish_test ADDED test/literal2.tcl Index: test/literal2.tcl ================================================================== --- /dev/null +++ test/literal2.tcl @@ -0,0 +1,40 @@ +# 2018 May 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +source [file join [file dirname $argv0] pg_common.tcl] + +#========================================================================= + + +start_test literal2 "2024 Jan 23" + +execsql_test 1.0 { SELECT 123_456 } +errorsql_test 1.1 { SELECT 123__456 } + +execsql_float_test 2.1 { SELECT 1.0e1_2 } + + +execsql_test 3.0.0 { SELECT 0xFF_FF } +execsql_test 3.0.1 { SELECT 0xFF_EF } +errorsql_test 3.0.2 { SELECT 0xFF__EF } +# errorsql_test 3.0.3 { SELECT 0x_FFEF } +errorsql_test 3.0.4 { SELECT 0xFFEF_ } + +execsql_test 3.1.0 { SELECT 0XFF_FF } +execsql_test 3.1.1 { SELECT 0XFF_EF } +errorsql_test 3.1.2 { SELECT 0XFF__EF } +# errorsql_test 3.1.3 { SELECT 0X_FFEF } +errorsql_test 3.1.4 { SELECT 0XFFEF_ } + +finish_test + + ADDED test/literal2.test Index: test/literal2.test ================================================================== --- /dev/null +++ test/literal2.test @@ -0,0 +1,84 @@ +# 2024 Jan 23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# + +#################################################### +# DO NOT EDIT! THIS FILE IS AUTOMATICALLY GENERATED! +#################################################### + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix literal2 + +do_execsql_test 1.0 { + SELECT 123_456 +} {123456} + +# PG says ERROR: trailing junk after numeric literal at or near "123_" +do_test 1.1 { catch { execsql { + SELECT 123__456 +} } } 1 + + +do_test 2.1 { + set myres {} + foreach r [db eval {SELECT 1.0e1_2}] { + lappend myres [format %.4f [set r]] + } + set res2 {1000000000000.0000} + set i 0 + foreach r [set myres] r2 [set res2] { + if {[set r]<([set r2]-0.0001) || [set r]>([set r2]+0.0001)} { + error "list element [set i] does not match: got=[set r] expected=[set r2]" + } + incr i + } + set {} {} +} {} + +do_execsql_test 3.0.0 { + SELECT 0xFF_FF +} {65535} + +do_execsql_test 3.0.1 { + SELECT 0xFF_EF +} {65519} + +# PG says ERROR: trailing junk after numeric literal at or near "0xFF_" +do_test 3.0.2 { catch { execsql { + SELECT 0xFF__EF +} } } 1 + +# PG says ERROR: trailing junk after numeric literal at or near "0xFFEF_" +do_test 3.0.4 { catch { execsql { + SELECT 0xFFEF_ +} } } 1 + +do_execsql_test 3.1.0 { + SELECT 0XFF_FF +} {65535} + +do_execsql_test 3.1.1 { + SELECT 0XFF_EF +} {65519} + +# PG says ERROR: trailing junk after numeric literal at or near "0XFF_" +do_test 3.1.2 { catch { execsql { + SELECT 0XFF__EF +} } } 1 + +# PG says ERROR: trailing junk after numeric literal at or near "0XFFEF_" +do_test 3.1.4 { catch { execsql { + SELECT 0XFFEF_ +} } } 1 + +finish_test Index: test/misc1.test ================================================================== --- test/misc1.test +++ test/misc1.test @@ -652,11 +652,11 @@ do_catchsql_test misc1-21.1 { select''like''like''like#0; } {1 {near "#0": syntax error}} do_catchsql_test misc1-21.2 { VALUES(0,0x0MATCH#0; -} {1 {near ";": syntax error}} +} {1 {unrecognized token: "0x0MATCH"}} # 2015-04-15 do_execsql_test misc1-22.1 { SELECT ''+3 FROM (SELECT ''+5); } {3} Index: test/speedtest1.c ================================================================== --- test/speedtest1.c +++ test/speedtest1.c @@ -2147,10 +2147,54 @@ x2 = swizzle(x1, n); speedtest1_numbername(x1, zNum, sizeof(zNum)); printf("%5d %5d %5d %s\n", i, x1, x2, zNum); } } + +/* +** This testset focuses on the speed of parsing numeric literals (integers +** and real numbers). This was added to test the impact of allowing "_" +** characters to appear in numeric SQL literals to make them easier to read. +** For example, "SELECT 1_000_000;" instead of "SELECT 1000000;". +*/ +void testset_parsenumber(void){ + const char *zSql1 = "SELECT 1, 12, 123, 1234, 12345, 123456"; + const char *zSql2 = "SELECT 8227256643844975616, 7932208612563860480, " + "2010730661871032832, 9138463067404021760, " + "2557616153664746496, 2557616153664746496"; + const char *zSql3 = "SELECT 1.0, 1.2, 1.23, 123.4, 1.2345, 1.23456"; + const char *zSql4 = "SELECT 8.227256643844975616, 7.932208612563860480, " + "2.010730661871032832, 9.138463067404021760, " + "2.557616153664746496, 2.557616153664746496"; + + const int NROW = 100*g.szTest; + int ii; + + speedtest1_begin_test(100, "parsing small integers"); + for(ii=0; ii #include @@ -2555,10 +2599,12 @@ testset_cte(); }else if( strcmp(zThisTest,"fp")==0 ){ testset_fp(); }else if( strcmp(zThisTest,"trigger")==0 ){ testset_trigger(); + }else if( strcmp(zThisTest,"parsenumber")==0 ){ + testset_parsenumber(); }else if( strcmp(zThisTest,"rtree")==0 ){ #ifdef SQLITE_ENABLE_RTREE testset_rtree(6, 147); #else fatal_error("compile with -DSQLITE_ENABLE_RTREE to enable " Index: tool/speed-check.sh ================================================================== --- tool/speed-check.sh +++ tool/speed-check.sh @@ -159,10 +159,13 @@ SPEEDTEST_OPTS="$SPEEDTEST_OPTS --testset cte" ;; --fp) SPEEDTEST_OPTS="$SPEEDTEST_OPTS --testset fp" ;; + --parsenumber) + SPEEDTEST_OPTS="$SPEEDTEST_OPTS --testset parsenumber" + ;; --stmtscanstatus) SPEEDTEST_OPTS="$SPEEDTEST_OPTS --stmtscanstatus" ;; -*) CC_OPTS="$CC_OPTS $1"