sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26from sqlglot.generator import unsupported_args 27 28DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 29 30 31def _build_date_format(args: t.List) -> exp.TimeToStr: 32 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 33 34 timezone = seq_get(args, 2) 35 if timezone: 36 expr.set("zone", timezone) 37 38 return expr 39 40 41def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 42 scale = expression.args.get("scale") 43 timestamp = expression.this 44 45 if scale in (None, exp.UnixToTime.SECONDS): 46 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MILLIS: 48 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MICROS: 50 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.NANOS: 52 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 54 return self.func( 55 "fromUnixTimestamp", 56 exp.cast( 57 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 58 ), 59 ) 60 61 62def _lower_func(sql: str) -> str: 63 index = sql.index("(") 64 return sql[:index].lower() + sql[index:] 65 66 67def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 68 quantile = expression.args["quantile"] 69 args = f"({self.sql(expression, 'this')})" 70 71 if isinstance(quantile, exp.Array): 72 func = self.func("quantiles", *quantile) 73 else: 74 func = self.func("quantile", quantile) 75 76 return func + args 77 78 79def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 80 if len(args) == 1: 81 return exp.CountIf(this=seq_get(args, 0)) 82 83 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 84 85 86def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 87 if len(args) == 3: 88 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 89 90 strtodate = exp.StrToDate.from_arg_list(args) 91 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 92 93 94def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 95 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 96 if not expression.unit: 97 return rename_func(name)(self, expression) 98 99 return self.func( 100 name, 101 unit_to_var(expression), 102 expression.expression, 103 expression.this, 104 ) 105 106 return _delta_sql 107 108 109def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 110 ts = expression.this 111 112 tz = expression.args.get("zone") 113 if tz and isinstance(ts, exp.Literal): 114 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 115 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 116 # 117 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 118 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 119 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 120 ts_string = ts.name.strip() 121 122 # separate [date and time] from [fractional seconds and UTC offset] 123 ts_parts = ts_string.split(".") 124 if len(ts_parts) == 2: 125 # separate fractional seconds and UTC offset 126 offset_sep = "+" if "+" in ts_parts[1] else "-" 127 ts_frac_parts = ts_parts[1].split(offset_sep) 128 num_frac_parts = len(ts_frac_parts) 129 130 # pad to 6 digits if fractional seconds present 131 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 132 ts_string = "".join( 133 [ 134 ts_parts[0], # date and time 135 ".", 136 ts_frac_parts[0], # fractional seconds 137 offset_sep if num_frac_parts > 1 else "", 138 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 139 ] 140 ) 141 142 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 143 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 144 # it's part of the timestamp string 145 ts_without_tz = ( 146 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 147 ) 148 ts = exp.Literal.string(ts_without_tz) 149 150 # Non-nullable DateTime64 with microsecond precision 151 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 152 datatype = exp.DataType.build( 153 exp.DataType.Type.DATETIME64, 154 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 155 nullable=False, 156 ) 157 158 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 159 160 161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 169 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 170 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 171 172 UNESCAPED_SEQUENCES = { 173 "\\0": "\0", 174 } 175 176 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 177 178 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 179 exp.Except: False, 180 exp.Intersect: False, 181 exp.Union: None, 182 } 183 184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 IDENTIFIER_ESCAPES = ["\\"] 188 STRING_ESCAPES = ["'", "\\"] 189 BIT_STRINGS = [("0b", "")] 190 HEX_STRINGS = [("0x", ""), ("0X", "")] 191 HEREDOC_STRINGS = ["$"] 192 193 KEYWORDS = { 194 **tokens.Tokenizer.KEYWORDS, 195 "ATTACH": TokenType.COMMAND, 196 "DATE32": TokenType.DATE32, 197 "DATETIME64": TokenType.DATETIME64, 198 "DICTIONARY": TokenType.DICTIONARY, 199 "ENUM8": TokenType.ENUM8, 200 "ENUM16": TokenType.ENUM16, 201 "FINAL": TokenType.FINAL, 202 "FIXEDSTRING": TokenType.FIXEDSTRING, 203 "FLOAT32": TokenType.FLOAT, 204 "FLOAT64": TokenType.DOUBLE, 205 "GLOBAL": TokenType.GLOBAL, 206 "INT256": TokenType.INT256, 207 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 208 "MAP": TokenType.MAP, 209 "NESTED": TokenType.NESTED, 210 "SAMPLE": TokenType.TABLE_SAMPLE, 211 "TUPLE": TokenType.STRUCT, 212 "UINT128": TokenType.UINT128, 213 "UINT16": TokenType.USMALLINT, 214 "UINT256": TokenType.UINT256, 215 "UINT32": TokenType.UINT, 216 "UINT64": TokenType.UBIGINT, 217 "UINT8": TokenType.UTINYINT, 218 "IPV4": TokenType.IPV4, 219 "IPV6": TokenType.IPV6, 220 "POINT": TokenType.POINT, 221 "RING": TokenType.RING, 222 "LINESTRING": TokenType.LINESTRING, 223 "MULTILINESTRING": TokenType.MULTILINESTRING, 224 "POLYGON": TokenType.POLYGON, 225 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 226 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 227 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 228 "SYSTEM": TokenType.COMMAND, 229 "PREWHERE": TokenType.PREWHERE, 230 } 231 KEYWORDS.pop("/*+") 232 233 SINGLE_TOKENS = { 234 **tokens.Tokenizer.SINGLE_TOKENS, 235 "$": TokenType.HEREDOC_STRING, 236 } 237 238 class Parser(parser.Parser): 239 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 240 # * select x from t1 union all select x from t2 limit 1; 241 # * select x from t1 union all (select x from t2 limit 1); 242 MODIFIERS_ATTACHED_TO_SET_OP = False 243 INTERVAL_SPANS = False 244 245 FUNCTIONS = { 246 **parser.Parser.FUNCTIONS, 247 "ANY": exp.AnyValue.from_arg_list, 248 "ARRAYSUM": exp.ArraySum.from_arg_list, 249 "COUNTIF": _build_count_if, 250 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATE_FORMAT": _build_date_format, 255 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 256 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 257 "FORMATDATETIME": _build_date_format, 258 "JSONEXTRACTSTRING": build_json_extract_path( 259 exp.JSONExtractScalar, zero_based_indexing=False 260 ), 261 "MAP": parser.build_var_map, 262 "MATCH": exp.RegexpLike.from_arg_list, 263 "RANDCANONICAL": exp.Rand.from_arg_list, 264 "STR_TO_DATE": _build_str_to_date, 265 "TUPLE": exp.Struct.from_arg_list, 266 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "UNIQ": exp.ApproxDistinct.from_arg_list, 271 "XOR": lambda args: exp.Xor(expressions=args), 272 "MD5": exp.MD5Digest.from_arg_list, 273 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 274 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 275 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 276 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 277 } 278 279 AGG_FUNCTIONS = { 280 "count", 281 "min", 282 "max", 283 "sum", 284 "avg", 285 "any", 286 "stddevPop", 287 "stddevSamp", 288 "varPop", 289 "varSamp", 290 "corr", 291 "covarPop", 292 "covarSamp", 293 "entropy", 294 "exponentialMovingAverage", 295 "intervalLengthSum", 296 "kolmogorovSmirnovTest", 297 "mannWhitneyUTest", 298 "median", 299 "rankCorr", 300 "sumKahan", 301 "studentTTest", 302 "welchTTest", 303 "anyHeavy", 304 "anyLast", 305 "boundingRatio", 306 "first_value", 307 "last_value", 308 "argMin", 309 "argMax", 310 "avgWeighted", 311 "topK", 312 "topKWeighted", 313 "deltaSum", 314 "deltaSumTimestamp", 315 "groupArray", 316 "groupArrayLast", 317 "groupUniqArray", 318 "groupArrayInsertAt", 319 "groupArrayMovingAvg", 320 "groupArrayMovingSum", 321 "groupArraySample", 322 "groupBitAnd", 323 "groupBitOr", 324 "groupBitXor", 325 "groupBitmap", 326 "groupBitmapAnd", 327 "groupBitmapOr", 328 "groupBitmapXor", 329 "sumWithOverflow", 330 "sumMap", 331 "minMap", 332 "maxMap", 333 "skewSamp", 334 "skewPop", 335 "kurtSamp", 336 "kurtPop", 337 "uniq", 338 "uniqExact", 339 "uniqCombined", 340 "uniqCombined64", 341 "uniqHLL12", 342 "uniqTheta", 343 "quantile", 344 "quantiles", 345 "quantileExact", 346 "quantilesExact", 347 "quantileExactLow", 348 "quantilesExactLow", 349 "quantileExactHigh", 350 "quantilesExactHigh", 351 "quantileExactWeighted", 352 "quantilesExactWeighted", 353 "quantileTiming", 354 "quantilesTiming", 355 "quantileTimingWeighted", 356 "quantilesTimingWeighted", 357 "quantileDeterministic", 358 "quantilesDeterministic", 359 "quantileTDigest", 360 "quantilesTDigest", 361 "quantileTDigestWeighted", 362 "quantilesTDigestWeighted", 363 "quantileBFloat16", 364 "quantilesBFloat16", 365 "quantileBFloat16Weighted", 366 "quantilesBFloat16Weighted", 367 "simpleLinearRegression", 368 "stochasticLinearRegression", 369 "stochasticLogisticRegression", 370 "categoricalInformationValue", 371 "contingency", 372 "cramersV", 373 "cramersVBiasCorrected", 374 "theilsU", 375 "maxIntersections", 376 "maxIntersectionsPosition", 377 "meanZTest", 378 "quantileInterpolatedWeighted", 379 "quantilesInterpolatedWeighted", 380 "quantileGK", 381 "quantilesGK", 382 "sparkBar", 383 "sumCount", 384 "largestTriangleThreeBuckets", 385 "histogram", 386 "sequenceMatch", 387 "sequenceCount", 388 "windowFunnel", 389 "retention", 390 "uniqUpTo", 391 "sequenceNextNode", 392 "exponentialTimeDecayedAvg", 393 } 394 395 AGG_FUNCTIONS_SUFFIXES = [ 396 "If", 397 "Array", 398 "ArrayIf", 399 "Map", 400 "SimpleState", 401 "State", 402 "Merge", 403 "MergeState", 404 "ForEach", 405 "Distinct", 406 "OrDefault", 407 "OrNull", 408 "Resample", 409 "ArgMin", 410 "ArgMax", 411 ] 412 413 FUNC_TOKENS = { 414 *parser.Parser.FUNC_TOKENS, 415 TokenType.SET, 416 } 417 418 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 419 420 ID_VAR_TOKENS = { 421 *parser.Parser.ID_VAR_TOKENS, 422 TokenType.LIKE, 423 } 424 425 AGG_FUNC_MAPPING = ( 426 lambda functions, suffixes: { 427 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 428 } 429 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 430 431 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 436 "QUANTILE": lambda self: self._parse_quantile(), 437 "MEDIAN": lambda self: self._parse_quantile(), 438 "COLUMNS": lambda self: self._parse_columns(), 439 } 440 441 FUNCTION_PARSERS.pop("MATCH") 442 443 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 444 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 445 446 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 447 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 448 449 RANGE_PARSERS = { 450 **parser.Parser.RANGE_PARSERS, 451 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 452 and self._parse_in(this, is_global=True), 453 } 454 455 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 456 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 457 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 458 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 459 460 JOIN_KINDS = { 461 *parser.Parser.JOIN_KINDS, 462 TokenType.ANY, 463 TokenType.ASOF, 464 TokenType.ARRAY, 465 } 466 467 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 468 TokenType.ANY, 469 TokenType.ARRAY, 470 TokenType.FINAL, 471 TokenType.FORMAT, 472 TokenType.SETTINGS, 473 } 474 475 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 476 TokenType.FORMAT, 477 } 478 479 LOG_DEFAULTS_TO_LN = True 480 481 QUERY_MODIFIER_PARSERS = { 482 **parser.Parser.QUERY_MODIFIER_PARSERS, 483 TokenType.SETTINGS: lambda self: ( 484 "settings", 485 self._advance() or self._parse_csv(self._parse_assignment), 486 ), 487 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 488 } 489 490 CONSTRAINT_PARSERS = { 491 **parser.Parser.CONSTRAINT_PARSERS, 492 "INDEX": lambda self: self._parse_index_constraint(), 493 "CODEC": lambda self: self._parse_compress(), 494 } 495 496 ALTER_PARSERS = { 497 **parser.Parser.ALTER_PARSERS, 498 "REPLACE": lambda self: self._parse_alter_table_replace(), 499 } 500 501 SCHEMA_UNNAMED_CONSTRAINTS = { 502 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 503 "INDEX", 504 } 505 506 PLACEHOLDER_PARSERS = { 507 **parser.Parser.PLACEHOLDER_PARSERS, 508 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 509 } 510 511 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 512 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 513 return self._parse_lambda() 514 515 def _parse_types( 516 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 517 ) -> t.Optional[exp.Expression]: 518 dtype = super()._parse_types( 519 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 520 ) 521 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 522 # Mark every type as non-nullable which is ClickHouse's default, unless it's 523 # already marked as nullable. This marker helps us transpile types from other 524 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 525 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 526 # fail in ClickHouse without the `Nullable` type constructor. 527 dtype.set("nullable", False) 528 529 return dtype 530 531 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 532 index = self._index 533 this = self._parse_bitwise() 534 if self._match(TokenType.FROM): 535 self._retreat(index) 536 return super()._parse_extract() 537 538 # We return Anonymous here because extract and regexpExtract have different semantics, 539 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 540 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 541 # 542 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 543 self._match(TokenType.COMMA) 544 return self.expression( 545 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 546 ) 547 548 def _parse_assignment(self) -> t.Optional[exp.Expression]: 549 this = super()._parse_assignment() 550 551 if self._match(TokenType.PLACEHOLDER): 552 return self.expression( 553 exp.If, 554 this=this, 555 true=self._parse_assignment(), 556 false=self._match(TokenType.COLON) and self._parse_assignment(), 557 ) 558 559 return this 560 561 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 562 """ 563 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 564 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 565 """ 566 this = self._parse_id_var() 567 self._match(TokenType.COLON) 568 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 569 self._match_text_seq("IDENTIFIER") and "Identifier" 570 ) 571 572 if not kind: 573 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 574 elif not self._match(TokenType.R_BRACE): 575 self.raise_error("Expecting }") 576 577 return self.expression(exp.Placeholder, this=this, kind=kind) 578 579 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 580 this = super()._parse_in(this) 581 this.set("is_global", is_global) 582 return this 583 584 def _parse_table( 585 self, 586 schema: bool = False, 587 joins: bool = False, 588 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 589 parse_bracket: bool = False, 590 is_db_reference: bool = False, 591 parse_partition: bool = False, 592 ) -> t.Optional[exp.Expression]: 593 this = super()._parse_table( 594 schema=schema, 595 joins=joins, 596 alias_tokens=alias_tokens, 597 parse_bracket=parse_bracket, 598 is_db_reference=is_db_reference, 599 ) 600 601 if self._match(TokenType.FINAL): 602 this = self.expression(exp.Final, this=this) 603 604 return this 605 606 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 607 return super()._parse_position(haystack_first=True) 608 609 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 610 def _parse_cte(self) -> exp.CTE: 611 # WITH <identifier> AS <subquery expression> 612 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 613 614 if not cte: 615 # WITH <expression> AS <identifier> 616 cte = self.expression( 617 exp.CTE, 618 this=self._parse_assignment(), 619 alias=self._parse_table_alias(), 620 scalar=True, 621 ) 622 623 return cte 624 625 def _parse_join_parts( 626 self, 627 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 628 is_global = self._match(TokenType.GLOBAL) and self._prev 629 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 630 631 if kind_pre: 632 kind = self._match_set(self.JOIN_KINDS) and self._prev 633 side = self._match_set(self.JOIN_SIDES) and self._prev 634 return is_global, side, kind 635 636 return ( 637 is_global, 638 self._match_set(self.JOIN_SIDES) and self._prev, 639 self._match_set(self.JOIN_KINDS) and self._prev, 640 ) 641 642 def _parse_join( 643 self, skip_join_token: bool = False, parse_bracket: bool = False 644 ) -> t.Optional[exp.Join]: 645 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 646 if join: 647 join.set("global", join.args.pop("method", None)) 648 649 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 650 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 651 if join.kind == "ARRAY": 652 for table in join.find_all(exp.Table): 653 table.replace(table.to_column()) 654 655 return join 656 657 def _parse_function( 658 self, 659 functions: t.Optional[t.Dict[str, t.Callable]] = None, 660 anonymous: bool = False, 661 optional_parens: bool = True, 662 any_token: bool = False, 663 ) -> t.Optional[exp.Expression]: 664 expr = super()._parse_function( 665 functions=functions, 666 anonymous=anonymous, 667 optional_parens=optional_parens, 668 any_token=any_token, 669 ) 670 671 func = expr.this if isinstance(expr, exp.Window) else expr 672 673 # Aggregate functions can be split in 2 parts: <func_name><suffix> 674 parts = ( 675 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 676 ) 677 678 if parts: 679 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 680 params = self._parse_func_params(anon_func) 681 682 kwargs = { 683 "this": anon_func.this, 684 "expressions": anon_func.expressions, 685 } 686 if parts[1]: 687 kwargs["parts"] = parts 688 exp_class: t.Type[exp.Expression] = ( 689 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 690 ) 691 else: 692 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 693 694 kwargs["exp_class"] = exp_class 695 if params: 696 kwargs["params"] = params 697 698 func = self.expression(**kwargs) 699 700 if isinstance(expr, exp.Window): 701 # The window's func was parsed as Anonymous in base parser, fix its 702 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 703 expr.set("this", func) 704 elif params: 705 # Params have blocked super()._parse_function() from parsing the following window 706 # (if that exists) as they're standing between the function call and the window spec 707 expr = self._parse_window(func) 708 else: 709 expr = func 710 711 return expr 712 713 def _parse_func_params( 714 self, this: t.Optional[exp.Func] = None 715 ) -> t.Optional[t.List[exp.Expression]]: 716 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 717 return self._parse_csv(self._parse_lambda) 718 719 if self._match(TokenType.L_PAREN): 720 params = self._parse_csv(self._parse_lambda) 721 self._match_r_paren(this) 722 return params 723 724 return None 725 726 def _parse_quantile(self) -> exp.Quantile: 727 this = self._parse_lambda() 728 params = self._parse_func_params() 729 if params: 730 return self.expression(exp.Quantile, this=params[0], quantile=this) 731 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 732 733 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 734 return super()._parse_wrapped_id_vars(optional=True) 735 736 def _parse_primary_key( 737 self, wrapped_optional: bool = False, in_props: bool = False 738 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 739 return super()._parse_primary_key( 740 wrapped_optional=wrapped_optional or in_props, in_props=in_props 741 ) 742 743 def _parse_on_property(self) -> t.Optional[exp.Expression]: 744 index = self._index 745 if self._match_text_seq("CLUSTER"): 746 this = self._parse_id_var() 747 if this: 748 return self.expression(exp.OnCluster, this=this) 749 else: 750 self._retreat(index) 751 return None 752 753 def _parse_index_constraint( 754 self, kind: t.Optional[str] = None 755 ) -> exp.IndexColumnConstraint: 756 # INDEX name1 expr TYPE type1(args) GRANULARITY value 757 this = self._parse_id_var() 758 expression = self._parse_assignment() 759 760 index_type = self._match_text_seq("TYPE") and ( 761 self._parse_function() or self._parse_var() 762 ) 763 764 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 765 766 return self.expression( 767 exp.IndexColumnConstraint, 768 this=this, 769 expression=expression, 770 index_type=index_type, 771 granularity=granularity, 772 ) 773 774 def _parse_partition(self) -> t.Optional[exp.Partition]: 775 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 776 if not self._match(TokenType.PARTITION): 777 return None 778 779 if self._match_text_seq("ID"): 780 # Corresponds to the PARTITION ID <string_value> syntax 781 expressions: t.List[exp.Expression] = [ 782 self.expression(exp.PartitionId, this=self._parse_string()) 783 ] 784 else: 785 expressions = self._parse_expressions() 786 787 return self.expression(exp.Partition, expressions=expressions) 788 789 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 790 partition = self._parse_partition() 791 792 if not partition or not self._match(TokenType.FROM): 793 return None 794 795 return self.expression( 796 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 797 ) 798 799 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 800 if not self._match_text_seq("PROJECTION"): 801 return None 802 803 return self.expression( 804 exp.ProjectionDef, 805 this=self._parse_id_var(), 806 expression=self._parse_wrapped(self._parse_statement), 807 ) 808 809 def _parse_constraint(self) -> t.Optional[exp.Expression]: 810 return super()._parse_constraint() or self._parse_projection_def() 811 812 def _parse_alias( 813 self, this: t.Optional[exp.Expression], explicit: bool = False 814 ) -> t.Optional[exp.Expression]: 815 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 816 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 817 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 818 return this 819 820 return super()._parse_alias(this=this, explicit=explicit) 821 822 def _parse_expression(self) -> t.Optional[exp.Expression]: 823 this = super()._parse_expression() 824 825 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 826 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 827 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 828 self._match(TokenType.R_PAREN) 829 830 return this 831 832 def _parse_columns(self) -> exp.Expression: 833 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 834 835 while self._next and self._match_text_seq(")", "APPLY", "("): 836 self._match(TokenType.R_PAREN) 837 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 838 return this 839 840 class Generator(generator.Generator): 841 QUERY_HINTS = False 842 STRUCT_DELIMITER = ("(", ")") 843 NVL2_SUPPORTED = False 844 TABLESAMPLE_REQUIRES_PARENS = False 845 TABLESAMPLE_SIZE_IS_ROWS = False 846 TABLESAMPLE_KEYWORDS = "SAMPLE" 847 LAST_DAY_SUPPORTS_DATE_PART = False 848 CAN_IMPLEMENT_ARRAY_ANY = True 849 SUPPORTS_TO_NUMBER = False 850 JOIN_HINTS = False 851 TABLE_HINTS = False 852 GROUPINGS_SEP = "" 853 SET_OP_MODIFIERS = False 854 SUPPORTS_TABLE_ALIAS_COLUMNS = False 855 VALUES_AS_TABLE = False 856 ARRAY_SIZE_NAME = "LENGTH" 857 858 STRING_TYPE_MAPPING = { 859 exp.DataType.Type.CHAR: "String", 860 exp.DataType.Type.LONGBLOB: "String", 861 exp.DataType.Type.LONGTEXT: "String", 862 exp.DataType.Type.MEDIUMBLOB: "String", 863 exp.DataType.Type.MEDIUMTEXT: "String", 864 exp.DataType.Type.TINYBLOB: "String", 865 exp.DataType.Type.TINYTEXT: "String", 866 exp.DataType.Type.TEXT: "String", 867 exp.DataType.Type.VARBINARY: "String", 868 exp.DataType.Type.VARCHAR: "String", 869 } 870 871 SUPPORTED_JSON_PATH_PARTS = { 872 exp.JSONPathKey, 873 exp.JSONPathRoot, 874 exp.JSONPathSubscript, 875 } 876 877 TYPE_MAPPING = { 878 **generator.Generator.TYPE_MAPPING, 879 **STRING_TYPE_MAPPING, 880 exp.DataType.Type.ARRAY: "Array", 881 exp.DataType.Type.BOOLEAN: "Bool", 882 exp.DataType.Type.BIGINT: "Int64", 883 exp.DataType.Type.DATE32: "Date32", 884 exp.DataType.Type.DATETIME: "DateTime", 885 exp.DataType.Type.DATETIME64: "DateTime64", 886 exp.DataType.Type.DECIMAL: "Decimal", 887 exp.DataType.Type.DECIMAL32: "Decimal32", 888 exp.DataType.Type.DECIMAL64: "Decimal64", 889 exp.DataType.Type.DECIMAL128: "Decimal128", 890 exp.DataType.Type.DECIMAL256: "Decimal256", 891 exp.DataType.Type.TIMESTAMP: "DateTime", 892 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 893 exp.DataType.Type.DOUBLE: "Float64", 894 exp.DataType.Type.ENUM: "Enum", 895 exp.DataType.Type.ENUM8: "Enum8", 896 exp.DataType.Type.ENUM16: "Enum16", 897 exp.DataType.Type.FIXEDSTRING: "FixedString", 898 exp.DataType.Type.FLOAT: "Float32", 899 exp.DataType.Type.INT: "Int32", 900 exp.DataType.Type.MEDIUMINT: "Int32", 901 exp.DataType.Type.INT128: "Int128", 902 exp.DataType.Type.INT256: "Int256", 903 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 904 exp.DataType.Type.MAP: "Map", 905 exp.DataType.Type.NESTED: "Nested", 906 exp.DataType.Type.SMALLINT: "Int16", 907 exp.DataType.Type.STRUCT: "Tuple", 908 exp.DataType.Type.TINYINT: "Int8", 909 exp.DataType.Type.UBIGINT: "UInt64", 910 exp.DataType.Type.UINT: "UInt32", 911 exp.DataType.Type.UINT128: "UInt128", 912 exp.DataType.Type.UINT256: "UInt256", 913 exp.DataType.Type.USMALLINT: "UInt16", 914 exp.DataType.Type.UTINYINT: "UInt8", 915 exp.DataType.Type.IPV4: "IPv4", 916 exp.DataType.Type.IPV6: "IPv6", 917 exp.DataType.Type.POINT: "Point", 918 exp.DataType.Type.RING: "Ring", 919 exp.DataType.Type.LINESTRING: "LineString", 920 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 921 exp.DataType.Type.POLYGON: "Polygon", 922 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 923 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 924 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 925 } 926 927 TRANSFORMS = { 928 **generator.Generator.TRANSFORMS, 929 exp.AnyValue: rename_func("any"), 930 exp.ApproxDistinct: rename_func("uniq"), 931 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 932 exp.ArraySum: rename_func("arraySum"), 933 exp.ArgMax: arg_max_or_min_no_count("argMax"), 934 exp.ArgMin: arg_max_or_min_no_count("argMin"), 935 exp.Array: inline_array_sql, 936 exp.CastToStrType: rename_func("CAST"), 937 exp.CountIf: rename_func("countIf"), 938 exp.CompressColumnConstraint: lambda self, 939 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 940 exp.ComputedColumnConstraint: lambda self, 941 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 942 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 943 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 944 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 945 exp.DateStrToDate: rename_func("toDate"), 946 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 947 exp.Explode: rename_func("arrayJoin"), 948 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 949 exp.IsNan: rename_func("isNaN"), 950 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 951 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 952 exp.JSONPathKey: json_path_key_only_name, 953 exp.JSONPathRoot: lambda *_: "", 954 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 955 exp.Median: rename_func("median"), 956 exp.Nullif: rename_func("nullIf"), 957 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 958 exp.Pivot: no_pivot_sql, 959 exp.Quantile: _quantile_sql, 960 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 961 exp.Rand: rename_func("randCanonical"), 962 exp.StartsWith: rename_func("startsWith"), 963 exp.StrPosition: lambda self, e: self.func( 964 "position", e.this, e.args.get("substr"), e.args.get("position") 965 ), 966 exp.TimeToStr: lambda self, e: self.func( 967 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 968 ), 969 exp.TimeStrToTime: _timestrtotime_sql, 970 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 971 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 972 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 973 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 974 exp.MD5Digest: rename_func("MD5"), 975 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 976 exp.SHA: rename_func("SHA1"), 977 exp.SHA2: sha256_sql, 978 exp.UnixToTime: _unix_to_time_sql, 979 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 980 exp.Trim: trim_sql, 981 exp.Variance: rename_func("varSamp"), 982 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 983 exp.Stddev: rename_func("stddevSamp"), 984 exp.Chr: rename_func("CHAR"), 985 exp.Lag: lambda self, e: self.func( 986 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 987 ), 988 exp.Lead: lambda self, e: self.func( 989 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 990 ), 991 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 992 rename_func("editDistance") 993 ), 994 } 995 996 PROPERTIES_LOCATION = { 997 **generator.Generator.PROPERTIES_LOCATION, 998 exp.OnCluster: exp.Properties.Location.POST_NAME, 999 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1000 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1001 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1002 } 1003 1004 # There's no list in docs, but it can be found in Clickhouse code 1005 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1006 ON_CLUSTER_TARGETS = { 1007 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1008 "DATABASE", 1009 "TABLE", 1010 "VIEW", 1011 "DICTIONARY", 1012 "INDEX", 1013 "FUNCTION", 1014 "NAMED COLLECTION", 1015 } 1016 1017 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1018 NON_NULLABLE_TYPES = { 1019 exp.DataType.Type.ARRAY, 1020 exp.DataType.Type.MAP, 1021 exp.DataType.Type.STRUCT, 1022 exp.DataType.Type.POINT, 1023 exp.DataType.Type.RING, 1024 exp.DataType.Type.LINESTRING, 1025 exp.DataType.Type.MULTILINESTRING, 1026 exp.DataType.Type.POLYGON, 1027 exp.DataType.Type.MULTIPOLYGON, 1028 } 1029 1030 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1031 strtodate_sql = self.function_fallback_sql(expression) 1032 1033 if not isinstance(expression.parent, exp.Cast): 1034 # StrToDate returns DATEs in other dialects (eg. postgres), so 1035 # this branch aims to improve the transpilation to clickhouse 1036 return f"CAST({strtodate_sql} AS DATE)" 1037 1038 return strtodate_sql 1039 1040 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1041 this = expression.this 1042 1043 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1044 return self.sql(this) 1045 1046 return super().cast_sql(expression, safe_prefix=safe_prefix) 1047 1048 def trycast_sql(self, expression: exp.TryCast) -> str: 1049 dtype = expression.to 1050 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1051 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1052 dtype.set("nullable", True) 1053 1054 return super().cast_sql(expression) 1055 1056 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1057 this = self.json_path_part(expression.this) 1058 return str(int(this) + 1) if is_int(this) else this 1059 1060 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1061 return f"AS {self.sql(expression, 'this')}" 1062 1063 def _any_to_has( 1064 self, 1065 expression: exp.EQ | exp.NEQ, 1066 default: t.Callable[[t.Any], str], 1067 prefix: str = "", 1068 ) -> str: 1069 if isinstance(expression.left, exp.Any): 1070 arr = expression.left 1071 this = expression.right 1072 elif isinstance(expression.right, exp.Any): 1073 arr = expression.right 1074 this = expression.left 1075 else: 1076 return default(expression) 1077 1078 return prefix + self.func("has", arr.this.unnest(), this) 1079 1080 def eq_sql(self, expression: exp.EQ) -> str: 1081 return self._any_to_has(expression, super().eq_sql) 1082 1083 def neq_sql(self, expression: exp.NEQ) -> str: 1084 return self._any_to_has(expression, super().neq_sql, "NOT ") 1085 1086 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1087 # Manually add a flag to make the search case-insensitive 1088 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1089 return self.func("match", expression.this, regex) 1090 1091 def datatype_sql(self, expression: exp.DataType) -> str: 1092 # String is the standard ClickHouse type, every other variant is just an alias. 1093 # Additionally, any supplied length parameter will be ignored. 1094 # 1095 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1096 if expression.this in self.STRING_TYPE_MAPPING: 1097 dtype = "String" 1098 else: 1099 dtype = super().datatype_sql(expression) 1100 1101 # This section changes the type to `Nullable(...)` if the following conditions hold: 1102 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1103 # and change their semantics 1104 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1105 # constraint: "Type of Map key must be a type, that can be represented by integer or 1106 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1107 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1108 parent = expression.parent 1109 nullable = expression.args.get("nullable") 1110 if nullable is True or ( 1111 nullable is None 1112 and not ( 1113 isinstance(parent, exp.DataType) 1114 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1115 and expression.index in (None, 0) 1116 ) 1117 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1118 ): 1119 dtype = f"Nullable({dtype})" 1120 1121 return dtype 1122 1123 def cte_sql(self, expression: exp.CTE) -> str: 1124 if expression.args.get("scalar"): 1125 this = self.sql(expression, "this") 1126 alias = self.sql(expression, "alias") 1127 return f"{this} AS {alias}" 1128 1129 return super().cte_sql(expression) 1130 1131 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1132 return super().after_limit_modifiers(expression) + [ 1133 ( 1134 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1135 if expression.args.get("settings") 1136 else "" 1137 ), 1138 ( 1139 self.seg("FORMAT ") + self.sql(expression, "format") 1140 if expression.args.get("format") 1141 else "" 1142 ), 1143 ] 1144 1145 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1146 params = self.expressions(expression, key="params", flat=True) 1147 return self.func(expression.name, *expression.expressions) + f"({params})" 1148 1149 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1150 return self.func(expression.name, *expression.expressions) 1151 1152 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1153 return self.anonymousaggfunc_sql(expression) 1154 1155 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1156 return self.parameterizedagg_sql(expression) 1157 1158 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1159 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1160 1161 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1162 return f"ON CLUSTER {self.sql(expression, 'this')}" 1163 1164 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1165 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1166 exp.Properties.Location.POST_NAME 1167 ): 1168 this_name = self.sql( 1169 expression.this if isinstance(expression.this, exp.Schema) else expression, 1170 "this", 1171 ) 1172 this_properties = " ".join( 1173 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1174 ) 1175 this_schema = self.schema_columns_sql(expression.this) 1176 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1177 1178 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1179 1180 return super().createable_sql(expression, locations) 1181 1182 def create_sql(self, expression: exp.Create) -> str: 1183 # The comment property comes last in CTAS statements, i.e. after the query 1184 query = expression.expression 1185 if isinstance(query, exp.Query): 1186 comment_prop = expression.find(exp.SchemaCommentProperty) 1187 if comment_prop: 1188 comment_prop.pop() 1189 query.replace(exp.paren(query)) 1190 else: 1191 comment_prop = None 1192 1193 create_sql = super().create_sql(expression) 1194 1195 comment_sql = self.sql(comment_prop) 1196 comment_sql = f" {comment_sql}" if comment_sql else "" 1197 1198 return f"{create_sql}{comment_sql}" 1199 1200 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1201 this = self.indent(self.sql(expression, "this")) 1202 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1203 1204 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1205 this = self.sql(expression, "this") 1206 this = f" {this}" if this else "" 1207 expr = self.sql(expression, "expression") 1208 expr = f" {expr}" if expr else "" 1209 index_type = self.sql(expression, "index_type") 1210 index_type = f" TYPE {index_type}" if index_type else "" 1211 granularity = self.sql(expression, "granularity") 1212 granularity = f" GRANULARITY {granularity}" if granularity else "" 1213 1214 return f"INDEX{this}{expr}{index_type}{granularity}" 1215 1216 def partition_sql(self, expression: exp.Partition) -> str: 1217 return f"PARTITION {self.expressions(expression, flat=True)}" 1218 1219 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1220 return f"ID {self.sql(expression.this)}" 1221 1222 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1223 return ( 1224 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1225 ) 1226 1227 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1228 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1229 1230 def is_sql(self, expression: exp.Is) -> str: 1231 is_sql = super().is_sql(expression) 1232 1233 if isinstance(expression.parent, exp.Not): 1234 # value IS NOT NULL -> NOT (value IS NULL) 1235 is_sql = self.wrap(is_sql) 1236 1237 return is_sql
162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 170 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 171 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 172 173 UNESCAPED_SEQUENCES = { 174 "\\0": "\0", 175 } 176 177 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 178 179 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 180 exp.Except: False, 181 exp.Intersect: False, 182 exp.Union: None, 183 } 184 185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 } 238 239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 280 AGG_FUNCTIONS = { 281 "count", 282 "min", 283 "max", 284 "sum", 285 "avg", 286 "any", 287 "stddevPop", 288 "stddevSamp", 289 "varPop", 290 "varSamp", 291 "corr", 292 "covarPop", 293 "covarSamp", 294 "entropy", 295 "exponentialMovingAverage", 296 "intervalLengthSum", 297 "kolmogorovSmirnovTest", 298 "mannWhitneyUTest", 299 "median", 300 "rankCorr", 301 "sumKahan", 302 "studentTTest", 303 "welchTTest", 304 "anyHeavy", 305 "anyLast", 306 "boundingRatio", 307 "first_value", 308 "last_value", 309 "argMin", 310 "argMax", 311 "avgWeighted", 312 "topK", 313 "topKWeighted", 314 "deltaSum", 315 "deltaSumTimestamp", 316 "groupArray", 317 "groupArrayLast", 318 "groupUniqArray", 319 "groupArrayInsertAt", 320 "groupArrayMovingAvg", 321 "groupArrayMovingSum", 322 "groupArraySample", 323 "groupBitAnd", 324 "groupBitOr", 325 "groupBitXor", 326 "groupBitmap", 327 "groupBitmapAnd", 328 "groupBitmapOr", 329 "groupBitmapXor", 330 "sumWithOverflow", 331 "sumMap", 332 "minMap", 333 "maxMap", 334 "skewSamp", 335 "skewPop", 336 "kurtSamp", 337 "kurtPop", 338 "uniq", 339 "uniqExact", 340 "uniqCombined", 341 "uniqCombined64", 342 "uniqHLL12", 343 "uniqTheta", 344 "quantile", 345 "quantiles", 346 "quantileExact", 347 "quantilesExact", 348 "quantileExactLow", 349 "quantilesExactLow", 350 "quantileExactHigh", 351 "quantilesExactHigh", 352 "quantileExactWeighted", 353 "quantilesExactWeighted", 354 "quantileTiming", 355 "quantilesTiming", 356 "quantileTimingWeighted", 357 "quantilesTimingWeighted", 358 "quantileDeterministic", 359 "quantilesDeterministic", 360 "quantileTDigest", 361 "quantilesTDigest", 362 "quantileTDigestWeighted", 363 "quantilesTDigestWeighted", 364 "quantileBFloat16", 365 "quantilesBFloat16", 366 "quantileBFloat16Weighted", 367 "quantilesBFloat16Weighted", 368 "simpleLinearRegression", 369 "stochasticLinearRegression", 370 "stochasticLogisticRegression", 371 "categoricalInformationValue", 372 "contingency", 373 "cramersV", 374 "cramersVBiasCorrected", 375 "theilsU", 376 "maxIntersections", 377 "maxIntersectionsPosition", 378 "meanZTest", 379 "quantileInterpolatedWeighted", 380 "quantilesInterpolatedWeighted", 381 "quantileGK", 382 "quantilesGK", 383 "sparkBar", 384 "sumCount", 385 "largestTriangleThreeBuckets", 386 "histogram", 387 "sequenceMatch", 388 "sequenceCount", 389 "windowFunnel", 390 "retention", 391 "uniqUpTo", 392 "sequenceNextNode", 393 "exponentialTimeDecayedAvg", 394 } 395 396 AGG_FUNCTIONS_SUFFIXES = [ 397 "If", 398 "Array", 399 "ArrayIf", 400 "Map", 401 "SimpleState", 402 "State", 403 "Merge", 404 "MergeState", 405 "ForEach", 406 "Distinct", 407 "OrDefault", 408 "OrNull", 409 "Resample", 410 "ArgMin", 411 "ArgMax", 412 ] 413 414 FUNC_TOKENS = { 415 *parser.Parser.FUNC_TOKENS, 416 TokenType.SET, 417 } 418 419 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 420 421 ID_VAR_TOKENS = { 422 *parser.Parser.ID_VAR_TOKENS, 423 TokenType.LIKE, 424 } 425 426 AGG_FUNC_MAPPING = ( 427 lambda functions, suffixes: { 428 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 429 } 430 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 431 432 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 433 434 FUNCTION_PARSERS = { 435 **parser.Parser.FUNCTION_PARSERS, 436 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 437 "QUANTILE": lambda self: self._parse_quantile(), 438 "MEDIAN": lambda self: self._parse_quantile(), 439 "COLUMNS": lambda self: self._parse_columns(), 440 } 441 442 FUNCTION_PARSERS.pop("MATCH") 443 444 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 445 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 446 447 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 448 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 449 450 RANGE_PARSERS = { 451 **parser.Parser.RANGE_PARSERS, 452 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 453 and self._parse_in(this, is_global=True), 454 } 455 456 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 457 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 458 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 459 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 460 461 JOIN_KINDS = { 462 *parser.Parser.JOIN_KINDS, 463 TokenType.ANY, 464 TokenType.ASOF, 465 TokenType.ARRAY, 466 } 467 468 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 469 TokenType.ANY, 470 TokenType.ARRAY, 471 TokenType.FINAL, 472 TokenType.FORMAT, 473 TokenType.SETTINGS, 474 } 475 476 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 477 TokenType.FORMAT, 478 } 479 480 LOG_DEFAULTS_TO_LN = True 481 482 QUERY_MODIFIER_PARSERS = { 483 **parser.Parser.QUERY_MODIFIER_PARSERS, 484 TokenType.SETTINGS: lambda self: ( 485 "settings", 486 self._advance() or self._parse_csv(self._parse_assignment), 487 ), 488 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 489 } 490 491 CONSTRAINT_PARSERS = { 492 **parser.Parser.CONSTRAINT_PARSERS, 493 "INDEX": lambda self: self._parse_index_constraint(), 494 "CODEC": lambda self: self._parse_compress(), 495 } 496 497 ALTER_PARSERS = { 498 **parser.Parser.ALTER_PARSERS, 499 "REPLACE": lambda self: self._parse_alter_table_replace(), 500 } 501 502 SCHEMA_UNNAMED_CONSTRAINTS = { 503 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 504 "INDEX", 505 } 506 507 PLACEHOLDER_PARSERS = { 508 **parser.Parser.PLACEHOLDER_PARSERS, 509 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 510 } 511 512 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 513 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 514 return self._parse_lambda() 515 516 def _parse_types( 517 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 518 ) -> t.Optional[exp.Expression]: 519 dtype = super()._parse_types( 520 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 521 ) 522 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 523 # Mark every type as non-nullable which is ClickHouse's default, unless it's 524 # already marked as nullable. This marker helps us transpile types from other 525 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 526 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 527 # fail in ClickHouse without the `Nullable` type constructor. 528 dtype.set("nullable", False) 529 530 return dtype 531 532 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 533 index = self._index 534 this = self._parse_bitwise() 535 if self._match(TokenType.FROM): 536 self._retreat(index) 537 return super()._parse_extract() 538 539 # We return Anonymous here because extract and regexpExtract have different semantics, 540 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 541 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 542 # 543 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 544 self._match(TokenType.COMMA) 545 return self.expression( 546 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 547 ) 548 549 def _parse_assignment(self) -> t.Optional[exp.Expression]: 550 this = super()._parse_assignment() 551 552 if self._match(TokenType.PLACEHOLDER): 553 return self.expression( 554 exp.If, 555 this=this, 556 true=self._parse_assignment(), 557 false=self._match(TokenType.COLON) and self._parse_assignment(), 558 ) 559 560 return this 561 562 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 563 """ 564 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 565 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 566 """ 567 this = self._parse_id_var() 568 self._match(TokenType.COLON) 569 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 570 self._match_text_seq("IDENTIFIER") and "Identifier" 571 ) 572 573 if not kind: 574 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 575 elif not self._match(TokenType.R_BRACE): 576 self.raise_error("Expecting }") 577 578 return self.expression(exp.Placeholder, this=this, kind=kind) 579 580 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 581 this = super()._parse_in(this) 582 this.set("is_global", is_global) 583 return this 584 585 def _parse_table( 586 self, 587 schema: bool = False, 588 joins: bool = False, 589 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 590 parse_bracket: bool = False, 591 is_db_reference: bool = False, 592 parse_partition: bool = False, 593 ) -> t.Optional[exp.Expression]: 594 this = super()._parse_table( 595 schema=schema, 596 joins=joins, 597 alias_tokens=alias_tokens, 598 parse_bracket=parse_bracket, 599 is_db_reference=is_db_reference, 600 ) 601 602 if self._match(TokenType.FINAL): 603 this = self.expression(exp.Final, this=this) 604 605 return this 606 607 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 608 return super()._parse_position(haystack_first=True) 609 610 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 611 def _parse_cte(self) -> exp.CTE: 612 # WITH <identifier> AS <subquery expression> 613 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 614 615 if not cte: 616 # WITH <expression> AS <identifier> 617 cte = self.expression( 618 exp.CTE, 619 this=self._parse_assignment(), 620 alias=self._parse_table_alias(), 621 scalar=True, 622 ) 623 624 return cte 625 626 def _parse_join_parts( 627 self, 628 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 629 is_global = self._match(TokenType.GLOBAL) and self._prev 630 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 631 632 if kind_pre: 633 kind = self._match_set(self.JOIN_KINDS) and self._prev 634 side = self._match_set(self.JOIN_SIDES) and self._prev 635 return is_global, side, kind 636 637 return ( 638 is_global, 639 self._match_set(self.JOIN_SIDES) and self._prev, 640 self._match_set(self.JOIN_KINDS) and self._prev, 641 ) 642 643 def _parse_join( 644 self, skip_join_token: bool = False, parse_bracket: bool = False 645 ) -> t.Optional[exp.Join]: 646 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 647 if join: 648 join.set("global", join.args.pop("method", None)) 649 650 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 651 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 652 if join.kind == "ARRAY": 653 for table in join.find_all(exp.Table): 654 table.replace(table.to_column()) 655 656 return join 657 658 def _parse_function( 659 self, 660 functions: t.Optional[t.Dict[str, t.Callable]] = None, 661 anonymous: bool = False, 662 optional_parens: bool = True, 663 any_token: bool = False, 664 ) -> t.Optional[exp.Expression]: 665 expr = super()._parse_function( 666 functions=functions, 667 anonymous=anonymous, 668 optional_parens=optional_parens, 669 any_token=any_token, 670 ) 671 672 func = expr.this if isinstance(expr, exp.Window) else expr 673 674 # Aggregate functions can be split in 2 parts: <func_name><suffix> 675 parts = ( 676 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 677 ) 678 679 if parts: 680 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 681 params = self._parse_func_params(anon_func) 682 683 kwargs = { 684 "this": anon_func.this, 685 "expressions": anon_func.expressions, 686 } 687 if parts[1]: 688 kwargs["parts"] = parts 689 exp_class: t.Type[exp.Expression] = ( 690 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 691 ) 692 else: 693 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 694 695 kwargs["exp_class"] = exp_class 696 if params: 697 kwargs["params"] = params 698 699 func = self.expression(**kwargs) 700 701 if isinstance(expr, exp.Window): 702 # The window's func was parsed as Anonymous in base parser, fix its 703 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 704 expr.set("this", func) 705 elif params: 706 # Params have blocked super()._parse_function() from parsing the following window 707 # (if that exists) as they're standing between the function call and the window spec 708 expr = self._parse_window(func) 709 else: 710 expr = func 711 712 return expr 713 714 def _parse_func_params( 715 self, this: t.Optional[exp.Func] = None 716 ) -> t.Optional[t.List[exp.Expression]]: 717 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 718 return self._parse_csv(self._parse_lambda) 719 720 if self._match(TokenType.L_PAREN): 721 params = self._parse_csv(self._parse_lambda) 722 self._match_r_paren(this) 723 return params 724 725 return None 726 727 def _parse_quantile(self) -> exp.Quantile: 728 this = self._parse_lambda() 729 params = self._parse_func_params() 730 if params: 731 return self.expression(exp.Quantile, this=params[0], quantile=this) 732 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 733 734 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 735 return super()._parse_wrapped_id_vars(optional=True) 736 737 def _parse_primary_key( 738 self, wrapped_optional: bool = False, in_props: bool = False 739 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 740 return super()._parse_primary_key( 741 wrapped_optional=wrapped_optional or in_props, in_props=in_props 742 ) 743 744 def _parse_on_property(self) -> t.Optional[exp.Expression]: 745 index = self._index 746 if self._match_text_seq("CLUSTER"): 747 this = self._parse_id_var() 748 if this: 749 return self.expression(exp.OnCluster, this=this) 750 else: 751 self._retreat(index) 752 return None 753 754 def _parse_index_constraint( 755 self, kind: t.Optional[str] = None 756 ) -> exp.IndexColumnConstraint: 757 # INDEX name1 expr TYPE type1(args) GRANULARITY value 758 this = self._parse_id_var() 759 expression = self._parse_assignment() 760 761 index_type = self._match_text_seq("TYPE") and ( 762 self._parse_function() or self._parse_var() 763 ) 764 765 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 766 767 return self.expression( 768 exp.IndexColumnConstraint, 769 this=this, 770 expression=expression, 771 index_type=index_type, 772 granularity=granularity, 773 ) 774 775 def _parse_partition(self) -> t.Optional[exp.Partition]: 776 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 777 if not self._match(TokenType.PARTITION): 778 return None 779 780 if self._match_text_seq("ID"): 781 # Corresponds to the PARTITION ID <string_value> syntax 782 expressions: t.List[exp.Expression] = [ 783 self.expression(exp.PartitionId, this=self._parse_string()) 784 ] 785 else: 786 expressions = self._parse_expressions() 787 788 return self.expression(exp.Partition, expressions=expressions) 789 790 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 791 partition = self._parse_partition() 792 793 if not partition or not self._match(TokenType.FROM): 794 return None 795 796 return self.expression( 797 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 798 ) 799 800 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 801 if not self._match_text_seq("PROJECTION"): 802 return None 803 804 return self.expression( 805 exp.ProjectionDef, 806 this=self._parse_id_var(), 807 expression=self._parse_wrapped(self._parse_statement), 808 ) 809 810 def _parse_constraint(self) -> t.Optional[exp.Expression]: 811 return super()._parse_constraint() or self._parse_projection_def() 812 813 def _parse_alias( 814 self, this: t.Optional[exp.Expression], explicit: bool = False 815 ) -> t.Optional[exp.Expression]: 816 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 817 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 818 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 819 return this 820 821 return super()._parse_alias(this=this, explicit=explicit) 822 823 def _parse_expression(self) -> t.Optional[exp.Expression]: 824 this = super()._parse_expression() 825 826 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 827 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 828 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 829 self._match(TokenType.R_PAREN) 830 831 return this 832 833 def _parse_columns(self) -> exp.Expression: 834 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 835 836 while self._next and self._match_text_seq(")", "APPLY", "("): 837 self._match(TokenType.R_PAREN) 838 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 839 return this 840 841 class Generator(generator.Generator): 842 QUERY_HINTS = False 843 STRUCT_DELIMITER = ("(", ")") 844 NVL2_SUPPORTED = False 845 TABLESAMPLE_REQUIRES_PARENS = False 846 TABLESAMPLE_SIZE_IS_ROWS = False 847 TABLESAMPLE_KEYWORDS = "SAMPLE" 848 LAST_DAY_SUPPORTS_DATE_PART = False 849 CAN_IMPLEMENT_ARRAY_ANY = True 850 SUPPORTS_TO_NUMBER = False 851 JOIN_HINTS = False 852 TABLE_HINTS = False 853 GROUPINGS_SEP = "" 854 SET_OP_MODIFIERS = False 855 SUPPORTS_TABLE_ALIAS_COLUMNS = False 856 VALUES_AS_TABLE = False 857 ARRAY_SIZE_NAME = "LENGTH" 858 859 STRING_TYPE_MAPPING = { 860 exp.DataType.Type.CHAR: "String", 861 exp.DataType.Type.LONGBLOB: "String", 862 exp.DataType.Type.LONGTEXT: "String", 863 exp.DataType.Type.MEDIUMBLOB: "String", 864 exp.DataType.Type.MEDIUMTEXT: "String", 865 exp.DataType.Type.TINYBLOB: "String", 866 exp.DataType.Type.TINYTEXT: "String", 867 exp.DataType.Type.TEXT: "String", 868 exp.DataType.Type.VARBINARY: "String", 869 exp.DataType.Type.VARCHAR: "String", 870 } 871 872 SUPPORTED_JSON_PATH_PARTS = { 873 exp.JSONPathKey, 874 exp.JSONPathRoot, 875 exp.JSONPathSubscript, 876 } 877 878 TYPE_MAPPING = { 879 **generator.Generator.TYPE_MAPPING, 880 **STRING_TYPE_MAPPING, 881 exp.DataType.Type.ARRAY: "Array", 882 exp.DataType.Type.BOOLEAN: "Bool", 883 exp.DataType.Type.BIGINT: "Int64", 884 exp.DataType.Type.DATE32: "Date32", 885 exp.DataType.Type.DATETIME: "DateTime", 886 exp.DataType.Type.DATETIME64: "DateTime64", 887 exp.DataType.Type.DECIMAL: "Decimal", 888 exp.DataType.Type.DECIMAL32: "Decimal32", 889 exp.DataType.Type.DECIMAL64: "Decimal64", 890 exp.DataType.Type.DECIMAL128: "Decimal128", 891 exp.DataType.Type.DECIMAL256: "Decimal256", 892 exp.DataType.Type.TIMESTAMP: "DateTime", 893 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 894 exp.DataType.Type.DOUBLE: "Float64", 895 exp.DataType.Type.ENUM: "Enum", 896 exp.DataType.Type.ENUM8: "Enum8", 897 exp.DataType.Type.ENUM16: "Enum16", 898 exp.DataType.Type.FIXEDSTRING: "FixedString", 899 exp.DataType.Type.FLOAT: "Float32", 900 exp.DataType.Type.INT: "Int32", 901 exp.DataType.Type.MEDIUMINT: "Int32", 902 exp.DataType.Type.INT128: "Int128", 903 exp.DataType.Type.INT256: "Int256", 904 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 905 exp.DataType.Type.MAP: "Map", 906 exp.DataType.Type.NESTED: "Nested", 907 exp.DataType.Type.SMALLINT: "Int16", 908 exp.DataType.Type.STRUCT: "Tuple", 909 exp.DataType.Type.TINYINT: "Int8", 910 exp.DataType.Type.UBIGINT: "UInt64", 911 exp.DataType.Type.UINT: "UInt32", 912 exp.DataType.Type.UINT128: "UInt128", 913 exp.DataType.Type.UINT256: "UInt256", 914 exp.DataType.Type.USMALLINT: "UInt16", 915 exp.DataType.Type.UTINYINT: "UInt8", 916 exp.DataType.Type.IPV4: "IPv4", 917 exp.DataType.Type.IPV6: "IPv6", 918 exp.DataType.Type.POINT: "Point", 919 exp.DataType.Type.RING: "Ring", 920 exp.DataType.Type.LINESTRING: "LineString", 921 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 922 exp.DataType.Type.POLYGON: "Polygon", 923 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 924 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 925 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 926 } 927 928 TRANSFORMS = { 929 **generator.Generator.TRANSFORMS, 930 exp.AnyValue: rename_func("any"), 931 exp.ApproxDistinct: rename_func("uniq"), 932 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 933 exp.ArraySum: rename_func("arraySum"), 934 exp.ArgMax: arg_max_or_min_no_count("argMax"), 935 exp.ArgMin: arg_max_or_min_no_count("argMin"), 936 exp.Array: inline_array_sql, 937 exp.CastToStrType: rename_func("CAST"), 938 exp.CountIf: rename_func("countIf"), 939 exp.CompressColumnConstraint: lambda self, 940 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 941 exp.ComputedColumnConstraint: lambda self, 942 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 943 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 944 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 945 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 946 exp.DateStrToDate: rename_func("toDate"), 947 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 948 exp.Explode: rename_func("arrayJoin"), 949 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 950 exp.IsNan: rename_func("isNaN"), 951 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 952 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 953 exp.JSONPathKey: json_path_key_only_name, 954 exp.JSONPathRoot: lambda *_: "", 955 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 956 exp.Median: rename_func("median"), 957 exp.Nullif: rename_func("nullIf"), 958 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 959 exp.Pivot: no_pivot_sql, 960 exp.Quantile: _quantile_sql, 961 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 962 exp.Rand: rename_func("randCanonical"), 963 exp.StartsWith: rename_func("startsWith"), 964 exp.StrPosition: lambda self, e: self.func( 965 "position", e.this, e.args.get("substr"), e.args.get("position") 966 ), 967 exp.TimeToStr: lambda self, e: self.func( 968 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 969 ), 970 exp.TimeStrToTime: _timestrtotime_sql, 971 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 972 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 973 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 974 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 975 exp.MD5Digest: rename_func("MD5"), 976 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 977 exp.SHA: rename_func("SHA1"), 978 exp.SHA2: sha256_sql, 979 exp.UnixToTime: _unix_to_time_sql, 980 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 981 exp.Trim: trim_sql, 982 exp.Variance: rename_func("varSamp"), 983 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 984 exp.Stddev: rename_func("stddevSamp"), 985 exp.Chr: rename_func("CHAR"), 986 exp.Lag: lambda self, e: self.func( 987 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 988 ), 989 exp.Lead: lambda self, e: self.func( 990 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 991 ), 992 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 993 rename_func("editDistance") 994 ), 995 } 996 997 PROPERTIES_LOCATION = { 998 **generator.Generator.PROPERTIES_LOCATION, 999 exp.OnCluster: exp.Properties.Location.POST_NAME, 1000 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1001 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1002 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1003 } 1004 1005 # There's no list in docs, but it can be found in Clickhouse code 1006 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1007 ON_CLUSTER_TARGETS = { 1008 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1009 "DATABASE", 1010 "TABLE", 1011 "VIEW", 1012 "DICTIONARY", 1013 "INDEX", 1014 "FUNCTION", 1015 "NAMED COLLECTION", 1016 } 1017 1018 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1019 NON_NULLABLE_TYPES = { 1020 exp.DataType.Type.ARRAY, 1021 exp.DataType.Type.MAP, 1022 exp.DataType.Type.STRUCT, 1023 exp.DataType.Type.POINT, 1024 exp.DataType.Type.RING, 1025 exp.DataType.Type.LINESTRING, 1026 exp.DataType.Type.MULTILINESTRING, 1027 exp.DataType.Type.POLYGON, 1028 exp.DataType.Type.MULTIPOLYGON, 1029 } 1030 1031 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1032 strtodate_sql = self.function_fallback_sql(expression) 1033 1034 if not isinstance(expression.parent, exp.Cast): 1035 # StrToDate returns DATEs in other dialects (eg. postgres), so 1036 # this branch aims to improve the transpilation to clickhouse 1037 return f"CAST({strtodate_sql} AS DATE)" 1038 1039 return strtodate_sql 1040 1041 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1042 this = expression.this 1043 1044 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1045 return self.sql(this) 1046 1047 return super().cast_sql(expression, safe_prefix=safe_prefix) 1048 1049 def trycast_sql(self, expression: exp.TryCast) -> str: 1050 dtype = expression.to 1051 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1052 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1053 dtype.set("nullable", True) 1054 1055 return super().cast_sql(expression) 1056 1057 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1058 this = self.json_path_part(expression.this) 1059 return str(int(this) + 1) if is_int(this) else this 1060 1061 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1062 return f"AS {self.sql(expression, 'this')}" 1063 1064 def _any_to_has( 1065 self, 1066 expression: exp.EQ | exp.NEQ, 1067 default: t.Callable[[t.Any], str], 1068 prefix: str = "", 1069 ) -> str: 1070 if isinstance(expression.left, exp.Any): 1071 arr = expression.left 1072 this = expression.right 1073 elif isinstance(expression.right, exp.Any): 1074 arr = expression.right 1075 this = expression.left 1076 else: 1077 return default(expression) 1078 1079 return prefix + self.func("has", arr.this.unnest(), this) 1080 1081 def eq_sql(self, expression: exp.EQ) -> str: 1082 return self._any_to_has(expression, super().eq_sql) 1083 1084 def neq_sql(self, expression: exp.NEQ) -> str: 1085 return self._any_to_has(expression, super().neq_sql, "NOT ") 1086 1087 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1088 # Manually add a flag to make the search case-insensitive 1089 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1090 return self.func("match", expression.this, regex) 1091 1092 def datatype_sql(self, expression: exp.DataType) -> str: 1093 # String is the standard ClickHouse type, every other variant is just an alias. 1094 # Additionally, any supplied length parameter will be ignored. 1095 # 1096 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1097 if expression.this in self.STRING_TYPE_MAPPING: 1098 dtype = "String" 1099 else: 1100 dtype = super().datatype_sql(expression) 1101 1102 # This section changes the type to `Nullable(...)` if the following conditions hold: 1103 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1104 # and change their semantics 1105 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1106 # constraint: "Type of Map key must be a type, that can be represented by integer or 1107 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1108 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1109 parent = expression.parent 1110 nullable = expression.args.get("nullable") 1111 if nullable is True or ( 1112 nullable is None 1113 and not ( 1114 isinstance(parent, exp.DataType) 1115 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1116 and expression.index in (None, 0) 1117 ) 1118 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1119 ): 1120 dtype = f"Nullable({dtype})" 1121 1122 return dtype 1123 1124 def cte_sql(self, expression: exp.CTE) -> str: 1125 if expression.args.get("scalar"): 1126 this = self.sql(expression, "this") 1127 alias = self.sql(expression, "alias") 1128 return f"{this} AS {alias}" 1129 1130 return super().cte_sql(expression) 1131 1132 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1133 return super().after_limit_modifiers(expression) + [ 1134 ( 1135 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1136 if expression.args.get("settings") 1137 else "" 1138 ), 1139 ( 1140 self.seg("FORMAT ") + self.sql(expression, "format") 1141 if expression.args.get("format") 1142 else "" 1143 ), 1144 ] 1145 1146 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1147 params = self.expressions(expression, key="params", flat=True) 1148 return self.func(expression.name, *expression.expressions) + f"({params})" 1149 1150 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1151 return self.func(expression.name, *expression.expressions) 1152 1153 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1154 return self.anonymousaggfunc_sql(expression) 1155 1156 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1157 return self.parameterizedagg_sql(expression) 1158 1159 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1160 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1161 1162 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1163 return f"ON CLUSTER {self.sql(expression, 'this')}" 1164 1165 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1166 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1167 exp.Properties.Location.POST_NAME 1168 ): 1169 this_name = self.sql( 1170 expression.this if isinstance(expression.this, exp.Schema) else expression, 1171 "this", 1172 ) 1173 this_properties = " ".join( 1174 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1175 ) 1176 this_schema = self.schema_columns_sql(expression.this) 1177 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1178 1179 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1180 1181 return super().createable_sql(expression, locations) 1182 1183 def create_sql(self, expression: exp.Create) -> str: 1184 # The comment property comes last in CTAS statements, i.e. after the query 1185 query = expression.expression 1186 if isinstance(query, exp.Query): 1187 comment_prop = expression.find(exp.SchemaCommentProperty) 1188 if comment_prop: 1189 comment_prop.pop() 1190 query.replace(exp.paren(query)) 1191 else: 1192 comment_prop = None 1193 1194 create_sql = super().create_sql(expression) 1195 1196 comment_sql = self.sql(comment_prop) 1197 comment_sql = f" {comment_sql}" if comment_sql else "" 1198 1199 return f"{create_sql}{comment_sql}" 1200 1201 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1202 this = self.indent(self.sql(expression, "this")) 1203 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1204 1205 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1206 this = self.sql(expression, "this") 1207 this = f" {this}" if this else "" 1208 expr = self.sql(expression, "expression") 1209 expr = f" {expr}" if expr else "" 1210 index_type = self.sql(expression, "index_type") 1211 index_type = f" TYPE {index_type}" if index_type else "" 1212 granularity = self.sql(expression, "granularity") 1213 granularity = f" GRANULARITY {granularity}" if granularity else "" 1214 1215 return f"INDEX{this}{expr}{index_type}{granularity}" 1216 1217 def partition_sql(self, expression: exp.Partition) -> str: 1218 return f"PARTITION {self.expressions(expression, flat=True)}" 1219 1220 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1221 return f"ID {self.sql(expression.this)}" 1222 1223 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1224 return ( 1225 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1226 ) 1227 1228 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1229 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1230 1231 def is_sql(self, expression: exp.Is) -> str: 1232 is_sql = super().is_sql(expression) 1233 1234 if isinstance(expression.parent, exp.Not): 1235 # value IS NOT NULL -> NOT (value IS NULL) 1236 is_sql = self.wrap(is_sql) 1237 1238 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 280 AGG_FUNCTIONS = { 281 "count", 282 "min", 283 "max", 284 "sum", 285 "avg", 286 "any", 287 "stddevPop", 288 "stddevSamp", 289 "varPop", 290 "varSamp", 291 "corr", 292 "covarPop", 293 "covarSamp", 294 "entropy", 295 "exponentialMovingAverage", 296 "intervalLengthSum", 297 "kolmogorovSmirnovTest", 298 "mannWhitneyUTest", 299 "median", 300 "rankCorr", 301 "sumKahan", 302 "studentTTest", 303 "welchTTest", 304 "anyHeavy", 305 "anyLast", 306 "boundingRatio", 307 "first_value", 308 "last_value", 309 "argMin", 310 "argMax", 311 "avgWeighted", 312 "topK", 313 "topKWeighted", 314 "deltaSum", 315 "deltaSumTimestamp", 316 "groupArray", 317 "groupArrayLast", 318 "groupUniqArray", 319 "groupArrayInsertAt", 320 "groupArrayMovingAvg", 321 "groupArrayMovingSum", 322 "groupArraySample", 323 "groupBitAnd", 324 "groupBitOr", 325 "groupBitXor", 326 "groupBitmap", 327 "groupBitmapAnd", 328 "groupBitmapOr", 329 "groupBitmapXor", 330 "sumWithOverflow", 331 "sumMap", 332 "minMap", 333 "maxMap", 334 "skewSamp", 335 "skewPop", 336 "kurtSamp", 337 "kurtPop", 338 "uniq", 339 "uniqExact", 340 "uniqCombined", 341 "uniqCombined64", 342 "uniqHLL12", 343 "uniqTheta", 344 "quantile", 345 "quantiles", 346 "quantileExact", 347 "quantilesExact", 348 "quantileExactLow", 349 "quantilesExactLow", 350 "quantileExactHigh", 351 "quantilesExactHigh", 352 "quantileExactWeighted", 353 "quantilesExactWeighted", 354 "quantileTiming", 355 "quantilesTiming", 356 "quantileTimingWeighted", 357 "quantilesTimingWeighted", 358 "quantileDeterministic", 359 "quantilesDeterministic", 360 "quantileTDigest", 361 "quantilesTDigest", 362 "quantileTDigestWeighted", 363 "quantilesTDigestWeighted", 364 "quantileBFloat16", 365 "quantilesBFloat16", 366 "quantileBFloat16Weighted", 367 "quantilesBFloat16Weighted", 368 "simpleLinearRegression", 369 "stochasticLinearRegression", 370 "stochasticLogisticRegression", 371 "categoricalInformationValue", 372 "contingency", 373 "cramersV", 374 "cramersVBiasCorrected", 375 "theilsU", 376 "maxIntersections", 377 "maxIntersectionsPosition", 378 "meanZTest", 379 "quantileInterpolatedWeighted", 380 "quantilesInterpolatedWeighted", 381 "quantileGK", 382 "quantilesGK", 383 "sparkBar", 384 "sumCount", 385 "largestTriangleThreeBuckets", 386 "histogram", 387 "sequenceMatch", 388 "sequenceCount", 389 "windowFunnel", 390 "retention", 391 "uniqUpTo", 392 "sequenceNextNode", 393 "exponentialTimeDecayedAvg", 394 } 395 396 AGG_FUNCTIONS_SUFFIXES = [ 397 "If", 398 "Array", 399 "ArrayIf", 400 "Map", 401 "SimpleState", 402 "State", 403 "Merge", 404 "MergeState", 405 "ForEach", 406 "Distinct", 407 "OrDefault", 408 "OrNull", 409 "Resample", 410 "ArgMin", 411 "ArgMax", 412 ] 413 414 FUNC_TOKENS = { 415 *parser.Parser.FUNC_TOKENS, 416 TokenType.SET, 417 } 418 419 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 420 421 ID_VAR_TOKENS = { 422 *parser.Parser.ID_VAR_TOKENS, 423 TokenType.LIKE, 424 } 425 426 AGG_FUNC_MAPPING = ( 427 lambda functions, suffixes: { 428 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 429 } 430 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 431 432 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 433 434 FUNCTION_PARSERS = { 435 **parser.Parser.FUNCTION_PARSERS, 436 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 437 "QUANTILE": lambda self: self._parse_quantile(), 438 "MEDIAN": lambda self: self._parse_quantile(), 439 "COLUMNS": lambda self: self._parse_columns(), 440 } 441 442 FUNCTION_PARSERS.pop("MATCH") 443 444 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 445 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 446 447 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 448 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 449 450 RANGE_PARSERS = { 451 **parser.Parser.RANGE_PARSERS, 452 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 453 and self._parse_in(this, is_global=True), 454 } 455 456 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 457 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 458 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 459 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 460 461 JOIN_KINDS = { 462 *parser.Parser.JOIN_KINDS, 463 TokenType.ANY, 464 TokenType.ASOF, 465 TokenType.ARRAY, 466 } 467 468 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 469 TokenType.ANY, 470 TokenType.ARRAY, 471 TokenType.FINAL, 472 TokenType.FORMAT, 473 TokenType.SETTINGS, 474 } 475 476 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 477 TokenType.FORMAT, 478 } 479 480 LOG_DEFAULTS_TO_LN = True 481 482 QUERY_MODIFIER_PARSERS = { 483 **parser.Parser.QUERY_MODIFIER_PARSERS, 484 TokenType.SETTINGS: lambda self: ( 485 "settings", 486 self._advance() or self._parse_csv(self._parse_assignment), 487 ), 488 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 489 } 490 491 CONSTRAINT_PARSERS = { 492 **parser.Parser.CONSTRAINT_PARSERS, 493 "INDEX": lambda self: self._parse_index_constraint(), 494 "CODEC": lambda self: self._parse_compress(), 495 } 496 497 ALTER_PARSERS = { 498 **parser.Parser.ALTER_PARSERS, 499 "REPLACE": lambda self: self._parse_alter_table_replace(), 500 } 501 502 SCHEMA_UNNAMED_CONSTRAINTS = { 503 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 504 "INDEX", 505 } 506 507 PLACEHOLDER_PARSERS = { 508 **parser.Parser.PLACEHOLDER_PARSERS, 509 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 510 } 511 512 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 513 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 514 return self._parse_lambda() 515 516 def _parse_types( 517 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 518 ) -> t.Optional[exp.Expression]: 519 dtype = super()._parse_types( 520 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 521 ) 522 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 523 # Mark every type as non-nullable which is ClickHouse's default, unless it's 524 # already marked as nullable. This marker helps us transpile types from other 525 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 526 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 527 # fail in ClickHouse without the `Nullable` type constructor. 528 dtype.set("nullable", False) 529 530 return dtype 531 532 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 533 index = self._index 534 this = self._parse_bitwise() 535 if self._match(TokenType.FROM): 536 self._retreat(index) 537 return super()._parse_extract() 538 539 # We return Anonymous here because extract and regexpExtract have different semantics, 540 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 541 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 542 # 543 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 544 self._match(TokenType.COMMA) 545 return self.expression( 546 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 547 ) 548 549 def _parse_assignment(self) -> t.Optional[exp.Expression]: 550 this = super()._parse_assignment() 551 552 if self._match(TokenType.PLACEHOLDER): 553 return self.expression( 554 exp.If, 555 this=this, 556 true=self._parse_assignment(), 557 false=self._match(TokenType.COLON) and self._parse_assignment(), 558 ) 559 560 return this 561 562 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 563 """ 564 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 565 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 566 """ 567 this = self._parse_id_var() 568 self._match(TokenType.COLON) 569 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 570 self._match_text_seq("IDENTIFIER") and "Identifier" 571 ) 572 573 if not kind: 574 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 575 elif not self._match(TokenType.R_BRACE): 576 self.raise_error("Expecting }") 577 578 return self.expression(exp.Placeholder, this=this, kind=kind) 579 580 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 581 this = super()._parse_in(this) 582 this.set("is_global", is_global) 583 return this 584 585 def _parse_table( 586 self, 587 schema: bool = False, 588 joins: bool = False, 589 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 590 parse_bracket: bool = False, 591 is_db_reference: bool = False, 592 parse_partition: bool = False, 593 ) -> t.Optional[exp.Expression]: 594 this = super()._parse_table( 595 schema=schema, 596 joins=joins, 597 alias_tokens=alias_tokens, 598 parse_bracket=parse_bracket, 599 is_db_reference=is_db_reference, 600 ) 601 602 if self._match(TokenType.FINAL): 603 this = self.expression(exp.Final, this=this) 604 605 return this 606 607 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 608 return super()._parse_position(haystack_first=True) 609 610 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 611 def _parse_cte(self) -> exp.CTE: 612 # WITH <identifier> AS <subquery expression> 613 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 614 615 if not cte: 616 # WITH <expression> AS <identifier> 617 cte = self.expression( 618 exp.CTE, 619 this=self._parse_assignment(), 620 alias=self._parse_table_alias(), 621 scalar=True, 622 ) 623 624 return cte 625 626 def _parse_join_parts( 627 self, 628 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 629 is_global = self._match(TokenType.GLOBAL) and self._prev 630 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 631 632 if kind_pre: 633 kind = self._match_set(self.JOIN_KINDS) and self._prev 634 side = self._match_set(self.JOIN_SIDES) and self._prev 635 return is_global, side, kind 636 637 return ( 638 is_global, 639 self._match_set(self.JOIN_SIDES) and self._prev, 640 self._match_set(self.JOIN_KINDS) and self._prev, 641 ) 642 643 def _parse_join( 644 self, skip_join_token: bool = False, parse_bracket: bool = False 645 ) -> t.Optional[exp.Join]: 646 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 647 if join: 648 join.set("global", join.args.pop("method", None)) 649 650 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 651 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 652 if join.kind == "ARRAY": 653 for table in join.find_all(exp.Table): 654 table.replace(table.to_column()) 655 656 return join 657 658 def _parse_function( 659 self, 660 functions: t.Optional[t.Dict[str, t.Callable]] = None, 661 anonymous: bool = False, 662 optional_parens: bool = True, 663 any_token: bool = False, 664 ) -> t.Optional[exp.Expression]: 665 expr = super()._parse_function( 666 functions=functions, 667 anonymous=anonymous, 668 optional_parens=optional_parens, 669 any_token=any_token, 670 ) 671 672 func = expr.this if isinstance(expr, exp.Window) else expr 673 674 # Aggregate functions can be split in 2 parts: <func_name><suffix> 675 parts = ( 676 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 677 ) 678 679 if parts: 680 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 681 params = self._parse_func_params(anon_func) 682 683 kwargs = { 684 "this": anon_func.this, 685 "expressions": anon_func.expressions, 686 } 687 if parts[1]: 688 kwargs["parts"] = parts 689 exp_class: t.Type[exp.Expression] = ( 690 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 691 ) 692 else: 693 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 694 695 kwargs["exp_class"] = exp_class 696 if params: 697 kwargs["params"] = params 698 699 func = self.expression(**kwargs) 700 701 if isinstance(expr, exp.Window): 702 # The window's func was parsed as Anonymous in base parser, fix its 703 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 704 expr.set("this", func) 705 elif params: 706 # Params have blocked super()._parse_function() from parsing the following window 707 # (if that exists) as they're standing between the function call and the window spec 708 expr = self._parse_window(func) 709 else: 710 expr = func 711 712 return expr 713 714 def _parse_func_params( 715 self, this: t.Optional[exp.Func] = None 716 ) -> t.Optional[t.List[exp.Expression]]: 717 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 718 return self._parse_csv(self._parse_lambda) 719 720 if self._match(TokenType.L_PAREN): 721 params = self._parse_csv(self._parse_lambda) 722 self._match_r_paren(this) 723 return params 724 725 return None 726 727 def _parse_quantile(self) -> exp.Quantile: 728 this = self._parse_lambda() 729 params = self._parse_func_params() 730 if params: 731 return self.expression(exp.Quantile, this=params[0], quantile=this) 732 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 733 734 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 735 return super()._parse_wrapped_id_vars(optional=True) 736 737 def _parse_primary_key( 738 self, wrapped_optional: bool = False, in_props: bool = False 739 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 740 return super()._parse_primary_key( 741 wrapped_optional=wrapped_optional or in_props, in_props=in_props 742 ) 743 744 def _parse_on_property(self) -> t.Optional[exp.Expression]: 745 index = self._index 746 if self._match_text_seq("CLUSTER"): 747 this = self._parse_id_var() 748 if this: 749 return self.expression(exp.OnCluster, this=this) 750 else: 751 self._retreat(index) 752 return None 753 754 def _parse_index_constraint( 755 self, kind: t.Optional[str] = None 756 ) -> exp.IndexColumnConstraint: 757 # INDEX name1 expr TYPE type1(args) GRANULARITY value 758 this = self._parse_id_var() 759 expression = self._parse_assignment() 760 761 index_type = self._match_text_seq("TYPE") and ( 762 self._parse_function() or self._parse_var() 763 ) 764 765 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 766 767 return self.expression( 768 exp.IndexColumnConstraint, 769 this=this, 770 expression=expression, 771 index_type=index_type, 772 granularity=granularity, 773 ) 774 775 def _parse_partition(self) -> t.Optional[exp.Partition]: 776 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 777 if not self._match(TokenType.PARTITION): 778 return None 779 780 if self._match_text_seq("ID"): 781 # Corresponds to the PARTITION ID <string_value> syntax 782 expressions: t.List[exp.Expression] = [ 783 self.expression(exp.PartitionId, this=self._parse_string()) 784 ] 785 else: 786 expressions = self._parse_expressions() 787 788 return self.expression(exp.Partition, expressions=expressions) 789 790 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 791 partition = self._parse_partition() 792 793 if not partition or not self._match(TokenType.FROM): 794 return None 795 796 return self.expression( 797 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 798 ) 799 800 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 801 if not self._match_text_seq("PROJECTION"): 802 return None 803 804 return self.expression( 805 exp.ProjectionDef, 806 this=self._parse_id_var(), 807 expression=self._parse_wrapped(self._parse_statement), 808 ) 809 810 def _parse_constraint(self) -> t.Optional[exp.Expression]: 811 return super()._parse_constraint() or self._parse_projection_def() 812 813 def _parse_alias( 814 self, this: t.Optional[exp.Expression], explicit: bool = False 815 ) -> t.Optional[exp.Expression]: 816 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 817 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 818 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 819 return this 820 821 return super()._parse_alias(this=this, explicit=explicit) 822 823 def _parse_expression(self) -> t.Optional[exp.Expression]: 824 this = super()._parse_expression() 825 826 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 827 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 828 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 829 self._match(TokenType.R_PAREN) 830 831 return this 832 833 def _parse_columns(self) -> exp.Expression: 834 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 835 836 while self._next and self._match_text_seq(")", "APPLY", "("): 837 self._match(TokenType.R_PAREN) 838 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 839 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
841 class Generator(generator.Generator): 842 QUERY_HINTS = False 843 STRUCT_DELIMITER = ("(", ")") 844 NVL2_SUPPORTED = False 845 TABLESAMPLE_REQUIRES_PARENS = False 846 TABLESAMPLE_SIZE_IS_ROWS = False 847 TABLESAMPLE_KEYWORDS = "SAMPLE" 848 LAST_DAY_SUPPORTS_DATE_PART = False 849 CAN_IMPLEMENT_ARRAY_ANY = True 850 SUPPORTS_TO_NUMBER = False 851 JOIN_HINTS = False 852 TABLE_HINTS = False 853 GROUPINGS_SEP = "" 854 SET_OP_MODIFIERS = False 855 SUPPORTS_TABLE_ALIAS_COLUMNS = False 856 VALUES_AS_TABLE = False 857 ARRAY_SIZE_NAME = "LENGTH" 858 859 STRING_TYPE_MAPPING = { 860 exp.DataType.Type.CHAR: "String", 861 exp.DataType.Type.LONGBLOB: "String", 862 exp.DataType.Type.LONGTEXT: "String", 863 exp.DataType.Type.MEDIUMBLOB: "String", 864 exp.DataType.Type.MEDIUMTEXT: "String", 865 exp.DataType.Type.TINYBLOB: "String", 866 exp.DataType.Type.TINYTEXT: "String", 867 exp.DataType.Type.TEXT: "String", 868 exp.DataType.Type.VARBINARY: "String", 869 exp.DataType.Type.VARCHAR: "String", 870 } 871 872 SUPPORTED_JSON_PATH_PARTS = { 873 exp.JSONPathKey, 874 exp.JSONPathRoot, 875 exp.JSONPathSubscript, 876 } 877 878 TYPE_MAPPING = { 879 **generator.Generator.TYPE_MAPPING, 880 **STRING_TYPE_MAPPING, 881 exp.DataType.Type.ARRAY: "Array", 882 exp.DataType.Type.BOOLEAN: "Bool", 883 exp.DataType.Type.BIGINT: "Int64", 884 exp.DataType.Type.DATE32: "Date32", 885 exp.DataType.Type.DATETIME: "DateTime", 886 exp.DataType.Type.DATETIME64: "DateTime64", 887 exp.DataType.Type.DECIMAL: "Decimal", 888 exp.DataType.Type.DECIMAL32: "Decimal32", 889 exp.DataType.Type.DECIMAL64: "Decimal64", 890 exp.DataType.Type.DECIMAL128: "Decimal128", 891 exp.DataType.Type.DECIMAL256: "Decimal256", 892 exp.DataType.Type.TIMESTAMP: "DateTime", 893 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 894 exp.DataType.Type.DOUBLE: "Float64", 895 exp.DataType.Type.ENUM: "Enum", 896 exp.DataType.Type.ENUM8: "Enum8", 897 exp.DataType.Type.ENUM16: "Enum16", 898 exp.DataType.Type.FIXEDSTRING: "FixedString", 899 exp.DataType.Type.FLOAT: "Float32", 900 exp.DataType.Type.INT: "Int32", 901 exp.DataType.Type.MEDIUMINT: "Int32", 902 exp.DataType.Type.INT128: "Int128", 903 exp.DataType.Type.INT256: "Int256", 904 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 905 exp.DataType.Type.MAP: "Map", 906 exp.DataType.Type.NESTED: "Nested", 907 exp.DataType.Type.SMALLINT: "Int16", 908 exp.DataType.Type.STRUCT: "Tuple", 909 exp.DataType.Type.TINYINT: "Int8", 910 exp.DataType.Type.UBIGINT: "UInt64", 911 exp.DataType.Type.UINT: "UInt32", 912 exp.DataType.Type.UINT128: "UInt128", 913 exp.DataType.Type.UINT256: "UInt256", 914 exp.DataType.Type.USMALLINT: "UInt16", 915 exp.DataType.Type.UTINYINT: "UInt8", 916 exp.DataType.Type.IPV4: "IPv4", 917 exp.DataType.Type.IPV6: "IPv6", 918 exp.DataType.Type.POINT: "Point", 919 exp.DataType.Type.RING: "Ring", 920 exp.DataType.Type.LINESTRING: "LineString", 921 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 922 exp.DataType.Type.POLYGON: "Polygon", 923 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 924 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 925 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 926 } 927 928 TRANSFORMS = { 929 **generator.Generator.TRANSFORMS, 930 exp.AnyValue: rename_func("any"), 931 exp.ApproxDistinct: rename_func("uniq"), 932 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 933 exp.ArraySum: rename_func("arraySum"), 934 exp.ArgMax: arg_max_or_min_no_count("argMax"), 935 exp.ArgMin: arg_max_or_min_no_count("argMin"), 936 exp.Array: inline_array_sql, 937 exp.CastToStrType: rename_func("CAST"), 938 exp.CountIf: rename_func("countIf"), 939 exp.CompressColumnConstraint: lambda self, 940 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 941 exp.ComputedColumnConstraint: lambda self, 942 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 943 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 944 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 945 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 946 exp.DateStrToDate: rename_func("toDate"), 947 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 948 exp.Explode: rename_func("arrayJoin"), 949 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 950 exp.IsNan: rename_func("isNaN"), 951 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 952 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 953 exp.JSONPathKey: json_path_key_only_name, 954 exp.JSONPathRoot: lambda *_: "", 955 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 956 exp.Median: rename_func("median"), 957 exp.Nullif: rename_func("nullIf"), 958 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 959 exp.Pivot: no_pivot_sql, 960 exp.Quantile: _quantile_sql, 961 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 962 exp.Rand: rename_func("randCanonical"), 963 exp.StartsWith: rename_func("startsWith"), 964 exp.StrPosition: lambda self, e: self.func( 965 "position", e.this, e.args.get("substr"), e.args.get("position") 966 ), 967 exp.TimeToStr: lambda self, e: self.func( 968 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 969 ), 970 exp.TimeStrToTime: _timestrtotime_sql, 971 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 972 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 973 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 974 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 975 exp.MD5Digest: rename_func("MD5"), 976 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 977 exp.SHA: rename_func("SHA1"), 978 exp.SHA2: sha256_sql, 979 exp.UnixToTime: _unix_to_time_sql, 980 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 981 exp.Trim: trim_sql, 982 exp.Variance: rename_func("varSamp"), 983 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 984 exp.Stddev: rename_func("stddevSamp"), 985 exp.Chr: rename_func("CHAR"), 986 exp.Lag: lambda self, e: self.func( 987 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 988 ), 989 exp.Lead: lambda self, e: self.func( 990 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 991 ), 992 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 993 rename_func("editDistance") 994 ), 995 } 996 997 PROPERTIES_LOCATION = { 998 **generator.Generator.PROPERTIES_LOCATION, 999 exp.OnCluster: exp.Properties.Location.POST_NAME, 1000 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1001 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1002 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1003 } 1004 1005 # There's no list in docs, but it can be found in Clickhouse code 1006 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1007 ON_CLUSTER_TARGETS = { 1008 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1009 "DATABASE", 1010 "TABLE", 1011 "VIEW", 1012 "DICTIONARY", 1013 "INDEX", 1014 "FUNCTION", 1015 "NAMED COLLECTION", 1016 } 1017 1018 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1019 NON_NULLABLE_TYPES = { 1020 exp.DataType.Type.ARRAY, 1021 exp.DataType.Type.MAP, 1022 exp.DataType.Type.STRUCT, 1023 exp.DataType.Type.POINT, 1024 exp.DataType.Type.RING, 1025 exp.DataType.Type.LINESTRING, 1026 exp.DataType.Type.MULTILINESTRING, 1027 exp.DataType.Type.POLYGON, 1028 exp.DataType.Type.MULTIPOLYGON, 1029 } 1030 1031 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1032 strtodate_sql = self.function_fallback_sql(expression) 1033 1034 if not isinstance(expression.parent, exp.Cast): 1035 # StrToDate returns DATEs in other dialects (eg. postgres), so 1036 # this branch aims to improve the transpilation to clickhouse 1037 return f"CAST({strtodate_sql} AS DATE)" 1038 1039 return strtodate_sql 1040 1041 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1042 this = expression.this 1043 1044 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1045 return self.sql(this) 1046 1047 return super().cast_sql(expression, safe_prefix=safe_prefix) 1048 1049 def trycast_sql(self, expression: exp.TryCast) -> str: 1050 dtype = expression.to 1051 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1052 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1053 dtype.set("nullable", True) 1054 1055 return super().cast_sql(expression) 1056 1057 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1058 this = self.json_path_part(expression.this) 1059 return str(int(this) + 1) if is_int(this) else this 1060 1061 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1062 return f"AS {self.sql(expression, 'this')}" 1063 1064 def _any_to_has( 1065 self, 1066 expression: exp.EQ | exp.NEQ, 1067 default: t.Callable[[t.Any], str], 1068 prefix: str = "", 1069 ) -> str: 1070 if isinstance(expression.left, exp.Any): 1071 arr = expression.left 1072 this = expression.right 1073 elif isinstance(expression.right, exp.Any): 1074 arr = expression.right 1075 this = expression.left 1076 else: 1077 return default(expression) 1078 1079 return prefix + self.func("has", arr.this.unnest(), this) 1080 1081 def eq_sql(self, expression: exp.EQ) -> str: 1082 return self._any_to_has(expression, super().eq_sql) 1083 1084 def neq_sql(self, expression: exp.NEQ) -> str: 1085 return self._any_to_has(expression, super().neq_sql, "NOT ") 1086 1087 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1088 # Manually add a flag to make the search case-insensitive 1089 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1090 return self.func("match", expression.this, regex) 1091 1092 def datatype_sql(self, expression: exp.DataType) -> str: 1093 # String is the standard ClickHouse type, every other variant is just an alias. 1094 # Additionally, any supplied length parameter will be ignored. 1095 # 1096 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1097 if expression.this in self.STRING_TYPE_MAPPING: 1098 dtype = "String" 1099 else: 1100 dtype = super().datatype_sql(expression) 1101 1102 # This section changes the type to `Nullable(...)` if the following conditions hold: 1103 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1104 # and change their semantics 1105 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1106 # constraint: "Type of Map key must be a type, that can be represented by integer or 1107 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1108 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1109 parent = expression.parent 1110 nullable = expression.args.get("nullable") 1111 if nullable is True or ( 1112 nullable is None 1113 and not ( 1114 isinstance(parent, exp.DataType) 1115 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1116 and expression.index in (None, 0) 1117 ) 1118 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1119 ): 1120 dtype = f"Nullable({dtype})" 1121 1122 return dtype 1123 1124 def cte_sql(self, expression: exp.CTE) -> str: 1125 if expression.args.get("scalar"): 1126 this = self.sql(expression, "this") 1127 alias = self.sql(expression, "alias") 1128 return f"{this} AS {alias}" 1129 1130 return super().cte_sql(expression) 1131 1132 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1133 return super().after_limit_modifiers(expression) + [ 1134 ( 1135 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1136 if expression.args.get("settings") 1137 else "" 1138 ), 1139 ( 1140 self.seg("FORMAT ") + self.sql(expression, "format") 1141 if expression.args.get("format") 1142 else "" 1143 ), 1144 ] 1145 1146 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1147 params = self.expressions(expression, key="params", flat=True) 1148 return self.func(expression.name, *expression.expressions) + f"({params})" 1149 1150 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1151 return self.func(expression.name, *expression.expressions) 1152 1153 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1154 return self.anonymousaggfunc_sql(expression) 1155 1156 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1157 return self.parameterizedagg_sql(expression) 1158 1159 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1160 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1161 1162 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1163 return f"ON CLUSTER {self.sql(expression, 'this')}" 1164 1165 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1166 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1167 exp.Properties.Location.POST_NAME 1168 ): 1169 this_name = self.sql( 1170 expression.this if isinstance(expression.this, exp.Schema) else expression, 1171 "this", 1172 ) 1173 this_properties = " ".join( 1174 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1175 ) 1176 this_schema = self.schema_columns_sql(expression.this) 1177 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1178 1179 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1180 1181 return super().createable_sql(expression, locations) 1182 1183 def create_sql(self, expression: exp.Create) -> str: 1184 # The comment property comes last in CTAS statements, i.e. after the query 1185 query = expression.expression 1186 if isinstance(query, exp.Query): 1187 comment_prop = expression.find(exp.SchemaCommentProperty) 1188 if comment_prop: 1189 comment_prop.pop() 1190 query.replace(exp.paren(query)) 1191 else: 1192 comment_prop = None 1193 1194 create_sql = super().create_sql(expression) 1195 1196 comment_sql = self.sql(comment_prop) 1197 comment_sql = f" {comment_sql}" if comment_sql else "" 1198 1199 return f"{create_sql}{comment_sql}" 1200 1201 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1202 this = self.indent(self.sql(expression, "this")) 1203 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1204 1205 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1206 this = self.sql(expression, "this") 1207 this = f" {this}" if this else "" 1208 expr = self.sql(expression, "expression") 1209 expr = f" {expr}" if expr else "" 1210 index_type = self.sql(expression, "index_type") 1211 index_type = f" TYPE {index_type}" if index_type else "" 1212 granularity = self.sql(expression, "granularity") 1213 granularity = f" GRANULARITY {granularity}" if granularity else "" 1214 1215 return f"INDEX{this}{expr}{index_type}{granularity}" 1216 1217 def partition_sql(self, expression: exp.Partition) -> str: 1218 return f"PARTITION {self.expressions(expression, flat=True)}" 1219 1220 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1221 return f"ID {self.sql(expression.this)}" 1222 1223 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1224 return ( 1225 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1226 ) 1227 1228 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1229 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1230 1231 def is_sql(self, expression: exp.Is) -> str: 1232 is_sql = super().is_sql(expression) 1233 1234 if isinstance(expression.parent, exp.Not): 1235 # value IS NOT NULL -> NOT (value IS NULL) 1236 is_sql = self.wrap(is_sql) 1237 1238 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1031 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1032 strtodate_sql = self.function_fallback_sql(expression) 1033 1034 if not isinstance(expression.parent, exp.Cast): 1035 # StrToDate returns DATEs in other dialects (eg. postgres), so 1036 # this branch aims to improve the transpilation to clickhouse 1037 return f"CAST({strtodate_sql} AS DATE)" 1038 1039 return strtodate_sql
1041 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1042 this = expression.this 1043 1044 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1045 return self.sql(this) 1046 1047 return super().cast_sql(expression, safe_prefix=safe_prefix)
1049 def trycast_sql(self, expression: exp.TryCast) -> str: 1050 dtype = expression.to 1051 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1052 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1053 dtype.set("nullable", True) 1054 1055 return super().cast_sql(expression)
1092 def datatype_sql(self, expression: exp.DataType) -> str: 1093 # String is the standard ClickHouse type, every other variant is just an alias. 1094 # Additionally, any supplied length parameter will be ignored. 1095 # 1096 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1097 if expression.this in self.STRING_TYPE_MAPPING: 1098 dtype = "String" 1099 else: 1100 dtype = super().datatype_sql(expression) 1101 1102 # This section changes the type to `Nullable(...)` if the following conditions hold: 1103 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1104 # and change their semantics 1105 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1106 # constraint: "Type of Map key must be a type, that can be represented by integer or 1107 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1108 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1109 parent = expression.parent 1110 nullable = expression.args.get("nullable") 1111 if nullable is True or ( 1112 nullable is None 1113 and not ( 1114 isinstance(parent, exp.DataType) 1115 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1116 and expression.index in (None, 0) 1117 ) 1118 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1119 ): 1120 dtype = f"Nullable({dtype})" 1121 1122 return dtype
1132 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1133 return super().after_limit_modifiers(expression) + [ 1134 ( 1135 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1136 if expression.args.get("settings") 1137 else "" 1138 ), 1139 ( 1140 self.seg("FORMAT ") + self.sql(expression, "format") 1141 if expression.args.get("format") 1142 else "" 1143 ), 1144 ]
1165 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1166 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1167 exp.Properties.Location.POST_NAME 1168 ): 1169 this_name = self.sql( 1170 expression.this if isinstance(expression.this, exp.Schema) else expression, 1171 "this", 1172 ) 1173 this_properties = " ".join( 1174 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1175 ) 1176 this_schema = self.schema_columns_sql(expression.this) 1177 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1178 1179 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1180 1181 return super().createable_sql(expression, locations)
1183 def create_sql(self, expression: exp.Create) -> str: 1184 # The comment property comes last in CTAS statements, i.e. after the query 1185 query = expression.expression 1186 if isinstance(query, exp.Query): 1187 comment_prop = expression.find(exp.SchemaCommentProperty) 1188 if comment_prop: 1189 comment_prop.pop() 1190 query.replace(exp.paren(query)) 1191 else: 1192 comment_prop = None 1193 1194 create_sql = super().create_sql(expression) 1195 1196 comment_sql = self.sql(comment_prop) 1197 comment_sql = f" {comment_sql}" if comment_sql else "" 1198 1199 return f"{create_sql}{comment_sql}"
1205 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1206 this = self.sql(expression, "this") 1207 this = f" {this}" if this else "" 1208 expr = self.sql(expression, "expression") 1209 expr = f" {expr}" if expr else "" 1210 index_type = self.sql(expression, "index_type") 1211 index_type = f" TYPE {index_type}" if index_type else "" 1212 granularity = self.sql(expression, "granularity") 1213 granularity = f" GRANULARITY {granularity}" if granularity else "" 1214 1215 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql