sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 NormalizationStrategy, 10 approx_count_distinct_sql, 11 arg_max_or_min_no_count, 12 datestrtodate_sql, 13 format_time_lambda, 14 if_sql, 15 is_parse_json, 16 left_to_substring_sql, 17 locate_to_strposition, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 no_recursive_cte_sql, 22 no_safe_divide_sql, 23 no_trycast_sql, 24 regexp_extract_sql, 25 regexp_replace_sql, 26 rename_func, 27 right_to_substring_sql, 28 strposition_to_locate_sql, 29 struct_extract_sql, 30 time_format, 31 timestrtotime_sql, 32 var_map_sql, 33) 34from sqlglot.transforms import ( 35 remove_unique_constraints, 36 ctas_with_tmp_tables_to_create_tmp_view, 37 preprocess, 38 move_schema_columns_to_partitioned_by, 39) 40from sqlglot.helper import seq_get 41from sqlglot.parser import parse_var_map 42from sqlglot.tokens import TokenType 43 44# (FuncType, Multiplier) 45DATE_DELTA_INTERVAL = { 46 "YEAR": ("ADD_MONTHS", 12), 47 "MONTH": ("ADD_MONTHS", 1), 48 "QUARTER": ("ADD_MONTHS", 3), 49 "WEEK": ("DATE_ADD", 7), 50 "DAY": ("DATE_ADD", 1), 51} 52 53TIME_DIFF_FACTOR = { 54 "MILLISECOND": " * 1000", 55 "SECOND": "", 56 "MINUTE": " / 60", 57 "HOUR": " / 3600", 58} 59 60DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 61 62 63def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: 64 if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: 65 return self.func("DATE_ADD", expression.this, expression.expression) 66 67 unit = expression.text("unit").upper() 68 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 69 70 if isinstance(expression, exp.DateSub): 71 multiplier *= -1 72 73 if expression.expression.is_number: 74 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 75 else: 76 modified_increment = expression.expression 77 if multiplier != 1: 78 modified_increment = exp.Mul( # type: ignore 79 this=modified_increment, expression=exp.Literal.number(multiplier) 80 ) 81 82 return self.func(func, expression.this, modified_increment) 83 84 85def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: 86 unit = expression.text("unit").upper() 87 88 factor = TIME_DIFF_FACTOR.get(unit) 89 if factor is not None: 90 left = self.sql(expression, "this") 91 right = self.sql(expression, "expression") 92 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 93 return f"({sec_diff}){factor}" if factor else sec_diff 94 95 months_between = unit in DIFF_MONTH_SWITCH 96 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 97 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 98 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 99 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 100 101 if months_between or multiplier_sql: 102 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. 103 # For the same reason, we want to truncate if there's a divisor present. 104 diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" 105 106 return diff_sql 107 108 109def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 110 this = expression.this 111 112 if is_parse_json(this): 113 if this.this.is_string: 114 # Since FROM_JSON requires a nested type, we always wrap the json string with 115 # an array to ensure that "naked" strings like "'a'" will be handled correctly 116 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 117 118 from_json = self.func( 119 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 120 ) 121 to_json = self.func("TO_JSON", from_json) 122 123 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 124 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 125 return self.sql(this) 126 127 return self.func("TO_JSON", this, expression.args.get("options")) 128 129 130def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 131 if expression.expression: 132 self.unsupported("Hive SORT_ARRAY does not support a comparator") 133 return f"SORT_ARRAY({self.sql(expression, 'this')})" 134 135 136def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 137 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 138 139 140def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 141 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 142 143 144def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 145 this = self.sql(expression, "this") 146 time_format = self.format_time(expression) 147 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 148 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 149 return f"CAST({this} AS DATE)" 150 151 152def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 153 this = self.sql(expression, "this") 154 time_format = self.format_time(expression) 155 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 156 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 157 return f"CAST({this} AS TIMESTAMP)" 158 159 160def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 161 this = self.sql(expression, "this") 162 time_format = self.format_time(expression) 163 return f"DATE_FORMAT({this}, {time_format})" 164 165 166def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 167 this = self.sql(expression, "this") 168 time_format = self.format_time(expression) 169 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 170 return f"TO_DATE({this}, {time_format})" 171 if isinstance(expression.this, exp.TsOrDsToDate): 172 return this 173 return f"TO_DATE({this})" 174 175 176def _parse_ignore_nulls( 177 exp_class: t.Type[exp.Expression], 178) -> t.Callable[[t.List[exp.Expression]], exp.Expression]: 179 def _parse(args: t.List[exp.Expression]) -> exp.Expression: 180 this = exp_class(this=seq_get(args, 0)) 181 if seq_get(args, 1) == exp.true(): 182 return exp.IgnoreNulls(this=this) 183 return this 184 185 return _parse 186 187 188class Hive(Dialect): 189 ALIAS_POST_TABLESAMPLE = True 190 IDENTIFIERS_CAN_START_WITH_DIGIT = True 191 SUPPORTS_USER_DEFINED_TYPES = False 192 SAFE_DIVISION = True 193 194 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 195 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 196 197 TIME_MAPPING = { 198 "y": "%Y", 199 "Y": "%Y", 200 "YYYY": "%Y", 201 "yyyy": "%Y", 202 "YY": "%y", 203 "yy": "%y", 204 "MMMM": "%B", 205 "MMM": "%b", 206 "MM": "%m", 207 "M": "%-m", 208 "dd": "%d", 209 "d": "%-d", 210 "HH": "%H", 211 "H": "%-H", 212 "hh": "%I", 213 "h": "%-I", 214 "mm": "%M", 215 "m": "%-M", 216 "ss": "%S", 217 "s": "%-S", 218 "SSSSSS": "%f", 219 "a": "%p", 220 "DD": "%j", 221 "D": "%-j", 222 "E": "%a", 223 "EE": "%a", 224 "EEE": "%a", 225 "EEEE": "%A", 226 } 227 228 DATE_FORMAT = "'yyyy-MM-dd'" 229 DATEINT_FORMAT = "'yyyyMMdd'" 230 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 231 232 class Tokenizer(tokens.Tokenizer): 233 QUOTES = ["'", '"'] 234 IDENTIFIERS = ["`"] 235 STRING_ESCAPES = ["\\"] 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.PARAMETER, 240 } 241 242 KEYWORDS = { 243 **tokens.Tokenizer.KEYWORDS, 244 "ADD ARCHIVE": TokenType.COMMAND, 245 "ADD ARCHIVES": TokenType.COMMAND, 246 "ADD FILE": TokenType.COMMAND, 247 "ADD FILES": TokenType.COMMAND, 248 "ADD JAR": TokenType.COMMAND, 249 "ADD JARS": TokenType.COMMAND, 250 "MSCK REPAIR": TokenType.COMMAND, 251 "REFRESH": TokenType.REFRESH, 252 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 253 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 254 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 255 } 256 257 NUMERIC_LITERALS = { 258 "L": "BIGINT", 259 "S": "SMALLINT", 260 "Y": "TINYINT", 261 "D": "DOUBLE", 262 "F": "FLOAT", 263 "BD": "DECIMAL", 264 } 265 266 class Parser(parser.Parser): 267 LOG_DEFAULTS_TO_LN = True 268 STRICT_CAST = False 269 VALUES_FOLLOWED_BY_PAREN = False 270 271 FUNCTIONS = { 272 **parser.Parser.FUNCTIONS, 273 "BASE64": exp.ToBase64.from_arg_list, 274 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 275 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 276 "DATE_ADD": lambda args: exp.TsOrDsAdd( 277 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 278 ), 279 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 280 [ 281 exp.TimeStrToTime(this=seq_get(args, 0)), 282 seq_get(args, 1), 283 ] 284 ), 285 "DATE_SUB": lambda args: exp.TsOrDsAdd( 286 this=seq_get(args, 0), 287 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 288 unit=exp.Literal.string("DAY"), 289 ), 290 "DATEDIFF": lambda args: exp.DateDiff( 291 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 292 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 293 ), 294 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 295 "FIRST": _parse_ignore_nulls(exp.First), 296 "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), 297 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 298 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 299 "LAST": _parse_ignore_nulls(exp.Last), 300 "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), 301 "LOCATE": locate_to_strposition, 302 "MAP": parse_var_map, 303 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 304 "PERCENTILE": exp.Quantile.from_arg_list, 305 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 306 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 307 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 308 ), 309 "SIZE": exp.ArraySize.from_arg_list, 310 "SPLIT": exp.RegexpSplit.from_arg_list, 311 "STR_TO_MAP": lambda args: exp.StrToMap( 312 this=seq_get(args, 0), 313 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 314 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 315 ), 316 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 317 "TO_JSON": exp.JSONFormat.from_arg_list, 318 "UNBASE64": exp.FromBase64.from_arg_list, 319 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 320 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 321 } 322 323 NO_PAREN_FUNCTION_PARSERS = { 324 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 325 "TRANSFORM": lambda self: self._parse_transform(), 326 } 327 328 PROPERTY_PARSERS = { 329 **parser.Parser.PROPERTY_PARSERS, 330 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 331 expressions=self._parse_wrapped_csv(self._parse_property) 332 ), 333 } 334 335 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 336 if not self._match(TokenType.L_PAREN, advance=False): 337 self._retreat(self._index - 1) 338 return None 339 340 args = self._parse_wrapped_csv(self._parse_lambda) 341 row_format_before = self._parse_row_format(match_row=True) 342 343 record_writer = None 344 if self._match_text_seq("RECORDWRITER"): 345 record_writer = self._parse_string() 346 347 if not self._match(TokenType.USING): 348 return exp.Transform.from_arg_list(args) 349 350 command_script = self._parse_string() 351 352 self._match(TokenType.ALIAS) 353 schema = self._parse_schema() 354 355 row_format_after = self._parse_row_format(match_row=True) 356 record_reader = None 357 if self._match_text_seq("RECORDREADER"): 358 record_reader = self._parse_string() 359 360 return self.expression( 361 exp.QueryTransform, 362 expressions=args, 363 command_script=command_script, 364 schema=schema, 365 row_format_before=row_format_before, 366 record_writer=record_writer, 367 row_format_after=row_format_after, 368 record_reader=record_reader, 369 ) 370 371 def _parse_types( 372 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 373 ) -> t.Optional[exp.Expression]: 374 """ 375 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 376 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 377 378 spark-sql (default)> select cast(1234 as varchar(2)); 379 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 380 char/varchar type and simply treats them as string type. Please use string type 381 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 382 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 383 384 1234 385 Time taken: 4.265 seconds, Fetched 1 row(s) 386 387 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 388 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 389 390 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 391 """ 392 this = super()._parse_types( 393 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 394 ) 395 396 if this and not schema: 397 return this.transform( 398 lambda node: ( 399 node.replace(exp.DataType.build("text")) 400 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 401 else node 402 ), 403 copy=False, 404 ) 405 406 return this 407 408 def _parse_partition_and_order( 409 self, 410 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 411 return ( 412 ( 413 self._parse_csv(self._parse_conjunction) 414 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 415 else [] 416 ), 417 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 418 ) 419 420 class Generator(generator.Generator): 421 LIMIT_FETCH = "LIMIT" 422 TABLESAMPLE_WITH_METHOD = False 423 JOIN_HINTS = False 424 TABLE_HINTS = False 425 QUERY_HINTS = False 426 INDEX_ON = "ON TABLE" 427 EXTRACT_ALLOWS_QUOTES = False 428 NVL2_SUPPORTED = False 429 LAST_DAY_SUPPORTS_DATE_PART = False 430 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 431 432 EXPRESSIONS_WITHOUT_NESTED_CTES = { 433 exp.Insert, 434 exp.Select, 435 exp.Subquery, 436 exp.Union, 437 } 438 439 SUPPORTED_JSON_PATH_PARTS = { 440 exp.JSONPathKey, 441 exp.JSONPathRoot, 442 exp.JSONPathSubscript, 443 exp.JSONPathWildcard, 444 } 445 446 TYPE_MAPPING = { 447 **generator.Generator.TYPE_MAPPING, 448 exp.DataType.Type.BIT: "BOOLEAN", 449 exp.DataType.Type.DATETIME: "TIMESTAMP", 450 exp.DataType.Type.TEXT: "STRING", 451 exp.DataType.Type.TIME: "TIMESTAMP", 452 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 453 exp.DataType.Type.VARBINARY: "BINARY", 454 } 455 456 TRANSFORMS = { 457 **generator.Generator.TRANSFORMS, 458 exp.Group: transforms.preprocess([transforms.unalias_group]), 459 exp.Select: transforms.preprocess( 460 [ 461 transforms.eliminate_qualify, 462 transforms.eliminate_distinct_on, 463 transforms.unnest_to_explode, 464 ] 465 ), 466 exp.Property: _property_sql, 467 exp.AnyValue: rename_func("FIRST"), 468 exp.ApproxDistinct: approx_count_distinct_sql, 469 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 470 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 471 exp.ArrayConcat: rename_func("CONCAT"), 472 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 473 exp.ArraySize: rename_func("SIZE"), 474 exp.ArraySort: _array_sort_sql, 475 exp.With: no_recursive_cte_sql, 476 exp.DateAdd: _add_date_sql, 477 exp.DateDiff: _date_diff_sql, 478 exp.DateStrToDate: datestrtodate_sql, 479 exp.DateSub: _add_date_sql, 480 exp.DateToDi: lambda self, 481 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 482 exp.DiToDate: lambda self, 483 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 484 exp.FileFormatProperty: lambda self, 485 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 486 exp.FromBase64: rename_func("UNBASE64"), 487 exp.If: if_sql(), 488 exp.ILike: no_ilike_sql, 489 exp.IsNan: rename_func("ISNAN"), 490 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 491 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 492 exp.JSONFormat: _json_format_sql, 493 exp.Left: left_to_substring_sql, 494 exp.Map: var_map_sql, 495 exp.Max: max_or_greatest, 496 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 497 exp.Min: min_or_least, 498 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 499 exp.NotNullColumnConstraint: lambda self, e: ( 500 "" if e.args.get("allow_null") else "NOT NULL" 501 ), 502 exp.VarMap: var_map_sql, 503 exp.Create: preprocess( 504 [ 505 remove_unique_constraints, 506 ctas_with_tmp_tables_to_create_tmp_view, 507 move_schema_columns_to_partitioned_by, 508 ] 509 ), 510 exp.Quantile: rename_func("PERCENTILE"), 511 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 512 exp.RegexpExtract: regexp_extract_sql, 513 exp.RegexpReplace: regexp_replace_sql, 514 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 515 exp.RegexpSplit: rename_func("SPLIT"), 516 exp.Right: right_to_substring_sql, 517 exp.SafeDivide: no_safe_divide_sql, 518 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 519 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 520 exp.Split: lambda self, 521 e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 522 exp.StrPosition: strposition_to_locate_sql, 523 exp.StrToDate: _str_to_date_sql, 524 exp.StrToTime: _str_to_time_sql, 525 exp.StrToUnix: _str_to_unix_sql, 526 exp.StructExtract: struct_extract_sql, 527 exp.TimeStrToDate: rename_func("TO_DATE"), 528 exp.TimeStrToTime: timestrtotime_sql, 529 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 530 exp.TimeToStr: _time_to_str, 531 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 532 exp.ToBase64: rename_func("BASE64"), 533 exp.TsOrDiToDi: lambda self, 534 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 535 exp.TsOrDsAdd: _add_date_sql, 536 exp.TsOrDsDiff: _date_diff_sql, 537 exp.TsOrDsToDate: _to_date_sql, 538 exp.TryCast: no_trycast_sql, 539 exp.UnixToStr: lambda self, e: self.func( 540 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 541 ), 542 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 543 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 544 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 545 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 546 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 547 exp.National: lambda self, e: self.national_sql(e, prefix=""), 548 exp.ClusteredColumnConstraint: lambda self, 549 e: f"({self.expressions(e, 'this', indent=False)})", 550 exp.NonClusteredColumnConstraint: lambda self, 551 e: f"({self.expressions(e, 'this', indent=False)})", 552 exp.NotForReplicationColumnConstraint: lambda self, e: "", 553 exp.OnProperty: lambda self, e: "", 554 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 555 } 556 557 PROPERTIES_LOCATION = { 558 **generator.Generator.PROPERTIES_LOCATION, 559 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 560 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 561 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 562 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 563 } 564 565 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 566 if isinstance(expression.this, exp.JSONPathWildcard): 567 self.unsupported("Unsupported wildcard in JSONPathKey expression") 568 return "" 569 570 return super()._jsonpathkey_sql(expression) 571 572 def parameter_sql(self, expression: exp.Parameter) -> str: 573 this = self.sql(expression, "this") 574 expression_sql = self.sql(expression, "expression") 575 576 parent = expression.parent 577 this = f"{this}:{expression_sql}" if expression_sql else this 578 579 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 580 # We need to produce SET key = value instead of SET ${key} = value 581 return this 582 583 return f"${{{this}}}" 584 585 def schema_sql(self, expression: exp.Schema) -> str: 586 for ordered in expression.find_all(exp.Ordered): 587 if ordered.args.get("desc") is False: 588 ordered.set("desc", None) 589 590 return super().schema_sql(expression) 591 592 def constraint_sql(self, expression: exp.Constraint) -> str: 593 for prop in list(expression.find_all(exp.Properties)): 594 prop.pop() 595 596 this = self.sql(expression, "this") 597 expressions = self.expressions(expression, sep=" ", flat=True) 598 return f"CONSTRAINT {this} {expressions}" 599 600 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 601 serde_props = self.sql(expression, "serde_properties") 602 serde_props = f" {serde_props}" if serde_props else "" 603 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 604 605 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 606 return self.func( 607 "COLLECT_LIST", 608 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 609 ) 610 611 def with_properties(self, properties: exp.Properties) -> str: 612 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 613 614 def datatype_sql(self, expression: exp.DataType) -> str: 615 if ( 616 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 617 and not expression.expressions 618 ): 619 expression = exp.DataType.build("text") 620 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 621 expression.set("this", exp.DataType.Type.VARCHAR) 622 elif expression.this in exp.DataType.TEMPORAL_TYPES: 623 expression = exp.DataType.build(expression.this) 624 elif expression.is_type("float"): 625 size_expression = expression.find(exp.DataTypeParam) 626 if size_expression: 627 size = int(size_expression.name) 628 expression = ( 629 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 630 ) 631 632 return super().datatype_sql(expression) 633 634 def version_sql(self, expression: exp.Version) -> str: 635 sql = super().version_sql(expression) 636 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
189class Hive(Dialect): 190 ALIAS_POST_TABLESAMPLE = True 191 IDENTIFIERS_CAN_START_WITH_DIGIT = True 192 SUPPORTS_USER_DEFINED_TYPES = False 193 SAFE_DIVISION = True 194 195 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 196 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 197 198 TIME_MAPPING = { 199 "y": "%Y", 200 "Y": "%Y", 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "MMM": "%b", 207 "MM": "%m", 208 "M": "%-m", 209 "dd": "%d", 210 "d": "%-d", 211 "HH": "%H", 212 "H": "%-H", 213 "hh": "%I", 214 "h": "%-I", 215 "mm": "%M", 216 "m": "%-M", 217 "ss": "%S", 218 "s": "%-S", 219 "SSSSSS": "%f", 220 "a": "%p", 221 "DD": "%j", 222 "D": "%-j", 223 "E": "%a", 224 "EE": "%a", 225 "EEE": "%a", 226 "EEEE": "%A", 227 } 228 229 DATE_FORMAT = "'yyyy-MM-dd'" 230 DATEINT_FORMAT = "'yyyyMMdd'" 231 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 232 233 class Tokenizer(tokens.Tokenizer): 234 QUOTES = ["'", '"'] 235 IDENTIFIERS = ["`"] 236 STRING_ESCAPES = ["\\"] 237 238 SINGLE_TOKENS = { 239 **tokens.Tokenizer.SINGLE_TOKENS, 240 "$": TokenType.PARAMETER, 241 } 242 243 KEYWORDS = { 244 **tokens.Tokenizer.KEYWORDS, 245 "ADD ARCHIVE": TokenType.COMMAND, 246 "ADD ARCHIVES": TokenType.COMMAND, 247 "ADD FILE": TokenType.COMMAND, 248 "ADD FILES": TokenType.COMMAND, 249 "ADD JAR": TokenType.COMMAND, 250 "ADD JARS": TokenType.COMMAND, 251 "MSCK REPAIR": TokenType.COMMAND, 252 "REFRESH": TokenType.REFRESH, 253 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 254 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 255 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 256 } 257 258 NUMERIC_LITERALS = { 259 "L": "BIGINT", 260 "S": "SMALLINT", 261 "Y": "TINYINT", 262 "D": "DOUBLE", 263 "F": "FLOAT", 264 "BD": "DECIMAL", 265 } 266 267 class Parser(parser.Parser): 268 LOG_DEFAULTS_TO_LN = True 269 STRICT_CAST = False 270 VALUES_FOLLOWED_BY_PAREN = False 271 272 FUNCTIONS = { 273 **parser.Parser.FUNCTIONS, 274 "BASE64": exp.ToBase64.from_arg_list, 275 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 276 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 277 "DATE_ADD": lambda args: exp.TsOrDsAdd( 278 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 279 ), 280 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 281 [ 282 exp.TimeStrToTime(this=seq_get(args, 0)), 283 seq_get(args, 1), 284 ] 285 ), 286 "DATE_SUB": lambda args: exp.TsOrDsAdd( 287 this=seq_get(args, 0), 288 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 289 unit=exp.Literal.string("DAY"), 290 ), 291 "DATEDIFF": lambda args: exp.DateDiff( 292 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 293 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 294 ), 295 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 296 "FIRST": _parse_ignore_nulls(exp.First), 297 "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), 298 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 299 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 300 "LAST": _parse_ignore_nulls(exp.Last), 301 "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), 302 "LOCATE": locate_to_strposition, 303 "MAP": parse_var_map, 304 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 305 "PERCENTILE": exp.Quantile.from_arg_list, 306 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 307 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 308 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 309 ), 310 "SIZE": exp.ArraySize.from_arg_list, 311 "SPLIT": exp.RegexpSplit.from_arg_list, 312 "STR_TO_MAP": lambda args: exp.StrToMap( 313 this=seq_get(args, 0), 314 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 315 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 316 ), 317 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 318 "TO_JSON": exp.JSONFormat.from_arg_list, 319 "UNBASE64": exp.FromBase64.from_arg_list, 320 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 321 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 322 } 323 324 NO_PAREN_FUNCTION_PARSERS = { 325 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 326 "TRANSFORM": lambda self: self._parse_transform(), 327 } 328 329 PROPERTY_PARSERS = { 330 **parser.Parser.PROPERTY_PARSERS, 331 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 332 expressions=self._parse_wrapped_csv(self._parse_property) 333 ), 334 } 335 336 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 337 if not self._match(TokenType.L_PAREN, advance=False): 338 self._retreat(self._index - 1) 339 return None 340 341 args = self._parse_wrapped_csv(self._parse_lambda) 342 row_format_before = self._parse_row_format(match_row=True) 343 344 record_writer = None 345 if self._match_text_seq("RECORDWRITER"): 346 record_writer = self._parse_string() 347 348 if not self._match(TokenType.USING): 349 return exp.Transform.from_arg_list(args) 350 351 command_script = self._parse_string() 352 353 self._match(TokenType.ALIAS) 354 schema = self._parse_schema() 355 356 row_format_after = self._parse_row_format(match_row=True) 357 record_reader = None 358 if self._match_text_seq("RECORDREADER"): 359 record_reader = self._parse_string() 360 361 return self.expression( 362 exp.QueryTransform, 363 expressions=args, 364 command_script=command_script, 365 schema=schema, 366 row_format_before=row_format_before, 367 record_writer=record_writer, 368 row_format_after=row_format_after, 369 record_reader=record_reader, 370 ) 371 372 def _parse_types( 373 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 374 ) -> t.Optional[exp.Expression]: 375 """ 376 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 377 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 378 379 spark-sql (default)> select cast(1234 as varchar(2)); 380 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 381 char/varchar type and simply treats them as string type. Please use string type 382 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 383 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 384 385 1234 386 Time taken: 4.265 seconds, Fetched 1 row(s) 387 388 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 389 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 390 391 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 392 """ 393 this = super()._parse_types( 394 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 395 ) 396 397 if this and not schema: 398 return this.transform( 399 lambda node: ( 400 node.replace(exp.DataType.build("text")) 401 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 402 else node 403 ), 404 copy=False, 405 ) 406 407 return this 408 409 def _parse_partition_and_order( 410 self, 411 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 412 return ( 413 ( 414 self._parse_csv(self._parse_conjunction) 415 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 416 else [] 417 ), 418 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 419 ) 420 421 class Generator(generator.Generator): 422 LIMIT_FETCH = "LIMIT" 423 TABLESAMPLE_WITH_METHOD = False 424 JOIN_HINTS = False 425 TABLE_HINTS = False 426 QUERY_HINTS = False 427 INDEX_ON = "ON TABLE" 428 EXTRACT_ALLOWS_QUOTES = False 429 NVL2_SUPPORTED = False 430 LAST_DAY_SUPPORTS_DATE_PART = False 431 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 432 433 EXPRESSIONS_WITHOUT_NESTED_CTES = { 434 exp.Insert, 435 exp.Select, 436 exp.Subquery, 437 exp.Union, 438 } 439 440 SUPPORTED_JSON_PATH_PARTS = { 441 exp.JSONPathKey, 442 exp.JSONPathRoot, 443 exp.JSONPathSubscript, 444 exp.JSONPathWildcard, 445 } 446 447 TYPE_MAPPING = { 448 **generator.Generator.TYPE_MAPPING, 449 exp.DataType.Type.BIT: "BOOLEAN", 450 exp.DataType.Type.DATETIME: "TIMESTAMP", 451 exp.DataType.Type.TEXT: "STRING", 452 exp.DataType.Type.TIME: "TIMESTAMP", 453 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 454 exp.DataType.Type.VARBINARY: "BINARY", 455 } 456 457 TRANSFORMS = { 458 **generator.Generator.TRANSFORMS, 459 exp.Group: transforms.preprocess([transforms.unalias_group]), 460 exp.Select: transforms.preprocess( 461 [ 462 transforms.eliminate_qualify, 463 transforms.eliminate_distinct_on, 464 transforms.unnest_to_explode, 465 ] 466 ), 467 exp.Property: _property_sql, 468 exp.AnyValue: rename_func("FIRST"), 469 exp.ApproxDistinct: approx_count_distinct_sql, 470 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 471 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 472 exp.ArrayConcat: rename_func("CONCAT"), 473 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 474 exp.ArraySize: rename_func("SIZE"), 475 exp.ArraySort: _array_sort_sql, 476 exp.With: no_recursive_cte_sql, 477 exp.DateAdd: _add_date_sql, 478 exp.DateDiff: _date_diff_sql, 479 exp.DateStrToDate: datestrtodate_sql, 480 exp.DateSub: _add_date_sql, 481 exp.DateToDi: lambda self, 482 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 483 exp.DiToDate: lambda self, 484 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 485 exp.FileFormatProperty: lambda self, 486 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 487 exp.FromBase64: rename_func("UNBASE64"), 488 exp.If: if_sql(), 489 exp.ILike: no_ilike_sql, 490 exp.IsNan: rename_func("ISNAN"), 491 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 492 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 493 exp.JSONFormat: _json_format_sql, 494 exp.Left: left_to_substring_sql, 495 exp.Map: var_map_sql, 496 exp.Max: max_or_greatest, 497 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 498 exp.Min: min_or_least, 499 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 500 exp.NotNullColumnConstraint: lambda self, e: ( 501 "" if e.args.get("allow_null") else "NOT NULL" 502 ), 503 exp.VarMap: var_map_sql, 504 exp.Create: preprocess( 505 [ 506 remove_unique_constraints, 507 ctas_with_tmp_tables_to_create_tmp_view, 508 move_schema_columns_to_partitioned_by, 509 ] 510 ), 511 exp.Quantile: rename_func("PERCENTILE"), 512 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 513 exp.RegexpExtract: regexp_extract_sql, 514 exp.RegexpReplace: regexp_replace_sql, 515 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 516 exp.RegexpSplit: rename_func("SPLIT"), 517 exp.Right: right_to_substring_sql, 518 exp.SafeDivide: no_safe_divide_sql, 519 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 520 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 521 exp.Split: lambda self, 522 e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 523 exp.StrPosition: strposition_to_locate_sql, 524 exp.StrToDate: _str_to_date_sql, 525 exp.StrToTime: _str_to_time_sql, 526 exp.StrToUnix: _str_to_unix_sql, 527 exp.StructExtract: struct_extract_sql, 528 exp.TimeStrToDate: rename_func("TO_DATE"), 529 exp.TimeStrToTime: timestrtotime_sql, 530 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 531 exp.TimeToStr: _time_to_str, 532 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 533 exp.ToBase64: rename_func("BASE64"), 534 exp.TsOrDiToDi: lambda self, 535 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 536 exp.TsOrDsAdd: _add_date_sql, 537 exp.TsOrDsDiff: _date_diff_sql, 538 exp.TsOrDsToDate: _to_date_sql, 539 exp.TryCast: no_trycast_sql, 540 exp.UnixToStr: lambda self, e: self.func( 541 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 542 ), 543 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 544 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 545 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 546 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 547 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 548 exp.National: lambda self, e: self.national_sql(e, prefix=""), 549 exp.ClusteredColumnConstraint: lambda self, 550 e: f"({self.expressions(e, 'this', indent=False)})", 551 exp.NonClusteredColumnConstraint: lambda self, 552 e: f"({self.expressions(e, 'this', indent=False)})", 553 exp.NotForReplicationColumnConstraint: lambda self, e: "", 554 exp.OnProperty: lambda self, e: "", 555 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 556 } 557 558 PROPERTIES_LOCATION = { 559 **generator.Generator.PROPERTIES_LOCATION, 560 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 561 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 562 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 563 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 564 } 565 566 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 567 if isinstance(expression.this, exp.JSONPathWildcard): 568 self.unsupported("Unsupported wildcard in JSONPathKey expression") 569 return "" 570 571 return super()._jsonpathkey_sql(expression) 572 573 def parameter_sql(self, expression: exp.Parameter) -> str: 574 this = self.sql(expression, "this") 575 expression_sql = self.sql(expression, "expression") 576 577 parent = expression.parent 578 this = f"{this}:{expression_sql}" if expression_sql else this 579 580 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 581 # We need to produce SET key = value instead of SET ${key} = value 582 return this 583 584 return f"${{{this}}}" 585 586 def schema_sql(self, expression: exp.Schema) -> str: 587 for ordered in expression.find_all(exp.Ordered): 588 if ordered.args.get("desc") is False: 589 ordered.set("desc", None) 590 591 return super().schema_sql(expression) 592 593 def constraint_sql(self, expression: exp.Constraint) -> str: 594 for prop in list(expression.find_all(exp.Properties)): 595 prop.pop() 596 597 this = self.sql(expression, "this") 598 expressions = self.expressions(expression, sep=" ", flat=True) 599 return f"CONSTRAINT {this} {expressions}" 600 601 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 602 serde_props = self.sql(expression, "serde_properties") 603 serde_props = f" {serde_props}" if serde_props else "" 604 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 605 606 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 607 return self.func( 608 "COLLECT_LIST", 609 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 610 ) 611 612 def with_properties(self, properties: exp.Properties) -> str: 613 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 614 615 def datatype_sql(self, expression: exp.DataType) -> str: 616 if ( 617 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 618 and not expression.expressions 619 ): 620 expression = exp.DataType.build("text") 621 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 622 expression.set("this", exp.DataType.Type.VARCHAR) 623 elif expression.this in exp.DataType.TEMPORAL_TYPES: 624 expression = exp.DataType.build(expression.this) 625 elif expression.is_type("float"): 626 size_expression = expression.find(exp.DataTypeParam) 627 if size_expression: 628 size = int(size_expression.name) 629 expression = ( 630 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 631 ) 632 633 return super().datatype_sql(expression) 634 635 def version_sql(self, expression: exp.Version) -> str: 636 sql = super().version_sql(expression) 637 return sql.replace("FOR ", "", 1)
IDENTIFIERS_CAN_START_WITH_DIGIT =
True
Determines whether or not an unquoted identifier can start with a digit.
SUPPORTS_USER_DEFINED_TYPES =
False
Determines whether or not user-defined data types are supported.
SAFE_DIVISION =
True
Determines whether division by zero throws an error (False
) or returns NULL (True
).
NORMALIZATION_STRATEGY =
<NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>
Specifies the strategy according to which identifiers should be normalized.
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
Associates this dialect's time formats with their equivalent Python strftime
format.
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- CONCAT_COALESCE
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
233 class Tokenizer(tokens.Tokenizer): 234 QUOTES = ["'", '"'] 235 IDENTIFIERS = ["`"] 236 STRING_ESCAPES = ["\\"] 237 238 SINGLE_TOKENS = { 239 **tokens.Tokenizer.SINGLE_TOKENS, 240 "$": TokenType.PARAMETER, 241 } 242 243 KEYWORDS = { 244 **tokens.Tokenizer.KEYWORDS, 245 "ADD ARCHIVE": TokenType.COMMAND, 246 "ADD ARCHIVES": TokenType.COMMAND, 247 "ADD FILE": TokenType.COMMAND, 248 "ADD FILES": TokenType.COMMAND, 249 "ADD JAR": TokenType.COMMAND, 250 "ADD JARS": TokenType.COMMAND, 251 "MSCK REPAIR": TokenType.COMMAND, 252 "REFRESH": TokenType.REFRESH, 253 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 254 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 255 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 256 } 257 258 NUMERIC_LITERALS = { 259 "L": "BIGINT", 260 "S": "SMALLINT", 261 "Y": "TINYINT", 262 "D": "DOUBLE", 263 "F": "FLOAT", 264 "BD": "DECIMAL", 265 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
NUMERIC_LITERALS =
{'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- COMMENTS
- dialect
- reset
- tokenize
- peek
- tokenize_rs
- size
- sql
- tokens
267 class Parser(parser.Parser): 268 LOG_DEFAULTS_TO_LN = True 269 STRICT_CAST = False 270 VALUES_FOLLOWED_BY_PAREN = False 271 272 FUNCTIONS = { 273 **parser.Parser.FUNCTIONS, 274 "BASE64": exp.ToBase64.from_arg_list, 275 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 276 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 277 "DATE_ADD": lambda args: exp.TsOrDsAdd( 278 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 279 ), 280 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 281 [ 282 exp.TimeStrToTime(this=seq_get(args, 0)), 283 seq_get(args, 1), 284 ] 285 ), 286 "DATE_SUB": lambda args: exp.TsOrDsAdd( 287 this=seq_get(args, 0), 288 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 289 unit=exp.Literal.string("DAY"), 290 ), 291 "DATEDIFF": lambda args: exp.DateDiff( 292 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 293 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 294 ), 295 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 296 "FIRST": _parse_ignore_nulls(exp.First), 297 "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), 298 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 299 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 300 "LAST": _parse_ignore_nulls(exp.Last), 301 "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), 302 "LOCATE": locate_to_strposition, 303 "MAP": parse_var_map, 304 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 305 "PERCENTILE": exp.Quantile.from_arg_list, 306 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 307 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 308 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 309 ), 310 "SIZE": exp.ArraySize.from_arg_list, 311 "SPLIT": exp.RegexpSplit.from_arg_list, 312 "STR_TO_MAP": lambda args: exp.StrToMap( 313 this=seq_get(args, 0), 314 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 315 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 316 ), 317 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 318 "TO_JSON": exp.JSONFormat.from_arg_list, 319 "UNBASE64": exp.FromBase64.from_arg_list, 320 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 321 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 322 } 323 324 NO_PAREN_FUNCTION_PARSERS = { 325 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 326 "TRANSFORM": lambda self: self._parse_transform(), 327 } 328 329 PROPERTY_PARSERS = { 330 **parser.Parser.PROPERTY_PARSERS, 331 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 332 expressions=self._parse_wrapped_csv(self._parse_property) 333 ), 334 } 335 336 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 337 if not self._match(TokenType.L_PAREN, advance=False): 338 self._retreat(self._index - 1) 339 return None 340 341 args = self._parse_wrapped_csv(self._parse_lambda) 342 row_format_before = self._parse_row_format(match_row=True) 343 344 record_writer = None 345 if self._match_text_seq("RECORDWRITER"): 346 record_writer = self._parse_string() 347 348 if not self._match(TokenType.USING): 349 return exp.Transform.from_arg_list(args) 350 351 command_script = self._parse_string() 352 353 self._match(TokenType.ALIAS) 354 schema = self._parse_schema() 355 356 row_format_after = self._parse_row_format(match_row=True) 357 record_reader = None 358 if self._match_text_seq("RECORDREADER"): 359 record_reader = self._parse_string() 360 361 return self.expression( 362 exp.QueryTransform, 363 expressions=args, 364 command_script=command_script, 365 schema=schema, 366 row_format_before=row_format_before, 367 record_writer=record_writer, 368 row_format_after=row_format_after, 369 record_reader=record_reader, 370 ) 371 372 def _parse_types( 373 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 374 ) -> t.Optional[exp.Expression]: 375 """ 376 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 377 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 378 379 spark-sql (default)> select cast(1234 as varchar(2)); 380 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 381 char/varchar type and simply treats them as string type. Please use string type 382 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 383 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 384 385 1234 386 Time taken: 4.265 seconds, Fetched 1 row(s) 387 388 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 389 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 390 391 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 392 """ 393 this = super()._parse_types( 394 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 395 ) 396 397 if this and not schema: 398 return this.transform( 399 lambda node: ( 400 node.replace(exp.DataType.build("text")) 401 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 402 else node 403 ), 404 copy=False, 405 ) 406 407 return this 408 409 def _parse_partition_and_order( 410 self, 411 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 412 return ( 413 ( 414 self._parse_csv(self._parse_conjunction) 415 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 416 else [] 417 ), 418 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 419 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CBRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cbrt'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _parse_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _parse_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'IIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function parse_extract_json_with_path.<locals>._parser>, 'JSON_EXTRACT_SCALAR': <function parse_extract_json_with_path.<locals>._parser>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _parse_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _parse_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function parse_logarithm>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function parse_extract_json_with_path.<locals>._parser>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
421 class Generator(generator.Generator): 422 LIMIT_FETCH = "LIMIT" 423 TABLESAMPLE_WITH_METHOD = False 424 JOIN_HINTS = False 425 TABLE_HINTS = False 426 QUERY_HINTS = False 427 INDEX_ON = "ON TABLE" 428 EXTRACT_ALLOWS_QUOTES = False 429 NVL2_SUPPORTED = False 430 LAST_DAY_SUPPORTS_DATE_PART = False 431 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 432 433 EXPRESSIONS_WITHOUT_NESTED_CTES = { 434 exp.Insert, 435 exp.Select, 436 exp.Subquery, 437 exp.Union, 438 } 439 440 SUPPORTED_JSON_PATH_PARTS = { 441 exp.JSONPathKey, 442 exp.JSONPathRoot, 443 exp.JSONPathSubscript, 444 exp.JSONPathWildcard, 445 } 446 447 TYPE_MAPPING = { 448 **generator.Generator.TYPE_MAPPING, 449 exp.DataType.Type.BIT: "BOOLEAN", 450 exp.DataType.Type.DATETIME: "TIMESTAMP", 451 exp.DataType.Type.TEXT: "STRING", 452 exp.DataType.Type.TIME: "TIMESTAMP", 453 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 454 exp.DataType.Type.VARBINARY: "BINARY", 455 } 456 457 TRANSFORMS = { 458 **generator.Generator.TRANSFORMS, 459 exp.Group: transforms.preprocess([transforms.unalias_group]), 460 exp.Select: transforms.preprocess( 461 [ 462 transforms.eliminate_qualify, 463 transforms.eliminate_distinct_on, 464 transforms.unnest_to_explode, 465 ] 466 ), 467 exp.Property: _property_sql, 468 exp.AnyValue: rename_func("FIRST"), 469 exp.ApproxDistinct: approx_count_distinct_sql, 470 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 471 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 472 exp.ArrayConcat: rename_func("CONCAT"), 473 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 474 exp.ArraySize: rename_func("SIZE"), 475 exp.ArraySort: _array_sort_sql, 476 exp.With: no_recursive_cte_sql, 477 exp.DateAdd: _add_date_sql, 478 exp.DateDiff: _date_diff_sql, 479 exp.DateStrToDate: datestrtodate_sql, 480 exp.DateSub: _add_date_sql, 481 exp.DateToDi: lambda self, 482 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 483 exp.DiToDate: lambda self, 484 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 485 exp.FileFormatProperty: lambda self, 486 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 487 exp.FromBase64: rename_func("UNBASE64"), 488 exp.If: if_sql(), 489 exp.ILike: no_ilike_sql, 490 exp.IsNan: rename_func("ISNAN"), 491 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 492 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 493 exp.JSONFormat: _json_format_sql, 494 exp.Left: left_to_substring_sql, 495 exp.Map: var_map_sql, 496 exp.Max: max_or_greatest, 497 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 498 exp.Min: min_or_least, 499 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 500 exp.NotNullColumnConstraint: lambda self, e: ( 501 "" if e.args.get("allow_null") else "NOT NULL" 502 ), 503 exp.VarMap: var_map_sql, 504 exp.Create: preprocess( 505 [ 506 remove_unique_constraints, 507 ctas_with_tmp_tables_to_create_tmp_view, 508 move_schema_columns_to_partitioned_by, 509 ] 510 ), 511 exp.Quantile: rename_func("PERCENTILE"), 512 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 513 exp.RegexpExtract: regexp_extract_sql, 514 exp.RegexpReplace: regexp_replace_sql, 515 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 516 exp.RegexpSplit: rename_func("SPLIT"), 517 exp.Right: right_to_substring_sql, 518 exp.SafeDivide: no_safe_divide_sql, 519 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 520 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 521 exp.Split: lambda self, 522 e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 523 exp.StrPosition: strposition_to_locate_sql, 524 exp.StrToDate: _str_to_date_sql, 525 exp.StrToTime: _str_to_time_sql, 526 exp.StrToUnix: _str_to_unix_sql, 527 exp.StructExtract: struct_extract_sql, 528 exp.TimeStrToDate: rename_func("TO_DATE"), 529 exp.TimeStrToTime: timestrtotime_sql, 530 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 531 exp.TimeToStr: _time_to_str, 532 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 533 exp.ToBase64: rename_func("BASE64"), 534 exp.TsOrDiToDi: lambda self, 535 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 536 exp.TsOrDsAdd: _add_date_sql, 537 exp.TsOrDsDiff: _date_diff_sql, 538 exp.TsOrDsToDate: _to_date_sql, 539 exp.TryCast: no_trycast_sql, 540 exp.UnixToStr: lambda self, e: self.func( 541 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 542 ), 543 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 544 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 545 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 546 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 547 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 548 exp.National: lambda self, e: self.national_sql(e, prefix=""), 549 exp.ClusteredColumnConstraint: lambda self, 550 e: f"({self.expressions(e, 'this', indent=False)})", 551 exp.NonClusteredColumnConstraint: lambda self, 552 e: f"({self.expressions(e, 'this', indent=False)})", 553 exp.NotForReplicationColumnConstraint: lambda self, e: "", 554 exp.OnProperty: lambda self, e: "", 555 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 556 } 557 558 PROPERTIES_LOCATION = { 559 **generator.Generator.PROPERTIES_LOCATION, 560 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 561 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 562 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 563 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 564 } 565 566 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 567 if isinstance(expression.this, exp.JSONPathWildcard): 568 self.unsupported("Unsupported wildcard in JSONPathKey expression") 569 return "" 570 571 return super()._jsonpathkey_sql(expression) 572 573 def parameter_sql(self, expression: exp.Parameter) -> str: 574 this = self.sql(expression, "this") 575 expression_sql = self.sql(expression, "expression") 576 577 parent = expression.parent 578 this = f"{this}:{expression_sql}" if expression_sql else this 579 580 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 581 # We need to produce SET key = value instead of SET ${key} = value 582 return this 583 584 return f"${{{this}}}" 585 586 def schema_sql(self, expression: exp.Schema) -> str: 587 for ordered in expression.find_all(exp.Ordered): 588 if ordered.args.get("desc") is False: 589 ordered.set("desc", None) 590 591 return super().schema_sql(expression) 592 593 def constraint_sql(self, expression: exp.Constraint) -> str: 594 for prop in list(expression.find_all(exp.Properties)): 595 prop.pop() 596 597 this = self.sql(expression, "this") 598 expressions = self.expressions(expression, sep=" ", flat=True) 599 return f"CONSTRAINT {this} {expressions}" 600 601 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 602 serde_props = self.sql(expression, "serde_properties") 603 serde_props = f" {serde_props}" if serde_props else "" 604 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 605 606 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 607 return self.func( 608 "COLLECT_LIST", 609 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 610 ) 611 612 def with_properties(self, properties: exp.Properties) -> str: 613 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 614 615 def datatype_sql(self, expression: exp.DataType) -> str: 616 if ( 617 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 618 and not expression.expressions 619 ): 620 expression = exp.DataType.build("text") 621 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 622 expression.set("this", exp.DataType.Type.VARCHAR) 623 elif expression.this in exp.DataType.TEMPORAL_TYPES: 624 expression = exp.DataType.build(expression.this) 625 elif expression.is_type("float"): 626 size_expression = expression.find(exp.DataTypeParam) 627 if size_expression: 628 size = int(size_expression.name) 629 expression = ( 630 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 631 ) 632 633 return super().datatype_sql(expression) 634 635 def version_sql(self, expression: exp.Version) -> str: 636 sql = super().version_sql(expression) 637 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Subquery'>, <class 'sqlglot.expressions.Select'>, <class 'sqlglot.expressions.Insert'>}
SUPPORTED_JSON_PATH_PARTS =
{<class 'sqlglot.expressions.JSONPathWildcard'>, <class 'sqlglot.expressions.JSONPathSubscript'>, <class 'sqlglot.expressions.JSONPathKey'>, <class 'sqlglot.expressions.JSONPathRoot'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
573 def parameter_sql(self, expression: exp.Parameter) -> str: 574 this = self.sql(expression, "this") 575 expression_sql = self.sql(expression, "expression") 576 577 parent = expression.parent 578 this = f"{this}:{expression_sql}" if expression_sql else this 579 580 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 581 # We need to produce SET key = value instead of SET ${key} = value 582 return this 583 584 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
615 def datatype_sql(self, expression: exp.DataType) -> str: 616 if ( 617 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 618 and not expression.expressions 619 ): 620 expression = exp.DataType.build("text") 621 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 622 expression.set("this", exp.DataType.Type.VARCHAR) 623 elif expression.this in exp.DataType.TEMPORAL_TYPES: 624 expression = exp.DataType.build(expression.this) 625 elif expression.is_type("float"): 626 size_expression = expression.find(exp.DataTypeParam) 627 if size_expression: 628 size = int(size_expression.name) 629 expression = ( 630 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 631 ) 632 633 return super().datatype_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql