From 79b30daff39dc53c4d822250946e1667cae1798e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jana=20D=C3=B6nszelmann?= Date: Mon, 5 Jan 2026 16:14:52 +0100 Subject: [PATCH] add to parser --- Grammar/Tokens | 1 + Grammar/python.gram | 77 ++++++++++++++++++++++++++++++----------- Parser/action_helpers.c | 62 +++++++++++++++++++++++++++++++++ Parser/pegen.h | 1 + 4 files changed, 120 insertions(+), 21 deletions(-) diff --git a/Grammar/Tokens b/Grammar/Tokens index 0547e6ed08f..fd0ce412f6c 100644 --- a/Grammar/Tokens +++ b/Grammar/Tokens @@ -59,6 +59,7 @@ RARROW '->' ELLIPSIS '...' COLONEQUAL ':=' EXCLAMATION '!' +PIPE '|>' OP TYPE_IGNORE diff --git a/Grammar/python.gram b/Grammar/python.gram index 110136af81b..30f7950d1a9 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -726,13 +726,13 @@ star_expressions[expr_ty]: | star_expression star_expression[expr_ty] (memo): - | '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) } + | '*' a=pipe { _PyAST_Starred(a, Load, EXTRA) } | expression star_named_expressions[asdl_expr_seq*]: a[asdl_expr_seq*]=','.star_named_expression+ [','] { a } star_named_expression[expr_ty]: - | '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) } + | '*' a=pipe { _PyAST_Starred(a, Load, EXTRA) } | named_expression assignment_expression[expr_ty]: @@ -767,13 +767,13 @@ inversion[expr_ty] (memo): # -------------------- comparison[expr_ty]: - | a=bitwise_or b=compare_op_bitwise_or_pair+ { + | a=pipe b=compare_op_bitwise_or_pair+ { _PyAST_Compare( a, CHECK(asdl_int_seq*, _PyPegen_get_cmpops(p, b)), CHECK(asdl_expr_seq*, _PyPegen_get_exprs(p, b)), EXTRA) } - | bitwise_or + | pipe compare_op_bitwise_or_pair[CmpopExprPair*]: | eq_bitwise_or @@ -787,21 +787,51 @@ compare_op_bitwise_or_pair[CmpopExprPair*]: | isnot_bitwise_or | is_bitwise_or -eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) } +eq_bitwise_or[CmpopExprPair*]: '==' a=pipe { _PyPegen_cmpop_expr_pair(p, Eq, a) } noteq_bitwise_or[CmpopExprPair*]: | (tok='!=' { _PyPegen_check_barry_as_flufl(p, tok) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) } -lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) } -lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) } -gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) } -gt_bitwise_or[CmpopExprPair*]: '>' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Gt, a) } -notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, NotIn, a) } -in_bitwise_or[CmpopExprPair*]: 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, In, a) } -isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, IsNot, a) } -is_bitwise_or[CmpopExprPair*]: 'is' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Is, a) } +lte_bitwise_or[CmpopExprPair*]: '<=' a=pipe { _PyPegen_cmpop_expr_pair(p, LtE, a) } +lt_bitwise_or[CmpopExprPair*]: '<' a=pipe { _PyPegen_cmpop_expr_pair(p, Lt, a) } +gte_bitwise_or[CmpopExprPair*]: '>=' a=pipe { _PyPegen_cmpop_expr_pair(p, GtE, a) } +gt_bitwise_or[CmpopExprPair*]: '>' a=pipe { _PyPegen_cmpop_expr_pair(p, Gt, a) } +notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=pipe { _PyPegen_cmpop_expr_pair(p, NotIn, a) } +in_bitwise_or[CmpopExprPair*]: 'in' a=pipe { _PyPegen_cmpop_expr_pair(p, In, a) } +isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=pipe { _PyPegen_cmpop_expr_pair(p, IsNot, a) } +is_bitwise_or[CmpopExprPair*]: 'is' a=pipe { _PyPegen_cmpop_expr_pair(p, Is, a) } # Bitwise operators # ----------------- +pipe[expr_ty]: + | lhs=pipe '|>' rhs=NAME !'(' { + CHECK_VERSION(expr_ty, 8, "Assignment expressions are", + _PyAST_NamedExpr(CHECK(expr_ty, _PyPegen_set_expr_context(p, rhs, Store)), lhs, EXTRA)) + } + | lhs=pipe '|>' rhs=primary_nocall b=genexp { + _PyAST_Call(rhs, + _PyPegen_desugar_pipe( + p, + CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)), + lhs + ) + , NULL, + EXTRA + ) + } + | lhs=pipe '|>' rhs=primary_nocall '(' arg=[arguments] ')' { + _PyAST_Call( + rhs, + _PyPegen_desugar_pipe( + p, + (arg) ? ((expr_ty) arg)->v.Call.args : NULL, + lhs + ), + (arg) ? ((expr_ty) arg)->v.Call.keywords : NULL, + EXTRA + ) + } + | bitwise_or + bitwise_or[expr_ty]: | a=bitwise_or '|' b=bitwise_xor { _PyAST_BinOp(a, BitOr, b, EXTRA) } | bitwise_xor @@ -856,6 +886,11 @@ await_primary[expr_ty] (memo): | 'await' a=primary { CHECK_VERSION(expr_ty, 5, "Await expressions are", _PyAST_Await(a, EXTRA)) } | primary +primary_nocall[expr_ty]: + | a=primary_nocall '.' b=NAME { _PyAST_Attribute(a, b->v.Name.id, Load, EXTRA) } + | a=primary_nocall '[' b=slices ']' { _PyAST_Subscript(a, b, Load, EXTRA) } + | atom + primary[expr_ty]: | a=primary '.' b=NAME { _PyAST_Attribute(a, b->v.Name.id, Load, EXTRA) } | a=primary b=genexp { _PyAST_Call(a, CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)), NULL, EXTRA) } @@ -1020,7 +1055,7 @@ dict[expr_ty]: double_starred_kvpairs[asdl_seq*]: a=','.double_starred_kvpair+ [','] { a } double_starred_kvpair[KeyValuePair*]: - | '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) } + | '**' a=pipe { _PyPegen_key_value_pair(p, NULL, a) } | kvpair kvpair[KeyValuePair*]: a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) } @@ -1098,7 +1133,7 @@ kwarg_or_double_starred[KeywordOrStarred*]: # Generic targets # --------------- -# NOTE: star_targets may contain *bitwise_or, targets may not. +# NOTE: star_targets may contain *pipe, targets may not. star_targets[expr_ty]: | a=star_target !',' { a } | a=star_target b=(',' c=star_target { c })* [','] { @@ -1266,9 +1301,9 @@ invalid_named_expression(memo): | a=expression ':=' expression { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) } - | a=NAME '=' b=bitwise_or !('='|':=') { + | a=NAME '=' b=pipe !('='|':=') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") } - | !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') { + | !(list|tuple|genexp|'True'|'None'|'False') a=pipe b='=' pipe !('='|':=') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?", _PyPegen_get_expr_name(a)) } @@ -1334,7 +1369,7 @@ invalid_comprehension: | ('[' | '{') a=star_named_expression b=',' for_if_clauses { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "did you forget parentheses around the comprehension target?") } invalid_dict_comprehension: - | '{' a='**' bitwise_or for_if_clauses '}' { + | '{' a='**' pipe for_if_clauses '}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "dict unpacking cannot be used in dict comprehension") } invalid_parameters: | a="/" ',' { @@ -1397,7 +1432,7 @@ invalid_with_item: RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) } invalid_for_if_clause: - | 'async'? 'for' (bitwise_or (',' bitwise_or)* [',']) !'in' { + | 'async'? 'for' (pipe (',' pipe)* [',']) !'in' { RAISE_SYNTAX_ERROR("'in' expected after for-loop variables") } invalid_for_target: @@ -1531,12 +1566,12 @@ invalid_class_def_raw: invalid_double_starred_kvpairs: | ','.double_starred_kvpair+ ',' invalid_kvpair - | expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") } + | expression ':' a='*' pipe { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") } | expression a=':' &('}'|',') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") } invalid_kvpair: | a=expression !(':') { RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, a->end_lineno, -1, "':' expected after dictionary key") } - | expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") } + | expression ':' a='*' pipe { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") } | expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") } invalid_starred_expression_unpacking: | a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") } diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 50856686335..3241c007aa8 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1152,6 +1152,68 @@ _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) { return PyPegen_last_item(comprehension->ifs, expr_ty); } +asdl_expr_seq * _PyPegen_desugar_pipe(Parser * p, asdl_expr_seq* args, expr_ty piped_lhs) { + // loop index + Py_ssize_t i = 0; + + // if the list was somehow null, replace it with a list only containing the piped argument + if (args == NULL) { + asdl_expr_seq *new_args = _Py_asdl_expr_seq_new(1, p->arena); + if (new_args == NULL) { + return NULL; + } + + asdl_seq_SET(new_args, 0, piped_lhs); + return new_args; + } + + // calculate the initial length + Py_ssize_t orig_args_len = asdl_seq_LEN(args); + + // look for a `_` to replace + Py_ssize_t underscore_index = -1; + for (i = 0; i < orig_args_len; i++) { + expr_ty arg = asdl_seq_GET(args, i); + + // if we see an underscore, count it + if ( + arg->kind == Name_kind + && PyUnicode_CompareWithASCIIString(arg->v.Name.id, "_") == 0 + ) { + // maybe this is the 2nd underscore, raise a syntax error + if (underscore_index != -1) { + return RAISE_SYNTAX_ERROR_KNOWN_RANGE(arg, arg, "only one `_` is allowed when piping"); + } + + underscore_index = i; + } + + assert(current_elem->kind == Constant_kind); + } + + // overwrite the `_` element if found + if (underscore_index != -1) { + asdl_seq_SET(args, underscore_index, piped_lhs); + return args; + } + + // otherwise, allocate a new expr seq of one item longer than the original + asdl_expr_seq *new_args = _Py_asdl_expr_seq_new(orig_args_len + 1, p->arena); + if (new_args == NULL) { + return NULL; + } + + // stick the piped element at the end (so at orig_args_len) + // which is usually 1 past the end but now we allocated one more element + asdl_seq_SET(new_args, orig_args_len, piped_lhs); + // and copy the rest of the elements. + for (i = 0; i < orig_args_len; i++) { + asdl_seq_SET(new_args, i, asdl_seq_GET(args, i)); + } + + return new_args; +} + expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena) { diff --git a/Parser/pegen.h b/Parser/pegen.h index be5333eb268..d9557494e6c 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -344,6 +344,7 @@ stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty); KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int); asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *); asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *); +asdl_expr_seq * _PyPegen_desugar_pipe(Parser*, asdl_expr_seq*, expr_ty); expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); -- 2.49.0