284 lines
12 KiB
Diff
284 lines
12 KiB
Diff
From 79b30daff39dc53c4d822250946e1667cae1798e Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Jana=20D=C3=B6nszelmann?= <jana@donsz.nl>
|
|
Date: Mon, 5 Jan 2026 16:14:52 +0100
|
|
Subject: [PATCH] add to parser
|
|
|
|
---
|
|
Grammar/Tokens | 1 +
|
|
Grammar/python.gram | 77 ++++++++++++++++++++++++++++++-----------
|
|
Parser/action_helpers.c | 62 +++++++++++++++++++++++++++++++++
|
|
Parser/pegen.h | 1 +
|
|
4 files changed, 120 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/Grammar/Tokens b/Grammar/Tokens
|
|
index 0547e6ed08f..fd0ce412f6c 100644
|
|
--- a/Grammar/Tokens
|
|
+++ b/Grammar/Tokens
|
|
@@ -59,6 +59,7 @@ RARROW '->'
|
|
ELLIPSIS '...'
|
|
COLONEQUAL ':='
|
|
EXCLAMATION '!'
|
|
+PIPE '|>'
|
|
|
|
OP
|
|
TYPE_IGNORE
|
|
diff --git a/Grammar/python.gram b/Grammar/python.gram
|
|
index 110136af81b..30f7950d1a9 100644
|
|
--- a/Grammar/python.gram
|
|
+++ b/Grammar/python.gram
|
|
@@ -726,13 +726,13 @@ star_expressions[expr_ty]:
|
|
| star_expression
|
|
|
|
star_expression[expr_ty] (memo):
|
|
- | '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) }
|
|
+ | '*' a=pipe { _PyAST_Starred(a, Load, EXTRA) }
|
|
| expression
|
|
|
|
star_named_expressions[asdl_expr_seq*]: a[asdl_expr_seq*]=','.star_named_expression+ [','] { a }
|
|
|
|
star_named_expression[expr_ty]:
|
|
- | '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) }
|
|
+ | '*' a=pipe { _PyAST_Starred(a, Load, EXTRA) }
|
|
| named_expression
|
|
|
|
assignment_expression[expr_ty]:
|
|
@@ -767,13 +767,13 @@ inversion[expr_ty] (memo):
|
|
# --------------------
|
|
|
|
comparison[expr_ty]:
|
|
- | a=bitwise_or b=compare_op_bitwise_or_pair+ {
|
|
+ | a=pipe b=compare_op_bitwise_or_pair+ {
|
|
_PyAST_Compare(
|
|
a,
|
|
CHECK(asdl_int_seq*, _PyPegen_get_cmpops(p, b)),
|
|
CHECK(asdl_expr_seq*, _PyPegen_get_exprs(p, b)),
|
|
EXTRA) }
|
|
- | bitwise_or
|
|
+ | pipe
|
|
|
|
compare_op_bitwise_or_pair[CmpopExprPair*]:
|
|
| eq_bitwise_or
|
|
@@ -787,21 +787,51 @@ compare_op_bitwise_or_pair[CmpopExprPair*]:
|
|
| isnot_bitwise_or
|
|
| is_bitwise_or
|
|
|
|
-eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) }
|
|
+eq_bitwise_or[CmpopExprPair*]: '==' a=pipe { _PyPegen_cmpop_expr_pair(p, Eq, a) }
|
|
noteq_bitwise_or[CmpopExprPair*]:
|
|
| (tok='!=' { _PyPegen_check_barry_as_flufl(p, tok) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) }
|
|
-lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) }
|
|
-lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) }
|
|
-gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) }
|
|
-gt_bitwise_or[CmpopExprPair*]: '>' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Gt, a) }
|
|
-notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, NotIn, a) }
|
|
-in_bitwise_or[CmpopExprPair*]: 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, In, a) }
|
|
-isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, IsNot, a) }
|
|
-is_bitwise_or[CmpopExprPair*]: 'is' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Is, a) }
|
|
+lte_bitwise_or[CmpopExprPair*]: '<=' a=pipe { _PyPegen_cmpop_expr_pair(p, LtE, a) }
|
|
+lt_bitwise_or[CmpopExprPair*]: '<' a=pipe { _PyPegen_cmpop_expr_pair(p, Lt, a) }
|
|
+gte_bitwise_or[CmpopExprPair*]: '>=' a=pipe { _PyPegen_cmpop_expr_pair(p, GtE, a) }
|
|
+gt_bitwise_or[CmpopExprPair*]: '>' a=pipe { _PyPegen_cmpop_expr_pair(p, Gt, a) }
|
|
+notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=pipe { _PyPegen_cmpop_expr_pair(p, NotIn, a) }
|
|
+in_bitwise_or[CmpopExprPair*]: 'in' a=pipe { _PyPegen_cmpop_expr_pair(p, In, a) }
|
|
+isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=pipe { _PyPegen_cmpop_expr_pair(p, IsNot, a) }
|
|
+is_bitwise_or[CmpopExprPair*]: 'is' a=pipe { _PyPegen_cmpop_expr_pair(p, Is, a) }
|
|
|
|
# Bitwise operators
|
|
# -----------------
|
|
|
|
+pipe[expr_ty]:
|
|
+ | lhs=pipe '|>' rhs=NAME !'(' {
|
|
+ CHECK_VERSION(expr_ty, 8, "Assignment expressions are",
|
|
+ _PyAST_NamedExpr(CHECK(expr_ty, _PyPegen_set_expr_context(p, rhs, Store)), lhs, EXTRA))
|
|
+ }
|
|
+ | lhs=pipe '|>' rhs=primary_nocall b=genexp {
|
|
+ _PyAST_Call(rhs,
|
|
+ _PyPegen_desugar_pipe(
|
|
+ p,
|
|
+ CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)),
|
|
+ lhs
|
|
+ )
|
|
+ , NULL,
|
|
+ EXTRA
|
|
+ )
|
|
+ }
|
|
+ | lhs=pipe '|>' rhs=primary_nocall '(' arg=[arguments] ')' {
|
|
+ _PyAST_Call(
|
|
+ rhs,
|
|
+ _PyPegen_desugar_pipe(
|
|
+ p,
|
|
+ (arg) ? ((expr_ty) arg)->v.Call.args : NULL,
|
|
+ lhs
|
|
+ ),
|
|
+ (arg) ? ((expr_ty) arg)->v.Call.keywords : NULL,
|
|
+ EXTRA
|
|
+ )
|
|
+ }
|
|
+ | bitwise_or
|
|
+
|
|
bitwise_or[expr_ty]:
|
|
| a=bitwise_or '|' b=bitwise_xor { _PyAST_BinOp(a, BitOr, b, EXTRA) }
|
|
| bitwise_xor
|
|
@@ -856,6 +886,11 @@ await_primary[expr_ty] (memo):
|
|
| 'await' a=primary { CHECK_VERSION(expr_ty, 5, "Await expressions are", _PyAST_Await(a, EXTRA)) }
|
|
| primary
|
|
|
|
+primary_nocall[expr_ty]:
|
|
+ | a=primary_nocall '.' b=NAME { _PyAST_Attribute(a, b->v.Name.id, Load, EXTRA) }
|
|
+ | a=primary_nocall '[' b=slices ']' { _PyAST_Subscript(a, b, Load, EXTRA) }
|
|
+ | atom
|
|
+
|
|
primary[expr_ty]:
|
|
| a=primary '.' b=NAME { _PyAST_Attribute(a, b->v.Name.id, Load, EXTRA) }
|
|
| a=primary b=genexp { _PyAST_Call(a, CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)), NULL, EXTRA) }
|
|
@@ -1020,7 +1055,7 @@ dict[expr_ty]:
|
|
double_starred_kvpairs[asdl_seq*]: a=','.double_starred_kvpair+ [','] { a }
|
|
|
|
double_starred_kvpair[KeyValuePair*]:
|
|
- | '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) }
|
|
+ | '**' a=pipe { _PyPegen_key_value_pair(p, NULL, a) }
|
|
| kvpair
|
|
|
|
kvpair[KeyValuePair*]: a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) }
|
|
@@ -1098,7 +1133,7 @@ kwarg_or_double_starred[KeywordOrStarred*]:
|
|
# Generic targets
|
|
# ---------------
|
|
|
|
-# NOTE: star_targets may contain *bitwise_or, targets may not.
|
|
+# NOTE: star_targets may contain *pipe, targets may not.
|
|
star_targets[expr_ty]:
|
|
| a=star_target !',' { a }
|
|
| a=star_target b=(',' c=star_target { c })* [','] {
|
|
@@ -1266,9 +1301,9 @@ invalid_named_expression(memo):
|
|
| a=expression ':=' expression {
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
|
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
|
|
- | a=NAME '=' b=bitwise_or !('='|':=') {
|
|
+ | a=NAME '=' b=pipe !('='|':=') {
|
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
|
|
- | !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') {
|
|
+ | !(list|tuple|genexp|'True'|'None'|'False') a=pipe b='=' pipe !('='|':=') {
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
|
|
_PyPegen_get_expr_name(a)) }
|
|
|
|
@@ -1334,7 +1369,7 @@ invalid_comprehension:
|
|
| ('[' | '{') a=star_named_expression b=',' for_if_clauses {
|
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "did you forget parentheses around the comprehension target?") }
|
|
invalid_dict_comprehension:
|
|
- | '{' a='**' bitwise_or for_if_clauses '}' {
|
|
+ | '{' a='**' pipe for_if_clauses '}' {
|
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "dict unpacking cannot be used in dict comprehension") }
|
|
invalid_parameters:
|
|
| a="/" ',' {
|
|
@@ -1397,7 +1432,7 @@ invalid_with_item:
|
|
RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) }
|
|
|
|
invalid_for_if_clause:
|
|
- | 'async'? 'for' (bitwise_or (',' bitwise_or)* [',']) !'in' {
|
|
+ | 'async'? 'for' (pipe (',' pipe)* [',']) !'in' {
|
|
RAISE_SYNTAX_ERROR("'in' expected after for-loop variables") }
|
|
|
|
invalid_for_target:
|
|
@@ -1531,12 +1566,12 @@ invalid_class_def_raw:
|
|
|
|
invalid_double_starred_kvpairs:
|
|
| ','.double_starred_kvpair+ ',' invalid_kvpair
|
|
- | expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
|
|
+ | expression ':' a='*' pipe { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
|
|
| expression a=':' &('}'|',') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
|
|
invalid_kvpair:
|
|
| a=expression !(':') {
|
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, a->end_lineno, -1, "':' expected after dictionary key") }
|
|
- | expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
|
|
+ | expression ':' a='*' pipe { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
|
|
| expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
|
|
invalid_starred_expression_unpacking:
|
|
| a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") }
|
|
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
|
|
index 50856686335..3241c007aa8 100644
|
|
--- a/Parser/action_helpers.c
|
|
+++ b/Parser/action_helpers.c
|
|
@@ -1152,6 +1152,68 @@ _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
|
|
return PyPegen_last_item(comprehension->ifs, expr_ty);
|
|
}
|
|
|
|
+asdl_expr_seq * _PyPegen_desugar_pipe(Parser * p, asdl_expr_seq* args, expr_ty piped_lhs) {
|
|
+ // loop index
|
|
+ Py_ssize_t i = 0;
|
|
+
|
|
+ // if the list was somehow null, replace it with a list only containing the piped argument
|
|
+ if (args == NULL) {
|
|
+ asdl_expr_seq *new_args = _Py_asdl_expr_seq_new(1, p->arena);
|
|
+ if (new_args == NULL) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ asdl_seq_SET(new_args, 0, piped_lhs);
|
|
+ return new_args;
|
|
+ }
|
|
+
|
|
+ // calculate the initial length
|
|
+ Py_ssize_t orig_args_len = asdl_seq_LEN(args);
|
|
+
|
|
+ // look for a `_` to replace
|
|
+ Py_ssize_t underscore_index = -1;
|
|
+ for (i = 0; i < orig_args_len; i++) {
|
|
+ expr_ty arg = asdl_seq_GET(args, i);
|
|
+
|
|
+ // if we see an underscore, count it
|
|
+ if (
|
|
+ arg->kind == Name_kind
|
|
+ && PyUnicode_CompareWithASCIIString(arg->v.Name.id, "_") == 0
|
|
+ ) {
|
|
+ // maybe this is the 2nd underscore, raise a syntax error
|
|
+ if (underscore_index != -1) {
|
|
+ return RAISE_SYNTAX_ERROR_KNOWN_RANGE(arg, arg, "only one `_` is allowed when piping");
|
|
+ }
|
|
+
|
|
+ underscore_index = i;
|
|
+ }
|
|
+
|
|
+ assert(current_elem->kind == Constant_kind);
|
|
+ }
|
|
+
|
|
+ // overwrite the `_` element if found
|
|
+ if (underscore_index != -1) {
|
|
+ asdl_seq_SET(args, underscore_index, piped_lhs);
|
|
+ return args;
|
|
+ }
|
|
+
|
|
+ // otherwise, allocate a new expr seq of one item longer than the original
|
|
+ asdl_expr_seq *new_args = _Py_asdl_expr_seq_new(orig_args_len + 1, p->arena);
|
|
+ if (new_args == NULL) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ // stick the piped element at the end (so at orig_args_len)
|
|
+ // which is usually 1 past the end but now we allocated one more element
|
|
+ asdl_seq_SET(new_args, orig_args_len, piped_lhs);
|
|
+ // and copy the rest of the elements.
|
|
+ for (i = 0; i < orig_args_len; i++) {
|
|
+ asdl_seq_SET(new_args, i, asdl_seq_GET(args, i));
|
|
+ }
|
|
+
|
|
+ return new_args;
|
|
+}
|
|
+
|
|
expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
|
|
int lineno, int col_offset, int end_lineno,
|
|
int end_col_offset, PyArena *arena) {
|
|
diff --git a/Parser/pegen.h b/Parser/pegen.h
|
|
index be5333eb268..d9557494e6c 100644
|
|
--- a/Parser/pegen.h
|
|
+++ b/Parser/pegen.h
|
|
@@ -344,6 +344,7 @@ stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
|
|
KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
|
|
asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
|
|
asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
|
|
+asdl_expr_seq * _PyPegen_desugar_pipe(Parser*, asdl_expr_seq*, expr_ty);
|
|
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
|
|
int lineno, int col_offset, int end_lineno,
|
|
int end_col_offset, PyArena *arena);
|
|
--
|
|
2.49.0
|
|
|