pipethon/0001-add-to-parser.patch

284 lines
12 KiB
Diff

From 79b30daff39dc53c4d822250946e1667cae1798e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jana=20D=C3=B6nszelmann?= <jana@donsz.nl>
Date: Mon, 5 Jan 2026 16:14:52 +0100
Subject: [PATCH] add to parser
---
Grammar/Tokens | 1 +
Grammar/python.gram | 77 ++++++++++++++++++++++++++++++-----------
Parser/action_helpers.c | 62 +++++++++++++++++++++++++++++++++
Parser/pegen.h | 1 +
4 files changed, 120 insertions(+), 21 deletions(-)
diff --git a/Grammar/Tokens b/Grammar/Tokens
index 0547e6ed08f..fd0ce412f6c 100644
--- a/Grammar/Tokens
+++ b/Grammar/Tokens
@@ -59,6 +59,7 @@ RARROW '->'
ELLIPSIS '...'
COLONEQUAL ':='
EXCLAMATION '!'
+PIPE '|>'
OP
TYPE_IGNORE
diff --git a/Grammar/python.gram b/Grammar/python.gram
index 110136af81b..30f7950d1a9 100644
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@@ -726,13 +726,13 @@ star_expressions[expr_ty]:
| star_expression
star_expression[expr_ty] (memo):
- | '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) }
+ | '*' a=pipe { _PyAST_Starred(a, Load, EXTRA) }
| expression
star_named_expressions[asdl_expr_seq*]: a[asdl_expr_seq*]=','.star_named_expression+ [','] { a }
star_named_expression[expr_ty]:
- | '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) }
+ | '*' a=pipe { _PyAST_Starred(a, Load, EXTRA) }
| named_expression
assignment_expression[expr_ty]:
@@ -767,13 +767,13 @@ inversion[expr_ty] (memo):
# --------------------
comparison[expr_ty]:
- | a=bitwise_or b=compare_op_bitwise_or_pair+ {
+ | a=pipe b=compare_op_bitwise_or_pair+ {
_PyAST_Compare(
a,
CHECK(asdl_int_seq*, _PyPegen_get_cmpops(p, b)),
CHECK(asdl_expr_seq*, _PyPegen_get_exprs(p, b)),
EXTRA) }
- | bitwise_or
+ | pipe
compare_op_bitwise_or_pair[CmpopExprPair*]:
| eq_bitwise_or
@@ -787,21 +787,51 @@ compare_op_bitwise_or_pair[CmpopExprPair*]:
| isnot_bitwise_or
| is_bitwise_or
-eq_bitwise_or[CmpopExprPair*]: '==' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Eq, a) }
+eq_bitwise_or[CmpopExprPair*]: '==' a=pipe { _PyPegen_cmpop_expr_pair(p, Eq, a) }
noteq_bitwise_or[CmpopExprPair*]:
| (tok='!=' { _PyPegen_check_barry_as_flufl(p, tok) ? NULL : tok}) a=bitwise_or {_PyPegen_cmpop_expr_pair(p, NotEq, a) }
-lte_bitwise_or[CmpopExprPair*]: '<=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, LtE, a) }
-lt_bitwise_or[CmpopExprPair*]: '<' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Lt, a) }
-gte_bitwise_or[CmpopExprPair*]: '>=' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, GtE, a) }
-gt_bitwise_or[CmpopExprPair*]: '>' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Gt, a) }
-notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, NotIn, a) }
-in_bitwise_or[CmpopExprPair*]: 'in' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, In, a) }
-isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, IsNot, a) }
-is_bitwise_or[CmpopExprPair*]: 'is' a=bitwise_or { _PyPegen_cmpop_expr_pair(p, Is, a) }
+lte_bitwise_or[CmpopExprPair*]: '<=' a=pipe { _PyPegen_cmpop_expr_pair(p, LtE, a) }
+lt_bitwise_or[CmpopExprPair*]: '<' a=pipe { _PyPegen_cmpop_expr_pair(p, Lt, a) }
+gte_bitwise_or[CmpopExprPair*]: '>=' a=pipe { _PyPegen_cmpop_expr_pair(p, GtE, a) }
+gt_bitwise_or[CmpopExprPair*]: '>' a=pipe { _PyPegen_cmpop_expr_pair(p, Gt, a) }
+notin_bitwise_or[CmpopExprPair*]: 'not' 'in' a=pipe { _PyPegen_cmpop_expr_pair(p, NotIn, a) }
+in_bitwise_or[CmpopExprPair*]: 'in' a=pipe { _PyPegen_cmpop_expr_pair(p, In, a) }
+isnot_bitwise_or[CmpopExprPair*]: 'is' 'not' a=pipe { _PyPegen_cmpop_expr_pair(p, IsNot, a) }
+is_bitwise_or[CmpopExprPair*]: 'is' a=pipe { _PyPegen_cmpop_expr_pair(p, Is, a) }
# Bitwise operators
# -----------------
+pipe[expr_ty]:
+ | lhs=pipe '|>' rhs=NAME !'(' {
+ CHECK_VERSION(expr_ty, 8, "Assignment expressions are",
+ _PyAST_NamedExpr(CHECK(expr_ty, _PyPegen_set_expr_context(p, rhs, Store)), lhs, EXTRA))
+ }
+ | lhs=pipe '|>' rhs=primary_nocall b=genexp {
+ _PyAST_Call(rhs,
+ _PyPegen_desugar_pipe(
+ p,
+ CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)),
+ lhs
+ )
+ , NULL,
+ EXTRA
+ )
+ }
+ | lhs=pipe '|>' rhs=primary_nocall '(' arg=[arguments] ')' {
+ _PyAST_Call(
+ rhs,
+ _PyPegen_desugar_pipe(
+ p,
+ (arg) ? ((expr_ty) arg)->v.Call.args : NULL,
+ lhs
+ ),
+ (arg) ? ((expr_ty) arg)->v.Call.keywords : NULL,
+ EXTRA
+ )
+ }
+ | bitwise_or
+
bitwise_or[expr_ty]:
| a=bitwise_or '|' b=bitwise_xor { _PyAST_BinOp(a, BitOr, b, EXTRA) }
| bitwise_xor
@@ -856,6 +886,11 @@ await_primary[expr_ty] (memo):
| 'await' a=primary { CHECK_VERSION(expr_ty, 5, "Await expressions are", _PyAST_Await(a, EXTRA)) }
| primary
+primary_nocall[expr_ty]:
+ | a=primary_nocall '.' b=NAME { _PyAST_Attribute(a, b->v.Name.id, Load, EXTRA) }
+ | a=primary_nocall '[' b=slices ']' { _PyAST_Subscript(a, b, Load, EXTRA) }
+ | atom
+
primary[expr_ty]:
| a=primary '.' b=NAME { _PyAST_Attribute(a, b->v.Name.id, Load, EXTRA) }
| a=primary b=genexp { _PyAST_Call(a, CHECK(asdl_expr_seq*, (asdl_expr_seq*)_PyPegen_singleton_seq(p, b)), NULL, EXTRA) }
@@ -1020,7 +1055,7 @@ dict[expr_ty]:
double_starred_kvpairs[asdl_seq*]: a=','.double_starred_kvpair+ [','] { a }
double_starred_kvpair[KeyValuePair*]:
- | '**' a=bitwise_or { _PyPegen_key_value_pair(p, NULL, a) }
+ | '**' a=pipe { _PyPegen_key_value_pair(p, NULL, a) }
| kvpair
kvpair[KeyValuePair*]: a=expression ':' b=expression { _PyPegen_key_value_pair(p, a, b) }
@@ -1098,7 +1133,7 @@ kwarg_or_double_starred[KeywordOrStarred*]:
# Generic targets
# ---------------
-# NOTE: star_targets may contain *bitwise_or, targets may not.
+# NOTE: star_targets may contain *pipe, targets may not.
star_targets[expr_ty]:
| a=star_target !',' { a }
| a=star_target b=(',' c=star_target { c })* [','] {
@@ -1266,9 +1301,9 @@ invalid_named_expression(memo):
| a=expression ':=' expression {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
- | a=NAME '=' b=bitwise_or !('='|':=') {
+ | a=NAME '=' b=pipe !('='|':=') {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
- | !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') {
+ | !(list|tuple|genexp|'True'|'None'|'False') a=pipe b='=' pipe !('='|':=') {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
_PyPegen_get_expr_name(a)) }
@@ -1334,7 +1369,7 @@ invalid_comprehension:
| ('[' | '{') a=star_named_expression b=',' for_if_clauses {
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "did you forget parentheses around the comprehension target?") }
invalid_dict_comprehension:
- | '{' a='**' bitwise_or for_if_clauses '}' {
+ | '{' a='**' pipe for_if_clauses '}' {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "dict unpacking cannot be used in dict comprehension") }
invalid_parameters:
| a="/" ',' {
@@ -1397,7 +1432,7 @@ invalid_with_item:
RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) }
invalid_for_if_clause:
- | 'async'? 'for' (bitwise_or (',' bitwise_or)* [',']) !'in' {
+ | 'async'? 'for' (pipe (',' pipe)* [',']) !'in' {
RAISE_SYNTAX_ERROR("'in' expected after for-loop variables") }
invalid_for_target:
@@ -1531,12 +1566,12 @@ invalid_class_def_raw:
invalid_double_starred_kvpairs:
| ','.double_starred_kvpair+ ',' invalid_kvpair
- | expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
+ | expression ':' a='*' pipe { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
| expression a=':' &('}'|',') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
invalid_kvpair:
| a=expression !(':') {
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, a->end_lineno, -1, "':' expected after dictionary key") }
- | expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
+ | expression ':' a='*' pipe { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
| expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
invalid_starred_expression_unpacking:
| a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") }
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 50856686335..3241c007aa8 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -1152,6 +1152,68 @@ _PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
return PyPegen_last_item(comprehension->ifs, expr_ty);
}
+asdl_expr_seq * _PyPegen_desugar_pipe(Parser * p, asdl_expr_seq* args, expr_ty piped_lhs) {
+ // loop index
+ Py_ssize_t i = 0;
+
+ // if the list was somehow null, replace it with a list only containing the piped argument
+ if (args == NULL) {
+ asdl_expr_seq *new_args = _Py_asdl_expr_seq_new(1, p->arena);
+ if (new_args == NULL) {
+ return NULL;
+ }
+
+ asdl_seq_SET(new_args, 0, piped_lhs);
+ return new_args;
+ }
+
+ // calculate the initial length
+ Py_ssize_t orig_args_len = asdl_seq_LEN(args);
+
+ // look for a `_` to replace
+ Py_ssize_t underscore_index = -1;
+ for (i = 0; i < orig_args_len; i++) {
+ expr_ty arg = asdl_seq_GET(args, i);
+
+ // if we see an underscore, count it
+ if (
+ arg->kind == Name_kind
+ && PyUnicode_CompareWithASCIIString(arg->v.Name.id, "_") == 0
+ ) {
+ // maybe this is the 2nd underscore, raise a syntax error
+ if (underscore_index != -1) {
+ return RAISE_SYNTAX_ERROR_KNOWN_RANGE(arg, arg, "only one `_` is allowed when piping");
+ }
+
+ underscore_index = i;
+ }
+
+ assert(current_elem->kind == Constant_kind);
+ }
+
+ // overwrite the `_` element if found
+ if (underscore_index != -1) {
+ asdl_seq_SET(args, underscore_index, piped_lhs);
+ return args;
+ }
+
+ // otherwise, allocate a new expr seq of one item longer than the original
+ asdl_expr_seq *new_args = _Py_asdl_expr_seq_new(orig_args_len + 1, p->arena);
+ if (new_args == NULL) {
+ return NULL;
+ }
+
+ // stick the piped element at the end (so at orig_args_len)
+ // which is usually 1 past the end but now we allocated one more element
+ asdl_seq_SET(new_args, orig_args_len, piped_lhs);
+ // and copy the rest of the elements.
+ for (i = 0; i < orig_args_len; i++) {
+ asdl_seq_SET(new_args, i, asdl_seq_GET(args, i));
+ }
+
+ return new_args;
+}
+
expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
int lineno, int col_offset, int end_lineno,
int end_col_offset, PyArena *arena) {
diff --git a/Parser/pegen.h b/Parser/pegen.h
index be5333eb268..d9557494e6c 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -344,6 +344,7 @@ stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
+asdl_expr_seq * _PyPegen_desugar_pipe(Parser*, asdl_expr_seq*, expr_ty);
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
int lineno, int col_offset, int end_lineno,
int end_col_offset, PyArena *arena);
--
2.49.0