Hide code cell source
from IPython.display import YouTubeVideo

YouTubeVideo("I2ICNT56Rdc")

EGRAPHS Community Call Talk#

Egglog as a tool for building an optimizing composable type safe DSLs in Python

… to help drive theoretical development of e-graphs in conjunction with impacting (large) real world communities.

Now that I have this great e-graph library in Python, what extra mechanisms do I need to make it useful in existing Python code?

This talk will go thorugh a few techniques developed and also point to how by bringing in use cases from scientific Python can help drive further theoretic research

Optimizing Scikit-learn with Numba#

We are going to work through the different pieces needed to optimize a Scikit-learn pipeline using Numba and egglog.

from __future__ import annotations

import sklearn
from sklearn.datasets import make_classification
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Tell sklearn to treat arrays as following array API
sklearn.set_config(array_api_dispatch=True)

X_np, y_np = make_classification(random_state=0, n_samples=1000000)


# Assumption: I want to optimize calling this many times on data similar to that above
def run_lda(x, y):
    lda = LinearDiscriminantAnalysis()
    return lda.fit(x, y).transform(x)

We can do this using egglog to generate Python code and Numba to JIT compile it to LLVM, resulting in a speedup:

# The first thing we need to do is create our symbolic arrays and get back our symbolic output

from egglog.exp.array_api import *

X_arr = NDArray.var("X")
assume_dtype(X_arr, X_np.dtype)
assume_shape(X_arr, X_np.shape)
assume_isfinite(X_arr)

y_arr = NDArray.var("y")
assume_dtype(y_arr, y_np.dtype)
assume_shape(y_arr, y_np.shape)
assume_value_one_of(y_arr, tuple(map(int, np.unique(y_np))))  # type: ignore[arg-type]

with EGraph():
    res = run_lda(X_arr, y_arr)
res
_NDArray_1 = NDArray.var("X")
assume_dtype(_NDArray_1, DType.float64)
assume_shape(_NDArray_1, TupleInt(Int(1000000)) + TupleInt(Int(20)))
assume_isfinite(_NDArray_1)
_NDArray_2 = NDArray.var("y")
assume_dtype(_NDArray_2, DType.int64)
assume_shape(_NDArray_2, TupleInt(Int(1000000)))
assume_value_one_of(_NDArray_2, TupleValue(Value.int(Int(0))) + TupleValue(Value.int(Int(1))))
_NDArray_3 = asarray(reshape(asarray(_NDArray_2), TupleInt(Int(-1))))
_NDArray_4 = astype(unique_counts(_NDArray_3)[Int(1)], asarray(_NDArray_1).dtype) / NDArray.scalar(Value.float(Float(1000000.0)))
_NDArray_5 = zeros(
    TupleInt(unique_inverse(_NDArray_3)[Int(0)].shape[Int(0)]) + TupleInt(asarray(_NDArray_1).shape[Int(1)]),
    OptionalDType.some(asarray(_NDArray_1).dtype),
    OptionalDevice.some(asarray(_NDArray_1).device),
)
_MultiAxisIndexKey_1 = MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice()))
_IndexKey_1 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(0))) + _MultiAxisIndexKey_1)
_OptionalIntOrTuple_1 = OptionalIntOrTuple.some(IntOrTuple.int(Int(0)))
_NDArray_5[_IndexKey_1] = mean(asarray(_NDArray_1)[ndarray_index(unique_inverse(_NDArray_3)[Int(1)] == NDArray.scalar(Value.int(Int(0))))], _OptionalIntOrTuple_1)
_IndexKey_2 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(1))) + _MultiAxisIndexKey_1)
_NDArray_5[_IndexKey_2] = mean(asarray(_NDArray_1)[ndarray_index(unique_inverse(_NDArray_3)[Int(1)] == NDArray.scalar(Value.int(Int(1))))], _OptionalIntOrTuple_1)
_NDArray_6 = unique_values(concat(TupleNDArray(unique_values(asarray(_NDArray_3)))))
_NDArray_7 = concat(
    TupleNDArray(asarray(_NDArray_1)[ndarray_index(_NDArray_3 == _NDArray_6[IndexKey.int(Int(0))])] - _NDArray_5[_IndexKey_1])
    + TupleNDArray(asarray(_NDArray_1)[ndarray_index(_NDArray_3 == _NDArray_6[IndexKey.int(Int(1))])] - _NDArray_5[_IndexKey_2]),
    OptionalInt.some(Int(0)),
)
_NDArray_8 = std(_NDArray_7, _OptionalIntOrTuple_1)
_NDArray_8[ndarray_index(std(_NDArray_7, _OptionalIntOrTuple_1) == NDArray.scalar(Value.int(Int(0))))] = NDArray.scalar(Value.float(Float(1.0)))
_TupleNDArray_1 = svd(
    sqrt(asarray(NDArray.scalar(Value.float(Float(1.0) / Float.from_int(asarray(_NDArray_1).shape[Int(0)] - _NDArray_6.shape[Int(0)]))))) * (_NDArray_7 / _NDArray_8), FALSE
)
_Slice_1 = Slice(OptionalInt.none, OptionalInt.some(sum(astype(_TupleNDArray_1[Int(1)] > NDArray.scalar(Value.float(Float(0.0001))), DType.int32)).to_value().to_int))
_NDArray_9 = (_TupleNDArray_1[Int(2)][IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(_Slice_1)) + _MultiAxisIndexKey_1)] / _NDArray_8).T / _TupleNDArray_1[
    Int(1)
][IndexKey.slice(_Slice_1)]
_TupleNDArray_2 = svd(
    (
        sqrt(
            (NDArray.scalar(Value.int(asarray(_NDArray_1).shape[Int(0)])) * _NDArray_4)
            * NDArray.scalar(Value.float(Float(1.0) / Float.from_int(_NDArray_6.shape[Int(0)] - Int(1))))
        )
        * (_NDArray_5 - (_NDArray_4 @ _NDArray_5)).T
    ).T
    @ _NDArray_9,
    FALSE,
)
(
    (asarray(_NDArray_1) - (_NDArray_4 @ _NDArray_5))
    @ (
        _NDArray_9
        @ _TupleNDArray_2[Int(2)].T[
            IndexKey.multi_axis(
                _MultiAxisIndexKey_1
                + MultiAxisIndexKey(
                    MultiAxisIndexKeyItem.slice(
                        Slice(
                            OptionalInt.none,
                            OptionalInt.some(
                                sum(astype(_TupleNDArray_2[Int(1)] > (NDArray.scalar(Value.float(Float(0.0001))) * _TupleNDArray_2[Int(1)][IndexKey.int(Int(0))]), DType.int32))
                                .to_value()
                                .to_int
                            ),
                        )
                    )
                )
            )
        ]
    )
)[IndexKey.multi_axis(_MultiAxisIndexKey_1 + MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice(OptionalInt.none, OptionalInt.some(_NDArray_6.shape[Int(0)] - Int(1))))))]

In order to run this, scikit-learn treated these objects as “array like”, meaning they conformed to the Array API.

Conversions: From Python to egglog#

Use conversions if you want your egglog API to be called with existing Python objects, without manually upcasting them

We will see this in our example:

class LinearDiscriminantAnalysis:
    ...
    def fit(self, X, y):
        ...
        _, cnts = xp.unique_counts(y)  # non-negative ints
        self.priors_ = xp.astype(cnts, X.dtype) / float(y.shape[0])

Ends up resulting in this expression:

astype(unique_counts(_NDArray_3)[Int(1)], asarray(_NDArray_1).dtype) / NDArray.scalar(Value.float(Float(1000000.0)))

How?

We have exposed a global conversion logic, where if you pass an arg to egglog and it isn’t the correct type, it will try to upcast the arg to the required egglog type.

There is a graph of all conversions and it will find the shortest path from the input to the desired type and automatically upcast to that.

Example#

For example, in indexing, if we do a slice (i.e. 1:10:2), we convert this to our custom egglog Slice expressions:

class Slice(Expr):
    def __init__(
        self,
        start: OptionalInt = OptionalInt.none,
        stop: OptionalInt = OptionalInt.none,
        step: OptionalInt = OptionalInt.none,
    ) -> None: ...


converter(
    slice,
    Slice,
    lambda x: Slice(
        convert(x.start, OptionalInt),
        convert(x.stop, OptionalInt),
        convert(x.step, OptionalInt),
    ),
)
class A(Expr):
    def __init__(self) -> None: ...
    def __getitem__(self, s: Slice) -> Int: ...
A()[:1:2]  # Pytohn desugars this to A()[slice(None, 1, 2)]
A()[Slice(OptionalInt.none, OptionalInt.some(Int(1)), OptionalInt.some(Int(2)))]

Preserved Methods#

If you need your egglog objects to interact with Python control flow, you can use preserved methods to stop, compile, and return an eager result to Python

In the fit function in sklearn, there are complicated analysis that must be done eagerly, like this one, which depends on knowing the priors, which are based on the counts of the classes in the training data, which we provided:

class LinearDiscriminantAnalysis:
    def fit(self, x, y):
        ...
        if xp.abs(xp.sum(self.priors_) - 1.0) > 1e-5:
            warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning)
            self.priors_ = self.priors_ / self.priors_.sum()

That is why we have to provide the metadata about the arrays, so we can reduce this expression to a boolean, using some interval analysis:

_NDArray_1 = NDArray.var("y")
assume_dtype(_NDArray_1, DType.int64)
assume_shape(_NDArray_1, TupleInt(Int(1000000)))
assume_value_one_of(_NDArray_1, TupleValue(Value.int(Int(0))) + TupleValue(Value.int(Int(1))))
_NDArray_2 = NDArray.var("X")
assume_dtype(_NDArray_2, DType.float64)
assume_shape(_NDArray_2, TupleInt(Int(1000000)) + TupleInt(Int(20)))
assume_isfinite(_NDArray_2)
(
    abs(
        sum(astype(unique_counts(asarray(reshape(asarray(_NDArray_1), TupleInt(Int(-1)))))[Int(1)], asarray(_NDArray_2).dtype) / NDArray.scalar(Value.float(Float(1000000.0))))
        - NDArray.scalar(Value.float(Float(1.0)))
    )
    > NDArray.scalar(Value.float(Float(1e-05)))
).to_value().to_bool.bool

But how does it move through the control flow if the expression is lazy? We can implement “preserved methods” which can evaluate an expression eagerly by adding it to the EGraph and evalutating it:

Example#

class Boolean(Expr):
    # Can be constructed from and convert to a primitive egglog bool:
    def __init__(self, b: BoolLike) -> None: ...
    @property
    def bool(self) -> Bool: ...

    # support boolean ops
    def __and__(self, other: Boolean) -> Boolean: ...

    # Can be treated like a Python bool
    @method(preserve=True)
    def __bool__(self) -> bool:
        egraph = EGraph()
        egraph.register(self)
        egraph.run(bool_rewrites.saturate())
        return egraph.eval(self.bool)


x = var("x", Boolean)
y = var("y", Bool)
bool_rewrites = ruleset(
    rule(eq(x).to(Boolean(y))).then(set_(x.bool).to(y)),
    rewrite(Boolean(True) & Boolean(True)).to(Boolean(True)),
)

expr = Boolean(True) & Boolean(True)
expr
Boolean(True) & Boolean(True)
if expr:
    print("yep it's true")
yep it's true

Mutations#

Mark a function or method as mutating the first arg, to translate it to pure function, but which acts imperative.

Another pattern that comes up a lot in Python is methods that mutate their arguments. But egglog is a pure functional language, so how do we support that?

Well we can convert functions that mutate an arg into one that returns a modified value of that argument. That way, we can keep using it with existing imperative methods and things work as they should.

For example, arrays support __setitem__, and this is used by scikit-learn:

class LinearDiscriminantAnalysis:
    ...
    def _solve_svd(self, X, y):
        ...
        # 1) within (univariate) scaling by with classes std-dev
        std = xp.std(Xc, axis=0)
        # avoid division by zero in normalization
        std[std == 0] = 1.0

This will be translated to the following expressions, where there will be a new array created in the graph for the modified version:

_NDArray_8 = std(_NDArray_7, _OptionalIntOrTuple_1)
_NDArray_8[ndarray_index(std(_NDArray_7, _OptionalIntOrTuple_1) == NDArray.scalar(Value.int(Int(0))))] = NDArray.scalar(Value.float(Float(1.0)))

Example#

We can see a simpler example of this below:

class ListOfInts(Expr):
    def __init__(self) -> None: ...
    def __getitem__(self, i: i64Like) -> Int: ...

    def __setitem__(self, i: i64Like, v: Int) -> None: ...


xs = ListOfInts()
xs[0] = Int(1)

egraph = EGraph()
egraph.register(xs[0])
egraph.display()
../_images/43e0d9c2daf95abfc7102fdd2ea8dd8f67193e0d8ae7d4a2296fd7ee3efa5bc3.svg

Subsumption#

mark a rewrite as subsumed to replace a smaller expression with a larger one

Now that we have a program, what do we do with it? Well first, we can optimize it, running rewrites, including those to translate from Numba forms to others.

We have added “subsumption” to egglog, to support directional rewrites, so that the left hand side is not extractable and not matchable. This is handy when we want to extract a value with more expressions or a higher cost, in a particular instance:

@array_api_numba_ruleset.register
def _mean(y: NDArray, x: NDArray, i: Int):
    axis = OptionalIntOrTuple.some(IntOrTuple.int(i))
    res = sum(x, axis) / NDArray.scalar(Value.int(x.shape[i]))

    yield rewrite(mean(x, axis, FALSE), subsume=True).to(res)
    yield rewrite(mean(x, axis, TRUE), subsume=True).to(expand_dims(res, i))

We can optimize this with the numba rules and we can see this rule take place in the _NDArray_9 line:

from egglog.exp.array_api_numba import array_api_numba_schedule

simplified_res = EGraph().simplify(res, array_api_numba_schedule)
simplified_res
_NDArray_1 = NDArray.var("X")
assume_dtype(_NDArray_1, DType.float64)
assume_shape(_NDArray_1, TupleInt(Int(1000000)) + TupleInt(Int(20)))
assume_isfinite(_NDArray_1)
_NDArray_2 = NDArray.var("y")
assume_dtype(_NDArray_2, DType.int64)
assume_shape(_NDArray_2, TupleInt(Int(1000000)))
assume_value_one_of(_NDArray_2, TupleValue(Value.int(Int(0))) + TupleValue(Value.int(Int(1))))
_NDArray_3 = astype(
    NDArray.vector(TupleValue(sum(_NDArray_2 == NDArray.scalar(Value.int(Int(0)))).to_value()) + TupleValue(sum(_NDArray_2 == NDArray.scalar(Value.int(Int(1)))).to_value())),
    DType.float64,
) / NDArray.scalar(Value.float(Float.rational(Rational(1000000, 1))))
_NDArray_4 = zeros(TupleInt(Int(2)) + TupleInt(Int(20)), OptionalDType.some(DType.float64), OptionalDevice.some(_NDArray_1.device))
_MultiAxisIndexKey_1 = MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice()))
_IndexKey_1 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(0))) + _MultiAxisIndexKey_1)
_NDArray_5 = _NDArray_1[ndarray_index(_NDArray_2 == NDArray.scalar(Value.int(Int(0))))]
_OptionalIntOrTuple_1 = OptionalIntOrTuple.some(IntOrTuple.int(Int(0)))
_NDArray_4[_IndexKey_1] = sum(_NDArray_5, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_5.shape[Int(0)]))
_IndexKey_2 = IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.int(Int(1))) + _MultiAxisIndexKey_1)
_NDArray_6 = _NDArray_1[ndarray_index(_NDArray_2 == NDArray.scalar(Value.int(Int(1))))]
_NDArray_4[_IndexKey_2] = sum(_NDArray_6, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_6.shape[Int(0)]))
_NDArray_7 = concat(TupleNDArray(_NDArray_5 - _NDArray_4[_IndexKey_1]) + TupleNDArray(_NDArray_6 - _NDArray_4[_IndexKey_2]), OptionalInt.some(Int(0)))
_NDArray_8 = square(_NDArray_7 - expand_dims(sum(_NDArray_7, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_7.shape[Int(0)]))))
_NDArray_9 = sqrt(sum(_NDArray_8, _OptionalIntOrTuple_1) / NDArray.scalar(Value.int(_NDArray_8.shape[Int(0)])))
_NDArray_10 = copy(_NDArray_9)
_NDArray_10[ndarray_index(_NDArray_9 == NDArray.scalar(Value.int(Int(0))))] = NDArray.scalar(Value.float(Float.rational(Rational(1, 1))))
_TupleNDArray_1 = svd(sqrt(NDArray.scalar(Value.float(Float.rational(Rational(1, 999998))))) * (_NDArray_7 / _NDArray_10), FALSE)
_Slice_1 = Slice(OptionalInt.none, OptionalInt.some(sum(astype(_TupleNDArray_1[Int(1)] > NDArray.scalar(Value.float(Float(0.0001))), DType.int32)).to_value().to_int))
_NDArray_11 = (_TupleNDArray_1[Int(2)][IndexKey.multi_axis(MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(_Slice_1)) + _MultiAxisIndexKey_1)] / _NDArray_10).T / _TupleNDArray_1[
    Int(1)
][IndexKey.slice(_Slice_1)]
_TupleNDArray_2 = svd(
    (sqrt((NDArray.scalar(Value.int(Int(1000000))) * _NDArray_3) * NDArray.scalar(Value.float(Float.rational(Rational(1, 1))))) * (_NDArray_4 - (_NDArray_3 @ _NDArray_4)).T).T
    @ _NDArray_11,
    FALSE,
)
(
    (_NDArray_1 - (_NDArray_3 @ _NDArray_4))
    @ (
        _NDArray_11
        @ _TupleNDArray_2[Int(2)].T[
            IndexKey.multi_axis(
                _MultiAxisIndexKey_1
                + MultiAxisIndexKey(
                    MultiAxisIndexKeyItem.slice(
                        Slice(
                            OptionalInt.none,
                            OptionalInt.some(
                                sum(astype(_TupleNDArray_2[Int(1)] > (NDArray.scalar(Value.float(Float(0.0001))) * _TupleNDArray_2[Int(1)][IndexKey.int(Int(0))]), DType.int32))
                                .to_value()
                                .to_int
                            ),
                        )
                    )
                )
            )
        ]
    )
)[IndexKey.multi_axis(_MultiAxisIndexKey_1 + MultiAxisIndexKey(MultiAxisIndexKeyItem.slice(Slice(OptionalInt.none, OptionalInt.some(Int(1))))))]

Program Gen#

Generate an imperative program from your e-graph with replacement rules that walk the graph in a fixed order

Now that we have a program, what do we do with it?

Well we showed how we can use eager evaluation to get a result, but what if we don’t want to do the computation in egglog, but instead export a program so we can execute that back in Python or in this case feed it to Python?

Well in this case we have designed a Program object which we can use to convert a funtional egglog expression back to imperative Python code:

from egglog.exp.array_api_program_gen import *

egraph = EGraph()
fn_program = egraph.let(
    "fn_program",
    ndarray_function_two(simplified_res, NDArray.var("X"), NDArray.var("y")),
)
egraph.run(array_api_program_gen_schedule)
fn = egraph.eval(fn_program.py_object)

fn
<function __fn(X, y)>
import inspect

print(inspect.getsource(fn))
def __fn(X, y):
    assert X.dtype == np.dtype(np.float64)
    assert X.shape == (1000000, 20,)
    assert np.all(np.isfinite(X))
    assert y.dtype == np.dtype(np.int64)
    assert y.shape == (1000000,)
    assert set(np.unique(y)) == set((0, 1,))
    _0 = y == np.array(0)
    _1 = np.sum(_0)
    _2 = y == np.array(1)
    _3 = np.sum(_2)
    _4 = np.array((_1, _3,)).astype(np.dtype(np.float64))
    _5 = _4 / np.array(float(1000000))
    _6 = np.zeros((2, 20,), dtype=np.dtype(np.float64))
    _7 = np.sum(X[_0], axis=0)
    _8 = _7 / np.array(X[_0].shape[0])
    _6[0, :] = _8
    _9 = np.sum(X[_2], axis=0)
    _10 = _9 / np.array(X[_2].shape[0])
    _6[1, :] = _10
    _11 = _5 @ _6
    _12 = X - _11
    _13 = np.sqrt(np.array(float(1 / 999998)))
    _14 = X[_0] - _6[0, :]
    _15 = X[_2] - _6[1, :]
    _16 = np.concatenate((_14, _15,), axis=0)
    _17 = np.sum(_16, axis=0)
    _18 = _17 / np.array(_16.shape[0])
    _19 = np.expand_dims(_18, 0)
    _20 = _16 - _19
    _21 = np.square(_20)
    _22 = np.sum(_21, axis=0)
    _23 = _22 / np.array(_21.shape[0])
    _24 = np.sqrt(_23)
    _25 = _24 == np.array(0)
    _24[_25] = np.array(float(1))
    _26 = _16 / _24
    _27 = _13 * _26
    _28 = np.linalg.svd(_27, full_matrices=False)
    _29 = _28[1] > np.array(0.0001)
    _30 = _29.astype(np.dtype(np.int32))
    _31 = np.sum(_30)
    _32 = _28[2][:_31, :] / _24
    _33 = _32.T / _28[1][:_31]
    _34 = np.array(1000000) * _5
    _35 = _34 * np.array(float(1))
    _36 = np.sqrt(_35)
    _37 = _6 - _11
    _38 = _36 * _37.T
    _39 = _38.T @ _33
    _40 = np.linalg.svd(_39, full_matrices=False)
    _41 = np.array(0.0001) * _40[1][0]
    _42 = _40[1] > _41
    _43 = _42.astype(np.dtype(np.int32))
    _44 = np.sum(_43)
    _45 = _33 @ _40[2].T[:, :_44]
    _46 = _12 @ _45
    return _46[:, :1]

From there we can complete our work, by optimizing with numba and we can call with our original values:

from numba import njit

njit(fn)(X_np, y_np)
/tmp/egglog-b8cb9176-c3d4-4aa3-905f-da2bb0b3ec4c.py:56: NumbaPerformanceWarning: '@' is faster on contiguous arrays, called on (Array(float64, 2, 'C', False, aligned=True), Array(float64, 2, 'A', False, aligned=True))
  _45 = _33 @ _40[2].T[:, :_44]
array([[ 0.64233002],
       [ 0.63661245],
       [-1.603293  ],
       ...,
       [-1.1506433 ],
       [ 0.71687176],
       [-1.51119579]])

These program rewrites work by first translating the code into an intermediate IR of just program assignments and statements, and then turning this into source. It does this by walking the graph first top to bottom, recording the first parent that saw every child. Then it goes from bottom to top, building up a larger set of statements as well as an expression representing each node. If a node has already been emitted somewhere else, we record that, and we always wait for all the children to complete before moving forward. That way, we can enforce some ordering and we know that everything will be emitted only once.

Example#

Here is a small example, we might want to compile, let’s say we want a function like this:

def __fn(x, y)
    x[1] = 10
    z = x + x
    return sum(z) + y

We can build up a functional program for this and then compile it to Python source:

from egglog.exp.program_gen import *

x = Program("x", is_identifier=True)
y = Program("y", is_identifier=True)
# To reference x, we need to first emit the statement
x_modified = Program("x").statement(x + "[1] = 10")

z = (x_modified + " + " + x_modified).assign()
res = Program("sum(") + z + ") + " + y
fn = res.function_two(x, y)

egraph = EGraph()
egraph.register(fn.compile())
egraph.run(program_gen_ruleset.saturate())
print(egraph.eval(fn.statements))
def __fn(x, y):
    x[1] = 10
    _0 = x + x
    return sum(_0) + y

This happens, by first going top to bottom with the compile, which will put a total ordering on all nodes, by defining one, and only one, parent expression for each expression.

Then, from the bottom up, for each node we compute an expression string and a list of statements string.

egraph
%3 outer_cluster_0 cluster_0 outer_cluster_12 cluster_12 outer_cluster_13 cluster_13 outer_cluster_5 cluster_5 outer_cluster_4 cluster_4 outer_cluster_15 cluster_15 outer_cluster_9 cluster_9 outer_cluster_3 cluster_3 outer_cluster_11 cluster_11 outer_cluster_6 cluster_6 outer_cluster_16 cluster_16 outer_cluster_8 cluster_8 outer_cluster_10 cluster_10 outer_cluster_14 cluster_14 outer_cluster_7 cluster_7 outer_cluster_1 cluster_1 outer_cluster_2 cluster_2 outer_cluster_Program_statements-2546176790493825425-value cluster_Program_statements-2546176790493825425-value outer_cluster_Program_expr-16783941965674463102-value cluster_Program_expr-16783941965674463102-value outer_cluster_Program_expr-8417957797057827878-value cluster_Program_expr-8417957797057827878-value outer_cluster_String-12057499341998549458 cluster_String-12057499341998549458 outer_cluster_String-17944947636783212074 cluster_String-17944947636783212074 outer_cluster_Program_expr-0-value cluster_Program_expr-0-value outer_cluster_Program_statements-10080759905092916392-value cluster_Program_statements-10080759905092916392-value outer_cluster_Program_statements-0-value cluster_Program_statements-0-value outer_cluster_Program_expr-5040379952546458196-value cluster_Program_expr-5040379952546458196-value outer_cluster_Program_statements-14289738803621830331-value cluster_Program_statements-14289738803621830331-value outer_cluster_Program_expr-15952540911656918845-value cluster_Program_expr-15952540911656918845-value outer_cluster_Program_expr-9249358851075372135-value cluster_Program_expr-9249358851075372135-value outer_cluster_Program_expr-5871781006564002453-value cluster_Program_expr-5871781006564002453-value outer_cluster_Program_statements-5040379952546458196-value cluster_Program_statements-5040379952546458196-value outer_cluster_Program_expr-17615343019692007359-value cluster_Program_expr-17615343019692007359-value outer_cluster_Program_expr-10912160959110460649-value cluster_Program_expr-10912160959110460649-value outer_cluster_Program_expr-15121139857639374588-value cluster_Program_expr-15121139857639374588-value outer_cluster_Program_statements-11743562013128004906-value cluster_Program_statements-11743562013128004906-value outer_cluster_String-6174351030322916527 cluster_String-6174351030322916527 outer_cluster_Program_statements-15121139857639374588-value cluster_Program_statements-15121139857639374588-value outer_cluster_Program_statements-4208978898528913939-value cluster_Program_statements-4208978898528913939-value outer_cluster_Program_statements-3377577844511369682-value cluster_Program_statements-3377577844511369682-value outer_cluster_String-11719120134779589971 cluster_String-11719120134779589971 outer_cluster_Program_expr-14289738803621830331-value cluster_Program_expr-14289738803621830331-value outer_cluster_Program_expr-4208978898528913939-value cluster_Program_expr-4208978898528913939-value outer_cluster_Program_expr-10080759905092916392-value cluster_Program_expr-10080759905092916392-value outer_cluster_Program_statements-9249358851075372135-value cluster_Program_statements-9249358851075372135-value outer_cluster_Program_expr-11743562013128004906-value cluster_Program_expr-11743562013128004906-value outer_cluster_Program_expr-3377577844511369682-value cluster_Program_expr-3377577844511369682-value outer_cluster_Program_statements-15952540911656918845-value cluster_Program_statements-15952540911656918845-value outer_cluster_Program_statements-8417957797057827878-value cluster_Program_statements-8417957797057827878-value outer_cluster_String-16659035501073256595 cluster_String-16659035501073256595 outer_cluster_Program_statements-16783941965674463102-value cluster_Program_statements-16783941965674463102-value outer_cluster_Program_expr-1714775736476281168-value cluster_Program_expr-1714775736476281168-value outer_cluster_Program_statements-10912160959110460649-value cluster_Program_statements-10912160959110460649-value outer_cluster_String-4817458461998925159 cluster_String-4817458461998925159 outer_cluster_Program_statements-5871781006564002453-value cluster_Program_statements-5871781006564002453-value outer_cluster_String-63661089271772003 cluster_String-63661089271772003 outer_cluster_Program_expr-2546176790493825425-value cluster_Program_expr-2546176790493825425-value outer_cluster_Program_statements-1714775736476281168-value cluster_Program_statements-1714775736476281168-value outer_cluster_Program_statements-17615343019692007359-value cluster_Program_statements-17615343019692007359-value outer_cluster_Program_compile-395596399104602408-value cluster_Program_compile-395596399104602408-value outer_cluster_Program_compile-2868860904042873558-value cluster_Program_compile-2868860904042873558-value outer_cluster_Program_compile-6662973804773207269-value cluster_Program_compile-6662973804773207269-value outer_cluster_Program_compile-17845845248991523397-value cluster_Program_compile-17845845248991523397-value outer_cluster_Program_compile-8179951341697187233-value cluster_Program_compile-8179951341697187233-value outer_cluster_Program_compile-12930351210441812130-value cluster_Program_compile-12930351210441812130-value outer_cluster_Program_compile-1912573936028582372-value cluster_Program_compile-1912573936028582372-value outer_cluster_Program_compile-1351883367118893594-value cluster_Program_compile-1351883367118893594-value outer_cluster_Program_compile-12369660641532123352-value cluster_Program_compile-12369660641532123352-value outer_cluster_Program_compile-5145996267849227305-value cluster_Program_compile-5145996267849227305-value outer_cluster_Program_compile-7619260772787498455-value cluster_Program_compile-7619260772787498455-value outer_cluster_Program_compile-0-value cluster_Program_compile-0-value outer_cluster_Program_compile-5706686836758916083-value cluster_Program_compile-5706686836758916083-value outer_cluster_Program_compile-11413373673517832166-value cluster_Program_compile-11413373673517832166-value outer_cluster_Program_compile-10457086705503540980-value cluster_Program_compile-10457086705503540980-value outer_cluster_Program_compile-9896396136593852202-value cluster_Program_compile-9896396136593852202-value outer_cluster_Program_compile-956286968014291186-value cluster_Program_compile-956286968014291186-value outer_cluster_Program_is_identifer-17615343019692007359-value cluster_Program_is_identifer-17615343019692007359-value outer_cluster_Program_is_identifer-10080759905092916392-value cluster_Program_is_identifer-10080759905092916392-value outer_cluster_bool-0 cluster_bool-0 outer_cluster_Program_is_identifer-16783941965674463102-value cluster_Program_is_identifer-16783941965674463102-value outer_cluster_Program_is_identifer-8417957797057827878-value cluster_Program_is_identifer-8417957797057827878-value outer_cluster_Program_is_identifer-15121139857639374588-value cluster_Program_is_identifer-15121139857639374588-value outer_cluster_Program_is_identifer-5040379952546458196-value cluster_Program_is_identifer-5040379952546458196-value outer_cluster_Program_is_identifer-1714775736476281168-value cluster_Program_is_identifer-1714775736476281168-value outer_cluster_Program_is_identifer-14289738803621830331-value cluster_Program_is_identifer-14289738803621830331-value outer_cluster_Program_is_identifer-9249358851075372135-value cluster_Program_is_identifer-9249358851075372135-value outer_cluster_Program_is_identifer-11743562013128004906-value cluster_Program_is_identifer-11743562013128004906-value outer_cluster_Program_is_identifer-5871781006564002453-value cluster_Program_is_identifer-5871781006564002453-value outer_cluster_Program_is_identifer-10912160959110460649-value cluster_Program_is_identifer-10912160959110460649-value outer_cluster_Program_is_identifer-4208978898528913939-value cluster_Program_is_identifer-4208978898528913939-value outer_cluster_bool-5871781006564002453 cluster_bool-5871781006564002453 outer_cluster_Program_is_identifer-2546176790493825425-value cluster_Program_is_identifer-2546176790493825425-value outer_cluster_Program_is_identifer-3377577844511369682-value cluster_Program_is_identifer-3377577844511369682-value outer_cluster_Program_is_identifer-0-value cluster_Program_is_identifer-0-value outer_cluster_Program_is_identifer-15952540911656918845-value cluster_Program_is_identifer-15952540911656918845-value outer_cluster_Program_next_sym-10912160959110460649-value cluster_Program_next_sym-10912160959110460649-value outer_cluster_Program_next_sym-15121139857639374588-value cluster_Program_next_sym-15121139857639374588-value outer_cluster_Program_next_sym-8417957797057827878-value cluster_Program_next_sym-8417957797057827878-value outer_cluster_Program_next_sym-11743562013128004906-value cluster_Program_next_sym-11743562013128004906-value outer_cluster_Program_next_sym-4208978898528913939-value cluster_Program_next_sym-4208978898528913939-value outer_cluster_Program_next_sym-15952540911656918845-value cluster_Program_next_sym-15952540911656918845-value outer_cluster_Program_next_sym-0-value cluster_Program_next_sym-0-value outer_cluster_Program_next_sym-17615343019692007359-value cluster_Program_next_sym-17615343019692007359-value outer_cluster_Program_next_sym-10080759905092916392-value cluster_Program_next_sym-10080759905092916392-value outer_cluster_Program_next_sym-14289738803621830331-value cluster_Program_next_sym-14289738803621830331-value outer_cluster_Program_next_sym-1714775736476281168-value cluster_Program_next_sym-1714775736476281168-value outer_cluster_Program_next_sym-5040379952546458196-value cluster_Program_next_sym-5040379952546458196-value outer_cluster_Program_next_sym-16783941965674463102-value cluster_Program_next_sym-16783941965674463102-value outer_cluster_Program_next_sym-2546176790493825425-value cluster_Program_next_sym-2546176790493825425-value outer_cluster_Program_next_sym-5871781006564002453-value cluster_Program_next_sym-5871781006564002453-value outer_cluster_Program_next_sym-3377577844511369682-value cluster_Program_next_sym-3377577844511369682-value outer_cluster_Program_next_sym-9249358851075372135-value cluster_Program_next_sym-9249358851075372135-value outer_cluster_i64-0 cluster_i64-0 outer_cluster_i64-5871781006564002453 cluster_i64-5871781006564002453 Program___init__-13367358156562404939:s->String-17944947636783212074 Program___init__-13367358156562404939:s->bool-0 Program___add__-2651793105796594534:s->Program_parent-15952540911656918845 Program___add__-2651793105796594534:s->Program___init__-15610023194365113096 Program_parent-15952540911656918845:s->Program_assign-10080759905092916392 Program___init__-15610023194365113096:s->bool-0 Program___init__-15610023194365113096:s->String-4817458461998925159 Program_parent-3377577844511369682:s->Program_parent-15952540911656918845 Program_parent-9249358851075372135:s->Program___init__-15610023194365113096 Program___init__-15335597026290359925:s->String-63661089271772003 Program___init__-15335597026290359925:s->bool-5871781006564002453 Program___add__-2671062704490572354:s->Program___init__-5046618212005984996 Program___add__-2671062704490572354:s->Program_expr_to_statement-5040379952546458196 Program___init__-5046618212005984996:s->bool-0 Program___init__-5046618212005984996:s->String-12057499341998549458 Program_expr_to_statement-5040379952546458196:s->Program___add__-1081172882011038115 Program_statement-5996666920560749382:s->Program___init__-5046618212005984996 Program_statement-5996666920560749382:s->Program___add__-1081172882011038115 Program___add__-1081172882011038115:s->Program___init__-10918399218569987449 Program___add__-1081172882011038115:s->Program___init__-16164106357233270148 Program_parent-5871781006564002453:s->Program___init__-5046618212005984996 Program_parent-1714775736476281168:s->Program_expr_to_statement-5040379952546458196 Program___init__-10918399218569987449:s->bool-5871781006564002453 Program___init__-10918399218569987449:s->String-12057499341998549458 Program___init__-16164106357233270148:s->bool-0 Program___init__-16164106357233270148:s->String-11719120134779589971 Program_parent-17615343019692007359:s->Program___init__-16164106357233270148 Program_parent-11743562013128004906:s->Program___init__-10918399218569987449 Program_parent-2546176790493825425:s->Program___init__-15335597026290359925 Program_parent-8417957797057827878:s->Program_parent-15121139857639374588 Program_parent-15121139857639374588:s->Program_parent-9249358851075372135 Program_function_two-5368042394236558780:s->Program___init__-15335597026290359925 Program_function_two-5368042394236558780:s->Program___init__-10918399218569987449 Program_function_two-5368042394236558780:s->Program_parent-15121139857639374588 Program_function_two-5368042394236558780:s->String-16659035501073256595 Program_assign-10080759905092916392:s->Program___add__-13241269951358007050 Program___add__-13241269951358007050:s->Program_parent-5871781006564002453 Program___add__-13241269951358007050:s->Program_parent-16783941965674463102 Program_parent-10080759905092916392:s->Program___add__-13241269951358007050 Program___init__-9195563421463438642:s->bool-0 Program___init__-9195563421463438642:s->String-6174351030322916527 Program_parent-16783941965674463102:s->Program___init__-9195563421463438642 Program_parent-4208978898528913939:s->Program_parent-16783941965674463102 Program_parent-5040379952546458196:s->Program___add__-1081172882011038115 Program___add__-13761752264459356387:s->Program_parent-9249358851075372135 Program___add__-13761752264459356387:s->Program___init__-15335597026290359925 Program___add__-15952540911656918845:s->Program___init__-13367358156562404939 Program___add__-15952540911656918845:s->Program_parent-10080759905092916392 Program_parent-0:s->Program___init__-13367358156562404939 Program___add__-13095445380246898500:s->Program_statement-5996666920560749382 Program___add__-13095445380246898500:s->Program___init__-9195563421463438642 Program_parent-10912160959110460649:s->Program_parent-5871781006564002453 Program_statements-2546176790493825425:s->Program___init__-15335597026290359925 Program_expr-16783941965674463102:s->Program___init__-9195563421463438642 Program_expr-8417957797057827878:s->Program___add__-13761752264459356387 Program_expr-0:s->Program___init__-13367358156562404939 Program_statements-10080759905092916392:s->Program_parent-4208978898528913939 Program_statements-0:s->Program___init__-13367358156562404939 Program_expr-5040379952546458196:s->Program_parent-17615343019692007359 Program_statements-14289738803621830331:s->Program_function_two-5368042394236558780 Program_expr-15952540911656918845:s->Program_parent-10080759905092916392 Program_expr-9249358851075372135:s->Program___init__-15610023194365113096 Program_expr-5871781006564002453:s->Program___init__-5046618212005984996 Program_statements-5040379952546458196:s->Program_parent-17615343019692007359 Program_expr-17615343019692007359:s->Program___init__-16164106357233270148 Program_expr-10912160959110460649:s->Program_statement-5996666920560749382 Program_expr-15121139857639374588:s->Program_parent-3377577844511369682 Program_statements-11743562013128004906:s->Program___init__-10918399218569987449 Program_statements-15121139857639374588:s->Program_parent-9249358851075372135 Program_statements-4208978898528913939:s->Program_parent-16783941965674463102 Program_statements-3377577844511369682:s->Program_parent-15952540911656918845 Program_expr-14289738803621830331:s->Program_parent-2546176790493825425 Program_expr-4208978898528913939:s->Program_parent-10912160959110460649 Program_expr-10080759905092916392:s->Program_parent-4208978898528913939 Program_statements-9249358851075372135:s->Program___init__-15610023194365113096 Program_expr-11743562013128004906:s->Program___init__-10918399218569987449 Program_expr-3377577844511369682:s->Program_parent-0 Program_statements-15952540911656918845:s->Program_parent-10080759905092916392 Program_statements-8417957797057827878:s->Program___add__-13761752264459356387 Program_statements-16783941965674463102:s->Program___init__-9195563421463438642 Program_expr-1714775736476281168:s->Program_parent-5040379952546458196 Program_statements-10912160959110460649:s->Program_parent-1714775736476281168 Program_statements-5871781006564002453:s->Program___init__-5046618212005984996 Program_expr-2546176790493825425:s->Program___init__-15335597026290359925 Program_statements-1714775736476281168:s->Program_parent-5040379952546458196 Program_statements-17615343019692007359:s->Program___init__-16164106357233270148 Program_compile-395596399104602408:s->Program___add__-1081172882011038115 Program_compile-395596399104602408:s->i64-0 Program_compile-2868860904042873558:s->Program___init__-16164106357233270148 Program_compile-2868860904042873558:s->i64-0 Program_compile-6662973804773207269:s->Program___add__-13241269951358007050 Program_compile-6662973804773207269:s->i64-0 Program_compile-17845845248991523397:s->Program___init__-15610023194365113096 Program_compile-17845845248991523397:s->i64-5871781006564002453 Program_compile-8179951341697187233:s->Program___init__-9195563421463438642 Program_compile-8179951341697187233:s->i64-0 Program_compile-12930351210441812130:s->Program___add__-2651793105796594534 Program_compile-12930351210441812130:s->i64-0 Program_compile-1912573936028582372:s->Program___init__-10918399218569987449 Program_compile-1912573936028582372:s->i64-0 Program_compile-1351883367118893594:s->Program___add__-2671062704490572354 Program_compile-1351883367118893594:s->i64-0 Program_compile-12369660641532123352:s->Program_parent-11743562013128004906 Program_compile-12369660641532123352:s->i64-0 Program_compile-5145996267849227305:s->Program___add__-15952540911656918845 Program_compile-5145996267849227305:s->i64-0 Program_compile-7619260772787498455:s->Program_assign-10080759905092916392 Program_compile-7619260772787498455:s->i64-0 Program_compile-0:s->Program___init__-13367358156562404939 Program_compile-0:s->i64-0 Program_compile-5706686836758916083:s->Program___add__-13095445380246898500 Program_compile-5706686836758916083:s->i64-0 Program_compile-11413373673517832166:s->Program_parent-15121139857639374588 Program_compile-11413373673517832166:s->i64-0 Program_compile-10457086705503540980:s->Program___init__-15335597026290359925 Program_compile-10457086705503540980:s->i64-0 Program_compile-9896396136593852202:s->Program_expr_to_statement-5040379952546458196 Program_compile-9896396136593852202:s->i64-0 Program_compile-956286968014291186:s->Program___init__-5046618212005984996 Program_compile-956286968014291186:s->i64-0 Program_is_identifer-17615343019692007359:s->Program___init__-16164106357233270148 Program_is_identifer-10080759905092916392:s->Program_parent-4208978898528913939 Program_is_identifer-16783941965674463102:s->Program___init__-9195563421463438642 Program_is_identifer-8417957797057827878:s->Program_parent-15121139857639374588 Program_is_identifer-15121139857639374588:s->Program_parent-3377577844511369682 Program_is_identifer-5040379952546458196:s->Program_parent-17615343019692007359 Program_is_identifer-1714775736476281168:s->Program_expr_to_statement-5040379952546458196 Program_is_identifer-14289738803621830331:s->Program_parent-8417957797057827878 Program_is_identifer-9249358851075372135:s->Program___init__-15610023194365113096 Program_is_identifer-11743562013128004906:s->Program___init__-10918399218569987449 Program_is_identifer-5871781006564002453:s->Program___init__-5046618212005984996 Program_is_identifer-10912160959110460649:s->Program_statement-5996666920560749382 Program_is_identifer-4208978898528913939:s->Program_parent-10912160959110460649 Program_is_identifer-2546176790493825425:s->Program___init__-15335597026290359925 Program_is_identifer-3377577844511369682:s->Program_parent-0 Program_is_identifer-0:s->Program___init__-13367358156562404939 Program_is_identifer-15952540911656918845:s->Program_parent-10080759905092916392 Program_next_sym-10912160959110460649:s->Program___add__-2671062704490572354 Program_next_sym-15121139857639374588:s->Program___add__-2651793105796594534 Program_next_sym-8417957797057827878:s->Program___add__-13761752264459356387 Program_next_sym-11743562013128004906:s->Program___init__-10918399218569987449 Program_next_sym-4208978898528913939:s->Program___add__-13095445380246898500 Program_next_sym-15952540911656918845:s->Program_assign-10080759905092916392 Program_next_sym-0:s->Program___init__-13367358156562404939 Program_next_sym-17615343019692007359:s->Program___init__-16164106357233270148 Program_next_sym-10080759905092916392:s->Program___add__-13241269951358007050 Program_next_sym-14289738803621830331:s->Program_parent-2546176790493825425 Program_next_sym-1714775736476281168:s->Program_parent-5040379952546458196 Program_next_sym-5040379952546458196:s->Program___add__-1081172882011038115 Program_next_sym-16783941965674463102:s->Program___init__-9195563421463438642 Program_next_sym-2546176790493825425:s->Program___init__-15335597026290359925 Program_next_sym-5871781006564002453:s->Program___init__-5046618212005984996 Program_next_sym-3377577844511369682:s->Program___add__-15952540911656918845 Program_next_sym-9249358851075372135:s->Program___init__-15610023194365113096 Program___init__-13367358156562404939 Program String-17944947636783212074 "sum(" bool-0 false Program___add__-2651793105796594534 · + · Program_parent-15952540911656918845 ·.parent Program___init__-15610023194365113096 Program Program_parent-3377577844511369682 ·.parent Program_parent-9249358851075372135 ·.parent Program___init__-15335597026290359925 Program String-63661089271772003 "y" bool-5871781006564002453 true Program___add__-2671062704490572354 · + · Program___init__-5046618212005984996 Program Program_expr_to_statement-5040379952546458196 ·.expr_to_statement Program_statement-5996666920560749382 ·.statement Program___add__-1081172882011038115 · + · Program_parent-5871781006564002453 ·.parent Program_parent-1714775736476281168 ·.parent Program___init__-10918399218569987449 Program Program___init__-16164106357233270148 Program Program_parent-17615343019692007359 ·.parent Program_parent-11743562013128004906 ·.parent Program_parent-2546176790493825425 ·.parent Program_parent-8417957797057827878 ·.parent Program_parent-15121139857639374588 ·.parent Program_function_two-5368042394236558780 ·.function_two String-16659035501073256595 "__fn" Program_assign-10080759905092916392 ·.assign Program___add__-13241269951358007050 · + · Program_parent-10080759905092916392 ·.parent String-11719120134779589971 "[1] = 10" String-4817458461998925159 ") + " Program___init__-9195563421463438642 Program String-6174351030322916527 " + " Program_parent-16783941965674463102 ·.parent Program_parent-4208978898528913939 ·.parent Program_parent-5040379952546458196 ·.parent Program___add__-13761752264459356387 · + · Program___add__-15952540911656918845 · + · Program_parent-0 ·.parent Program___add__-13095445380246898500 · + · Program_parent-10912160959110460649 ·.parent String-12057499341998549458 "x" Program_statements-2546176790493825425 ·.statements Program_statements-2546176790493825425-value "" Program_expr-16783941965674463102 ·.expr Program_expr-16783941965674463102-value " + " Program_expr-8417957797057827878 ·.expr Program_expr-8417957797057827878-value "sum(_0) + y" Program_expr-0 ·.expr Program_expr-0-value "sum(" Program_statements-10080759905092916392 ·.statements Program_statements-10080759905092916392-value "x[1] = 10 " Program_statements-0 ·.statements Program_statements-0-value "" Program_expr-5040379952546458196 ·.expr Program_expr-5040379952546458196-value "x[1] = 10" Program_statements-14289738803621830331 ·.statements Program_statements-14289738803621830331-value "def __fn(x, y):    x[1] = 10    _0 = x + x    return sum(_0) + y " Program_expr-15952540911656918845 ·.expr Program_expr-15952540911656918845-value "_0" Program_expr-9249358851075372135 ·.expr Program_expr-9249358851075372135-value ") + " Program_expr-5871781006564002453 ·.expr Program_expr-5871781006564002453-value "x" Program_statements-5040379952546458196 ·.statements Program_statements-5040379952546458196-value "" Program_expr-17615343019692007359 ·.expr Program_expr-17615343019692007359-value "[1] = 10" Program_expr-10912160959110460649 ·.expr Program_expr-10912160959110460649-value "x" Program_expr-15121139857639374588 ·.expr Program_expr-15121139857639374588-value "sum(_0) + " Program_statements-11743562013128004906 ·.statements Program_statements-11743562013128004906-value "" Program_statements-15121139857639374588 ·.statements Program_statements-15121139857639374588-value "x[1] = 10 _0 = x + x " Program_statements-4208978898528913939 ·.statements Program_statements-4208978898528913939-value "x[1] = 10 " Program_statements-3377577844511369682 ·.statements Program_statements-3377577844511369682-value "x[1] = 10 _0 = x + x " Program_expr-14289738803621830331 ·.expr Program_expr-14289738803621830331-value "__fn" Program_expr-4208978898528913939 ·.expr Program_expr-4208978898528913939-value "x + " Program_expr-10080759905092916392 ·.expr Program_expr-10080759905092916392-value "x + x" Program_statements-9249358851075372135 ·.statements Program_statements-9249358851075372135-value "" Program_expr-11743562013128004906 ·.expr Program_expr-11743562013128004906-value "x" Program_expr-3377577844511369682 ·.expr Program_expr-3377577844511369682-value "sum(_0" Program_statements-15952540911656918845 ·.statements Program_statements-15952540911656918845-value "x[1] = 10 _0 = x + x " Program_statements-8417957797057827878 ·.statements Program_statements-8417957797057827878-value "x[1] = 10 _0 = x + x " Program_statements-16783941965674463102 ·.statements Program_statements-16783941965674463102-value "" Program_expr-1714775736476281168 ·.expr Program_expr-1714775736476281168-value "" Program_statements-10912160959110460649 ·.statements Program_statements-10912160959110460649-value "x[1] = 10 " Program_statements-5871781006564002453 ·.statements Program_statements-5871781006564002453-value "" Program_expr-2546176790493825425 ·.expr Program_expr-2546176790493825425-value "y" Program_statements-1714775736476281168 ·.statements Program_statements-1714775736476281168-value "x[1] = 10 " Program_statements-17615343019692007359 ·.statements Program_statements-17615343019692007359-value "" Program_compile-395596399104602408 ·.compile i64-0 0 Program_compile-395596399104602408-value () Program_compile-2868860904042873558 ·.compile Program_compile-2868860904042873558-value () Program_compile-6662973804773207269 ·.compile Program_compile-6662973804773207269-value () Program_compile-17845845248991523397 ·.compile i64-5871781006564002453 1 Program_compile-17845845248991523397-value () Program_compile-8179951341697187233 ·.compile Program_compile-8179951341697187233-value () Program_compile-12930351210441812130 ·.compile Program_compile-12930351210441812130-value () Program_compile-1912573936028582372 ·.compile Program_compile-1912573936028582372-value () Program_compile-1351883367118893594 ·.compile Program_compile-1351883367118893594-value () Program_compile-12369660641532123352 ·.compile Program_compile-12369660641532123352-value () Program_compile-5145996267849227305 ·.compile Program_compile-5145996267849227305-value () Program_compile-7619260772787498455 ·.compile Program_compile-7619260772787498455-value () Program_compile-0 ·.compile Program_compile-0-value () Program_compile-5706686836758916083 ·.compile Program_compile-5706686836758916083-value () Program_compile-11413373673517832166 ·.compile Program_compile-11413373673517832166-value () Program_compile-10457086705503540980 ·.compile Program_compile-10457086705503540980-value () Program_compile-9896396136593852202 ·.compile Program_compile-9896396136593852202-value () Program_compile-956286968014291186 ·.compile Program_compile-956286968014291186-value () Program_is_identifer-17615343019692007359 ·.is_identifer Program_is_identifer-17615343019692007359-value false Program_is_identifer-10080759905092916392 ·.is_identifer Program_is_identifer-10080759905092916392-value false Program_is_identifer-16783941965674463102 ·.is_identifer Program_is_identifer-16783941965674463102-value false Program_is_identifer-8417957797057827878 ·.is_identifer Program_is_identifer-8417957797057827878-value false Program_is_identifer-15121139857639374588 ·.is_identifer Program_is_identifer-15121139857639374588-value false Program_is_identifer-5040379952546458196 ·.is_identifer Program_is_identifer-5040379952546458196-value false Program_is_identifer-1714775736476281168 ·.is_identifer Program_is_identifer-1714775736476281168-value false Program_is_identifer-14289738803621830331 ·.is_identifer Program_is_identifer-14289738803621830331-value true Program_is_identifer-9249358851075372135 ·.is_identifer Program_is_identifer-9249358851075372135-value false Program_is_identifer-11743562013128004906 ·.is_identifer Program_is_identifer-11743562013128004906-value true Program_is_identifer-5871781006564002453 ·.is_identifer Program_is_identifer-5871781006564002453-value false Program_is_identifer-10912160959110460649 ·.is_identifer Program_is_identifer-10912160959110460649-value false Program_is_identifer-4208978898528913939 ·.is_identifer Program_is_identifer-4208978898528913939-value false Program_is_identifer-2546176790493825425 ·.is_identifer Program_is_identifer-2546176790493825425-value true Program_is_identifer-3377577844511369682 ·.is_identifer Program_is_identifer-3377577844511369682-value false Program_is_identifer-0 ·.is_identifer Program_is_identifer-0-value false Program_is_identifer-15952540911656918845 ·.is_identifer Program_is_identifer-15952540911656918845-value true Program_next_sym-10912160959110460649 ·.next_sym Program_next_sym-10912160959110460649-value 0 Program_next_sym-15121139857639374588 ·.next_sym Program_next_sym-15121139857639374588-value 1 Program_next_sym-8417957797057827878 ·.next_sym Program_next_sym-8417957797057827878-value 1 Program_next_sym-11743562013128004906 ·.next_sym Program_next_sym-11743562013128004906-value 0 Program_next_sym-4208978898528913939 ·.next_sym Program_next_sym-4208978898528913939-value 0 Program_next_sym-15952540911656918845 ·.next_sym Program_next_sym-15952540911656918845-value 1 Program_next_sym-0 ·.next_sym Program_next_sym-0-value 0 Program_next_sym-17615343019692007359 ·.next_sym Program_next_sym-17615343019692007359-value 0 Program_next_sym-10080759905092916392 ·.next_sym Program_next_sym-10080759905092916392-value 0 Program_next_sym-14289738803621830331 ·.next_sym Program_next_sym-14289738803621830331-value 0 Program_next_sym-1714775736476281168 ·.next_sym Program_next_sym-1714775736476281168-value 0 Program_next_sym-5040379952546458196 ·.next_sym Program_next_sym-5040379952546458196-value 0 Program_next_sym-16783941965674463102 ·.next_sym Program_next_sym-16783941965674463102-value 0 Program_next_sym-2546176790493825425 ·.next_sym Program_next_sym-2546176790493825425-value 0 Program_next_sym-5871781006564002453 ·.next_sym Program_next_sym-5871781006564002453-value 0 Program_next_sym-3377577844511369682 ·.next_sym Program_next_sym-3377577844511369682-value 1 Program_next_sym-9249358851075372135 ·.next_sym Program_next_sym-9249358851075372135-value 1

PyObject - Python objects in EGraphs#

We can also add Python objects directly to the e-graph as primitives and run rewrite rules that call back into Python

Future Work#

The next milestone use case is to be able to optimize functional array programs and rewrite them.

To implement this we need to at least support functions as values and ideally also generic types.

Example#

There is a concrete example provided by Siu from the Numba project.

We would want users to be able to write code like this:

def linalg_norm_loopnest_egglog(X: enp.NDArray, axis: enp.TupleInt) -> enp.NDArray:
    # peel off the outer shape for result array
    outshape = ShapeAPI(X.shape).deselect(axis).to_tuple()
    # get only the inner shape for reduction
    reduce_axis = ShapeAPI(X.shape).select(axis).to_tuple()

    return enp.NDArray.from_fn(
        outshape,
        X.dtype,
        lambda k: enp.sqrt(
            LoopNestAPI.from_tuple(reduce_axis)
            .unwrap()
            .reduce(lambda carry, i: carry + enp.real(enp.conj(x := X[i + k]) * x), init=0.0)
        ).to_value(),
    )

Which would then be rewritten to:

def linalg_norm_array_api(X: enp.NDArray, axis: enp.TupleInt) -> enp.NDArray:
    outdim = enp.range_(X.ndim).filter(lambda x: ~axis.contains(x))
    outshape = convert(convert(X.shape, enp.NDArray)[outdim], enp.TupleInt)
    row_axis, col_axis = axis
    return enp.NDArray.from_fn(
        outshape,
        X.dtype,
        lambda k: enp.sqrt(
            enp.int_product(enp.range_(X.shape[row_axis]), enp.range_(X.shape[col_axis]))
            .map_to_ndarray(lambda rc: enp.real(enp.conj(x := X[rc + k]) * x))
            .sum()
        ).to_value(),
    )

And finally could be lowered to Python as:

def linalg_norm_low_level(
    X: np.ndarray[tuple, np.dtype[np.float64]], axis: tuple[int, int]
) -> np.ndarray[tuple, np.dtype[np.float64]]:
    # # If X ndim>=3 and axis is a 2-tuple
    assert X.ndim >= 3
    assert len(axis) == 2
    #  Y - 2
    outdim = [dim for dim in range(X.ndim) if dim not in axis]

    outshape = tuple(np.asarray(X.shape)[outdim])

    res = np.zeros(outshape, dtype=X.dtype)
    row_axis, col_axis = axis
    for k in np.ndindex(outshape):
        tmp = 0.0
        for row in range(X.shape[row_axis]):
            for col in range(X.shape[col_axis]):
                idx = (row, col, *k)
                x = X[idx]
                tmp += (x.conj() * x).real
        res[k] = np.sqrt(tmp)
    return res

Conclusion#

In this talk I have gone through some details of what is needed to connect data science users to egglog:

Overall, the idea is that if we can get egglog in more users hands, in particular for data intensive workloads where the tradeoff of time for pre-computation is worth it, than this can help drive exciting future research directions and also build meaningful useful tools for the scientific open source ecosystem in Python.

If you are building DSLs in Python, or more generally want to play with e-graphs, try out egglog-python!

Around on the e-graphs Zulip for any questions.