Go to the first, previous, next, last section, table of contents.
In this example program the outermost for loop of each loop nest will be tiled.
#include <stdlib.h> #include <suif1.h> #include <builder.h> #include <dependence.h> int tile_size; /************************************************************************* * Tile the given for loop * *************************************************************************/ void tile_for_loop(tree_for * tf) { // create the builder structure for the loop index variable block ind_var(tf->index()); // create a new variable for the tile driver using builder char name[64]; sprintf(name, "%s_tile", tf->index()->name()); block tile_var(block::new_sym(tf->index()->type(), name)); var_sym * tv = (var_sym *)tile_var.get_sym(); // get the bounds of the current loop named_lin_ineq * nli = include_for(tf); // if bounds cannot be found, got out if(nli == NULL) return; // Create a named system of inequalities to represent the tiling inequalities // tile_sz * tile_var <= index_var <= tile_sz + tile_var + tile_sz - 1 name_table nt(new immed(tv), new immed(tf->index())); lin_ineq li(Compose(2, 3, 0, -tile_size, 1, tile_size - 1, tile_size, -1)); // create the named linear inequality combining the names and inequalities named_lin_ineq blk(nt, li); // Add the tiling inequalities to the original set *nli &= blk; // project the original set to tighten bounds nli->project(); // find the lower and upper bound expresstion for the tile block tile_block_lb(nli->create_expression(tf->index(), FALSE)); block tile_block_ub(nli->create_expression(tf->index(), TRUE)); // project away the index variable of the tile nli->project_away(immed(tf->index())); // find the lower and upper bound expresstion for the tile driver block tile_driver_lb(nli->create_expression(tv, FALSE)); block tile_driver_ub(nli->create_expression(tv, TRUE)); // create a builder structure to represent both loop nests block code(block::FOR(tile_var, tile_driver_lb, tile_driver_ub, block::FOR(ind_var, tile_block_lb, tile_block_ub, block(tf->body())))); // create a tree_node (a tree_for in this case) out of the builder representation tree_node * new_tn = code.make_tree_node(); // insert the new tiled loops and remove the input for loop tf->parent()->insert_before(new_tn, tf->list_e()); tf->parent()->remove(tf->list_e()); } /************************************************************************* * Find the outermost for loop and call tile_for_loop * *************************************************************************/ void find_first_for(tree_node_list * tnl) { tree_node_list_iter iter(tnl); while(!iter.is_empty()) { tree_node * tn = iter.step(); switch(tn->kind()) { case TREE_FOR: tree_for * tnf = (tree_for *)tn; tile_for_loop(tnf); break; case TREE_IF: tree_if * tni = (tree_if *)tn; find_first_for(tni->then_part()); find_first_for(tni->else_part()); break; case TREE_LOOP: tree_loop * tnl = (tree_loop *)tn; find_first_for(tnl->body()); break; case TREE_BLOCK: tree_block * tnb = (tree_block *)tn; find_first_for(tnb->body()); break; case TREE_INSTR: break; default: assert(0); break; } } } /************************************************************************* * For each procedure, set the proc. for builder and call find_first_for * * Need to do fill_in_access because of include_for * *************************************************************************/ void do_tile(tree_proc * tp) { block::set_proc(tp); fill_in_access(tp); find_first_for(tp->body()); } /************************************************************************* * Main: set the tile size if supplied, otherwise it is 64. * *************************************************************************/ main(int argc, char * argv[]) { static cmd_line_option option_table[] = { {CLO_INT, "-size", "64", &tile_size} }; start_suif(argc, argv); parse_cmd_line(argc, argv, option_table, sizeof(option_table) / sizeof(cmd_line_option)); suif_proc_iter(argc, argv, do_tile, TRUE); }
After compiling this program and running over the following input program:
double A[2000]; test(int N, int M) { int i, j, k; for(i = 0; i<= 1000; i++) for(j = 1; j<= 1000; j++) A[i] = A[i-1]; for(i = 0; i<=M; i++) for(j = 1; j<=1000; j++) A[j] = A[j-1]; }
the C-form of the output created is
extern int test(int N, int M) { int i; int j; int k; int i_tile6; int i_tile7; for(i_tile6 = 0; i_tile6 <= 15; i_tile6++) for(i = max(0,0+64*i_tile6); i <= min(1000,63+64*i_tile6); i++) for(j = 1; j <= 1000; j++) A[i] = A[i - 1]; for(i_tile7 = 0; i_tile7 <= divfloor(0+M,64); i_tile7++) for(i = max(0,0+64*i_tile7); i <= min(0+M,63+64*i_tile7); i++) for (j = 1; j <= 1000; j++) A[j] = A[j - 1]; return 0; }
Go to the first, previous, next, last section, table of contents.