PostgreSQL在何处处理 sql查询之四十五

更新时间：2022-09-14 15:30:46

现在，将要进入最为重要的部分：

分析如何制作执行计划的。

先把它简化，看看NodeTag 在 PortalStart 之前，是如何得到的。

先找到第一个相关程序：

每一种执行计划（结点单位），都有一个对应的path。

/*****************************************************************************
 *        PATH NODE CREATION ROUTINES
 *****************************************************************************/

/*
 * create_seqscan_path
 *      Creates a path corresponding to a sequential scan, returning the
 *      pathnode.
 */
Path *
create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
{
    Path       *pathnode = makeNode(Path);
    pathnode->pathtype = T_SeqScan;
    pathnode->parent = rel;
    pathnode->param_info = get_baserel_parampathinfo(root, rel,
                                                     required_outer);
    pathnode->pathkeys = NIL;    /* seqscan has unordered result */

    cost_seqscan(pathnode, root, rel, pathnode->param_info);

    return pathnode;
}

/*
 * create_index_path
 *      Creates a path node for an index scan.
 *
 * 'index' is a usable index.
 * 'indexclauses' is a list of RestrictInfo nodes representing clauses
 *            to be used as index qual conditions in the scan.
 * 'indexclausecols' is an integer list of index column numbers (zero based)
 *            the indexclauses can be used with.
 * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
 *            to be used as index ordering operators in the scan.
 * 'indexorderbycols' is an integer list of index column numbers (zero based)
 *            the ordering operators can be used with.
 * 'pathkeys' describes the ordering of the path.
 * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
 *            for an ordered index, or NoMovementScanDirection for
 *            an unordered index.
 * 'indexonly' is true if an index-only scan is wanted.
 * 'required_outer' is the set of outer relids for a parameterized path.
 * 'loop_count' is the number of repetitions of the indexscan to factor into
 *        estimates of caching behavior.
 *
 * Returns the new path node.
 */
IndexPath *
create_index_path(PlannerInfo *root,
                  IndexOptInfo *index,
                  List *indexclauses,
                  List *indexclausecols,
                  List *indexorderbys,
                  List *indexorderbycols,
                  List *pathkeys,
                  ScanDirection indexscandir,
                  bool indexonly,
                  Relids required_outer,
                  double loop_count)
{
    IndexPath  *pathnode = makeNode(IndexPath);
    RelOptInfo *rel = index->rel;
    List       *indexquals,
               *indexqualcols;

    pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
    pathnode->path.parent = rel;
    pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
                                                          required_outer);
    pathnode->path.pathkeys = pathkeys;

    /* Convert clauses to indexquals the executor can handle */
    expand_indexqual_conditions(index, indexclauses, indexclausecols,
                                &indexquals, &indexqualcols);

    /* Fill in the pathnode */
    pathnode->indexinfo = index;
    pathnode->indexclauses = indexclauses;
    pathnode->indexquals = indexquals;
    pathnode->indexqualcols = indexqualcols;
    pathnode->indexorderbys = indexorderbys;
    pathnode->indexorderbycols = indexorderbycols;
    pathnode->indexscandir = indexscandir;

    cost_index(pathnode, root, loop_count);

    return pathnode;
}

对seq scan而言，其上层是：

/*
 * set_plain_rel_pathlist
 *      Build access paths for a plain relation (no subquery, no inheritance)
 */
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
    fprintf(stderr, "In %s\n",__FUNCTION__);

    /* Consider sequential scan */
    add_path(rel, create_seqscan_path(root, rel, NULL));

    /* Consider index scans */
    create_index_paths(root, rel);

    /* Consider TID scans */
    create_tidscan_paths(root, rel);

    /* Now find the cheapest of the paths for this rel */
    set_cheapest(rel);
}

再上溯：

/*
 * set_rel_pathlist
 *      Build access paths for a base relation
 */
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 Index rti, RangeTblEntry *rte)
{
    if (IS_DUMMY_REL(rel))
    {
        /* We already proved the relation empty, so nothing more to do */
    }
    else if (rte->inh)
    {
        /* It's an "append relation", process accordingly */
        set_append_rel_pathlist(root, rel, rti, rte);
    }
    else
    {
        switch (rel->rtekind)
        {
            case RTE_RELATION:
                if (rte->relkind == RELKIND_FOREIGN_TABLE)
                {
                    /* Foreign table */
                    set_foreign_pathlist(root, rel, rte);
                }
                else
                {
                    /* Plain relation */
                    set_plain_rel_pathlist(root, rel, rte);
                }
                break;
            case RTE_SUBQUERY:
                /* Subquery --- fully handled during set_rel_size */
                break;
            case RTE_FUNCTION:
                /* RangeFunction */
                set_function_pathlist(root, rel, rte);
                break;
            case RTE_VALUES:
                /* Values list */
                set_values_pathlist(root, rel, rte);
                break;
            case RTE_CTE:
                /* CTE reference --- fully handled during set_rel_size */
                break;
            default:
                elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
                break;
        }
    }

#ifdef OPTIMIZER_DEBUG
    debug_print_rel(root, rel);
#endif
}

接着，就是要分析 rel->rtekind 是什么东西了。

typedef struct RelOptInfo
{
    NodeTag        type;

    RelOptKind    reloptkind;

    /* all relations included in this RelOptInfo */
    Relids        relids;            /* set of base relids (rangetable indexes) */

    /* size estimates generated by planner */
    double        rows;            /* estimated number of result tuples */
    int            width;            /* estimated avg width of result tuples */

    /* materialization information */
    List       *reltargetlist;    /* Vars to be output by scan of relation */
    List       *pathlist;        /* Path structures */
    List       *ppilist;        /* ParamPathInfos used in pathlist */
    struct Path *cheapest_startup_path;
    struct Path *cheapest_total_path;
    struct Path *cheapest_unique_path;
    List       *cheapest_parameterized_paths;

    /* information about a base rel (not set for join rels!) */
    Index        relid;
    Oid            reltablespace;    /* containing tablespace */
    RTEKind        rtekind;        /* RELATION, SUBQUERY, or FUNCTION */
    AttrNumber    min_attr;        /* smallest attrno of rel (often <0) */
    AttrNumber    max_attr;        /* largest attrno of rel */
    Relids       *attr_needed;    /* array indexed [min_attr .. max_attr] */
    int32       *attr_widths;    /* array indexed [min_attr .. max_attr] */
    List       *indexlist;        /* list of IndexOptInfo */
    BlockNumber pages;            /* size estimates derived from pg_class */
    double        tuples;
    double        allvisfrac;
    /* use "struct Plan" to avoid including plannodes.h here */
    struct Plan *subplan;        /* if subquery */
    PlannerInfo *subroot;        /* if subquery */
    /* use "struct FdwRoutine" to avoid including fdwapi.h here */
    struct FdwRoutine *fdwroutine;        /* if foreign table */
    void       *fdw_private;    /* if foreign table */

    /* used by various scans and joins: */
    List       *baserestrictinfo;        /* RestrictInfo structures (if base
                                         * rel) */
    QualCost    baserestrictcost;        /* cost of evaluating the above */
    List       *joininfo;        /* RestrictInfo structures for join clauses
                                 * involving this rel */
    bool        has_eclass_joins;        /* T means joininfo is incomplete */
} RelOptInfo;

RTEKind:

typedef enum RTEKind
{
    RTE_RELATION,                /* ordinary relation reference */
    RTE_SUBQUERY,                /* subquery in FROM */
    RTE_JOIN,                    /* join */
    RTE_FUNCTION,                /* function in FROM */
    RTE_VALUES,                    /* VALUES (<exprlist>), (<exprlist>), ... */
    RTE_CTE                        /* common table expr (WITH list element) */
} RTEKind;

/*
 * Add an entry for a relation to the pstate's range table (p_rtable).
 *
 * If pstate is NULL, we just build an RTE and return it without adding it
 * to an rtable list.
 *
 * Note: formerly this checked for refname conflicts, but that's wrong.
 * Caller is responsible for checking for conflicts in the appropriate scope.
 */
RangeTblEntry *
addRangeTableEntry(ParseState *pstate,
                   RangeVar *relation,
                   Alias *alias,
                   bool inh,
                   bool inFromCl)
{
    RangeTblEntry *rte = makeNode(RangeTblEntry);
    char       *refname = alias ? alias->aliasname : relation->relname;
    LOCKMODE    lockmode;
    Relation    rel;

    //fprintf(stderr,"In addRangeTableEntry: rtekind=RTE_RELATION\n");

    rte->rtekind = RTE_RELATION;
    rte->alias = alias;

    /*
     * Get the rel's OID.  This access also ensures that we have an up-to-date
     * relcache entry for the rel.    Since this is typically the first access
     * to a rel in a statement, be careful to get the right access level
     * depending on whether we're doing SELECT FOR UPDATE/SHARE.
     */
    lockmode = isLockedRefname(pstate, refname) ? RowShareLock : AccessShareLock;
    rel = parserOpenTable(pstate, relation, lockmode);
    rte->relid = RelationGetRelid(rel);
    rte->relkind = rel->rd_rel->relkind;

    /*
     * Build the list of effective column names using user-supplied aliases
     * and/or actual column names.
     */
    rte->eref = makeAlias(refname, NIL);
    buildRelationAliases(rel->rd_att, alias, rte->eref);

    /*
     * Drop the rel refcount, but keep the access lock till end of transaction
     * so that the table can't be deleted or have its schema modified
     * underneath us.
     */
    heap_close(rel, NoLock);

    /*----------
     * Flags:
     * - this RTE should be expanded to include descendant tables,
     * - this RTE is in the FROM clause,
     * - this RTE should be checked for appropriate access rights.
     *
     * The initial default on access checks is always check-for-READ-access,
     * which is the right thing for all except target tables.
     *----------
     */
    rte->inh = inh;
    rte->inFromCl = inFromCl;

    rte->requiredPerms = ACL_SELECT;
    rte->checkAsUser = InvalidOid;        /* not set-uid by default, either */
    rte->selectedCols = NULL;
    rte->modifiedCols = NULL;

    /*
     * Add completed RTE to pstate's range table list, but not to join list
     * nor namespace --- caller must do that if appropriate.
     */
    if (pstate != NULL)
        pstate->p_rtable = lappend(pstate->p_rtable, rte);

    return rte;
}

本文转自健哥的数据花园博客园博客，原文链接：http://www.cnblogs.com/gaojian/archive/2013/06/03/3115643.html，如需转载请自行联系原作者

上一篇 : ：基于Win32的多线程客户/服务器通信下一篇 : emulate sh

PostgreSQL在何处处理 sql查询之四十五

相关阅读

推荐文章