Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1494
  • 博文数量: 2
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 10
  • 用 户 组: 普通用户
  • 注册时间: 2019-05-29 23:28
文章分类
文章存档

2019年(2)

我的朋友
最近访客

分类:

2019-05-29 23:29:11

主要分析文件:
    // basic nodes definition
    src/include/nodes/nodes.h
    // SQL parsed struct
    src/include/nodes/parsenodes.h
    // List定义
    src/include/nodes/pg_list.h
    // List实现
    src/backend/nodes/list.c
    // postgres运行入口文件
    src/backend/tcop/postgres.c
    // utility Stmt运行文件
    src/backend/tcop/utility.c
    // SQL analyze and rewrite
    src/backend/parser/analyze.c
   
PostgreSQL用一种非常简单的形式实现了类似C++的继承,请看nodes.h :
typedef struct Node
{
NodeTag  type;
} Node;
然后请看:parsenodes.h(SQL语句经过parser解析后都先对应该该文件的一个struct中)
假设有一个create table的sql:
create table test (name varchar(100, pass varchar(100));

将会被解析到如下structure:
typedef struct CreateStmt
{
NodeTag  type;
RangeVar   *relation;  /* relation to create */
List    *tableElts;  /* column definitions (list of ColumnDef) */
List    *inhRelations; /* relations to inherit from (list of
         * inhRelation) */
List    *constraints; /* constraints (list of Constraint nodes) */
List    *options;  /* options from WITH clause */
OnCommitAction oncommit; /* what do we do at COMMIT? */
char    *tablespacename; /* table space to use, or NULL */
} CreateStmt;

首先,看看该struct的第一个元素:type是一个NodeTag类型。
PG的很多struct的第一个元素都是NodeTag,这样在函数中传递指针变量时,他可以很
简单的把参数设置成:Node*
说简单点,其实有点像是所有的struct都继承了Node这个struct.
就是因为这个原因,看PG的代码很累,很多函数的参数和返回值都是一个简单的Node.
在nodes.h中有每个Node的值的定义,比如:上面说的CreateStmt的type的值就是:T_CreateStmt

然后,PG中大量的使用了链表类型:List
在pg_list.h中有定义:
typedef struct List
{
NodeTag  type;   /* T_List, T_IntList, or T_OidList */
int   length;
ListCell   *head;
ListCell   *tail;
} List;

可以看到,List的定义是基于基类Node来进行的。
常用的List操作函数有:
//取List第一个元素
ListCell *y = list_head(List *l);
//得到List的元素个数
list_length(List *l);
// 遍历List
foreach(cell, l)
{

}
其他的很多函数具体可以参考pg_list.h和list.c
下面接着说SQL的解释和执行。
所有的SQL都会先解析成一个与之相对应的struct.
Select会解析到:
typedef struct SelectStmt
{
NodeTag  type;

/*
  * These fields are used only in “leaf” SelectStmts.
  *
  * into, intoColNames, intoOptions, intoOnCommit, and intoTableSpaceName
  * are a kluge; they belong somewhere else…
  */
List    *distinctClause; /* NULL, list of DISTINCT ON exprs, or
         * lcons(NIL,NIL) for all (SELECT DISTINCT) */
RangeVar   *into;   /* target table (for select into table) */
List    *intoColNames; /* column names for into table */
List    *intoOptions; /* options from WITH clause */
OnCommitAction intoOnCommit; /* what do we do at COMMIT? */
char    *intoTableSpaceName;  /* table space to use, or NULL */
List    *targetList;  /* the target list (of ResTarget) */
List    *fromClause;  /* the FROM clause */
Node    *whereClause; /* WHERE qualification */
List    *groupClause; /* GROUP BY clauses */
Node    *havingClause; /* HAVING conditional-expression */

/*
  * In a “leaf” node representing a VALUES list, the above fields are all
  * null, and instead this field is set.  Note that the elements of the
  * sublists are just expressions, without ResTarget decoration. Also note
  * that a list element can be DEFAULT (represented as a SetToDefault
  * node), regardless of the context of the VALUES list. It’s up to parse
  * analysis to reject that where not valid.
  */
List    *valuesLists; /* untransformed list of expression lists */

/*
  * These fields are used in both “leaf” SelectStmts and upper-level
  * SelectStmts.
  */
List    *sortClause;  /* sort clause (a list of SortBy’s) */
Node    *limitOffset; /* # of result tuples to skip */
Node    *limitCount;  /* # of result tuples to return */
List    *lockingClause; /* FOR UPDATE (list of LockingClause’s) */

/*
  * These fields are used only in upper-level SelectStmts.
  */
SetOperation op;   /* type of set op */
bool  all;   /* ALL specified? */
struct SelectStmt *larg; /* left child */
struct SelectStmt *rarg; /* right child */
/* Eventually add fields for CORRESPONDING spec here */
} SelectStmt;

Delete会解析到:
typedef struct DeleteStmt
{
NodeTag  type;
RangeVar   *relation;  /* relation to delete from */
List    *usingClause; /* optional using clause for more tables */
Node    *whereClause; /* qualifications */
List    *returningList; /* list of expressions to return */
} DeleteStmt;

Update会解析到:
typedef struct UpdateStmt
{
NodeTag  type;
RangeVar   *relation;  /* relation to update */
List    *targetList;  /* the target list (of ResTarget) */
Node    *whereClause; /* qualifications */
List    *fromClause;  /* optional from clause for more tables */
List    *returningList; /* list of expressions to return */
} UpdateStmt;

从定义上看,Select比较复杂。其实在PG内部,把Select/Delete/Update当成一样处理,只是最后
找到相应的结果集时采取不同的操作。
postgres.c的804行可以看到这一步操作:
parsetree_list = pg_parse_query(query_string);
第一步完成后,只是做了很简单、很粗糙的事情,然后,要进一步进行rewrite, 在交给优化器进行优化和
路径选择之前,所有的执行语句都要转换成struct Query:
typedef struct Query
{
NodeTag  type;

CmdType  commandType; /* select|insert|update|delete|utility */
    /*
     注意:
     如果commandType为: utility,优化器不会对该SQL进行进一步优化,因为这个SQL
     就是一些建表或者其他命令操作,无法进行路径选择和优化,这时候,executor直接
     执行utilityStmt这个Node对应的Struct.
   
     对于select|insert|update|delete这些SQL,优化器都需要进行评估和优化。
   
    */
QuerySource querySource; /* where did I come from? */

bool  canSetTag;  /* do I set the command result tag? */

Node    *utilityStmt; /* non-null if this is a non-optimizable
         * statement */

int   resultRelation; /* rtable index of target relation for
         * INSERT/UPDATE/DELETE; 0 for SELECT */

RangeVar   *into;   /* target relation for SELECT INTO */
List    *intoOptions; /* options from WITH clause */
OnCommitAction intoOnCommit; /* what do we do at COMMIT? */
char    *intoTableSpaceName;  /* table space to use, or NULL */

bool  hasAggs;  /* has aggregates in tlist or havingQual */
bool  hasSubLinks; /* has subquery SubLink */

List    *rtable;   /* list of range table entries */
FromExpr   *jointree;  /* table join tree (FROM and WHERE clauses) */

List    *targetList;  /* target list (of TargetEntry) */

List    *returningList; /* return-values list (of TargetEntry) */

List    *groupClause; /* a list of GroupClause’s */

Node    *havingQual;  /* qualifications applied to groups */

List    *distinctClause; /* a list of SortClause’s */

List    *sortClause;  /* a list of SortClause’s */

Node    *limitOffset; /* # of result tuples to skip (int8 expr) */
Node    *limitCount;  /* # of result tuples to return (int8 expr) */

List    *rowMarks;  /* a list of RowMarkClause’s */

Node    *setOperations; /* set-operation tree if this is top level of
         * a UNION/INTERSECT/EXCEPT query */

/*
  * If the resultRelation turns out to be the parent of an inheritance
  * tree, the planner will add all the child tables to the rtable and store
  * a list of the rtindexes of all the result relations here. This is done
  * at plan time, not parse time, since we don’t want to commit to the
  * exact set of child tables at parse time.  XXX This field ought to go in
  * some sort of TopPlan plan node, not in the Query.
  */
List    *resultRelations; /* integer list of RT indexes, or NIL */

/*
  * If the query has a returningList then the planner will store a list of
  * processed targetlists (one per result relation) here.  We must have a
  * separate RETURNING targetlist for each result rel because column
  * numbers may vary within an inheritance tree.  In the targetlists, Vars
  * referencing the result relation will have their original varno and
  * varattno, while Vars referencing other rels will be converted to have
  * varno OUTER and varattno referencing a resjunk entry in the top plan
  * node’s targetlist.  XXX This field ought to go in some sort of TopPlan
  * plan node, not in the Query.
  */
List    *returningLists; /* list of lists of TargetEntry, or NIL */
} Query;

这些rewrite比较复杂,是由一系列的transform函数完成的,具体可以查看analyze.c.

下面把一些Sql命令转化后的struct做一个简单的分析。
首先分析utilityStmt, 这些命令都比较简单。
(1) Create Table

/* ———————-
*  Create Table Statement
*
* NOTE: in the raw gram.y output, ColumnDef, Constraint, and FkConstraint
* nodes are intermixed in tableElts, and constraints is NIL.  After parse
* analysis, tableElts contains just ColumnDefs, and constraints contains
* just Constraint nodes (in fact, only CONSTR_CHECK nodes, in the present
* implementation).
* ———————-
*/

typedef struct CreateStmt
{
    // type应该为 T_CreateStmt
NodeTag  type;   
// relation->relname 就是要创建的table名字   
RangeVar   *relation;  /* relation to create */
List    *tableElts;  /* column definitions (list of ColumnDef) */
// 由于PG的表是可以继承的,这部分先不分析
List    *inhRelations; /* relations to inherit from (list of
         * inhRelation) */
// 这些都是constraints的定义
List    *constraints; /* constraints (list of Constraint nodes) */
List    *options;  /* options from WITH clause */
OnCommitAction oncommit; /* what do we do at COMMIT? */
char    *tablespacename; /* table space to use, or NULL */
} CreateStmt;

其中最重要的就是:tableElts这个字段,里面包含了要创建table的所有columns.
我写了几行代码,可以遍历这一List.

List *schema;
ListCell *col;

elog(LOG, “begin create:%s”, ((CreateStmt *)parsetree)->relation->relname);
schema = ((CreateStmt *)parsetree)->tableElts;
// 遍历
foreach(col, schema)
{
    ColumnDef  *entry = lfirst(col);
    elog(LOG, “column name:%s column type:%s”,
                    entry->colname, TypeNameToString(entry->typename));
}

建表语句:
pgsql=# create table lijianghua ( name varchar(100), pass varchar(100));
CREATE TABLE

log输出:
LOG:  begin create:lijianghua
LOG:  column name:name column type:pg_catalog.varchar
LOG:  column name:pass column type:pg_catalog.varchar
(2) drop object
删除一个对象时,比如:drop table/drop view……, 命令会被解析到DropStmt struct:
/* ———————-
*  Drop Table|Sequence|View|Index|Type|Domain|Conversion|Schema Statement
* ———————-
*/
typedef struct DropStmt
{
NodeTag  type;
// 需要删除的对象列表
List    *objects;  /* list of sublists of names (as Values) */
// 对象类型
ObjectType removeType;  /* object type */
DropBehavior behavior;  /* RESTRICT or CASCADE behavior */
bool  missing_ok;  /* skip error if object is missing? */
} DropStmt;

ObjectType的定义也非常简单:
typedef enum ObjectType
{
OBJECT_AGGREGATE,
OBJECT_CAST,
OBJECT_COLUMN,
OBJECT_CONSTRAINT,
OBJECT_CONVERSION,
OBJECT_DATABASE,
OBJECT_DOMAIN,
OBJECT_FUNCTION,
OBJECT_INDEX,
OBJECT_LANGUAGE,
OBJECT_LARGEOBJECT,
OBJECT_OPCLASS,
OBJECT_OPERATOR,
OBJECT_ROLE,
OBJECT_RULE,
OBJECT_SCHEMA,
OBJECT_SEQUENCE,
OBJECT_TABLE,
OBJECT_TABLESPACE,
OBJECT_TRIGGER,
OBJECT_TYPE,
OBJECT_VIEW
} ObjectType;
同样,我也写了几行代码,进行了相关验证:
if(parsetree->type == T_DropStmt)
{
      List *tables;
      ListCell *table;
      tables = ((DropStmt *)parsetree)->objects;
      foreach(table, tables)
      {
           List    *names = (List *) lfirst(table);
           RangeVar   *rel;
           if(((DropStmt *)parsetree)->removeType == OBJECT_TABLE)
           {
                  rel = makeRangeVarFromNameList(names);
                  elog(LOG, “want to delete table:%s”, rel->relname);
           }
      }
}
               
SQL语句:
pgsql=# drop table test,test21;
DROP TABLE

log输出:
LOG:  want to delete table:test
LOG:  want to delete table:test21

(3) 把比较复杂的Insert/Update/Delete/Select做一些详细的分析
     未完待续!
阅读(638) | 评论(0) | 转发(0) |
0

上一篇:没有了

下一篇:postgresql源代码的组成

给主人留下些什么吧!~~