likes
comments
collection
share

vue中的解析器parse

作者站长头像
站长
· 阅读数 21

Vue编译器在编译.vue文件时包括三个部分,parse,transform,generate。

今天来分析parse的简单原理,主要分析template里面的html dom转换成模板AST的过程

1. 主流程

假如在.vue文件的template里面写了如下dom

<section class="main" v-show="todos.length">
    <ul class="todo-list">
    <li v-for="todo in filteredTodos"
        class="todo"
        :key="todo.id"
        :class="{ completed: todo.completed, editing: todo === editedTodo }">
        <div class="view">
        <label @dblclick="editTodo(todo)">{{ todo.title }}</label>
        </div>
        <input class="edit" type="text"
                v-model="todo.title"
                v-todo-focus="todo === editedTodo"
                @blur="doneEdit(todo)"
                @keyup.enter="doneEdit(todo)"
                @keyup.escape="cancelEdit(todo)"
        >
    </li>
    </ul>
</section>

解析这棵dom树主要用到“递归下降”的算法,函数递归过程是 parseChildren->parseElement->parseChildren->parseElement->......parseChildren。直到运行最后一层parseChildren,用parseInterpolation解析“{{}}”停止递归,parseChildren运行的结果是当前层级的所有子元素。

如下是parseChildren, parseElement函数的定义

function parseChildren(context, ancestors) {
    const nodes = [];
    while (!isEnd(context, ancestors)) {
      const s = context.source;
      let node = null;
      if (startsWith(s, delimiters[0])) {
          // 解析 '{{' '}}'
        node = parseInterpolation(context)
      } else if(s[0] === '<'){
        if (/[a-z]/i.test(s[1])){
          // 解析 dom
            node  = parseElement(context, ancestors)
        }
      }
      if (!node) {
        // 解析文字 '/n    '换行符也是
          node = parseText(context)
      }
      nodes.push(node)
    }

    // 将循环nodes, 换行符置为空
    for (let i = 0; i < nodes.length; i++) {
      const node = nodes[i]
      if (node.type === NodeTypes.TEXT && !/[^\t\r\n\f ]/.test(node.content)) {
        nodes[i] = null
      }
    }
    // 过滤掉空node
    return nodes.filter(Boolean)
}
function parseElement(context, ancestors) {
  // Start tag.
    const element = parseTag(context, TagType.Start)

    // Children.
    ancestors.push(element)
    const children = parseChildren(context, ancestors)
    ancestors.pop()

    element.children = children

    // End tag. 可能最后一个是空字符串,需要过滤掉
    if (startsWithEndTagOpen(context.source, element.tag)) {
      parseTag(context, TagType.End)
    }

    return element
}

parseChildren 首先进入while循环,是否结束的判断方法是isEnd,就是判断是否遇到闭合标签"</",无论是单标签还是双标签。

首先是检查是否遇到"{{}}"这类双括号包围起来的表达式运算符,然后再去判断是否遇到开始标签“<”。如果都没有遇到,就说明node为空,没有解析到有效的子元素,这个时候遇到的就是空的换行符,用parseText处理。

处理完子元素后,循环遍历nodes,把是换行符的给置为空,然后在return的时候过滤掉。

整个过程如下

vue中的解析器parse

最后得到一个简单版的AST数据结构,每一个节点包含tag名字和props属性(原生属性、指令、事件),可以对照这个ast结构和上面的模板dom树做对比

const ast = {  
    "tag""section",  
    "props": [  
        {  
            "name""class",  
            "value""main"  
        },  
        {  
            "name""show",  
            "exp""todos.length"  
        }  
    ],  
    "children": [  
        {  
            "tag""ul",  
            "props": [  
                {  
                    "name""class",  
                    "value""todo-list"  
                }  
            ],  
            "children": [  
                {  
                    "tag""li",  
                    "props": [  
                        {  
                            "name""for",  
                            "exp""todo in filteredTodos"  
                        },  
                        {  
                            "name""class",  
                            "value""todo"  
                        },  
                        {  
                            "name""bind",  
                            "exp""todo.id",  
                            "arg""key"  
                        },  
                        {  
                            "name""bind",  
                            "exp""{ completed: todo.completed, editing: todo === editedTodo }",  
                            "arg""class"  
                        }  
                    ],  
                    "children": [  
                        {  
                            "tag""div",  
                            "props": [  
                                {  
                                    "name""class",  
                                    "value""view"  
                                }  
                            ],  
                            "children": [  
                                {  
                                    "tag""label",  
                                    "props": [  
                                        {  
                                            "name""on",  
                                            "exp""editTodo(todo)",  
                                            "arg""dblclick"  
                                        }  
                                    ],  
                                    "children": [  
                                        {  
                                            "content": {  
                                                "content""todo.title"  
                                            }  
                                        }  
                                    ]  
                                }  
                            ]  
                        },  
                        {  
                            "tag""input",  
                            "props": [  
                                {  
                                    "name""class",  
                                    "value""edit"  
                                },  
                                {  
                                    "name""type",  
                                    "value""text"  
                                },  
                                {  
                                    "name""model",  
                                    "exp""todo.title"  
                                },  
                                {  
                                    "name""todo-focus",  
                                    "exp""todo === editedTodo"  
                                },  
                                {  
                                    "name""on",  
                                    "exp""doneEdit(todo)",  
                                    "arg""blur"  
                                },  
                                {  
                                    "name""on",  
                                    "exp""doneEdit(todo)",  
                                    "arg""keyup"  
                                },  
                                {  
                                    "name""on",  
                                    "exp""cancelEdit(todo)",  
                                    "arg""keyup"  
                                }  
                            ]  
                        }  
                    ]  
                }  
            ]  
        }  
    ]  
}

不管是匹配dom标签元素,还是匹配标签上的属性,都是正则表达式来匹配的。匹配都是通过while循环,直到结束点。

2. 判断是否结束isEnd

function isEnd(
  context,
  ancestors
) {
  const s = context.source
  if (startsWith(s, '</')) {
    // TODO: probably bad performance
    for (let i = ancestors.length - 1; i >= 0; --i) {
      if (startsWithEndTagOpen(s, ancestors[i].tag)) {
        return true
      }
    }
  }
  return !s
}

ancestors是一个栈,维护父级的嵌套关系,比如当递归解析到“{{ todo.title }}”, ancestors的值是

const ancestors = [
    {
        "type": "ELEMENT",
        "tag": "section",
        // ...
    },
    {
        "type": "ELEMENT",
        "tag": "ul",
       // ...
    },
    {
        "type": "ELEMENT",
        "tag": "li",
       // ...
    },
    {
        "type": "ELEMENT",
        "tag": "div",
        // ...
    },
    {
        "type": "ELEMENT",
        "tag": "label",
        // ...
    }
]

从头到尾,就是section到“{{}}”的层级关系链条,最后一个元素也就代表当前parseChildren解析的元素是 "<label>"。 在parseElement解析dom树的时候,每递归一层,执行parseChildren之前,就要在ancestors push一下,记录当前的元素。当解析完所有子元素后,又把当前元素pop出来。

在while循环中isEnd函数传入的参数是 contxt 和 ancesotrs,其实就是判断ancesotrs的最后的元素遇到和自己一样的闭合标签就停下来,也就是“</”。

3. advanceBy 一步一步吃掉字符串

每解析到标签元素或者换行符或者属性,就消费一段字符串,是通过advanceBy函数,参数numberOfCharacters就是前进几个字符串,直到把模板字符串消费完。

function advanceBy(context, numberOfCharacters) {
  const { source } = context
  context.source = source.slice(numberOfCharacters)
}