likes
comments
collection
share

深拷贝细节远不止递归(下篇)

作者站长头像
站长
· 阅读数 14

上篇主要是总结 lodashcloneDeep 源码处理目标对象的各种类型的复制的细节,这篇主要是总结:

  • 处理循环引用
  • 递归
  • 处理 function
  • 过滤原型属性

对象循环引用

对象循环引用也就是对象 A 引用了对象 B,而对象 B 又引用了对象 A,structedCloneJSON.parse(JSON.stringify()) 对循环引用的对象进行深拷贝会抛出异常。

处理对象循环引用方法就是对拷贝对象进行判断是否拷贝过。

const cacheMap=new WeakMap()
function cloneDeep(source) {
    const result =  Array.isArray(source) ? [] : Object.create(null)
    for (let key in source) {
        if (source.hasOwnProperty(key) &&
            (getTypeof(source[key]) === 'object' || getTypeof(source[key]) === 'array') &&
            !cacheMap.has(source[key])) {
            cacheMap.set(source[key], target[key])
            result[key] = cloneDeep(source[key]);

        } else {
            target[key] = cacheMap.get(source[key]) || source[key]
        }
    }
    return result
}

这里是通过 WeakMap 对象来存拷贝过的对象,如果已经拷贝过就直接赋值。

lodash cloneDeep 处理循环引用的方法类似,考虑的更为细致:

// Check for circular references and return its corresponding clone.
stack || (stack = new Stack);
var stacked = stack.get(value);
if (stacked) {
    return stacked;
}
stack.set(value, result)

代码中用 Stack 来存在拷贝过的对象。

/**
 * Creates a stack cache object to store key-value pairs.
 *
 * @private
 * @constructor
 * @param {Array} [entries] The key-value pairs to cache.
 */
function Stack(entries) {
    var data = this.__data__ = new ListCache(entries);
    this.size = data.size;
}

/**
 * Removes all key-value entries from the stack.
 *
 * @private
 * @name clear
 * @memberOf Stack
 */
function stackClear() {
    this.__data__ = new ListCache;
    this.size = 0;
}

/**
 * Removes `key` and its value from the stack.
 *
 * @private
 * @name delete
 * @memberOf Stack
 * @param {string} key The key of the value to remove.
 * @returns {boolean} Returns `true` if the entry was removed, else `false`.
 */
function stackDelete(key) {
    var data = this.__data__,
        result = data['delete'](key);

    this.size = data.size;
    return result;
}

/**
 * Gets the stack value for `key`.
 *
 * @private
 * @name get
 * @memberOf Stack
 * @param {string} key The key of the value to get.
 * @returns {*} Returns the entry value.
 */
function stackGet(key) {
    return this.__data__.get(key);
}

/**
 * Checks if a stack value for `key` exists.
 *
 * @private
 * @name has
 * @memberOf Stack
 * @param {string} key The key of the entry to check.
 * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`.
 */
function stackHas(key) {
    return this.__data__.has(key);
}

/**
 * Sets the stack `key` to `value`.
 *
 * @private
 * @name set
 * @memberOf Stack
 * @param {string} key The key of the value to set.
 * @param {*} value The value to set.
 * @returns {Object} Returns the stack cache instance.
 */
function stackSet(key, value) {
    var data = this.__data__;
    if (data instanceof ListCache) {
        var pairs = data.__data__;
        if (!Map || (pairs.length < LARGE_ARRAY_SIZE - 1)) {
            pairs.push([key, value]);
            this.size = ++data.size;
            return this;
        }
        data = this.__data__ = new MapCache(pairs);
    }
    data.set(key, value);
    this.size = data.size;
    return this;
}

// Add methods to `Stack`.
Stack.prototype.clear = stackClear;
Stack.prototype['delete'] = stackDelete;
Stack.prototype.get = stackGet;
Stack.prototype.has = stackHas;
Stack.prototype.set = stackSet;

代码中 Stack 对象有 cleardeletegetset 方法,其中 set 方法中长多超过 LARGE_ARRAY_SIZE(200) 时采用 MapCache 对象存储,没超过采用 listCache 存储。MapCache 其实就是对象或 Map 存储,listCache 是数组存储,为什么 200 之前用数组,可能是因为数组占用的空间要少些,200 以内数组的查找性能也不是很大。而超过 200 之后数组的查询性能消耗会大起来了,因此换 hash 表存储,hash 表查快,但内存占用空间大。

listCache:


/**
 * Gets the index at which the `key` is found in `array` of key-value pairs.
 *
 * @private
 * @param {Array} array The array to inspect.
 * @param {*} key The key to search for.
 * @returns {number} Returns the index of the matched value, else `-1`.
 */
function assocIndexOf(array, key) {
    var length = array.length;
    while (length--) {
        if (eq(array[length][0], key)) {
            return length;
        }
    }
    return -1;
}

/**
 * Creates an list cache object.
 *
 * @private
 * @constructor
 * @param {Array} [entries] The key-value pairs to cache.
 */
function ListCache(entries) {
    var index = -1,
        length = entries == null ? 0 : entries.length;

    this.clear();
    while (++index < length) {
        var entry = entries[index];
        this.set(entry[0], entry[1]);
    }
}

/**
 * Removes all key-value entries from the list cache.
 *
 * @private
 * @name clear
 * @memberOf ListCache
 */
function listCacheClear() {
    this.__data__ = [];
    this.size = 0;
}

/**
 * Removes `key` and its value from the list cache.
 *
 * @private
 * @name delete
 * @memberOf ListCache
 * @param {string} key The key of the value to remove.
 * @returns {boolean} Returns `true` if the entry was removed, else `false`.
 */
function listCacheDelete(key) {
    var data = this.__data__,
        index = assocIndexOf(data, key);

    if (index < 0) {
        return false;
    }
    var lastIndex = data.length - 1;
    if (index == lastIndex) {
        data.pop();
    } else {
        splice.call(data, index, 1);
    }
    --this.size;
    return true;
}

/**
 * Gets the list cache value for `key`.
 *
 * @private
 * @name get
 * @memberOf ListCache
 * @param {string} key The key of the value to get.
 * @returns {*} Returns the entry value.
 */
function listCacheGet(key) {
    var data = this.__data__,
        index = assocIndexOf(data, key);

    return index < 0 ? undefined : data[index][1];
}

/**
 * Checks if a list cache value for `key` exists.
 *
 * @private
 * @name has
 * @memberOf ListCache
 * @param {string} key The key of the entry to check.
 * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`.
 */
function listCacheHas(key) {
    return assocIndexOf(this.__data__, key) > -1;
}

/**
 * Sets the list cache `key` to `value`.
 *
 * @private
 * @name set
 * @memberOf ListCache
 * @param {string} key The key of the value to set.
 * @param {*} value The value to set.
 * @returns {Object} Returns the list cache instance.
 */
function listCacheSet(key, value) {
    var data = this.__data__,
        index = assocIndexOf(data, key);

    if (index < 0) {
        ++this.size;
        data.push([key, value]);
    } else {
        data[index][1] = value;
    }
    return this;
}

// Add methods to `ListCache`.
ListCache.prototype.clear = listCacheClear;
ListCache.prototype['delete'] = listCacheDelete;
ListCache.prototype.get = listCacheGet;
ListCache.prototype.has = listCacheHas;
ListCache.prototype.set = listCacheSet;

MapCache:

/**
 * Checks if `value` is suitable for use as unique object key.
 *
 * @private
 * @param {*} value The value to check.
 * @returns {boolean} Returns `true` if `value` is suitable, else `false`.
 */
function isKeyable(value) {
    var type = typeof value;
    return (type == 'string' || type == 'number' || type == 'symbol' || type == 'boolean')
        ? (value !== '__proto__')
        : (value === null);
}

/**
 * Gets the data for `map`.
 *
 * @private
 * @param {Object} map The map to query.
 * @param {string} key The reference key.
 * @returns {*} Returns the map data.
 */
function getMapData(map, key) {
    var data = map.__data__;
    return isKeyable(key)
        ? data[typeof key == 'string' ? 'string' : 'hash']
        : data.map;
}

/**
 * Creates a map cache object to store key-value pairs.
 *
 * @private
 * @constructor
 * @param {Array} [entries] The key-value pairs to cache.
 */
function MapCache(entries) {
    var index = -1,
        length = entries == null ? 0 : entries.length;

    this.clear();
    while (++index < length) {
        var entry = entries[index];
        this.set(entry[0], entry[1]);
    }
}

/**
 * Removes all key-value entries from the map.
 *
 * @private
 * @name clear
 * @memberOf MapCache
 */
 function mapCacheClear() {
    this.size = 0;
    this.__data__ = {
        'hash': new Hash,
        'map': new (Map || ListCache),
        'string': new Hash
    };
}

/**
 * Removes `key` and its value from the map.
 *
 * @private
 * @name delete
 * @memberOf MapCache
 * @param {string} key The key of the value to remove.
 * @returns {boolean} Returns `true` if the entry was removed, else `false`.
 */
function mapCacheDelete(key) {
    var result = getMapData(this, key)['delete'](key);
    this.size -= result ? 1 : 0;
    return result;
}

/**
 * Gets the map value for `key`.
 *
 * @private
 * @name get
 * @memberOf MapCache
 * @param {string} key The key of the value to get.
 * @returns {*} Returns the entry value.
 */
function mapCacheGet(key) {
    return getMapData(this, key).get(key);
}

/**
 * Checks if a map value for `key` exists.
 *
 * @private
 * @name has
 * @memberOf MapCache
 * @param {string} key The key of the entry to check.
 * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`.
 */
function mapCacheHas(key) {
    return getMapData(this, key).has(key);
}

/**
 * Sets the map `key` to `value`.
 *
 * @private
 * @name set
 * @memberOf MapCache
 * @param {string} key The key of the value to set.
 * @param {*} value The value to set.
 * @returns {Object} Returns the map cache instance.
 */
function mapCacheSet(key, value) {
    var data = getMapData(this, key),
        size = data.size;

    data.set(key, value);
    this.size += data.size == size ? 0 : 1;
    return this;
}

// Add methods to `MapCache`.
MapCache.prototype.clear = mapCacheClear;
MapCache.prototype['delete'] = mapCacheDelete;
MapCache.prototype.get = mapCacheGet;
MapCache.prototype.has = mapCacheHas;
MapCache.prototype.set = mapCacheSet;

代码中getMapData方法:

/**
 * Gets the data for `map`.
 *
 * @private
 * @param {Object} map The map to query.
 * @param {string} key The reference key.
 * @returns {*} Returns the map data.
 */
function getMapData(map, key) {
    var data = map.__data__;
    return isKeyable(key)
        ? data[typeof key == 'string' ? 'string' : 'hash']
        : data.map;
}

keystring 时取 data['string'] 对象,keynumberbooleansymbol 时取 data['hash'] 对象,keyobject 时取 data['map'], 也就是 Map 对象。

为什么不统一放入 Map 对象呢?

这种处理方式是为了兼容还不支持 Map 的环境。Map 主要是处理 keyObject 的情况,其他类型的 key 可以放入普通对象中。

而之所以要分 data['string']data['hash'] 是因为如果 keyboolean 类型时,会将 boolean 值转为字符串,就会和字符串'true' 或 'false' 冲突,因此分两个来存储。

Hash:

/**
 * Creates a hash object.
 *
 * @private
 * @constructor
 * @param {Array} [entries] The key-value pairs to cache.
 */
function Hash(entries) {
    var index = -1,
        length = entries == null ? 0 : entries.length;

    this.clear();
    while (++index < length) {
        var entry = entries[index];
        this.set(entry[0], entry[1]);
    }
}

/**
 * Removes all key-value entries from the hash.
 *
 * @private
 * @name clear
 * @memberOf Hash
 */
function hashClear() {
    this.__data__ = nativeCreate ? nativeCreate(null) : {};
    this.size = 0;
}

/**
 * Removes `key` and its value from the hash.
 *
 * @private
 * @name delete
 * @memberOf Hash
 * @param {Object} hash The hash to modify.
 * @param {string} key The key of the value to remove.
 * @returns {boolean} Returns `true` if the entry was removed, else `false`.
 */
function hashDelete(key) {
    var result = this.has(key) && delete this.__data__[key];
    this.size -= result ? 1 : 0;
    return result;
}

/**
 * Gets the hash value for `key`.
 *
 * @private
 * @name get
 * @memberOf Hash
 * @param {string} key The key of the value to get.
 * @returns {*} Returns the entry value.
 */
function hashGet(key) {
    var data = this.__data__;
    if (nativeCreate) {
        var result = data[key];
        return result === HASH_UNDEFINED ? undefined : result;
    }
    return hasOwnProperty.call(data, key) ? data[key] : undefined;
}

/**
 * Checks if a hash value for `key` exists.
 *
 * @private
 * @name has
 * @memberOf Hash
 * @param {string} key The key of the entry to check.
 * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`.
 */
function hashHas(key) {
    var data = this.__data__;
    return nativeCreate ? (data[key] !== undefined) : hasOwnProperty.call(data, key);
}

/**
 * Sets the hash `key` to `value`.
 *
 * @private
 * @name set
 * @memberOf Hash
 * @param {string} key The key of the value to set.
 * @param {*} value The value to set.
 * @returns {Object} Returns the hash instance.
 */
function hashSet(key, value) {
    var data = this.__data__;
    this.size += this.has(key) ? 0 : 1;
    data[key] = (nativeCreate && value === undefined) ? HASH_UNDEFINED : value;
    return this;
}

// Add methods to `Hash`.
Hash.prototype.clear = hashClear;
Hash.prototype['delete'] = hashDelete;
Hash.prototype.get = hashGet;
Hash.prototype.has = hashHas;
Hash.prototype.set = hashSet;

递归

lodash 采用递归的方式来实现深拷贝。之前《不用递归也能实现深拷贝》中提到深拷贝的一个缺点,嵌套多的情况下可能会超出执行栈长度而报错。可以采用深度优先遍历和广度优先遍历非递归方式来实现深拷贝,但这两种方式性能都没有递归的性能好,因为出栈入栈的操作次数要比递归的多很多。这里对非递归的方式再次进行性能优化。

export function originalCloneDeep<T extends Object>(source: T, cacheMap: WeakMap<Object, Object>) {
    const result = Array.isArray(source) ? [] : Object.create(null)
    const stack = [{ target: result, source }]
    while (stack.length) {
        const { source, target } = stack.pop()!
        for (let key in source) {
            if (source.hasOwnProperty(key) &&
                (getTypeof(source[key]) === 'object' || getTypeof(source[key]) === 'array') &&
                !cacheMap.has(<Object>source[key])) {
                target[key] = Array.isArray(source[key]) ? [] : Object.create(null)
                stack.push({ source: <any>source[key], target: target[key] })
                cacheMap.set(<Object>source[key], target[key])
            } else {
                target[key] = cacheMap.get(<Object>source[key]) || source[key]
            }
        }
    }
    return result
}

这里将深度优先遍历和广度优先遍历结合方式来减少 stack 的入栈和出栈操作次数。

处理 function

lodash 会忽略 function 的拷贝。

如果要实现复制函数本身:

  • toString + eval
const fn=()=>{
    console.log('fn')
}

const copyFn= eval(fn.toString())

copyFn() // fn

console.log(fn===copyFn) // false

缺点:

  • 安全问题:eval 内部代码可以获取前作用域和全局作用域的一些变量和方法,如果参数字符串内包含有恶意代码,直接执行就会引发安全问题。

  • 性能问题:将需要调用 javascript 解释器来将字符串代码转为机器码,重新解析,就带来性能销毁。

  • toString + new Function

const fn = () => {
    console.log(1111)
}

const copyFn = new Function(fn.toString())

copyFn()

console.log(fn === copyFn);

安全性要比 eval 好点,new Function 无法访问局部当前作用域,但可以访问全局作用域,new Function 同样会有性能消耗。

4. 过滤原型属性

/**
     * The base implementation of `_.keys` which doesn't treat sparse arrays as dense.
     *
     * @private
     * @param {Object} object The object to query.
     * @returns {Array} Returns the array of property names.
     */
    function baseKeys(object) {
      if (!isPrototype(object)) {
        return nativeKeys(object);
      }
      var result = [];
      for (var key in Object(object)) {
        if (hasOwnProperty.call(object, key) && key != 'constructor') {
          result.push(key);
        }
      }
      return result;
    }

hasOwnProperty 判断是否是自身属性,并且还排除了 constructor

使用 Object(object) 的形式是为了确保将传入的参数转换为一个对象。它的作用是将传入的参数强制转换为对象,即使参数本身就是一个对象也会进行转换。