libplist icon indicating copy to clipboard operation
libplist copied to clipboard

plist_to_bin optimization

Open xdeng opened this issue 5 years ago • 3 comments

The performance of the plist_to_bin function is poor when the plist file has 320000 nodes, I tried to modify the hashtable function and it was half faster.

hashtable.h

typedef struct hashentry_t {
	void *key;
	void *value;
} hashentry_t;

typedef unsigned int(*hash_func_t)(const void* key);
typedef int (*compare_func_t)(const void *a, const void *b);
typedef void (*free_func_t)(void *ptr);

typedef struct he_array_t {
	hashentry_t *pdata;
	long len;
	long capacity;
	long capacity_step;
} he_array_t;

typedef struct hashtable_t {
	he_array_t *entries[4096];
	size_t count;
	hash_func_t hash_func;
	compare_func_t compare_func;
	free_func_t free_func;
} hashtable_t;

hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func, free_func_t free_func);
void hash_table_destroy(hashtable_t *ht);

void hash_table_insert(hashtable_t* ht, void *key, void *value);
void* hash_table_lookup(hashtable_t* ht, void *key);
void hash_table_remove(hashtable_t* ht, void *key);

hashtable.c

he_array_t *he_array_new(int capacity)
{
	he_array_t *ha = (he_array_t*)malloc(sizeof(he_array_t));
	ha->pdata = (hashentry_t*)malloc(sizeof(hashentry_t) * capacity);
	ha->capacity = capacity;
	ha->capacity_step = (capacity > 4096) ? 4096 : capacity;
	ha->len = 0;
	return ha;
}

void he_array_free(he_array_t *ha)
{
	if (!ha) return;
	if (ha->pdata) {
		free(ha->pdata);
	}
	free(ha);
}

void he_array_insert(he_array_t *ha, hashentry_t *data, long array_index)
{
	long remaining;
	if (!ha || !ha->pdata) return;
	remaining = ha->capacity-ha->len;
	if (remaining == 0) {
		ha->pdata = realloc(ha->pdata, sizeof(hashentry_t) * (ha->capacity + ha->capacity_step));
		ha->capacity += ha->capacity_step;
	}
	if (array_index < 0 || array_index >= ha->len) {
		memcpy(&ha->pdata[ha->len], data, sizeof(hashentry_t));
	} else {
		memmove(&ha->pdata[array_index+1], &ha->pdata[array_index], (ha->len-array_index) * sizeof(hashentry_t));
		memcpy(&ha->pdata[array_index], data, sizeof(hashentry_t));
	}
	ha->len++;
}

void he_array_add(he_array_t *ha, hashentry_t *data)
{
	he_array_insert(ha, data, -1);
}

void he_array_remove(he_array_t *ha, long array_index)
{
	if (!ha || !ha->pdata || array_index < 0) return;
	if (ha->len == 0 || array_index >= ha->len) return;
	if (ha->len == 1) {
		memset(&ha->pdata[0], 0, sizeof(hashentry_t));
	} else {
		memmove(&ha->pdata[array_index], &ha->pdata[array_index+1], (ha->len-array_index-1) * sizeof(hashentry_t));
	}
	ha->len--;
}

void he_array_set(he_array_t *ha, hashentry_t *data, long array_index)
{
	if (!ha || !ha->pdata || array_index < 0) return;
	if (ha->len == 0 || array_index >= ha->len) return;
	memcpy(&ha->pdata[array_index], data, sizeof(hashentry_t));
}

hashentry_t* he_array_index(he_array_t *ha, long array_index)
{
	if (!ha) return NULL;
	if (array_index < 0 || array_index >= ha->len) {
		return NULL;
	}
	return &ha->pdata[array_index];
}

long he_array_size(he_array_t *ha)
{
	return ha->len;
}


hashtable_t* hash_table_new(hash_func_t hash_func, compare_func_t compare_func, free_func_t free_func)
{
	hashtable_t* ht = (hashtable_t*)malloc(sizeof(hashtable_t));
	int i;
	for (i = 0; i < 4096; i++) {
		ht->entries[i] = he_array_new(8);
	}
	ht->count = 0;
	ht->hash_func = hash_func;
	ht->compare_func = compare_func;
	ht->free_func = free_func;
	return ht;
}

void hash_table_destroy(hashtable_t *ht)
{
	int i = 0;

	if (!ht) return;

	for (i = 0; i < 4096; i++) {
		he_array_t* a = ht->entries[i];
		if(ht->free_func){
			int j;
			for (j = 0; j < he_array_size(a); j++)
			{
				hashentry_t* e = he_array_index(a, j);
				ht->free_func(e->value);
			}
		}
		he_array_free(a);
	}
	free(ht);
}

void hash_table_insert(hashtable_t* ht, void *key, void *value)
{
	unsigned int hash;
	int idx0, i;
	he_array_t* a;
	hashentry_t entry;

	if (!ht || !key) return;

	hash = ht->hash_func(key);

	idx0 = hash & 0xFFF;

	// get the idx0 list
	a = ht->entries[idx0];
	for (i = 0; i < he_array_size(a); i++)
	{
		hashentry_t* e = he_array_index(a, i);
		if(ht->compare_func(e->key, key)){
			// element already present. replace value.
			e->value = value;
			return;
		}
	}

	// if we get here, the element is not yet in the list.

	// make a new entry.
	entry.key = key;
	entry.value = value;
	he_array_add(a, &entry);
	ht->count++;
}

void* hash_table_lookup(hashtable_t* ht, void *key)
{
	unsigned int hash;
	int idx0, i;
	he_array_t* a;

	if (!ht || !key) return NULL;
	hash = ht->hash_func(key);

	idx0 = hash & 0xFFF;

	a = ht->entries[idx0];
	for (i = 0; i < he_array_size(a); i++)
	{
		hashentry_t* e = he_array_index(a, i);
		if(ht->compare_func(e->key, key)){
			return e->value;
		}
	}
	return NULL;
}

void hash_table_remove(hashtable_t* ht, void *key)
{
	unsigned int hash;
	int idx0, i;
	he_array_t* a;

	if (!ht || !key) return;

	hash = ht->hash_func(key);

	idx0 = hash & 0xFFF;

	// get the idx0 list
	a = ht->entries[idx0];
	for (i = 0; i < he_array_size(a); i++)
	{
		hashentry_t* e = he_array_index(a, i);
		if(ht->compare_func(e->key, key)){
			// found element, remove it from the list
			if(ht->free_func){
				ht->free_func(e->value);
			}
			he_array_remove(a, i);
			return;
		}
	}
}

xdeng avatar Dec 10 '19 07:12 xdeng

The code is very poorly written for reference only. you can try try

xdeng avatar Dec 10 '19 07:12 xdeng

Any interest in finishing this? The array thing looks interesting.

Artoria2e5 avatar May 28 '20 15:05 Artoria2e5

Yes. This will be addressed after the next release which is due in a few days.

nikias avatar May 28 '20 15:05 nikias