/*
 *   (C) Copyright IBM Corp. 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: Multipath Plugin - LVM PV detection.
 * File: evms2/engine/plugins/multipath/mp_lvm.c
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include "multipath.h"

#define LVM_TEST_INTERVAL 30
#define LVM_SELECTOR "round-robin"

static list_anchor_t pv_list;

/**
 * mp_lvm_setup
 *
 * Allocate the list to use during discovery.
 **/
int mp_lvm_setup(void)
{
	int rc = 0;

	LOG_ENTRY();

	pv_list = EngFncs->allocate_list();
	if (!pv_list) {
		rc = ENOMEM;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * mp_lvm_cleanup
 *
 * Deallocate the discovery list.
 **/
void mp_lvm_cleanup(void)
{
	LOG_ENTRY();
	EngFncs->destroy_list(pv_list);
	LOG_EXIT_VOID();
}

/**
 * endian_convert_pv
 * @pv: pv_disk_t structure to be converted
 *
 * Convert a PV structure from memory-order to disk-order (or vice-versa).
 **/
static inline void endian_convert_pv(pv_disk_t * pv)
{
	LOG_ENTRY();

	pv->version			= DISK_TO_CPU16(pv->version);
	pv->pv_on_disk.base		= DISK_TO_CPU32(pv->pv_on_disk.base);
	pv->pv_on_disk.size		= DISK_TO_CPU32(pv->pv_on_disk.size);
	pv->vg_on_disk.base		= DISK_TO_CPU32(pv->vg_on_disk.base);
	pv->vg_on_disk.size		= DISK_TO_CPU32(pv->vg_on_disk.size);
	pv->pv_uuidlist_on_disk.base	= DISK_TO_CPU32(pv->pv_uuidlist_on_disk.base);
	pv->pv_uuidlist_on_disk.size	= DISK_TO_CPU32(pv->pv_uuidlist_on_disk.size);
	pv->lv_on_disk.base		= DISK_TO_CPU32(pv->lv_on_disk.base);
	pv->lv_on_disk.size		= DISK_TO_CPU32(pv->lv_on_disk.size);
	pv->pe_on_disk.base		= DISK_TO_CPU32(pv->pe_on_disk.base);
	pv->pe_on_disk.size		= DISK_TO_CPU32(pv->pe_on_disk.size);
	pv->pv_major			= DISK_TO_CPU32(pv->pv_major);
	pv->pv_number			= DISK_TO_CPU32(pv->pv_number);
	pv->pv_status			= DISK_TO_CPU32(pv->pv_status);
	pv->pv_allocatable		= DISK_TO_CPU32(pv->pv_allocatable);
	pv->pv_size			= DISK_TO_CPU32(pv->pv_size);
	pv->lv_cur			= DISK_TO_CPU32(pv->lv_cur);
	pv->pe_size			= DISK_TO_CPU32(pv->pe_size);
	pv->pe_total			= DISK_TO_CPU32(pv->pe_total);
	pv->pe_allocated		= DISK_TO_CPU32(pv->pe_allocated);
	pv->pe_start			= DISK_TO_CPU32(pv->pe_start);

	LOG_EXIT_VOID();
}

/**
 * read_pv
 * @object:	Storage object to read the metadata from
 * @pv:		Location to put a pointer to the PV metadata
 *
 * Read the PV metadata from the specified segment. Check for an LVM PV
 * signature. Allocate space for and return a pointer to the metadata
 * that was read. If any error occurs, *pv will be set to NULL and no
 * memory will be allocated.
 **/
static int read_pv(storage_object_t * object, pv_disk_t ** pv)
{
	pv_disk_t * pv_buffer;
	int rc;

	LOG_ENTRY();
	LOG_EXTRA("Reading LVM PV metadata from object %s\n", object->name);

	*pv = NULL;

	/* Buffer for reading the PV metadata. */
	pv_buffer = EngFncs->engine_alloc(LVM_PV_DISK_SIZE);
	if (!pv_buffer) {
		LOG_CRITICAL("Memory error creating buffer to read LVM PV "
			     "metadata from object %s\n", object->name);
		rc = ENOMEM;
		goto out;
	}
	
	/* Read the first sector. */
	rc = READ(object, bytes_to_sectors(LVM_PV_DISK_BASE),
		  bytes_to_sectors(LVM_PV_DISK_SIZE), pv_buffer);
	if (rc) {
		LOG_SERIOUS("Error reading LVM PV metadata from object %s\n",
			    object->name);
		goto out;
	}

	/* Endian-neutral conversion of PV metadata. */
	endian_convert_pv(pv_buffer);

	/* Check for an LVM signature and make sure the sizes match.
	 * Versions 1 and 2 are both valid now.
	 */
	if (!(pv_buffer->id[0] == 'H' &&
	      pv_buffer->id[1] == 'M' &&
	      (pv_buffer->version == 1 || pv_buffer->version == 2) &&
	      pv_buffer->pv_size == object->size)) {
		LOG_EXTRA("Object %s is not an LVM PV\n", object->name);
		rc = EINVAL;
		goto out;
	}

	/* This is a valid PV. Allocate a new pv_disk_t. */
	*pv = EngFncs->engine_alloc(sizeof(pv_disk_t));
	if (!*pv) {
		LOG_CRITICAL("Memory error creating new PV for object %s\n",
			     object->name);
		rc = ENOMEM;
		goto out;
	}

	/* Copy the metadata. */
	LOG_DEBUG("Object %s is an LVM PV\n", object->name);
	memcpy(*pv, pv_buffer, sizeof(pv_disk_t));

out:
	EngFncs->engine_free(pv_buffer);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * mp_lvm_probe
 *
 * Determine if this object is an LVM PV. If so, place the object on the LVM
 * PV list for later processing.
 **/
int mp_lvm_probe(storage_object_t *child)
{
	pv_disk_t *pv;
	int rc;

	LOG_ENTRY();

	rc = read_pv(child, &pv);
	if (!rc) {
		child->consuming_private_data = pv;
		EngFncs->insert_thing(pv_list, child, 0, NULL);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * compare_pvs
 *
 * Compare two PV structs to see if they are identical.
 **/
static int compare_pvs(pv_disk_t *pv1, pv_disk_t *pv2)
{
	int rc;

	LOG_ENTRY();

	/* Seems kinda lame. :) */
	rc = memcmp(pv1, pv2, sizeof(pv_disk_t));

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * allocate_parent
 *
 * Allocate a parent segment for an LVM multipath-PV.
 **/
static storage_object_t *allocate_parent(storage_object_t *child, pv_disk_t *pv)
{
	storage_object_t *parent;
	char name[EVMS_NAME_SIZE];
	multipath_t *mp;

	LOG_ENTRY();

	/* Generate a name based on the VG name and the PV number. */
	snprintf(name, EVMS_NAME_SIZE, "%s/lvm/%s-pv%d",
		 MP_NAME, pv->vg_name, pv->pv_number);

	LOG_DEBUG("Creating multipath segment %s\n", name);

	parent = multipath_allocate(name, child->size, MULTIPATH_LVM);
	if (parent) {
		parent->geometry = child->geometry;
		mp = parent->private_data;
		mp->type_data = child->consuming_private_data;
		make_parent_and_child(parent, child);
	}

	LOG_EXIT_PTR(parent);
	return parent;
}

/**
 * update_target_info
 *
 * Run through the paths in the MP target and update with status from the
 * info string.
 **/
static void update_target_info(dm_target_t *targets, char *info)
{
	dm_target_multipath_t *mp = targets->data.multipath;
	dm_priority_group_t *pg = &mp->group[0];
	char device[25];
	char *dev, active;
	int i, fail_count;

	LOG_ENTRY();

	for (i = 0; i < pg->num_paths; i++) {
		/* Find major:minor in info string. */
		snprintf(device, 25, "%u:%u",
			 pg->path[i].device.major,
			 pg->path[i].device.minor);
		dev = strstr(info, device);
		if (dev) {
			sscanf(dev, "%*u:%*u %c %u", &active, &fail_count);
			if (!(active == 'A' || active == 'a')) {
				pg->path[i].has_failed = TRUE;
			}
			pg->path[i].fail_count = fail_count;
		}

	}

	LOG_EXIT_VOID();
}

/**
 * compare_mapping
 **/
static int compare_mapping(storage_object_t *object, dm_target_t *target)
{
	dm_target_multipath_t *mp = target->data.multipath;
	dm_priority_group_t *pg = &mp->group[0];
	storage_object_t *child;
	list_element_t itr;
	int num_paths = EngFncs->list_count(object->child_objects);
	int matched_paths = 0, active_paths = 0;
	int found, i, rc = EINVAL;

	LOG_ENTRY();

	/* Basic target information must match. */
	if (target->start != 0 ||
	    target->length != object->size ||
	    target->next != NULL) {
	}

	/* Number of groups must match. */
	if (mp->num_groups != 1) {
		goto out;
	}

	/* Priority-group information must match. The number of paths in the
	 * kernel must be equal to *or greater than* the number found by the
	 * engine. If a path is currently down, EVMS won't discover it, but
	 * that doesn't mean it doesn't exist. :)
	 */
	if (strncmp(pg->selector, LVM_SELECTOR, DM_SELECTOR_NAME_SIZE) != 0 ||
	    pg->num_paths < num_paths) {
		goto out;
	}

	/* Every discovered path must be active in the kernel. */
	LIST_FOR_EACH(object->child_objects, itr, child) {
		found = FALSE;
		for (i = 0; i < pg->num_paths; i++) {
			if (pg->path[i].device.major == child->dev_major &&
			    pg->path[i].device.minor == child->dev_minor) {
				found = TRUE;
				matched_paths++;
				if (!pg->path[i].has_failed) {
					active_paths++;
				}
			}
		}
		if (!found) {
			goto out;
		}
	}

	/* Comparison passes if kernel has same number of (or more)
	 * active paths as the engine found.
	 */
	if (active_paths >= num_paths) {
		rc = 0;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * check_state
 *
 * Check if this segment is active. If not, it needs to be activated. If it
 * is active, get the current mapping and compare.
 **/
static void check_state(storage_object_t *parent)
{
	multipath_t *mp = parent->private_data;
	dm_target_t *target;
	char *info;
	int rc;

	LOG_ENTRY();

	rc = EngFncs->dm_update_status(parent);
	if (rc) {
		goto out;
	}

	if (!(parent->flags & SOFLAG_ACTIVE)) {
		rc = ENODEV;
		goto out;
	}

	rc = EngFncs->dm_get_targets(parent, &target);
	if (rc) {
		goto out;
	}

	rc = EngFncs->dm_get_info(parent, &info);
	if (rc) {
		goto out;
	}

	update_target_info(target, info);

	rc = compare_mapping(parent, target);

out:
	if (rc) {
		parent->flags |= SOFLAG_NEEDS_ACTIVATE;
		mp->flags |= MP_FLAG_ACTIVATE_SEGMENT;
	}
	LOG_EXIT_VOID();
}

/**
 * mp_lvm_process
 *
 * Post-process the list of LVM PVs. This is where we determine if an MP-PV is
 * present, and create a new segment for it.
 **/
int mp_lvm_process(list_anchor_t output_list)
{
	storage_object_t *object1, *object2, *parent;
	list_element_t itr1, itr2, itr3;
	pv_disk_t *pv1, *pv2;
	int rc, count = 0;

	LOG_ENTRY();

	/* Can't use a LIST_FOR_EACH_SAFE here, since we're doing that
	 * inside of this loop, and they can't be nested. Instead, always
	 * return to the start of the list.
	 */
	for (object1 = EngFncs->first_thing(pv_list, &itr1);
	     object1; object1 = EngFncs->first_thing(pv_list, &itr1)) {

		/* Remove the first item on the list, and compare it against
		 * all remaining items in the list.
		 */
		EngFncs->remove_element(itr1);
		pv1 = object1->consuming_private_data;
		parent = NULL;

		LIST_FOR_EACH_SAFE(pv_list, itr2, itr3, object2) {

			/* Compare these two PVs. */
			LOG_DEBUG("Comparing PVs %s and %s\n",
				  object1->name, object2->name);
			pv2 = object2->consuming_private_data;
			rc = compare_pvs(pv1, pv2);
			if (rc) {
				continue;
			}

			if (!parent) {
				/* Create a new MP-PV segment. */
				parent = allocate_parent(object1, pv1);
				if (!parent) {
					break;
				}

				check_state(parent);
				check_daemon(parent);
				count++;
			}

			make_parent_and_child(parent, object2);
			EngFncs->remove_element(itr2);
		}

		if (parent) {
			EngFncs->insert_thing(output_list, parent, 0, NULL);
		} else {
			/* Didn't find a multipath-PV. Delete the private
			 * data and return the PV to the output list.
			 */
			LOG_DEBUG("%s is not a multipath PV.\n", object1->name);
			EngFncs->engine_free(pv1);
			object1->consuming_private_data = NULL;
			EngFncs->insert_thing(output_list, object1, 0, NULL);
		}
	}

	LOG_EXIT_INT(count);
	return count;
}

/**
 * mp_lvm_allocate
 *
 * LVM has no extra private data. It attaches the PV structs to the
 * consuming-private-data of the child objects.
 **/
int mp_lvm_allocate(storage_object_t *object)
{
	LOG_ENTRY();
	LOG_EXIT_INT(0);
	return 0;
}

/**
 * mp_lvm_discard
 *
 * Deallocate the PV structs attached to the child objects.
 **/
int mp_lvm_discard(storage_object_t *object)
{
	multipath_t *mp = object->private_data;
	storage_object_t *child;
	list_element_t itr1, itr2;

	LOG_ENTRY();

	mp->type_data = NULL;

	LIST_FOR_EACH_SAFE(object->child_objects, itr1, itr2, child) {
		unmake_parent_and_child(object, child);
		EngFncs->engine_free(child->consuming_private_data);
		child->consuming_private_data = NULL;
	}

	LOG_EXIT_INT(0);
	return 0;
}

/**
 * mp_lvm_delete
 *
 * Since we have to metadata to delete, delete can simply call discard.
 **/
int mp_lvm_delete(storage_object_t *object)
{
	LOG_ENTRY();

	mp_lvm_discard(object);

	LOG_EXIT_INT(0);
	return 0;
}

/**
 * mp_lvm_map
 *
 * Always select the first object on the child list. No extra offsets.
 **/
int mp_lvm_map(storage_object_t **object, lsn_t *lsn, sector_count_t *count)
{
	storage_object_t *child;
	int rc = EIO;

	LOG_ENTRY();

	child = EngFncs->first_thing((*object)->child_objects, NULL);
	if (child) {
		*object = child;
		rc = 0;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * mp_lvm_build_targets
 *
 * Build a multipath Device-Mapper target for this segment.
 **/
int mp_lvm_build_targets(storage_object_t *object, dm_target_t **targets)
{
	dm_target_t *target;
	dm_target_multipath_t *mp_target;
	storage_object_t *child;
	list_element_t itr;
	int count = EngFncs->list_count(object->child_objects);
	int rc = 0, i = 0;

	LOG_ENTRY();

	target = EngFncs->dm_allocate_target(DM_TARGET_MULTIPATH, 0,
					     object->size, count, 1);
	if (!target) {
		rc = ENOMEM;
		goto out;
	}

	mp_target = target->data.multipath;
	strncpy(mp_target->group[0].selector, LVM_SELECTOR,
		DM_SELECTOR_NAME_SIZE);
	mp_target->group[0].num_paths = count;

	LIST_FOR_EACH(object->child_objects, itr, child) {
		mp_target->group[0].path[i].device.major = child->dev_major;
		mp_target->group[0].path[i].device.minor = child->dev_minor;
		i++;
	}

	*targets = target;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

