/*
 * drivers/video/tegra/host/gk20a/clk_gk20a.c
 *
 * GK20A Clocks
 *
 * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <linux/clk.h>
#include <linux/clkdev.h>
#include <linux/clk-provider.h>
#include <linux/delay.h>	/* for mdelay */
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/tegra-soc.h>

#include "../dev.h"

#include "clk_pllg.h"
#include "gk20a.h"
#include "gk20a_dvfs.h"
#include "hw_trim_gk20a.h"
#include "hw_timer_gk20a.h"

#define nvhost_dbg_clk(fmt, arg...) \
	nvhost_dbg(dbg_clk, fmt, ##arg)

static int num_gpu_cooling_freq;
static struct gpufreq_table_data *gpu_cooling_freq;
static struct gk20a *gk20a;

struct gpufreq_table_data *tegra_gpufreq_table_get(void)
{
	return gpu_cooling_freq;
}

unsigned int tegra_gpufreq_table_size_get(void)
{
	return num_gpu_cooling_freq;
}

static u8 pl_to_div[] = {
/* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
/* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32 };

struct tegra_clk_pllg_params pllg_params = {
	.min_freq = 144, .max_freq = 2064,
	.min_vco = 1000, .max_vco = 2064,
	.min_u = 12, .max_u = 38,
	.min_m = 1, .max_m = 255,
	.min_n = 8, .max_n = 255,
	.min_pl = 1, .max_pl = 32,
};
/* Needed by GK20A PMU module */
EXPORT_SYMBOL(pllg_params);

/**
 * gk20a_init_clk_gpcpll - initialize the internal gpu clock
 *
 * @pdev: instance of gk20a platform device
 *
 * This function should be called during the probe phase
 */
int gk20a_init_clk_gpcpll(struct gk20a *g)
{
	void __iomem *clk_base;
	struct clk *clk, *parent;
	const char *parent_name;
	struct platform_device *pdev = g->dev;
	struct device_node *np = pdev->dev.of_node;

	if (!g->regs) {
		nvhost_err(dev_from_gk20a(g),
				"tegra124 gk20 bar0 not initialized yet");
		return -EPERM;
	}
	clk_base = g->regs;

	/*
	 * The reference clock of gk20a should always be the first item of the
	 * clocks in DT.
	 */
	parent = clk_get(&pdev->dev, NULL);
	if (IS_ERR(parent)) {
		nvhost_err(dev_from_gk20a(g), "pllg parent not initialized");
		return -EPERM;
	}

	parent_name = __clk_get_name(parent);

	clk = tegra_clk_register_pllg("gpcpll", parent_name, clk_base, 0,
					&pllg_params, NULL);

	clk_register_clkdev(clk, "gpcpll", "tegra_gk20a");

	of_clk_add_provider(np, of_clk_src_simple_get, clk);

	gk20a = g;

	return 0;
}

static int gk20a_init_clk_reset_enable_hw(struct gk20a *g)
{
	nvhost_dbg_fn("");
	return 0;
}

struct clk *gk20a_clk_get(struct gk20a *g)
{
	if (!g->clk.tegra_clk) {
		struct clk *clk;

		clk = clk_get_sys("tegra_gk20a", "gpcpll");
		if (IS_ERR(clk)) {
			nvhost_err(dev_from_gk20a(g),
				"fail to get tegra gpu clk tegra_gk20a/gpcpll");
			return NULL;
		}
		g->clk.tegra_clk = clk;
	}

	return g->clk.tegra_clk;
}

static int gk20a_init_clk_setup_sw(struct gk20a *g)
{
	struct clk_gk20a *clk = &g->clk;
	unsigned long *freqs;
	int err, num_freqs;

	nvhost_dbg_fn("");

	if (clk->sw_ready) {
		nvhost_dbg_fn("skip init");
		return 0;
	}

	if (!gk20a_clk_get(g))
		return -EINVAL;

	err = gk20a_dvfs_get_freqs(g, &freqs, &num_freqs);
	if (!err) {
		int i, j;

		/* init j for inverse traversal of frequencies */
		j = num_freqs - 1;

		gpu_cooling_freq = kzalloc(
				(1 + num_freqs) * sizeof(*gpu_cooling_freq),
				GFP_KERNEL);

		/* store frequencies in inverse order */
		for (i = 0; i < num_freqs; ++i, --j) {
			gpu_cooling_freq[i].index = i;
			gpu_cooling_freq[i].frequency = freqs[j];
		}

		/* add 'end of table' marker */
		gpu_cooling_freq[i].index = i;
		gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END;

		/* store number of frequencies */
		num_gpu_cooling_freq = num_freqs + 1;
	}

	mutex_init(&clk->clk_mutex);

	clk->sw_ready = true;

	nvhost_dbg_fn("done");
	return 0;
}

static int gk20a_init_clk_setup_hw(struct gk20a *g)
{
	u32 data;

	nvhost_dbg_fn("");

	data = gk20a_readl(g, trim_sys_gpc2clk_out_r());
	data = set_field(data,
			trim_sys_gpc2clk_out_sdiv14_m() |
			trim_sys_gpc2clk_out_vcodiv_m() |
			trim_sys_gpc2clk_out_bypdiv_m(),
			trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() |
			trim_sys_gpc2clk_out_vcodiv_by1_f() |
			trim_sys_gpc2clk_out_bypdiv_f(0));
	gk20a_writel(g, trim_sys_gpc2clk_out_r(), data);

	return 0;
}

int gk20a_init_clk_support(struct gk20a *g)
{
	struct clk_gk20a *clk = &g->clk;
	u32 err;
	int min_rate;

	nvhost_dbg_fn("");

	clk->g = g;

	err = gk20a_init_clk_reset_enable_hw(g);
	if (err)
		return err;

	err = gk20a_init_clk_setup_sw(g);
	if (err)
		return err;

	mutex_lock(&clk->clk_mutex);
	clk->clk_hw_on = true;

	err = gk20a_init_clk_setup_hw(g);
	mutex_unlock(&clk->clk_mutex);
	if (err)
		return err;

	/* Get the minimal rate */
	min_rate = gk20a_clk_round_rate(g, 0);
	if (!min_rate) {
		nvhost_err(dev_from_gk20a(g),
				"failed to get gpcpll init rate\n");
		return -EINVAL;
	}

	err = gk20a_dvfs_adjust_voltage(g, min_rate);
	if (err) {
		nvhost_err(dev_from_gk20a(g),
				"failed to set voltage for %d MHz\n", min_rate);
		return err;
	}

	err = gk20a_clk_set_rate(g, min_rate);
	if (err) {
		nvhost_err(dev_from_gk20a(g),
				"failed to set gpcpll to %d MHz\n", min_rate);
		return err;
	}

	/* FIXME: this effectively prevents host level clock gating */
	err = clk_prepare_enable(g->clk.tegra_clk);
	if (err)
		return err;

	gk20a_writel(g,
		timer_pri_timeout_r(),
		timer_pri_timeout_period_f(0x186A0) |
		timer_pri_timeout_en_m());

	return err;
}

u32 gk20a_clk_get_rate(struct gk20a *g)
{
	struct clk_gk20a *clk = &g->clk;

	return clk_get_rate(clk->tegra_clk) / MHZ;
}

/**
 * gk20a_clk_round_rate - round rate for gpcpll
 *
 * @g: gk20a instance
 * @rate: target rate in MHz
 */
int gk20a_clk_round_rate(struct gk20a *g, u32 rate)
{
	unsigned long speed_cap;

	/* make sure the clock is available */
	if (!gk20a_clk_get(g))
		return rate;

	speed_cap = g->clk.speed_cap / KHZ;
	if (speed_cap && rate > speed_cap)
		rate = speed_cap;

	return clk_round_rate(g->clk.tegra_clk, rate * MHZ) / MHZ;
}

/**
 * gk20a_clk_set_rate - set rate for gpcpll
 *
 * @g: gk20a instance
 * @rate: target rate in MHz
 */
int gk20a_clk_set_rate(struct gk20a *g, u32 rate)
{
	struct clk_gk20a *clk = &g->clk;
	unsigned long speed_cap;

	speed_cap = clk->speed_cap / KHZ;
	if (speed_cap && rate > speed_cap)
		rate = speed_cap;

	return clk_set_rate(clk->tegra_clk, rate * MHZ);
}

int gk20a_suspend_clk_support(struct gk20a *g)
{
	clk_disable_unprepare(g->clk.tegra_clk);

	return 0;
}

/**
 * tegra_gpu_set_speed_cap - set the speed cap for gk20a
 *
 * @speed_cap: the cap value in KHz
 *
 * If @speed_cap is a zero, that means frequency capping is disabled.
 */
int tegra_gpu_set_speed_cap(unsigned long *speed_cap)
{
	struct clk_gk20a *clk;

	if (!gk20a) {
		WARN(1, "failed to find gpu instance\n");
		return -EPERM;
	}

	if (!speed_cap)
		return -EINVAL;

	clk = &gk20a->clk;

	if (!clk->sw_ready)
		return -EINVAL;

	if (*speed_cap)
		*speed_cap = clk_round_rate(clk->tegra_clk,
					    *speed_cap * KHZ) / KHZ;

	mutex_lock(&clk->clk_mutex);
	if (*speed_cap < (gk20a_dvfs_get_max_freq(gk20a) * KHZ)) {
		clk->speed_cap = *speed_cap;
	} else {
		clk->speed_cap = 0;
		*speed_cap = 0;
	}
	mutex_unlock(&clk->clk_mutex);

	if (*speed_cap && (*speed_cap < (gk20a_clk_get_rate(gk20a) * KHZ)))
		return gk20a_clk_set_rate(gk20a, *speed_cap / KHZ);
	else
		return 0;
}

#ifdef CONFIG_DEBUG_FS

static int rate_get(void *data, u64 *val)
{
	struct gk20a *g = (struct gk20a *)data;
	*val = (u64)gk20a_clk_get_rate(g);
	return 0;
}
static int rate_set(void *data, u64 val)
{
	struct gk20a *g = (struct gk20a *)data;
	return gk20a_clk_set_rate(g, (u32)val);
}
DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");

static int cap_get(void *data, u64 *val)
{
	struct gk20a *g = (struct gk20a *)data;
	*val = g->clk.speed_cap / KHZ;
	return 0;
}
static int cap_set(void *data, u64 val)
{
	unsigned long rate = val * KHZ;
	return tegra_gpu_set_speed_cap(&rate);
}
DEFINE_SIMPLE_ATTRIBUTE(cap_fops, cap_get, cap_set, "%llu\n");

static int pll_reg_show(struct seq_file *s, void *data)
{
	struct gk20a *g = s->private;
	u32 reg, m, n, pl, f;

	mutex_lock(&g->clk.clk_mutex);
	if (!g->clk.clk_hw_on) {
		seq_printf(s, "gk20a powered down - no access to registers\n");
		mutex_unlock(&g->clk.clk_mutex);
		return 0;
	}

	reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r());
	seq_printf(s, "cfg  = 0x%x : %s : %s\n", reg,
		   trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled",
		   trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked");

	reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
	m = trim_sys_gpcpll_coeff_mdiv_v(reg);
	n = trim_sys_gpcpll_coeff_ndiv_v(reg);
	pl = trim_sys_gpcpll_coeff_pldiv_v(reg);
	f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]);
	seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
	seq_printf(s, " : pll_f(gpu_f) = %u(%u) MHz\n", f, f/2);
	mutex_unlock(&g->clk.clk_mutex);
	return 0;
}

static int pll_reg_open(struct inode *inode, struct file *file)
{
	return single_open(file, pll_reg_show, inode->i_private);
}

static const struct file_operations pll_reg_fops = {
	.open		= pll_reg_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int monitor_get(void *data, u64 *val)
{
	struct gk20a *g = (struct gk20a *)data;
	struct clk_gk20a *clk = &g->clk;

	u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */
	u32 clkin = clk->gpc_pll.clk_in;
	u32 count1, count2;

	if (!__clk_is_enabled(clk->tegra_clk)) {
		*val = 0;
		return 0;
	}

	gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
		     trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f());
	gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0),
		     trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() |
		     trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() |
		     trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle));
	/* start */

	/* It should take about 8us to finish 100 cycle of 12MHz.
	   But longer than 100us delay is required here. */
	gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0));
	udelay(2000);

	count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
	udelay(100);
	count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0));
	*val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) *
		     clkin / ncycle);

	if (count1 != count2)
		return -EBUSY;
	return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");

int clk_gk20a_debugfs_init(struct platform_device *dev)
{
	struct dentry *d;
	struct nvhost_device_data *pdata = platform_get_drvdata(dev);
	struct gk20a *g = get_gk20a(dev);

	d = debugfs_create_file(
		"rate", S_IRUGO|S_IWUSR, pdata->debugfs, g, &rate_fops);
	if (!d)
		goto err_out;

	d = debugfs_create_file(
		"cap", S_IRUGO|S_IWUSR, pdata->debugfs, g, &cap_fops);
	if (!d)
		goto err_out;

	d = debugfs_create_file(
		"pll_reg", S_IRUGO, pdata->debugfs, g, &pll_reg_fops);
	if (!d)
		goto err_out;

	d = debugfs_create_file(
		"monitor", S_IRUGO, pdata->debugfs, g, &monitor_fops);
	if (!d)
		goto err_out;

	return 0;

err_out:
	pr_err("%s: Failed to make debugfs node\n", __func__);
	debugfs_remove_recursive(pdata->debugfs);
	return -ENOMEM;
}

#endif /* CONFIG_DEBUG_FS */