* This code is released under the GNU General Public License version 2 or
* later.
*/
-#include <linux/mc146818rtc.h>
+#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
-#include <asm/mach-bigsmp/mach_apic.h>
#include <asm/mmu_context.h>
-#include <asm/idle.h>
-#include <asm/genapic.h>
-#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
+#include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/tsc.h>
+#include <asm/irq_vectors.h>
-struct bau_control **uv_bau_table_bases;
-static int uv_bau_retry_limit;
-static int uv_nshift; /* position of pnode (which is nasid>>1) */
-static unsigned long uv_mmask;
+static struct bau_control **uv_bau_table_bases __read_mostly;
+static int uv_bau_retry_limit __read_mostly;
-char *status_table[] = {
- "IDLE",
- "ACTIVE",
- "DESTINATION TIMEOUT",
- "SOURCE TIMEOUT"
-};
+/* base pnode in this partition */
+static int uv_partition_base_pnode __read_mostly;
-DEFINE_PER_CPU(struct ptc_stats, ptcstats);
-DEFINE_PER_CPU(struct bau_control, bau_control);
+static unsigned long uv_mmask __read_mostly;
+
+static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+static DEFINE_PER_CPU(struct bau_control, bau_control);
+
+/*
+ * Determine the first node on a blade.
+ */
+static int __init blade_to_first_node(int blade)
+{
+ int node, b;
+
+ for_each_online_node(node) {
+ b = uv_node_to_blade_id(node);
+ if (blade == b)
+ return node;
+ }
+ return -1; /* shouldn't happen */
+}
+
+/*
+ * Determine the apicid of the first cpu on a blade.
+ */
+static int __init blade_to_first_apicid(int blade)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (blade == uv_cpu_to_blade_id(cpu))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return -1;
+}
/*
* Free a software acknowledge hardware resource by clearing its Pending
* clear of the Timeout bit (as well) will free the resource. No reply will
* be sent (the hardware will only do one reply per message).
*/
-static void
-uv_reply_to_message(int resource,
- struct bau_payload_queue_entry *msg,
- struct bau_msg_status *msp)
+static void uv_reply_to_message(int resource,
+ struct bau_payload_queue_entry *msg,
+ struct bau_msg_status *msp)
{
- int fw;
+ unsigned long dw;
- fw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
+ dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
msg->replied_to = 1;
msg->sw_ack_vector = 0;
if (msp)
msp->seen_by.bits = 0;
- uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, fw);
- return;
+ uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
}
/*
* Do all the things a cpu should do for a TLB shootdown message.
* Other cpu's may come here at the same time for this message.
*/
-static void
-uv_bau_process_message(struct bau_payload_queue_entry *msg,
- int msg_slot, int sw_ack_slot)
+static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
+ int msg_slot, int sw_ack_slot)
{
- int cpu;
unsigned long this_cpu_mask;
struct bau_msg_status *msp;
+ int cpu;
msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
cpu = uv_blade_processor_id();
msg->number_of_cpus =
- uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
- this_cpu_mask = (unsigned long)1 << cpu;
+ uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+ this_cpu_mask = 1UL << cpu;
if (msp->seen_by.bits & this_cpu_mask)
return;
atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
atomic_inc_short(&msg->acknowledge_count);
if (msg->number_of_cpus == msg->acknowledge_count)
uv_reply_to_message(sw_ack_slot, msg, msp);
- return;
}
/*
- * Examine the payload queue on all the distribution nodes to see
+ * Examine the payload queue on one distribution node to see
* which messages have not been seen, and which cpu(s) have not seen them.
*
* Returns the number of cpu's that have not responded.
*/
-static int
-uv_examine_destinations(struct bau_target_nodemask *distribution)
+static int uv_examine_destination(struct bau_control *bau_tablesp, int sender)
{
- int sender;
- int i;
- int j;
- int k;
- int count = 0;
- struct bau_control *bau_tablesp;
struct bau_payload_queue_entry *msg;
struct bau_msg_status *msp;
+ int count = 0;
+ int i;
+ int j;
- sender = smp_processor_id();
- for (i = 0; i < (sizeof(struct bau_target_nodemask) * BITSPERBYTE);
- i++) {
- if (bau_node_isset(i, distribution)) {
- bau_tablesp = uv_bau_table_bases[i];
- for (msg = bau_tablesp->va_queue_first, j = 0;
- j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) {
- if ((msg->sending_cpu == sender) &&
- (!msg->replied_to)) {
- msp = bau_tablesp->msg_statuses + j;
- printk(KERN_DEBUG
- "blade %d: address:%#lx %d of %d, not cpu(s): ",
- i, msg->address,
- msg->acknowledge_count,
- msg->number_of_cpus);
- for (k = 0; k < msg->number_of_cpus;
- k++) {
- if (!((long)1 << k & msp->
- seen_by.bits)) {
- count++;
- printk("%d ", k);
- }
- }
- printk("\n");
+ for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE;
+ msg++, i++) {
+ if ((msg->sending_cpu == sender) && (!msg->replied_to)) {
+ msp = bau_tablesp->msg_statuses + i;
+ printk(KERN_DEBUG
+ "blade %d: address:%#lx %d of %d, not cpu(s): ",
+ i, msg->address, msg->acknowledge_count,
+ msg->number_of_cpus);
+ for (j = 0; j < msg->number_of_cpus; j++) {
+ if (!((1L << j) & msp->seen_by.bits)) {
+ count++;
+ printk("%d ", j);
}
}
+ printk("\n");
}
}
return count;
}
-/**
- * uv_flush_tlb_others - globally purge translation cache of a virtual
- * address or all TLB's
- * @cpumaskp: mask of all cpu's in which the address is to be removed
- * @mm: mm_struct containing virtual address range
- * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
- *
- * This is the entry point for initiating any UV global TLB shootdown.
- *
- * Purges the translation caches of all specified processors of the given
- * virtual address, or purges all TLB's on specified processors.
- *
- * The caller has derived the cpumaskp from the mm_struct and has subtracted
- * the local cpu from the mask. This function is called only if there
- * are bits set in the mask. (e.g. flush_tlb_page())
+/*
+ * Examine the payload queue on all the distribution nodes to see
+ * which messages have not been seen, and which cpu(s) have not seen them.
*
- * The cpumaskp is converted into a nodemask of the nodes containing
- * the cpus.
+ * Returns the number of cpu's that have not responded.
*/
-int
-uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, unsigned long va)
+static int uv_examine_destinations(struct bau_target_nodemask *distribution)
{
+ int sender;
int i;
- int blade;
- int cpu;
- int bit;
- int right_shift;
- int this_blade;
- int exams = 0;
- int tries = 0;
- long source_timeouts = 0;
- long destination_timeouts = 0;
- unsigned long index;
- unsigned long mmr_offset;
- unsigned long descriptor_status;
- struct bau_activation_descriptor *bau_desc;
- ktime_t time1, time2;
-
- cpu = uv_blade_processor_id();
- this_blade = uv_numa_blade_id();
- bau_desc = __get_cpu_var(bau_control).descriptor_base;
- bau_desc += (UV_ITEMS_PER_DESCRIPTOR * cpu);
-
- bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
+ int count = 0;
- i = 0;
- for_each_cpu_mask(bit, *cpumaskp) {
- blade = uv_cpu_to_blade_id(bit);
- if (blade > (UV_DISTRIBUTION_SIZE - 1))
- BUG();
- if (blade == this_blade)
+ sender = smp_processor_id();
+ for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) {
+ if (!bau_node_isset(i, distribution))
continue;
- bau_node_set(blade, &bau_desc->distribution);
- /* leave the bits for the remote cpu's in the mask until
- success; on failure we fall back to the IPI method */
- i++;
- }
- if (i == 0)
- goto none_to_flush;
- __get_cpu_var(ptcstats).requestor++;
- __get_cpu_var(ptcstats).ntargeted += i;
-
- bau_desc->payload.address = va;
- bau_desc->payload.sending_cpu = smp_processor_id();
-
- if (cpu < UV_CPUS_PER_ACT_STATUS) {
- mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
- right_shift = cpu * UV_ACT_STATUS_SIZE;
- } else {
- mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
- right_shift =
- ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
+ count += uv_examine_destination(uv_bau_table_bases[i], sender);
}
- time1 = ktime_get();
+ return count;
+}
-retry:
- tries++;
- index = ((unsigned long)
- 1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu;
- uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
+/*
+ * wait for completion of a broadcast message
+ *
+ * return COMPLETE, RETRY or GIVEUP
+ */
+static int uv_wait_completion(struct bau_desc *bau_desc,
+ unsigned long mmr_offset, int right_shift)
+{
+ int exams = 0;
+ long destination_timeouts = 0;
+ long source_timeouts = 0;
+ unsigned long descriptor_status;
while ((descriptor_status = (((unsigned long)
- uv_read_local_mmr(mmr_offset) >>
- right_shift) & UV_ACT_STATUS_MASK)) !=
- DESC_STATUS_IDLE) {
+ uv_read_local_mmr(mmr_offset) >>
+ right_shift) & UV_ACT_STATUS_MASK)) !=
+ DESC_STATUS_IDLE) {
if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
source_timeouts++;
if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
source_timeouts = 0;
__get_cpu_var(ptcstats).s_retry++;
- goto retry;
+ return FLUSH_RETRY;
}
- /* spin here looking for progress at the destinations */
+ /*
+ * spin here looking for progress at the destinations
+ */
if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
destination_timeouts++;
if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
- /* returns # of cpus not responding */
+ /*
+ * returns number of cpus not responding
+ */
if (uv_examine_destinations
(&bau_desc->distribution) == 0) {
__get_cpu_var(ptcstats).d_retry++;
- goto retry;
+ return FLUSH_RETRY;
}
exams++;
if (exams >= uv_bau_retry_limit) {
"uv_flush_tlb_others");
printk("giving up on cpu %d\n",
smp_processor_id());
- goto unsuccessful;
+ return FLUSH_GIVEUP;
}
- /* delays can hang up the simulator
+ /*
+ * delays can hang the simulator
udelay(1000);
*/
destination_timeouts = 0;
}
}
+ cpu_relax();
+ }
+ return FLUSH_COMPLETE;
+}
+
+/**
+ * uv_flush_send_and_wait
+ *
+ * Send a broadcast and wait for a broadcast message to complete.
+ *
+ * The flush_mask contains the cpus the broadcast was sent to.
+ *
+ * Returns NULL if all remote flushing was done. The mask is zeroed.
+ * Returns @flush_mask if some remote flushing remains to be done. The
+ * mask will have some bits still set.
+ */
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
+ struct bau_desc *bau_desc,
+ struct cpumask *flush_mask)
+{
+ int completion_status = 0;
+ int right_shift;
+ int tries = 0;
+ int pnode;
+ int bit;
+ unsigned long mmr_offset;
+ unsigned long index;
+ cycles_t time1;
+ cycles_t time2;
+
+ if (cpu < UV_CPUS_PER_ACT_STATUS) {
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
+ right_shift = cpu * UV_ACT_STATUS_SIZE;
+ } else {
+ mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
+ right_shift =
+ ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
}
+ time1 = get_cycles();
+ do {
+ tries++;
+ index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
+ cpu;
+ uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
+ completion_status = uv_wait_completion(bau_desc, mmr_offset,
+ right_shift);
+ } while (completion_status == FLUSH_RETRY);
+ time2 = get_cycles();
+ __get_cpu_var(ptcstats).sflush += (time2 - time1);
if (tries > 1)
__get_cpu_var(ptcstats).retriesok++;
- /* on success, clear the remote cpu's from the mask so we don't
- use the IPI method of shootdown on them */
- for_each_cpu_mask(bit, *cpumaskp) {
- blade = uv_cpu_to_blade_id(bit);
- if (blade == this_blade)
+
+ if (completion_status == FLUSH_GIVEUP) {
+ /*
+ * Cause the caller to do an IPI-style TLB shootdown on
+ * the cpu's, all of which are still in the mask.
+ */
+ __get_cpu_var(ptcstats).ptc_i++;
+ return flush_mask;
+ }
+
+ /*
+ * Success, so clear the remote cpu's from the mask so we don't
+ * use the IPI method of shootdown on them.
+ */
+ for_each_cpu(bit, flush_mask) {
+ pnode = uv_cpu_to_pnode(bit);
+ if (pnode == this_pnode)
continue;
- cpu_clear(bit, *cpumaskp);
+ cpumask_clear_cpu(bit, flush_mask);
}
+ if (!cpumask_empty(flush_mask))
+ return flush_mask;
+ return NULL;
+}
+
+static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
+
+/**
+ * uv_flush_tlb_others - globally purge translation cache of a virtual
+ * address or all TLB's
+ * @cpumask: mask of all cpu's in which the address is to be removed
+ * @mm: mm_struct containing virtual address range
+ * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ * @cpu: the current cpu
+ *
+ * This is the entry point for initiating any UV global TLB shootdown.
+ *
+ * Purges the translation caches of all specified processors of the given
+ * virtual address, or purges all TLB's on specified processors.
+ *
+ * The caller has derived the cpumask from the mm_struct. This function
+ * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
+ *
+ * The cpumask is converted into a nodemask of the nodes containing
+ * the cpus.
+ *
+ * Note that this function should be called with preemption disabled.
+ *
+ * Returns NULL if all remote flushing was done.
+ * Returns pointer to cpumask if some remote flushing remains to be
+ * done. The returned pointer is valid till preemption is re-enabled.
+ */
+const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm,
+ unsigned long va, unsigned int cpu)
+{
+ struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
+ int i;
+ int bit;
+ int pnode;
+ int uv_cpu;
+ int this_pnode;
+ int locals = 0;
+ struct bau_desc *bau_desc;
-unsuccessful:
- time2 = ktime_get();
- __get_cpu_var(ptcstats).sflush_ns += (time2.tv64 - time1.tv64);
+ cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
-none_to_flush:
- if (cpus_empty(*cpumaskp))
- return 1;
+ uv_cpu = uv_blade_processor_id();
+ this_pnode = uv_hub_info->pnode;
+ bau_desc = __get_cpu_var(bau_control).descriptor_base;
+ bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
- /* Cause the caller to do an IPI-style TLB shootdown on
- the cpu's still in the mask */
- __get_cpu_var(ptcstats).ptc_i++;
- return 0;
+ bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
+
+ i = 0;
+ for_each_cpu(bit, flush_mask) {
+ pnode = uv_cpu_to_pnode(bit);
+ BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
+ if (pnode == this_pnode) {
+ locals++;
+ continue;
+ }
+ bau_node_set(pnode - uv_partition_base_pnode,
+ &bau_desc->distribution);
+ i++;
+ }
+ if (i == 0) {
+ /*
+ * no off_node flushing; return status for local node
+ */
+ if (locals)
+ return flush_mask;
+ else
+ return NULL;
+ }
+ __get_cpu_var(ptcstats).requestor++;
+ __get_cpu_var(ptcstats).ntargeted += i;
+
+ bau_desc->payload.address = va;
+ bau_desc->payload.sending_cpu = cpu;
+
+ return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
}
/*
* (the resource will not be freed until noninterruptable cpus see this
* interrupt; hardware will timeout the s/w ack and reply ERROR)
*/
-void
-uv_bau_message_interrupt(struct pt_regs *regs)
+void uv_bau_message_interrupt(struct pt_regs *regs)
{
- struct bau_payload_queue_entry *pqp;
+ struct bau_payload_queue_entry *va_queue_first;
+ struct bau_payload_queue_entry *va_queue_last;
struct bau_payload_queue_entry *msg;
struct pt_regs *old_regs = set_irq_regs(regs);
- ktime_t time1, time2;
+ cycles_t time1;
+ cycles_t time2;
int msg_slot;
int sw_ack_slot;
int fw;
exit_idle();
irq_enter();
- time1 = ktime_get();
+ time1 = get_cycles();
local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
- pqp = __get_cpu_var(bau_control).va_queue_first;
+ va_queue_first = __get_cpu_var(bau_control).va_queue_first;
+ va_queue_last = __get_cpu_var(bau_control).va_queue_last;
+
msg = __get_cpu_var(bau_control).bau_msg_head;
while (msg->sw_ack_vector) {
count++;
fw = msg->sw_ack_vector;
- msg_slot = msg - pqp;
+ msg_slot = msg - va_queue_first;
sw_ack_slot = ffs(fw) - 1;
uv_bau_process_message(msg, msg_slot, sw_ack_slot);
msg++;
- if (msg > __get_cpu_var(bau_control).va_queue_last)
- msg = __get_cpu_var(bau_control).va_queue_first;
+ if (msg > va_queue_last)
+ msg = va_queue_first;
__get_cpu_var(bau_control).bau_msg_head = msg;
}
if (!count)
else if (count > 1)
__get_cpu_var(ptcstats).multmsg++;
- time2 = ktime_get();
- __get_cpu_var(ptcstats).dflush_ns += (time2.tv64 - time1.tv64);
+ time2 = get_cycles();
+ __get_cpu_var(ptcstats).dflush += (time2 - time1);
irq_exit();
set_irq_regs(old_regs);
- return;
}
-static void
-uv_enable_timeouts(void)
+/*
+ * uv_enable_timeouts
+ *
+ * Each target blade (i.e. blades that have cpu's) needs to have
+ * shootdown message timeouts enabled. The timeout does not cause
+ * an interrupt, but causes an error message to be returned to
+ * the sender.
+ */
+static void uv_enable_timeouts(void)
{
- int i;
int blade;
- int last_blade;
+ int nblades;
int pnode;
- int cur_cpu = 0;
- unsigned long apicid;
+ unsigned long mmr_image;
+
+ nblades = uv_num_possible_blades();
- /* better if we had each_online_blade */
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
+ for (blade = 0; blade < nblades; blade++) {
+ if (!uv_blade_nr_possible_cpus(blade))
continue;
- last_blade = blade;
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+
pnode = uv_blade_to_pnode(blade);
- cur_cpu += uv_blade_nr_possible_cpus(i);
+ mmr_image =
+ uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+ /*
+ * Set the timeout period and then lock it in, in three
+ * steps; captures and locks in the period.
+ *
+ * To program the period, the SOFT_ACK_MODE must be off.
+ */
+ mmr_image &= ~((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Set the 4-bit period.
+ */
+ mmr_image &= ~((unsigned long)0xf <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Subsequent reversals of the timebase bit (3) cause an
+ * immediate timeout of one or all INTD resources as
+ * indicated in bits 2:0 (7 causes all of them to timeout).
+ */
+ mmr_image |= ((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
}
- return;
}
-static void *
-uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
+static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
{
if (*offset < num_possible_cpus())
return offset;
return NULL;
}
-static void *
-uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
+static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
{
(*offset)++;
if (*offset < num_possible_cpus())
return NULL;
}
-static void
-uv_ptc_seq_stop(struct seq_file *file, void *data)
+static void uv_ptc_seq_stop(struct seq_file *file, void *data)
{
}
* Display the statistics thru /proc
* data points to the cpu number
*/
-static int
-uv_ptc_seq_show(struct seq_file *file, void *data)
+static int uv_ptc_seq_show(struct seq_file *file, void *data)
{
struct ptc_stats *stat;
int cpu;
seq_printf(file,
"# cpu requestor requestee one all sretry dretry ptc_i ");
seq_printf(file,
- "sw_ack sflush_us dflush_us sok dnomsg dmult starget\n");
+ "sw_ack sflush dflush sok dnomsg dmult starget\n");
}
if (cpu < num_possible_cpus() && cpu_online(cpu)) {
stat = &per_cpu(ptcstats, cpu);
stat->requestee, stat->onetlb, stat->alltlb,
stat->s_retry, stat->d_retry, stat->ptc_i);
seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
- uv_read_global_mmr64(uv_blade_to_pnode
- (uv_cpu_to_blade_id(cpu)),
+ uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
- stat->sflush_ns / 1000, stat->dflush_ns / 1000,
+ stat->sflush, stat->dflush,
stat->retriesok, stat->nomsg,
stat->multmsg, stat->ntargeted);
}
* 0: display meaning of the statistics
* >0: retry limit
*/
-static ssize_t
-uv_ptc_proc_write(struct file *file, const char __user *user,
- size_t count, loff_t *data)
+static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
+ size_t count, loff_t *data)
{
long newmode;
char optstr[64];
+ if (count == 0 || count > sizeof(optstr))
+ return -EINVAL;
if (copy_from_user(optstr, user, count))
return -EFAULT;
optstr[count - 1] = '\0';
printk(KERN_DEBUG
"sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
printk(KERN_DEBUG
- "sflush_us: microseconds spent in uv_flush_tlb_others()\n");
+ "sflush_us: cycles spent in uv_flush_tlb_others()\n");
printk(KERN_DEBUG
- "dflush_us: microseconds spent in handling flush requests\n");
+ "dflush_us: cycles spent in handling flush requests\n");
printk(KERN_DEBUG "sok: successes on retry\n");
printk(KERN_DEBUG "dnomsg: interrupts with no message\n");
printk(KERN_DEBUG
}
static const struct seq_operations uv_ptc_seq_ops = {
- .start = uv_ptc_seq_start,
- .next = uv_ptc_seq_next,
- .stop = uv_ptc_seq_stop,
- .show = uv_ptc_seq_show
+ .start = uv_ptc_seq_start,
+ .next = uv_ptc_seq_next,
+ .stop = uv_ptc_seq_stop,
+ .show = uv_ptc_seq_show
};
-static int
-uv_ptc_proc_open(struct inode *inode, struct file *file)
+static int uv_ptc_proc_open(struct inode *inode, struct file *file)
{
return seq_open(file, &uv_ptc_seq_ops);
}
static const struct file_operations proc_uv_ptc_operations = {
- .open = uv_ptc_proc_open,
- .read = seq_read,
- .write = uv_ptc_proc_write,
- .llseek = seq_lseek,
- .release = seq_release,
+ .open = uv_ptc_proc_open,
+ .read = seq_read,
+ .write = uv_ptc_proc_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
};
-static struct proc_dir_entry *proc_uv_ptc;
-
-static int __init
-uv_ptc_init(void)
+static int __init uv_ptc_init(void)
{
- static struct proc_dir_entry *sgi_proc_dir;
-
- sgi_proc_dir = NULL;
+ struct proc_dir_entry *proc_uv_ptc;
if (!is_uv_system())
return 0;
- sgi_proc_dir = proc_mkdir("sgi_uv", NULL);
- if (!sgi_proc_dir)
- return -EINVAL;
-
- proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
+ proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
+ &proc_uv_ptc_operations);
if (!proc_uv_ptc) {
printk(KERN_ERR "unable to create %s proc entry\n",
UV_PTC_BASENAME);
return -EINVAL;
}
- proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
return 0;
}
-static void __exit
-uv_ptc_exit(void)
-{
- remove_proc_entry(UV_PTC_BASENAME, NULL);
-}
-
-module_init(uv_ptc_init);
-module_exit(uv_ptc_exit);
-
/*
- * Initialization of BAU-related structures
+ * begin the initialization of the per-blade control structures
*/
-int __init
-uv_bau_init(void)
+static struct bau_control * __init uv_table_bases_init(int blade, int node)
{
int i;
- int j;
- int blade;
- int nblades;
- int *ip;
- int pnode;
- int last_blade;
- int cur_cpu = 0;
- unsigned long pa;
- unsigned long n;
- unsigned long m;
- unsigned long mmr_image;
- unsigned long apicid;
- char *cp;
- struct bau_control *bau_tablesp;
- struct bau_activation_descriptor *adp, *ad2;
- struct bau_payload_queue_entry *pqp;
struct bau_msg_status *msp;
- struct bau_control *bcp;
+ struct bau_control *bau_tabp;
- if (!is_uv_system())
- return 0;
+ bau_tabp =
+ kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
+ BUG_ON(!bau_tabp);
- uv_bau_retry_limit = 1;
+ bau_tabp->msg_statuses =
+ kmalloc_node(sizeof(struct bau_msg_status) *
+ DEST_Q_SIZE, GFP_KERNEL, node);
+ BUG_ON(!bau_tabp->msg_statuses);
- if ((sizeof(struct bau_local_cpumask) * BITSPERBYTE) <
- MAX_CPUS_PER_NODE) {
- printk(KERN_ERR
- "uv_bau_init: bau_local_cpumask.bits too small\n");
- BUG();
- }
+ for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++)
+ bau_cpubits_clear(&msp->seen_by, (int)
+ uv_blade_nr_possible_cpus(blade));
+
+ uv_bau_table_bases[blade] = bau_tabp;
+
+ return bau_tabp;
+}
+
+/*
+ * finish the initialization of the per-blade control structures
+ */
+static void __init
+uv_table_bases_finish(int blade,
+ struct bau_control *bau_tablesp,
+ struct bau_desc *adp)
+{
+ struct bau_control *bcp;
+ int cpu;
- uv_nshift = uv_hub_info->n_val;
- uv_mmask = ((unsigned long)1 << uv_hub_info->n_val) - 1;
- nblades = 0;
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
+ for_each_present_cpu(cpu) {
+ if (blade != uv_cpu_to_blade_id(cpu))
continue;
- last_blade = blade;
- nblades++;
+
+ bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
+ bcp->bau_msg_head = bau_tablesp->va_queue_first;
+ bcp->va_queue_first = bau_tablesp->va_queue_first;
+ bcp->va_queue_last = bau_tablesp->va_queue_last;
+ bcp->msg_statuses = bau_tablesp->msg_statuses;
+ bcp->descriptor_base = adp;
}
+}
- uv_bau_table_bases = (struct bau_control **)
- kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
- if (!uv_bau_table_bases)
- BUG();
-
- /* better if we had each_online_blade */
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
- continue;
- last_blade = blade;
-
- bau_tablesp =
- kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, i);
- if (!bau_tablesp)
- BUG();
-
- bau_tablesp->msg_statuses =
- kmalloc_node(sizeof(struct bau_msg_status) *
- DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, i);
- if (!bau_tablesp->msg_statuses)
- BUG();
- for (j = 0, msp = bau_tablesp->msg_statuses;
- j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, msp++) {
- bau_cpubits_clear(&msp->seen_by, (int)
- uv_blade_nr_possible_cpus(blade));
- }
+/*
+ * initialize the sending side's sending buffers
+ */
+static struct bau_desc * __init
+uv_activation_descriptor_init(int node, int pnode)
+{
+ int i;
+ unsigned long pa;
+ unsigned long m;
+ unsigned long n;
+ struct bau_desc *adp;
+ struct bau_desc *ad2;
+
+ /*
+ * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
+ * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade
+ */
+ adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
+ UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
+ BUG_ON(!adp);
+
+ pa = uv_gpa(adp); /* need the real nasid*/
+ n = uv_gpa_to_pnode(pa);
+ m = pa & uv_mmask;
+
+ uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
+ (n << UV_DESC_BASE_PNODE_SHIFT | m));
+
+ /*
+ * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+ * cpu even though we only use the first one; one descriptor can
+ * describe a broadcast to 256 nodes.
+ */
+ for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
+ i++, ad2++) {
+ memset(ad2, 0, sizeof(struct bau_desc));
+ ad2->header.sw_ack_flag = 1;
+ /*
+ * base_dest_nodeid is the first node in the partition, so
+ * the bit map will indicate partition-relative node numbers.
+ * note that base_dest_nodeid is actually a nasid.
+ */
+ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
+ ad2->header.dest_subnodeid = 0x10; /* the LB */
+ ad2->header.command = UV_NET_ENDPOINT_INTD;
+ ad2->header.int_both = 1;
+ /*
+ * all others need to be set to zero:
+ * fairness chaining multilevel count replied_to
+ */
+ }
+ return adp;
+}
- bau_tablesp->watching =
- kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES,
- GFP_KERNEL, i);
- if (!bau_tablesp->watching)
- BUG();
- for (j = 0, ip = bau_tablesp->watching;
- j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, ip++) {
- *ip = 0;
- }
+/*
+ * initialize the destination side's receiving buffers
+ */
+static struct bau_payload_queue_entry * __init
+uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
+{
+ struct bau_payload_queue_entry *pqp;
+ unsigned long pa;
+ int pn;
+ char *cp;
- uv_bau_table_bases[i] = bau_tablesp;
+ pqp = (struct bau_payload_queue_entry *) kmalloc_node(
+ (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
+ GFP_KERNEL, node);
+ BUG_ON(!pqp);
+
+ cp = (char *)pqp + 31;
+ pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
+ bau_tablesp->va_queue_first = pqp;
+ /*
+ * need the pnode of where the memory was really allocated
+ */
+ pa = uv_gpa(pqp);
+ pn = uv_gpa_to_pnode(pa);
+ uv_write_global_mmr64(pnode,
+ UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
+ ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
+ uv_physnodeaddr(pqp));
+ uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
+ uv_physnodeaddr(pqp));
+ bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
+ uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
+ (unsigned long)
+ uv_physnodeaddr(bau_tablesp->va_queue_last));
+ memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
+
+ return pqp;
+}
- pnode = uv_blade_to_pnode(blade);
+/*
+ * Initialization of each UV blade's structures
+ */
+static int __init uv_init_blade(int blade)
+{
+ int node;
+ int pnode;
+ unsigned long pa;
+ unsigned long apicid;
+ struct bau_desc *adp;
+ struct bau_payload_queue_entry *pqp;
+ struct bau_control *bau_tablesp;
- if (sizeof(struct bau_activation_descriptor) != 64)
- BUG();
-
- adp = (struct bau_activation_descriptor *)
- kmalloc_node(16384, GFP_KERNEL, i);
- if (!adp)
- BUG();
- if ((unsigned long)adp & 0xfff)
- BUG();
- pa = __pa((unsigned long)adp);
- n = pa >> uv_nshift;
- m = pa & uv_mmask;
-
- mmr_image = uv_read_global_mmr64(pnode,
- UVH_LB_BAU_SB_DESCRIPTOR_BASE);
- if (mmr_image)
- uv_write_global_mmr64(pnode, (unsigned long)
- UVH_LB_BAU_SB_DESCRIPTOR_BASE,
- (n << UV_DESC_BASE_PNODE_SHIFT |
- m));
- for (j = 0, ad2 = adp; j < UV_ACTIVATION_DESCRIPTOR_SIZE;
- j++, ad2++) {
- memset(ad2, 0,
- sizeof(struct bau_activation_descriptor));
- ad2->header.sw_ack_flag = 1;
- ad2->header.base_dest_nodeid =
- uv_blade_to_pnode(uv_cpu_to_blade_id(0));
- ad2->header.command = UV_NET_ENDPOINT_INTD;
- ad2->header.int_both = 1;
- /* all others need to be set to zero:
- fairness chaining multilevel count replied_to */
- }
+ node = blade_to_first_node(blade);
+ bau_tablesp = uv_table_bases_init(blade, node);
+ pnode = uv_blade_to_pnode(blade);
+ adp = uv_activation_descriptor_init(node, pnode);
+ pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
+ uv_table_bases_finish(blade, bau_tablesp, adp);
+ /*
+ * the below initialization can't be in firmware because the
+ * messaging IRQ will be determined by the OS
+ */
+ apicid = blade_to_first_apicid(blade);
+ pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
+ if ((pa & 0xff) != UV_BAU_MESSAGE) {
+ uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
+ ((apicid << 32) | UV_BAU_MESSAGE));
+ }
+ return 0;
+}
- pqp = (struct bau_payload_queue_entry *)
- kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) *
- sizeof(struct bau_payload_queue_entry),
- GFP_KERNEL, i);
- if (!pqp)
- BUG();
- if (sizeof(struct bau_payload_queue_entry) != 32)
- BUG();
- if ((unsigned long)(&((struct bau_payload_queue_entry *)0)->
- sw_ack_vector) != 15)
- BUG();
-
- cp = (char *)pqp + 31;
- pqp = (struct bau_payload_queue_entry *)
- (((unsigned long)cp >> 5) << 5);
- bau_tablesp->va_queue_first = pqp;
- uv_write_global_mmr64(pnode,
- UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
- ((unsigned long)pnode <<
- UV_PAYLOADQ_PNODE_SHIFT) |
- uv_physnodeaddr(pqp));
- uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
- uv_physnodeaddr(pqp));
- bau_tablesp->va_queue_last =
- pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1);
- uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
- (unsigned long)
- uv_physnodeaddr(bau_tablesp->
- va_queue_last));
- memset(pqp, 0, sizeof(struct bau_payload_queue_entry) *
- DESTINATION_PAYLOAD_QUEUE_SIZE);
-
- /* this initialization can't be in firmware because the
- messaging IRQ will be determined by the OS */
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
- pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
- if ((pa & 0xff) != UV_BAU_MESSAGE) {
- uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
- ((apicid << 32) |
- UV_BAU_MESSAGE));
- }
+/*
+ * Initialization of BAU-related structures
+ */
+static int __init uv_bau_init(void)
+{
+ int blade;
+ int nblades;
+ int cur_cpu;
- for (j = cur_cpu; j < (cur_cpu + uv_blade_nr_possible_cpus(i));
- j++) {
- bcp = (struct bau_control *)&per_cpu(bau_control, j);
- bcp->bau_msg_head = bau_tablesp->va_queue_first;
- bcp->va_queue_first = bau_tablesp->va_queue_first;
+ if (!is_uv_system())
+ return 0;
- bcp->va_queue_last = bau_tablesp->va_queue_last;
- bcp->watching = bau_tablesp->watching;
- bcp->msg_statuses = bau_tablesp->msg_statuses;
- bcp->descriptor_base = adp;
- }
- cur_cpu += uv_blade_nr_possible_cpus(i);
- }
+ for_each_possible_cpu(cur_cpu)
+ zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+ GFP_KERNEL, cpu_to_node(cur_cpu));
- set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
+ uv_bau_retry_limit = 1;
+ uv_mmask = (1UL << uv_hub_info->m_val) - 1;
+ nblades = uv_num_possible_blades();
+ uv_bau_table_bases = (struct bau_control **)
+ kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
+ BUG_ON(!uv_bau_table_bases);
+
+ uv_partition_base_pnode = 0x7fffffff;
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade) &&
+ (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
+ uv_partition_base_pnode = uv_blade_to_pnode(blade);
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade))
+ uv_init_blade(blade);
+
+ alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
return 0;
}
-
__initcall(uv_bau_init);
+__initcall(uv_ptc_init);