Reflections on 30 Years of HPC Programming: So many hardware advances, so little adoption of new languages
This summary of Brad’s HIPS 2025 keynote looks at how HPC programming has (and has not) changed over the past 30 years
writeln("Hello, world!");
// create a parallel task per processor core
coforall tid in 0..<here.maxTaskPar do
writeln("Hello from task ", tid);
// print these 1,000 messages in parallel using all cores
forall i in 1..1000 do
writeln("Hello from iteration ", i);
// print a message per compute node
coforall loc in Locales do
on loc do
writeln("Hello from locale ", loc.id);
// print a message per core per compute node
coforall loc in Locales do
on loc do
coforall tid in 0..<here.maxTaskPar do
writeln("Hello from task ", tid, " on locale ", loc.id);
// print 1,000 messages in parallel using all nodes and cores
use BlockDist;
const Inds = blockDist.createDomain(1..1000);
forall i in Inds do
writeln("Hello from iteration ", i, " running on locale ", here.id);
use IO;
// read in a file containing 'city name;temperature' lines (1BRC-style)
const stats = [line in stdin.lines()] new cityTemperature(line);
writeln(stats);
record cityTemperature {
const city: string; // city name
const temp: real; // temperature
proc init(str: string) {
const words = str.split(";");
this.city = words[0];
this.temp = words[1]: real;
}
}
// set different values at runtime with command line arguments
// e.g. --n=2048 --numSteps=256 --alpha=0.8
config const n = 1000,
numSteps = 100,
alpha = 1.0;
const fullDomain = {1..n},
interior = {2..n-1};
var u: [fullDomain] real = 1.0;
u[n/4..3*n/4] = 2.0; // make the middle a bit hotter
var un = u;
for 1..numSteps {
forall i in interior do // shared-memory parallelism
u[i] = un[i] + alpha * (un[i-1] - 2*un[i] + un[i+1]);
un <=> u; // swap the two arrays
}
writeln(un);
use Random, Math;
const nGpus = here.gpus.size,
n = Locales.size*nGpus;
var A: [1..n, 1..n] real;
fillRandom(A);
// use all nodes
coforall (loc, localRowStart) in zip(Locales, 1.. by nGpus) do on loc {
// and all GPUs within each
coforall (gpu, row) in zip(here.gpus, localRowStart..) do on gpu {
var B: [1..n] real = A[row, ..]; // copy a row onto the device
B = asin(B); // compute (kernel launch)
A[row, ..] = B; // copy the row back
}
}
writeln(A);