ggmlR provides a high-level Keras-like interface for building and
training neural networks on top of the ggml C tensor library. This
vignette walks through the main APIs using the built-in
iris and mtcars datasets.
The sequential API mirrors keras.Sequential: stack
layers with |>, compile, fit, evaluate, predict.
data(iris)
set.seed(42)
x <- as.matrix(iris[, 1:4])
x <- scale(x) # standardise
# one-hot encode species
y <- model.matrix(~ Species - 1, iris) # [150 x 3]
idx <- sample(nrow(x))
n_tr <- 120L
x_tr <- x[idx[1:n_tr], ]
y_tr <- y[idx[1:n_tr], ]
x_val <- x[idx[(n_tr+1):150], ]
y_val <- y[idx[(n_tr+1):150], ]model <- ggml_model_sequential() |>
ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
ggml_layer_batch_norm() |>
ggml_layer_dropout(0.3, stochastic = TRUE) |>
ggml_layer_dense(16L, activation = "relu") |>
ggml_layer_dense(3L, activation = "softmax") |>
ggml_compile(
optimizer = "adam",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)
print(model)score <- ggml_evaluate(model, x_val, y_val, batch_size = 32L)
cat(sprintf("Val loss: %.4f Val accuracy: %.4f\n", score$loss, score$accuracy))
probs <- ggml_predict(model, x_val, batch_size = 32L)
classes <- apply(probs, 1, which.max)
true <- apply(y_val, 1, which.max)
cat("Confusion matrix:\n")
print(table(true = true, predicted = classes))path <- tempfile(fileext = ".rds")
ggml_save_model(model, path)
cat(sprintf("Saved: %s (%.1f KB)\n", path, file.size(path) / 1024))
model2 <- ggml_load_model(path)
score2 <- ggml_evaluate(model2, x_val, y_val, batch_size = 32L)
cat(sprintf("Reloaded model accuracy: %.4f\n", score2$accuracy))The functional API lets you build DAGs with multiple
inputs/branches/outputs. Use ggml_input() to declare
inputs, pipe through layers, then call ggml_model().
inp <- ggml_input(shape = 4L)
h <- inp |> ggml_layer_dense(32L, activation = "relu") |> ggml_layer_batch_norm()
h <- h |> ggml_layer_dense(16L, activation = "relu")
out <- h |> ggml_layer_dense(3L, activation = "softmax")
model_fn <- ggml_model(inputs = inp, outputs = out) |>
ggml_compile(optimizer = "adam", loss = "categorical_crossentropy",
metrics = c("accuracy"))
model_fn <- ggml_fit(model_fn, x_tr, y_tr,
epochs = 100L, batch_size = 32L, verbose = 0L)
score_fn <- ggml_evaluate(model_fn, x_val, y_val, batch_size = 32L)
cat(sprintf("Functional model accuracy: %.4f\n", score_fn$accuracy))Predict mpg from two input groups: engine features and
transmission features.
data(mtcars)
set.seed(7)
# Input group 1: engine (disp, hp, wt)
# Input group 2: transmission / gearbox (cyl, gear, carb, am)
engine <- as.matrix(scale(mtcars[, c("disp","hp","wt")]))
trans <- as.matrix(scale(mtcars[, c("cyl","gear","carb","am")]))
y_mpg <- matrix(scale(mtcars$mpg), ncol = 1L) # [32 x 1]
# small dataset — use all for training, evaluate on same data for demo
x1 <- engine; x2 <- trans
inp1 <- ggml_input(shape = 3L, name = "engine")
inp2 <- ggml_input(shape = 4L, name = "transmission")
branch1 <- inp1 |> ggml_layer_dense(16L, activation = "relu")
branch2 <- inp2 |> ggml_layer_dense(16L, activation = "relu")
merged <- ggml_layer_add(list(branch1, branch2)) # element-wise add
out_reg <- merged |>
ggml_layer_dense(8L, activation = "relu") |>
ggml_layer_dense(1L, activation = "linear")
model_reg <- ggml_model(inputs = list(inp1, inp2), outputs = out_reg) |>
ggml_compile(optimizer = "adam", loss = "mse")
model_reg <- ggml_fit(model_reg,
x = list(x1, x2), y = y_mpg,
epochs = 200L, batch_size = 16L, verbose = 0L)
preds <- ggml_predict(model_reg, x = list(x1, x2), batch_size = 16L)
cat(sprintf("Pearson r (scaled mpg): %.4f\n", cor(preds, y_mpg)))Callbacks plug into ggml_fit() via the
callbacks argument.
cb_stop <- callback_early_stopping(
monitor = "val_loss",
patience = 15L,
min_delta = 1e-4,
restore_best_weights = TRUE
)
model_cb <- ggml_model_sequential() |>
ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
ggml_layer_dense(3L, activation = "softmax") |>
ggml_compile(optimizer = "adam", loss = "categorical_crossentropy")
model_cb <- ggml_fit(model_cb, x_tr, y_tr,
epochs = 300L,
batch_size = 32L,
validation_split = 0.1,
callbacks = list(cb_stop),
verbose = 1L)# Cosine annealing
cb_cosine <- callback_lr_cosine(T_max = 100L, lr_min = 1e-5)
# Step decay: halve LR every 30 epochs
cb_step <- callback_lr_step(step_size = 30L, gamma = 0.5)
# Reduce on plateau
cb_plateau <- callback_reduce_lr_on_plateau(
monitor = "val_loss",
factor = 0.5,
patience = 10L,
min_lr = 1e-6
)
model_lr <- ggml_model_sequential() |>
ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
ggml_layer_dense(3L, activation = "softmax") |>
ggml_compile(optimizer = "adam", loss = "categorical_crossentropy",
lr = 1e-3)
model_lr <- ggml_fit(model_lr, x_tr, y_tr,
epochs = 150L,
batch_size = 32L,
validation_split = 0.1,
callbacks = list(cb_cosine),
verbose = 0L)ag_*)The autograd engine provides PyTorch-style dynamic computation
graphs. Tensors are column-major: shape is
[features, batch].
ag_sequentialBS <- 32L
n <- ncol(x_tr_ag)
ag_train(ag_mod)
set.seed(42)
for (ep in seq_len(150L)) {
perm <- sample(n)
for (b in seq_len(ceiling(n / BS))) {
idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))]
xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
yb <- y_tr_ag[, idx, drop = FALSE]
with_grad_tape({
loss <- ag_softmax_cross_entropy_loss(ag_mod$forward(xb), yb)
})
grads <- backward(loss)
opt$step(grads)
opt$zero_grad()
}
if (ep %% 50L == 0L)
cat(sprintf("epoch %d loss %.4f\n", ep, loss$data[1]))
}ag_eval(ag_mod)
# forward in chunks, apply softmax manually
ag_predict_cm <- function(model, x_cm, chunk = 64L) {
n <- ncol(x_cm)
out <- matrix(0.0, nrow(model$forward(ag_tensor(x_cm[,1,drop=FALSE]))$data), n)
for (s in seq(1L, n, by = chunk)) {
e <- min(s + chunk - 1L, n)
lg <- model$forward(ag_tensor(x_cm[, s:e, drop = FALSE]))$data
ev <- exp(lg - apply(lg, 2, max))
out[, s:e] <- ev / colSums(ev)
}
out
}
probs_ag <- ag_predict_cm(ag_mod, x_val_ag) # [3, 30]
pred_ag <- apply(probs_ag, 2, which.max)
true_ag <- apply(y_val_ag, 1, which.max) # col-major: rows = classes
acc_ag <- mean(pred_ag == true_ag)
cat(sprintf("Autograd val accuracy: %.4f\n", acc_ag))ag_mod2 <- ag_sequential(
ag_linear(4L, 64L, activation = "relu"),
ag_batch_norm(64L),
ag_dropout(0.3),
ag_linear(64L, 32L, activation = "relu"),
ag_linear(32L, 3L)
)
params2 <- ag_mod2$parameters()
opt2 <- optimizer_adam(params2, lr = 1e-3)
sch2 <- lr_scheduler_cosine(opt2, T_max = 150L, lr_min = 1e-5)
ag_train(ag_mod2)
set.seed(42)
for (ep in seq_len(150L)) {
perm <- sample(n)
for (b in seq_len(ceiling(n / BS))) {
idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))]
xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
yb <- y_tr_ag[, idx, drop = FALSE]
with_grad_tape({
loss2 <- ag_softmax_cross_entropy_loss(ag_mod2$forward(xb), yb)
})
grads2 <- backward(loss2)
clip_grad_norm(params2, grads2, max_norm = 5.0)
opt2$step(grads2)
opt2$zero_grad()
}
sch2$step()
}
ag_eval(ag_mod2)
probs2 <- ag_predict_cm(ag_mod2, x_val_ag)
acc2 <- mean(apply(probs2, 2, which.max) == true_ag)
cat(sprintf("ag + cosine + clip val accuracy: %.4f\n", acc2))ag_paramFor full control, build a network directly from parameters:
make_net <- function(n_in, n_hidden, n_out) {
W1 <- ag_param(matrix(rnorm(n_hidden * n_in) * sqrt(2/n_in), n_hidden, n_in))
b1 <- ag_param(matrix(0.0, n_hidden, 1L))
W2 <- ag_param(matrix(rnorm(n_out * n_hidden) * sqrt(2/n_hidden), n_out, n_hidden))
b2 <- ag_param(matrix(0.0, n_out, 1L))
list(
forward = function(x)
ag_add(ag_matmul(W2, ag_relu(ag_add(ag_matmul(W1, x), b1))), b2),
parameters = function() list(W1=W1, b1=b1, W2=W2, b2=b2)
)
}
set.seed(1)
net <- make_net(4L, 32L, 3L)
opt_r <- optimizer_adam(net$parameters(), lr = 1e-3)
for (ep in seq_len(200L)) {
perm <- sample(n)
for (b in seq_len(ceiling(n / BS))) {
idx <- perm[seq((b-1L)*BS+1L, min(b*BS, n))]
xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
yb <- y_tr_ag[, idx, drop = FALSE]
with_grad_tape({ loss_r <- ag_softmax_cross_entropy_loss(net$forward(xb), yb) })
gr <- backward(loss_r)
opt_r$step(gr); opt_r$zero_grad()
}
}
probs_r <- ag_predict_cm(net, x_val_ag)
acc_r <- mean(apply(probs_r, 2, which.max) == true_ag)
cat(sprintf("Raw ag_param val accuracy: %.4f\n", acc_r))# Use Vulkan GPU if available, fall back to CPU
device <- tryCatch({
ag_device("gpu")
"gpu"
}, error = function(e) "cpu")
cat("Running on:", device, "\n")
# Mixed precision (f16 on GPU, f32 on CPU)
ag_dtype(if (device == "gpu") "f16" else "f32")After calling ag_device() and ag_dtype(),
all subsequent ag_param and ag_tensor calls
use the selected device and dtype. The sequential API
(ggml_fit) picks up the backend automatically.
| Task | Sequential / Functional | Autograd |
|---|---|---|
| Build model | ggml_model_sequential() +
ggml_layer_* |
ag_sequential() or raw ag_param |
| Functional DAG | ggml_input() → ggml_model() |
— |
| Compile | ggml_compile() |
manual optimizer |
| Train | ggml_fit() |
with_grad_tape + backward +
opt$step |
| Evaluate | ggml_evaluate() |
manual loop |
| Predict | ggml_predict() |
manual forward |
| Save / load | ggml_save_model() / ggml_load_model() |
— |
| Callbacks | callback_early_stopping(),
callback_lr_* |
lr_scheduler_*, clip_grad_norm |
| Device | automatic | ag_device() / ag_dtype() |