Skip to content
Cookbook

Sorting & Limiting

Recipes for ordering DataFrame rows and limiting result sets.

Basic Sorting

Sort by Single Column

typescript
import { col, asc, desc } from "molniya";

// Ascending order (default)
df.sort(asc("name"))

// Descending order
df.sort(desc("price"))

Sort by Multiple Columns

typescript
// Sort by category, then by price within each category
df.sort(asc("category"), desc("price"))

// Equivalent using array syntax
df.sort([asc("category"), desc("price")])

Common Sorting Patterns

Top N Items

typescript
import { desc } from "molniya";

// Top 10 most expensive products
const top10 = df.sort(desc("price")).limit(10);

// Top 5 best-selling items
const top5 = df.sort(desc("sales_count")).limit(5);

Bottom N Items

typescript
import { asc } from "molniya";

// 10 cheapest products
const cheapest = df.sort(asc("price")).limit(10);

// 5 lowest scores
const lowest = df.sort(asc("score")).limit(5);

Sort by Computed Column

typescript
import { col, desc } from "molniya";

// Sort by total (price * quantity)
df.withColumn("total", col("price").mul(col("quantity")))
  .sort(desc("total"))

Advanced Sorting

Sort with Nulls

typescript
import { asc, desc } from "molniya";

// Nulls first (default)
df.sort(asc("middle_name"))  // nulls appear first

// Nulls last
df.sort(asc("middle_name").nullsLast())

// Descending with nulls last
df.sort(desc("rating").nullsLast())

Sort by Multiple Criteria

typescript
import { asc, desc } from "molniya";

// Complex sort: active first, then by score descending, then by name
df.sort(
  desc("is_active"),    // true (1) before false (0)
  desc("score"),        // Highest scores first
  asc("name")           // Alphabetical within same score
)

Sort by String Length

typescript
import { col, length, desc } from "molniya";

// Sort by name length (longest first)
df.sort(desc(length(col("name"))))

Limiting Results

Basic Limit

typescript
// Get first 100 rows
const limited = df.limit(100);

// Get first 10 rows
const sample = df.limit(10);

Pagination

typescript
// Page 1: rows 0-9
const page1 = df.limit(10);

// Page 2: rows 10-19
const page2 = df.offset(10).limit(10);

// Page 3: rows 20-29
const page3 = df.offset(20).limit(10);

Head and Tail

typescript
// First 5 rows (convenience method)
const first5 = df.head(5);

// Last 5 rows
const last5 = df.tail(5);

Combining Sort and Limit

Top Performers

typescript
import { desc } from "molniya";

// Top 20 employees by sales
const topPerformers = df
  .sort(desc("total_sales"))
  .limit(20);

Recent Items

typescript
import { desc } from "molniya";

// 50 most recent orders
const recent = df
  .sort(desc("order_date"))
  .limit(50);

Worst Performers

typescript
import { asc } from "molniya";

// 10 products with lowest ratings
const worstRated = df
  .filter(col("rating").isNotNull())
  .sort(asc("rating"))
  .limit(10);

Sorting After Aggregation

Top Categories

typescript
import { sum, desc } from "molniya";

// Top 5 categories by revenue
const topCategories = df
  .groupBy("category", [
    { name: "revenue", expr: sum("amount") }
  ])
  .sort(desc("revenue"))
  .limit(5);

Bottom Groups

typescript
import { count, asc } from "molniya";

// Categories with fewest products
const smallestCategories = df
  .groupBy("category", [
    { name: "product_count", expr: count() }
  ])
  .sort(asc("product_count"))
  .limit(10);

Random Sampling

Sample Rows

typescript
// Get random 100 rows (shuffle then limit)
const sample = df.shuffle().limit(100);

// 10% sample
const tenPercent = df.sample(0.1);

Sorting with Null Handling

Prioritize Non-Null Values

typescript
import { col, desc } from "molniya";

// Sort by priority (non-null first), then by date
df.sort(
  col("priority").isNull(),  // false (0) before true (1)
  desc("created_at")
)

Fill Then Sort

typescript
import { col, asc } from "molniya";

// Replace nulls with 0, then sort
df.withColumn("score_filled", col("score").fillNull(0))
  .sort(desc("score_filled"))

Performance Tips

Sort Early

typescript
// Good: Sort before limiting
df.sort(desc("date")).limit(100);

// Less efficient: Limit would need to scan all anyway
// (but sorting is the expensive part)

Use Indexes

When reading from files, filter first if possible:

typescript
// Filter before sort for better performance
df.filter(col("year").eq(2024))
  .sort(desc("amount"))
  .limit(10);

Common Recipes

Leaderboard

typescript
import { desc } from "molniya";

// Top 100 players by score
const leaderboard = df
  .select("player_name", "score", "level")
  .sort(desc("score"), desc("level"))
  .limit(100)
  .withColumn("rank", range(1, 101));

Recent Activity Feed

typescript
import { desc } from "molniya";

// Latest 20 activities
const feed = df
  .filter(col("is_public").eq(true))
  .sort(desc("created_at"))
  .limit(20);

Price Range Display

typescript
import { asc, desc } from "molniya";

// Show price range: 5 cheapest and 5 most expensive
const cheapest = df.sort(asc("price")).limit(5);
const mostExpensive = df.sort(desc("price")).limit(5);
const priceRange = cheapest.union(mostExpensive);

Alphabetical Listing

typescript
import { asc } from "molniya";

// A-Z listing with pagination
const page = df
  .sort(asc("last_name"), asc("first_name"))
  .offset(pageNum * pageSize)
  .limit(pageSize);

Error Handling

Empty Results

typescript
// Handle empty DataFrames gracefully
const result = df.sort(desc("score")).limit(10);
const count = await result.count();

if (count === 0) {
  console.log("No results found");
}

Invalid Sort Column

typescript
// Check column exists before sorting
if (df.columnNames.includes("score")) {
  const sorted = df.sort(desc("score"));
}