diff --git a/BUILTINS.md b/BUILTINS.md
index 78c3d41..091fe4e 100644
--- a/BUILTINS.md
+++ b/BUILTINS.md
@@ -83,17 +83,23 @@ fn main() -> i32 {
 
 ---
 
-#### `attach(handle, target, flags)`
+#### `attach(handle, target, flags)` / `attach(handle, opts, flags)`
 **Signature:** `attach(handle: ProgramHandle, target: str(128), flags: u32) -> u32`
+**Signature:** `attach(handle: ProgramHandle, opts: perf_options, flags: u32) -> u32`
 **Variadic:** No
 **Context:** Userspace only
 
-**Description:** Attach a loaded eBPF program to a target interface or attachment point.
+**Description:** Attach a loaded eBPF program to a target interface or attachment point, or to a perf event counter described by `perf_options`. Both forms take three arguments, keeping a uniform call shape across all program types.
 
 **Parameters:**
-- `handle`: Program handle returned from `load()`
-- `target`: Target interface name (e.g., "eth0", "lo") or attachment point
-- `flags`: Attachment flags (context-dependent)
+- Standard form:
+    - `handle`: Program handle returned from `load()`
+    - `target`: Target interface name (e.g., "eth0", "lo") or attachment point
+    - `flags`: Attachment flags (context-dependent)
+- Perf event form:
+    - `handle`: Program handle returned from `load()`
+    - `opts`: `perf_options` value — only `perf_type` and `perf_config` are required; all other fields have defaults
+    - `flags`: Reserved (pass `0`)
 
 **Return Value:**
 - Returns `0` on success
@@ -106,11 +112,17 @@ var result = attach(prog, "eth0", 0)
 if (result != 0) {
     print("Failed to attach program")
 }
+
+// Minimal perf attach — all non-perf_type/perf_config fields use defaults:
+// pid=-1 (all procs), cpu=0, period=1_000_000, wakeup=1, flags=false
+var perf_prog = load(on_branch_miss)
+attach(perf_prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses }, 0)
+detach(perf_prog)
 ```
 
 **Context-specific implementations:**
 - **eBPF:** Not available
-- **Userspace:** Uses `bpf_prog_attach` system call
+- **Userspace:** Uses `attach_bpf_program_by_fd` for standard targets and `ks_attach_perf_event` for perf events
 - **Kernel Module:** Not available
 
 ---
@@ -340,7 +352,7 @@ fn main() -> i32 {
 |----------|------|-----------|---------------|-------|
 | `print()` | ✅ | ✅ | ✅ | Different output destinations |
 | `load()` | ❌ | ✅ | ❌ | Program management only |
-| `attach()` | ❌ | ✅ | ❌ | Program management only |
+| `attach()` | ❌ | ✅ | ❌ | Standard attach and perf_options attach |
 | `detach()` | ❌ | ✅ | ❌ | Program management only |
 | `register()` | ❌ | ✅ | ❌ | struct_ops registration |
 | `test()` | ❌ | ✅ | ❌ | Testing framework only |
diff --git a/README.md b/README.md
index 700c82d..cba9f6a 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,13 @@ fn traffic_shaper(ctx: *__sk_buff) -> i32 {
     // Trace system call entry
     return 0
 }
+
+// Perf event program for hardware counter sampling
+@perf_event
+fn on_branch_miss(ctx: *bpf_perf_event_data) -> i32 {
+    // Runs on every hardware branch-miss event
+    return 0
+}
 ```
 
 ### Type System
@@ -261,6 +268,58 @@ fn main() -> i32 {
 }
 ```
 
+### Hardware Performance Counter Programs
+
+Use `@perf_event` to attach eBPF programs to hardware or software performance counters. `perf_options` keeps the kernel's tagged `perf_type + perf_config` model, so adding new perf event families does not require flattening everything into one enum. Only `perf_type` and `perf_config` are required; all other fields have sensible defaults. If you need the current count in userspace, call `perf_read(prog)` after `attach(...)`:
+
+```kernelscript
+// eBPF program fires on every hardware branch-miss sample
+@perf_event
+fn on_branch_miss(ctx: *bpf_perf_event_data) -> i32 {
+    return 0
+}
+
+fn main() -> i32 {
+    var prog = load(on_branch_miss)
+
+    // Minimal form — defaults: pid=-1 (all procs), cpu=0,
+    // period=1_000_000, wakeup=1, all flags=false
+    attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses }, 0)
+    var count = perf_read(prog)
+    print("branch misses: %lld", count)
+
+    detach(prog)   // disables counter, destroys BPF link, closes fd
+    return 0
+}
+```
+
+**Available `perf_type` values:**
+
+| Enum value | Hardware/software event |
+|---|---|
+| `perf_type_hardware` | `PERF_TYPE_HARDWARE` |
+| `perf_type_software` | `PERF_TYPE_SOFTWARE` |
+| `perf_type_tracepoint` | `PERF_TYPE_TRACEPOINT` |
+| `perf_type_hw_cache` | `PERF_TYPE_HW_CACHE` |
+| `perf_type_raw` | `PERF_TYPE_RAW` |
+| `perf_type_breakpoint` | `PERF_TYPE_BREAKPOINT` |
+
+**Common `perf_config` constants:**
+
+| Constant | Intended `perf_type` | Linux config |
+|---|---|---|
+| `cpu_cycles` | `perf_type_hardware` | `PERF_COUNT_HW_CPU_CYCLES` |
+| `instructions` | `perf_type_hardware` | `PERF_COUNT_HW_INSTRUCTIONS` |
+| `cache_references` | `perf_type_hardware` | `PERF_COUNT_HW_CACHE_REFERENCES` |
+| `cache_misses` | `perf_type_hardware` | `PERF_COUNT_HW_CACHE_MISSES` |
+| `branch_instructions` | `perf_type_hardware` | `PERF_COUNT_HW_BRANCH_INSTRUCTIONS` |
+| `branch_misses` | `perf_type_hardware` | `PERF_COUNT_HW_BRANCH_MISSES` |
+| `page_faults` | `perf_type_software` | `PERF_COUNT_SW_PAGE_FAULTS` |
+| `context_switches` | `perf_type_software` | `PERF_COUNT_SW_CONTEXT_SWITCHES` |
+| `cpu_migrations` | `perf_type_software` | `PERF_COUNT_SW_CPU_MIGRATIONS` |
+
+For newer families such as `perf_type_hw_cache`, pass the kernel-compatible encoded `perf_config` value directly.
+
 📖 **For detailed language specification, syntax reference, and advanced features, please read [`SPEC.md`](SPEC.md).**
 
 🔧 **For complete builtin functions reference, see [`BUILTINS.md`](BUILTINS.md).**
@@ -304,6 +363,7 @@ my_project/
 - `tc` - Traffic control programs  
 - `probe` - Kernel function probing
 - `tracepoint` - Kernel tracepoint programs
+- `perf_event` - Hardware/software performance counter programs
 
 **Available struct_ops:**
 - `tcp_congestion_ops` - TCP congestion control
diff --git a/SPEC.md b/SPEC.md
index 8e1e2cf..ab2b7d6 100644
--- a/SPEC.md
+++ b/SPEC.md
@@ -35,7 +35,7 @@ var flows : hash<IpAddress, PacketStats>(1024)
 KernelScript uses a simple and clear scoping model that eliminates ambiguity:
 
 - **`@helper` functions**: Kernel-shared functions - accessible by all eBPF programs, compile to eBPF bytecode
-- **Attributed functions** (e.g., `@xdp`, `@tc`, `@tracepoint`): eBPF program entry points - compile to eBPF bytecode
+- **Attributed functions** (e.g., `@xdp`, `@tc`, `@tracepoint`, `@perf_event`): eBPF program entry points - compile to eBPF bytecode
 - **Regular functions**: User space - functions and data structures compile to native executable
 - **Maps and global configs**: Shared resources accessible from both kernel and user space
 - **No wrapper syntax**: Direct, flat structure without unnecessary nesting
@@ -440,6 +440,131 @@ kernelscript init tracepoint/syscalls/sys_enter_read my_syscall_tracer
 # appropriate KernelScript templates with correct context types
 ```
 
+#### 3.1.3 Perf Event Programs
+
+`@perf_event` programs attach eBPF logic to hardware or software performance counters via `perf_event_open(2)`. The eBPF function is invoked for every counter sample; the userspace side controls which counter to monitor through a `perf_options` struct literal passed to the standard 3-argument `attach()`.
+
+**Syntax:**
+```kernelscript
+@perf_event
+fn <handler_name>(ctx: *bpf_perf_event_data) -> i32 {
+    // runs on every sample
+    return 0
+}
+```
+
+The context type is always `*bpf_perf_event_data` (from `vmlinux.h`).
+
+**Userspace lifecycle:**
+```kernelscript
+fn main() -> i32 {
+    var prog = load(my_handler)
+
+    // Only perf_type + perf_config are required; all other fields use language-level defaults:
+    // pid=-1, cpu=0, period=1_000_000, wakeup=1, inherit/exclude_*=false
+    attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses }, 0)
+
+    // Override specific fields as needed:
+    attach(prog, perf_options {
+        perf_type: perf_type_hardware,
+        perf_config: cache_misses,
+        cpu: 2,
+        period: 500000,
+        exclude_kernel: true,
+    }, 0)
+
+    var count = perf_read(prog)
+    print("count: %lld", count)
+
+    detach(prog)   // IOC_DISABLE → bpf_link__destroy → close(perf_fd)
+    return 0
+}
+```
+
+**`perf_options` fields and defaults:**
+
+| Field | Type | Default | Description |
+|---|---|---|---|
+| `perf_type` | `perf_type` | *(required)* | `perf_event_attr.type` tag |
+| `perf_config` | `u64` | *(required)* | `perf_event_attr.config` value for that type |
+| `pid` | `i32` | `-1` | -1 = all processes; ≥0 = specific PID |
+| `cpu` | `i32` | `0` | ≥0 = specific CPU; -1 = any CPU (pid must be ≥0) |
+| `period` | `u64` | `1000000` | Sample after this many events |
+| `wakeup` | `u32` | `1` | Wake userspace after N samples |
+| `inherit` | `bool` | `false` | Inherit to forked children |
+| `exclude_kernel` | `bool` | `false` | Exclude kernel-mode samples |
+| `exclude_user` | `bool` | `false` | Exclude user-mode samples |
+
+**`pid` / `cpu` rules enforced at runtime:**
+
+| `pid` | `cpu` | Meaning |
+|---|---|---|
+| ≥ 0 | ≥ 0 | Specific process on specific CPU |
+| ≥ 0 | -1 | Specific process on any CPU |
+| -1 | ≥ 0 | All processes on specific CPU (system-wide) |
+| -1 | -1 | **Invalid** — rejected with error |
+
+**`perf_type` enum:**
+
+| Value | Linux constant |
+|---|---|
+| `perf_type_hardware` | `PERF_TYPE_HARDWARE` |
+| `perf_type_software` | `PERF_TYPE_SOFTWARE` |
+| `perf_type_tracepoint` | `PERF_TYPE_TRACEPOINT` |
+| `perf_type_hw_cache` | `PERF_TYPE_HW_CACHE` |
+| `perf_type_raw` | `PERF_TYPE_RAW` |
+| `perf_type_breakpoint` | `PERF_TYPE_BREAKPOINT` |
+
+**Common `perf_config` constants:**
+
+| Value | Intended `perf_type` | Linux constant |
+|---|---|---|
+| `cpu_cycles` | `perf_type_hardware` | `PERF_COUNT_HW_CPU_CYCLES` |
+| `instructions` | `perf_type_hardware` | `PERF_COUNT_HW_INSTRUCTIONS` |
+| `cache_references` | `perf_type_hardware` | `PERF_COUNT_HW_CACHE_REFERENCES` |
+| `cache_misses` | `perf_type_hardware` | `PERF_COUNT_HW_CACHE_MISSES` |
+| `branch_instructions` | `perf_type_hardware` | `PERF_COUNT_HW_BRANCH_INSTRUCTIONS` |
+| `branch_misses` | `perf_type_hardware` | `PERF_COUNT_HW_BRANCH_MISSES` |
+| `page_faults` | `perf_type_software` | `PERF_COUNT_SW_PAGE_FAULTS` |
+| `context_switches` | `perf_type_software` | `PERF_COUNT_SW_CONTEXT_SWITCHES` |
+| `cpu_migrations` | `perf_type_software` | `PERF_COUNT_SW_CPU_MIGRATIONS` |
+
+For event families with a richer config space, such as `perf_type_hw_cache`, provide the encoded kernel `perf_config` value directly instead of relying on a flattened enum.
+
+**Generated C helpers (emitted when `attach(prog, perf_options{...}, flags)` is used):**
+
+| Function | Signature | Description |
+|---|---|---|
+| `ks_open_perf_event` | `int (ks_perf_options)` | Calls `perf_event_open(2)`, returns fd |
+| `ks_attach_perf_event` | `int (int prog_fd, ks_perf_options, int flags)` | Full open-reset-attach-enable lifecycle |
+| `ks_read_perf_count` | `int64_t (int perf_fd)` | Reads current 64-bit counter via `read()` |
+| `ks_perf_read` | `int64_t (int prog_fd)` | High-level read via program handle |
+
+**Attach sequence (compiler-generated, inside `ks_attach_perf_event`):**
+1. `ks_attr.attr.disabled = 1` — open counter without starting it  
+2. `syscall(SYS_perf_event_open, ...)` → `perf_fd`  
+3. `ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0)` — zero the counter  
+4. `bpf_program__attach_perf_event(prog, perf_fd)` — link BPF program  
+5. `ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, 0)` — **start counting**  
+
+**Detach sequence (compiler-generated):**
+1. `ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, 0)` — stop counting  
+2. `bpf_link__destroy(link)` — unlink BPF program  
+3. `close(perf_fd)` — release the kernel perf event  
+
+**Compiler implementation:**
+- Detects `attach(prog, perf_options_value, flags)` (three-argument form with `perf_options` second arg) and routes to `ks_attach_perf_event`
+- Exposes omitted `perf_options` fields as language-level defaults (partial struct literal)
+- Validates `pid ≥ -1`, `cpu ≥ -1`, and rejects `pid == -1 && cpu == -1` at runtime
+- Emits `PERF_FLAG_FD_CLOEXEC` for safe fd inheritance
+- BPF program section is `SEC("perf_event")`
+
+**Project Initialization:**
+```bash
+# Initialize a perf_event project
+kernelscript init perf_event my_perf_monitor
+```
+
 ### 3.2 Named Configuration Blocks
 ```kernelscript
 // Named configuration blocks - globally accessible
diff --git a/examples/perf_cache_miss.ks b/examples/perf_cache_miss.ks
new file mode 100644
index 0000000..70b3bf6
--- /dev/null
+++ b/examples/perf_cache_miss.ks
@@ -0,0 +1,25 @@
+// perf_cache_miss.ks
+// Demonstrates @perf_event program type in KernelScript.
+// The eBPF program runs on every hardware cache-miss event.
+// The userspace side opens the perf event and attaches the BPF program.
+
+@perf_event
+fn on_cache_miss(ctx: *bpf_perf_event_data) -> i32 {
+    return 0
+}
+
+fn main() -> i32 {
+    var prog = load(on_cache_miss)
+
+    // Only perf_type + perf_config are required; pid, cpu, period, wakeup and flag fields
+    // default to: pid=-1 (all procs), cpu=0, period=1_000_000, wakeup=1,
+    // inherit/exclude_kernel/exclude_user=false.
+    attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: cache_misses, period: 10000000, inherit: true }, 0)
+    print("Cache-miss perf_event demo attached")
+    var count = perf_read(prog)
+    print("Cache-miss count: %lld", count)
+
+    detach(prog)
+    print("Cache-miss perf_event demo detached")
+    return 0
+}
diff --git a/examples/perf_page_fault.ks b/examples/perf_page_fault.ks
new file mode 100644
index 0000000..7c07084
--- /dev/null
+++ b/examples/perf_page_fault.ks
@@ -0,0 +1,32 @@
+// perf_page_fault.ks
+// Demonstrates @perf_event program type in KernelScript.
+// The eBPF program runs on every software page-fault event.
+// The userspace side opens the perf event and attaches the BPF program.
+
+@perf_event
+fn on_page_fault(ctx: *bpf_perf_event_data) -> i32 {
+    return 0
+}
+
+fn main() -> i32 {
+    var prog = load(on_page_fault)
+
+    // pid: 0 = current process, cpu: -1 = any CPU (standard per-process monitoring).
+    // page_faults (PERF_COUNT_SW_PAGE_FAULTS) is the most reliable software event:
+    // every heap/stack allocation triggers minor page faults, no scheduler dependency.
+    attach(prog, perf_options { perf_type: perf_type_software, perf_config: page_faults, pid: 0, cpu: -1, period: 1 }, 0)
+    print("Page-fault perf_event demo attached")
+
+    // Repeatedly increment a counter; stack/heap activity will generate page faults.
+    var x: i64 = 0
+    for (i in 0..10000000) {
+        x = x + 1
+    }
+
+    var count = perf_read(prog)
+    print("Page-fault count: %lld", count)
+
+    detach(prog)
+    print("Page-fault perf_event demo detached")
+    return 0
+}
diff --git a/src/ast.ml b/src/ast.ml
index 3ff6ae4..5477bbe 100644
--- a/src/ast.ml
+++ b/src/ast.ml
@@ -40,7 +40,7 @@ type probe_type =
 
 (** Program types supported by KernelScript *)
 type program_type = 
-  | Xdp | Tc | Probe of probe_type | Tracepoint | StructOps
+  | Xdp | Tc | Probe of probe_type | Tracepoint | StructOps | PerfEvent
 
 (** Map types for eBPF maps *)
 type map_type =
@@ -658,6 +658,7 @@ let string_of_program_type = function
   | Probe Kprobe -> "kprobe"
   | Tracepoint -> "tracepoint"
   | StructOps -> "struct_ops"
+  | PerfEvent -> "perf_event"
 
 let string_of_map_type = function
   | Hash -> "hash"
diff --git a/src/btf_parser.ml b/src/btf_parser.ml
index 53230fc..fbc0f4e 100644
--- a/src/btf_parser.ml
+++ b/src/btf_parser.ml
@@ -106,6 +106,9 @@ let get_program_template prog_type btf_path =
     | "tc" -> ("*__sk_buff", "i32", [
         "__sk_buff"
       ])
+    | "perf_event" -> ("*bpf_perf_event_data", "i32", [
+        "bpf_perf_event_data"
+      ])
     | _ -> failwith (sprintf "Unsupported program type '%s' for generic template. Use specific template functions for kprobe/tracepoint." prog_type)
   in
   
@@ -364,6 +367,7 @@ let generate_kernelscript_source ?extra_param ?include_kfuncs template project_n
   Kernelscript_context.Kprobe_codegen.register ();
   Kernelscript_context.Tracepoint_codegen.register ();
   Kernelscript_context.Fprobe_codegen.register ();
+  Kernelscript_context.Perf_event_codegen.register ();
   
   (* Get program description from context codegen system *)
   let context_comment = "// " ^ (Kernelscript_context.Context_codegen.get_context_program_description template.program_type) in
@@ -502,6 +506,31 @@ let generate_kernelscript_source ?extra_param ?include_kfuncs template project_n
     | None -> ""
   in
   
+  (* perf_event programs use a completely different main() with attach(prog, opts, 0) *)
+  if template.program_type = "perf_event" then
+    sprintf {|%s
+// Generated by KernelScript compiler with direct BTF parsing%s
+
+%s
+%s {
+    // TODO: Implement your perf_event logic here
+    
+    return %s
+}
+
+fn main() -> i32 {
+    var prog = load(%s)
+
+    // perf_type + perf_config are required; all other fields default to sensible values.
+    attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses }, 0)
+
+    detach(prog)
+
+    return 0
+}
+|} context_comment include_line attribute_line function_definition sample_return function_name
+  else
+
   sprintf {|%s
 // Generated by KernelScript compiler with direct BTF parsing%s
 %s
@@ -549,6 +578,9 @@ let get_program_btf_types prog_type =
   | "tracepoint" -> [
       ("trace_entry", "struct");
     ]
+  | "perf_event" -> [
+      ("bpf_perf_event_data", "struct");
+    ]
   | _ -> []
 
 (* Program-type specific kfunc names to extract from BTF *)
diff --git a/src/codegen_common.ml b/src/codegen_common.ml
index 0ee25c2..2325ca5 100644
--- a/src/codegen_common.ml
+++ b/src/codegen_common.ml
@@ -43,6 +43,7 @@ let rec ir_type_to_c target = function
        | UserspaceStd -> "char") (* Base type for userspace string - size handled in declaration *)
   | IRPointer (inner_type, _) -> sprintf "%s*" (ir_type_to_c target inner_type)
   | IRArray (inner_type, size, _) -> sprintf "%s[%d]" (ir_type_to_c target inner_type) size
+  | IRStruct ("perf_options", _) -> "ks_perf_options"  (* Namespace KS type away from kernel structs *)
   | IRStruct (name, _) -> sprintf "struct %s" name
   | IREnum (name, _) -> sprintf "enum %s" name
   | IRResult (ok_type, _err_type) -> ir_type_to_c target ok_type (* simplified to ok type *)
diff --git a/src/context/dune b/src/context/dune
index ede66a7..034d4b0 100644
--- a/src/context/dune
+++ b/src/context/dune
@@ -1,5 +1,5 @@
 (library
  (public_name kernelscript.context)
  (name kernelscript_context)
- (modules context_codegen xdp_codegen tc_codegen kprobe_codegen tracepoint_codegen fprobe_codegen)
+ (modules context_codegen xdp_codegen tc_codegen kprobe_codegen tracepoint_codegen fprobe_codegen perf_event_codegen)
  (libraries unix str)) 
\ No newline at end of file
diff --git a/src/context/perf_event_codegen.ml b/src/context/perf_event_codegen.ml
new file mode 100644
index 0000000..78ea9f3
--- /dev/null
+++ b/src/context/perf_event_codegen.ml
@@ -0,0 +1,83 @@
+(*
+ * Copyright 2026 Siyuan Sun
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *)
+
+(** perf_event-specific code generation
+    Handles SEC("perf_event") programs with bpf_perf_event_data context.
+*)
+
+open Printf
+open Context_codegen
+
+(** Generate perf_event-specific includes *)
+let generate_perf_event_includes () = [
+  "#include <bpf/bpf_helpers.h>";
+  "#include <bpf/bpf_tracing.h>";
+]
+
+(** Field access for bpf_perf_event_data context.
+    Phase 1 supports a minimal set of fields.
+    Full field access is added in Phase 3 (perf_event_codegen expansion). *)
+let generate_perf_event_field_access ctx_var field_name =
+  match field_name with
+  | "sample_period" -> sprintf "%s->sample_period" ctx_var
+  | "addr"          -> sprintf "%s->addr" ctx_var
+  | "cpu"           -> sprintf "bpf_get_smp_processor_id()"
+  | _ ->
+      failwith (sprintf "Unknown perf_event context field: %s. \
+        Supported fields in Phase 1: sample_period, addr, cpu." field_name)
+
+(** perf_event programs always return 0 or 1 – no named action constants *)
+let map_perf_event_action_constant = function
+  | 0 -> Some "0"
+  | _ -> None
+
+(** Generate SEC("perf_event") attribute *)
+let generate_perf_event_section_name _target =
+  "SEC(\"perf_event\")"
+
+(** Static field mapping table (minimal Phase 1 set) *)
+let perf_event_field_mappings = [
+  ("sample_period", {
+    field_name = "sample_period";
+    c_expression = (fun ctx_var -> sprintf "%s->sample_period" ctx_var);
+    requires_cast = false;
+    field_type = "__u64";
+  });
+  ("addr", {
+    field_name = "addr";
+    c_expression = (fun ctx_var -> sprintf "%s->addr" ctx_var);
+    requires_cast = false;
+    field_type = "__u64";
+  });
+]
+
+(** Create perf_event code generator *)
+let create () = {
+  name = "PerfEvent";
+  c_type = "struct bpf_perf_event_data";
+  section_prefix = "perf_event";
+  field_mappings = perf_event_field_mappings;
+  generate_includes = generate_perf_event_includes;
+  generate_field_access = generate_perf_event_field_access;
+  map_action_constant = map_perf_event_action_constant;
+  generate_function_signature = None;
+  generate_section_name = Some generate_perf_event_section_name;
+}
+
+(** Register this codegen with the context registry *)
+let register () =
+  let codegen = create () in
+  Context_codegen.register_context_codegen "perf_event" codegen
diff --git a/src/ebpf_c_codegen.ml b/src/ebpf_c_codegen.ml
index 5747f60..8e1d828 100644
--- a/src/ebpf_c_codegen.ml
+++ b/src/ebpf_c_codegen.ml
@@ -257,7 +257,8 @@ let initialize_context_generators () =
   Kernelscript_context.Tc_codegen.register ();
   Kernelscript_context.Kprobe_codegen.register ();
   Kernelscript_context.Tracepoint_codegen.register ();
-  Kernelscript_context.Fprobe_codegen.register ()
+  Kernelscript_context.Fprobe_codegen.register ();
+  Kernelscript_context.Perf_event_codegen.register ()
 
 (** Emit all pending string literal declarations *)
 let emit_pending_string_literals ctx =
@@ -1759,6 +1760,7 @@ let rec generate_c_function ctx ir_func =
             (match probe_type with
              | Ast.Kprobe -> Some "kprobe"  (* Only kprobe uses pt_regs context *)
              | Ast.Fprobe -> None)  (* Fprobe uses direct parameters *)
+     | Some Ast.PerfEvent -> Some "perf_event"
      | _ ->
          (* Fall back to parameter-based detection *)
          (match ir_func.parameters with
@@ -1768,13 +1770,16 @@ let rec generate_c_function ctx ir_func =
           | (_, IRPointer (IRStruct ("__sk_buff", _), _)) :: _ -> Some "tc"  (* Handle __sk_buff as TC context *)
           | (_, IRPointer (IRStruct ("xdp_md", _), _)) :: _ -> Some "xdp"    (* Handle xdp_md as XDP context *)
           | (_, IRPointer (IRStruct ("pt_regs", _), _)) :: _ -> Some "kprobe"  (* Handle pt_regs as kprobe context *)
+          | (_, IRPointer (IRStruct ("bpf_perf_event_data", _), _)) :: _ -> Some "perf_event"  (* Handle bpf_perf_event_data *)
           | (_, IRPointer (IRStruct (struct_name, _), _)) :: _ when String.starts_with struct_name ~prefix:"trace_event_raw_" -> Some "tracepoint"  (* Handle tracepoint context *)
           | _ -> None));
   
   let return_type_str = 
-    (* Special handling for kprobe functions: always use int return type for eBPF compatibility *)
+    (* Special handling for probe functions: always use int return type for eBPF compatibility *)
     match ir_func.func_program_type with
+    | Some (Ast.Probe Ast.Fprobe) -> "__s32"  (* eBPF fprobe programs must return int *)
     | Some (Ast.Probe _) -> "__s32"  (* eBPF probe programs must return int *)
+    | Some Ast.PerfEvent -> "__s32"  (* eBPF perf_event programs must return int *)
     | _ ->
         match ir_func.return_type with
         | Some ret_type -> ebpf_type_from_ir_type ret_type
@@ -1815,6 +1820,7 @@ let rec generate_c_function ctx ir_func =
             | Some (Ast.Probe Ast.Fprobe), _ -> Some "fprobe"
             | Some (Ast.Probe Ast.Kprobe), _ -> Some "kprobe"
             | Some Ast.Tracepoint, _ -> Some "tracepoint"
+            | Some Ast.PerfEvent, _ -> Some "perf_event"
             (* Fall back to parameter-based detection for context functions *)
             | _, (_, IRStruct ("xdp_md", _)) :: _ -> Some "xdp"
             | _, (_, IRStruct ("__sk_buff", _)) :: _ -> Some "tc"
@@ -1823,6 +1829,7 @@ let rec generate_c_function ctx ir_func =
             | _, (_, IRPointer (IRStruct ("xdp_md", _), _)) :: _ -> Some "xdp"
             | _, (_, IRPointer (IRStruct ("__sk_buff", _), _)) :: _ -> Some "tc" (* Handle __sk_buff as TC context *)
             | _, (_, IRPointer (IRStruct ("pt_regs", _), _)) :: _ -> Some "kprobe"
+            | _, (_, IRPointer (IRStruct ("bpf_perf_event_data", _), _)) :: _ -> Some "perf_event"
             | _, (_, IRPointer (IRStruct (struct_name, _), _)) :: _ when String.starts_with struct_name ~prefix:"trace_event_raw_" -> Some "tracepoint"
             | _, [] -> None (* Parameterless function *)
             | _, _ -> None (* Other context types *)
@@ -1843,6 +1850,7 @@ let rec generate_c_function ctx ir_func =
     | Some (Ast.Probe Ast.Fprobe) -> Some "fprobe"
     | Some (Ast.Probe Ast.Kprobe) -> Some "kprobe"
     | Some Ast.Tracepoint -> Some "tracepoint"
+    | Some Ast.PerfEvent -> Some "perf_event"
     | _ -> None
   in
   
diff --git a/src/ir_function_system.ml b/src/ir_function_system.ml
index 7804c47..db61078 100644
--- a/src/ir_function_system.ml
+++ b/src/ir_function_system.ml
@@ -47,8 +47,14 @@ let validate_function_signature (ir_func : ir_function) : signature_info =
     | Some (Ast.Probe _) -> true
     | _ -> false
   in
+
+  (* Check if this is a perf_event function *)
+  let is_perf_event_function = match ir_func.func_program_type with
+    | Some Ast.PerfEvent -> true
+    | _ -> false
+  in
   
-  if ir_func.is_main && not is_struct_ops_function && not is_kprobe_function then (
+  if ir_func.is_main && not is_struct_ops_function && not is_kprobe_function && not is_perf_event_function then (
     if param_count <> 1 then
       errors := "Main function must have exactly one parameter (context)" :: !errors;
     match ir_func.parameters with
@@ -91,6 +97,22 @@ let validate_function_signature (ir_func : ir_function) : signature_info =
     | Some _ -> errors := "Kprobe programs must return int (i32), u32, or void" :: !errors;
     | None -> errors := "Kprobe functions must have a return type" :: !errors
   );
+
+  (* Validation for perf_event functions *)
+  if ir_func.is_main && is_perf_event_function then (
+    if param_count <> 1 then
+      errors := "perf_event functions must have exactly one parameter (context)" :: !errors;
+    (* Validate context type *)
+    (match ir_func.parameters with
+     | [(_, IRPointer (IRStruct ("bpf_perf_event_data", _), _))] -> ()
+     | [(_, IRStruct ("bpf_perf_event_data", _))] -> ()
+     | _ -> errors := "perf_event context must be *bpf_perf_event_data" :: !errors);
+    (* Validate return type *)
+    match ir_func.return_type with
+    | Some (IRI32) -> ()
+    | Some _ -> errors := "perf_event programs must return i32" :: !errors
+    | None -> errors := "perf_event functions must have a return type" :: !errors
+  );
   
   (* For struct_ops functions, we have different validation rules *)
   if is_struct_ops_function then (
diff --git a/src/ir_generator.ml b/src/ir_generator.ml
index 5e71e4b..7ad26c8 100644
--- a/src/ir_generator.ml
+++ b/src/ir_generator.ml
@@ -1445,9 +1445,16 @@ and lower_statement ctx stmt =
                      let _ = lower_expression ctx expr in
                      ())
             | _ ->
-                (* Non-void function - use normal expression handling *)
-                let _ = lower_expression ctx expr in
-                ())
+                (* Non-void function call used as statement - discard return value *)
+                (match callee_expr.expr_desc with
+                 | Ast.Identifier name ->
+                     let arg_vals = List.map (lower_expression ctx) args in
+                     let instr = make_ir_instruction (IRCall (DirectCall name, arg_vals, None)) expr.expr_pos in
+                     emit_instruction ctx instr
+                 | _ ->
+                     (* Complex callee (function pointer) - use normal expression handling *)
+                     let _ = lower_expression ctx expr in
+                     ()))
        | _ ->
            (* Non-function call expression - use normal handling *)
            let _ = lower_expression ctx expr in
@@ -2933,6 +2940,7 @@ let lower_multi_program ast symbol_table source_name =
                     | "xdp" -> Ast.Xdp
                     | "tc" -> Ast.Tc
                     | "tracepoint" -> Ast.Tracepoint
+                    | "perf_event" -> Ast.PerfEvent
                     | _ -> failwith ("Unknown program type: " ^ prog_type_str)
                   in
                   Some {
diff --git a/src/main.ml b/src/main.ml
index f37aef7..d4c59bf 100644
--- a/src/main.ml
+++ b/src/main.ml
@@ -202,7 +202,7 @@ let init_project prog_type_or_struct_ops project_name btf_path extract_kfuncs =
   in
   
   (* Check if this is a struct_ops or a regular program type *)
-  let valid_program_types = ["xdp"; "tc"; "probe"; "tracepoint"] in
+  let valid_program_types = ["xdp"; "tc"; "probe"; "tracepoint"; "perf_event"] in
   let is_struct_ops = Struct_ops_registry.is_known_struct_ops prog_type in
   let is_program_type = List.mem prog_type valid_program_types in
   
@@ -347,6 +347,7 @@ During compilation, the definition is verified against BTF to ensure compatibili
             (match target_function with
              | Some category_event -> sprintf "Tracepoint programs provide static tracing points in the kernel. This program traces the '%s' tracepoint." category_event
              | None -> "Tracepoint programs provide static tracing points in the kernel.")
+        | "perf_event" -> "Perf event programs run on hardware/software performance events (branch misses, CPU cycles, etc.) and can profile kernel and userspace workloads."
         | _ -> "eBPF program for kernel-level processing."
       in
       sprintf {|# %s
diff --git a/src/multi_program_analyzer.ml b/src/multi_program_analyzer.ml
index fbe94be..6bf5f86 100644
--- a/src/multi_program_analyzer.ml
+++ b/src/multi_program_analyzer.ml
@@ -69,6 +69,13 @@ let get_execution_context = function
       execution_stage = "struct_ops_callbacks";
       can_drop_packets = false;
     }
+  | PerfEvent -> {
+      program_type = PerfEvent;
+      hook_point = "perf_event_sampling";
+      stack_layer = 0;
+      execution_stage = "perf_sampling";
+      can_drop_packets = false;
+    }
 
 (** Check if two programs execute sequentially (not concurrently) *)
 let are_sequential prog_type1 prog_type2 =
@@ -114,6 +121,7 @@ let extract_programs (ast: declaration list) : program_def list =
                     | "kprobe" -> Probe Kprobe
                     | "tracepoint" -> Tracepoint
                     | "struct_ops" -> StructOps
+                    | "perf_event" -> PerfEvent
                     | _ -> failwith ("Unknown program type: " ^ prog_type_str)
                   in
                   Some {
@@ -441,6 +449,7 @@ let get_program_types_from_ast (ast: declaration list) : program_type list =
               | "tc" -> Tc :: acc  
               | "kprobe" -> Probe Kprobe :: acc
               | "tracepoint" -> Tracepoint :: acc
+              | "perf_event" -> PerfEvent :: acc
               | _ -> acc)
          | _ -> acc)
     | _ -> acc
diff --git a/src/stdlib.ml b/src/stdlib.ml
index 2e84eb0..10144ae 100644
--- a/src/stdlib.ml
+++ b/src/stdlib.ml
@@ -109,6 +109,18 @@ let validate_register_function arg_types ast_context _pos =
     | _ -> 
         (false, Some "register() requires an impl block argument")
 
+(** Validation function for attach() - accepts standard 3-arg form, and perf_options 3-arg form *)
+let validate_attach_function arg_types _ast_context _pos =
+  match arg_types with
+  | [ProgramHandle; Str _; (U8|U16|U32|U64|I8|I16|I32|I64)] ->
+      (* Standard form: attach(prog, target, flags) *)
+      (true, None)
+  | [ProgramHandle; (Struct "perf_options" | UserType "perf_options"); (U8|U16|U32|U64|I8|I16|I32|I64)] ->
+      (* Perf event form: attach(prog, perf_options { ... }, flags) - uniform 3-arg shape *)
+      (true, None)
+  | _ ->
+      (false, Some "attach() requires (handle, target, flags) — target is a string or perf_options { ... }")
+
 (** Standard library built-in functions *)
 let builtin_functions = [
   {
@@ -135,14 +147,14 @@ let builtin_functions = [
   };
   {
     name = "attach";
-    param_types = [ProgramHandle; Str 128; U32]; (* program handle, target interface, flags *)
+    param_types = []; (* Custom validation handles both standard and perf_options forms *)
     return_type = U32; (* Returns 0 on success *)
-    description = "Attach a loaded eBPF program to a target with flags";
+    description = "Attach a loaded eBPF program to a target with flags; target is a string or perf_options { ... }";
     is_variadic = false;
     ebpf_impl = ""; (* Not available in eBPF context *)
     userspace_impl = "bpf_prog_attach";
     kernel_impl = "";
-    validate = None;
+    validate = Some validate_attach_function;
   };
   {
     name = "detach";
@@ -210,7 +222,17 @@ let builtin_functions = [
     kernel_impl = ""; (* Not available in kernel context *)
     validate = Some validate_exec_function;
   };
-
+  {
+    name = "perf_read";
+    param_types = [ProgramHandle];
+    return_type = I64; (* Raw counter value, or -1 on error *)
+    description = "Read the current hardware/software counter value for a perf_event program";
+    is_variadic = false;
+    ebpf_impl = ""; (* Not available in eBPF context *)
+    userspace_impl = "ks_perf_read";
+    kernel_impl = "";
+    validate = None;
+  };
 ]
 
 (** Get built-in function definition by name *)
@@ -274,8 +296,66 @@ let builtin_types = [
     ("TC_ACT_REDIRECT", Some (Ast.Signed64 7L));
     ("TC_ACT_TRAP", Some (Ast.Signed64 8L));
   ], builtin_pos));
+
+  (* perf_type mirrors perf_event_attr.type so config stays a tagged 2D space. *)
+  TypeDef (EnumDef ("perf_type", [
+    ("perf_type_hardware",   Some (Ast.Signed64 0L));
+    ("perf_type_software",   Some (Ast.Signed64 1L));
+    ("perf_type_tracepoint", Some (Ast.Signed64 2L));
+    ("perf_type_hw_cache",   Some (Ast.Signed64 3L));
+    ("perf_type_raw",        Some (Ast.Signed64 4L));
+    ("perf_type_breakpoint", Some (Ast.Signed64 5L));
+  ], builtin_pos));
+
+  (* Common config values for PERF_TYPE_HARDWARE. *)
+  TypeDef (EnumDef ("perf_hw_config", [
+    ("cpu_cycles",           Some (Ast.Signed64 0L));
+    ("instructions",         Some (Ast.Signed64 1L));
+    ("cache_references",     Some (Ast.Signed64 2L));
+    ("cache_misses",         Some (Ast.Signed64 3L));
+    ("branch_instructions",  Some (Ast.Signed64 4L));
+    ("branch_misses",        Some (Ast.Signed64 5L));
+  ], builtin_pos));
+
+  (* Common config values for PERF_TYPE_SOFTWARE. *)
+  TypeDef (EnumDef ("perf_sw_config", [
+    ("page_faults",          Some (Ast.Signed64 2L));
+    ("context_switches",     Some (Ast.Signed64 3L));
+    ("cpu_migrations",       Some (Ast.Signed64 4L));
+  ], builtin_pos));
+
+  (* perf_options: configuration bag for @perf_event programs.
+     Only 'perf_type' and 'perf_config' are required; all other fields have language-level defaults. *)
+  TypeDef (StructDef ("perf_options", [
+    ("perf_type",      Enum "perf_type");
+    ("perf_config",    U64);
+    ("pid",            I32);
+    ("cpu",            I32);
+    ("period",         U64);
+    ("wakeup",         U32);
+    ("inherit",        Bool);
+    ("exclude_kernel", Bool);
+    ("exclude_user",   Bool);
+  ], builtin_pos));
 ]
 
+(** Default field values for structs that support partial initialisation.
+    Returns [(field_name, default_literal)] for optional fields only.
+  Required fields (e.g. perf_type/perf_config in perf_options) are absent from the list,
+    so the type checker will still error if they are omitted. *)
+let get_struct_field_defaults = function
+  | "perf_options" ->
+      Some [
+        ("pid",            IntLit (Signed64 (-1L),      None));
+        ("cpu",            IntLit (Signed64 0L,         None));
+        ("period",         IntLit (Unsigned64 1000000L, None));
+        ("wakeup",         IntLit (Unsigned64 1L,       None));
+        ("inherit",        BoolLit false);
+        ("exclude_kernel", BoolLit false);
+        ("exclude_user",   BoolLit false);
+      ]
+  | _ -> None
+
 (** Get all builtin type definitions *)
 let get_builtin_types () = builtin_types
 
diff --git a/src/type_checker.ml b/src/type_checker.ml
index 8a95a99..2a0ae9b 100644
--- a/src/type_checker.ml
+++ b/src/type_checker.ml
@@ -1177,6 +1177,17 @@ and type_check_struct_literal ctx struct_name field_assignments pos =
     let type_def = Hashtbl.find ctx.types struct_name in
     match type_def with
     | StructDef (_, struct_fields, _) ->
+        (* Fill in optional fields from language-level defaults before type-checking.
+           Required fields (absent from the defaults table) still cause an error if omitted. *)
+        let field_assignments =
+          match Stdlib.get_struct_field_defaults struct_name with
+          | None -> field_assignments
+          | Some defaults ->
+              List.fold_left (fun acc (field_name, default_lit) ->
+                if List.mem_assoc field_name acc then acc
+                else acc @ [(field_name, make_expr (Literal default_lit) pos)]
+              ) field_assignments defaults
+        in
         (* Type check each field assignment *)
         let typed_field_assignments = List.map (fun (field_name, field_expr) ->
           let typed_field_expr = type_check_expression ctx field_expr in
@@ -2476,6 +2487,7 @@ let type_check_ast ?symbol_table:(provided_symbol_table=None) ast =
                | "tc" -> Some Tc  
 
                | "tracepoint" -> Some Tracepoint
+               | "perf_event" -> Some PerfEvent
                | "kfunc" -> None  (* kfuncs don't have program types *)
                | "private" -> None  (* private functions don't have program types *)
                | "helper" -> None  (* helper functions don't have program types *)
@@ -3010,6 +3022,7 @@ let rec type_check_and_annotate_ast ?symbol_table:(provided_symbol_table=None) ?
                | "tracepoint" -> 
                    (* Reject old format: @tracepoint without category/event *)
                    type_error ("@tracepoint requires category/event specification. Use @tracepoint(\"category/event\") instead.") attr_func.attr_pos
+               | "perf_event" -> (Some PerfEvent, None)
                | "kfunc" -> (None, None)  (* kfuncs don't have program types *)
                | "private" -> (None, None)  (* private functions don't have program types *)
                | "helper" -> (None, None)  (* helper functions don't have program types *)
@@ -3118,6 +3131,26 @@ let rec type_check_and_annotate_ast ?symbol_table:(provided_symbol_table=None) ?
              
              if not valid_return_type then
                type_error (sprintf "@%s attributed function must return i32" probe_type_name) attr_func.attr_pos
+           | Some PerfEvent ->
+             (* @perf_event: must have exactly one param *bpf_perf_event_data and return i32 *)
+             let params = attr_func.attr_function.func_params in
+             let resolved_return_type = match get_return_type attr_func.attr_function.func_return_type with
+               | Some ret_type -> Some (resolve_user_type ctx ret_type)
+               | None -> None in
+             if List.length params <> 1 then
+               type_error "@perf_event attributed function must have exactly one parameter (ctx: *bpf_perf_event_data)" attr_func.attr_pos;
+             (match params with
+              | [(_, param_type)] ->
+                  let resolved_param_type = resolve_user_type ctx param_type in
+                  (match resolved_param_type with
+                   | Pointer (Struct "bpf_perf_event_data") -> ()
+                   | Pointer (UserType "bpf_perf_event_data") -> ()
+                   | _ ->
+                       type_error "@perf_event attributed function parameter must be ctx: *bpf_perf_event_data" attr_func.attr_pos)
+              | _ -> ());
+             (match resolved_return_type with
+              | Some I32 -> ()
+              | _ -> type_error "@perf_event attributed function must return i32" attr_func.attr_pos)
            | Some _ -> () (* Other program types - validation can be added later *)
            | None -> type_error ("Invalid or unsupported attribute") attr_func.attr_pos);
         
@@ -3402,6 +3435,7 @@ and populate_multi_program_context ast multi_prog_analysis =
               (match prog_type_str with
                | "xdp" -> Some Xdp
                | "tracepoint" -> Some Tracepoint
+               | "perf_event" -> Some PerfEvent
                | _ -> None)
           | AttributeWithArg (attr_name, _) :: _ ->
               (match attr_name with
diff --git a/src/userspace_codegen.ml b/src/userspace_codegen.ml
index 0c07f08..dc4a5dc 100644
--- a/src/userspace_codegen.ml
+++ b/src/userspace_codegen.ml
@@ -382,6 +382,8 @@ type kfunc_dependency_info = {
 type function_usage = {
   mutable uses_load: bool;
   mutable uses_attach: bool;
+  mutable uses_attach_perf: bool;
+  mutable uses_perf_read: bool;
   mutable uses_detach: bool;
   mutable uses_map_operations: bool;
   mutable uses_daemon: bool;
@@ -393,6 +395,8 @@ type function_usage = {
 let create_function_usage () = {
   uses_load = false;
   uses_attach = false;
+  uses_attach_perf = false;
+  uses_perf_read = false;
   uses_detach = false;
   uses_map_operations = false;
   uses_daemon = false;
@@ -470,7 +474,7 @@ let extract_function_calls_from_ir_function ir_func =
 let get_program_type_from_attributes attr_list =
   List.fold_left (fun acc attr ->
     match attr with
-    | Ast.SimpleAttribute attr_name when List.mem attr_name ["xdp"; "tc"; "kprobe"; "tracepoint"] ->
+    | Ast.SimpleAttribute attr_name when List.mem attr_name ["xdp"; "tc"; "kprobe"; "tracepoint"; "perf_event"] ->
         Some attr_name
     | _ -> acc
   ) None attr_list
@@ -702,7 +706,15 @@ let track_function_usage ctx instr =
        | DirectCall func_name ->
            (match func_name with
             | "load" -> ctx.function_usage.uses_load <- true
-            | "attach" -> ctx.function_usage.uses_attach <- true
+            | "attach" ->
+                (* Detect perf_options 3-arg form: attach(prog, perf_options{...}, flags) *)
+                (match args with
+                 | [_; opts_val; _] when (match opts_val.val_type with IRStruct ("perf_options", _) -> true | _ -> false) ->
+                     ctx.function_usage.uses_attach_perf <- true
+                 | _ ->
+                     ctx.function_usage.uses_attach <- true)
+            | "perf_read" ->
+              ctx.function_usage.uses_perf_read <- true
             | "detach" -> ctx.function_usage.uses_detach <- true
             | "daemon" -> ctx.function_usage.uses_daemon <- true
             | "exec" -> 
@@ -1064,7 +1076,7 @@ let collect_type_aliases_from_userspace_program userspace_prog =
   List.rev !type_aliases
 
 
-(** Get printf format specifier for IR type *)
+(** Get printf format specifier for IR type (for embedding inside a string literal) *)
 let get_printf_format_specifier ir_type =
   match ir_type with
   | IRU8 -> "%u"
@@ -1083,48 +1095,109 @@ let get_printf_format_specifier ir_type =
   | IRPointer _ -> "%p"
   | _ -> "%d"  (* fallback *)
 
-(** Fix format specifiers in a format string based on argument types *)
+(** Build a complete C printf format-string expression for a single value plus \n.
+    For 64-bit types we use the PRId64/PRIu64 macros via adjacent string-literal
+    concatenation so the generated code is warning-free on LP64 and LLP64:
+      int64_t  →  "%" PRId64 "\n"
+      uint64_t →  "%" PRIu64 "\n"
+      int32_t  →  "%d\n"            *)
+let build_single_format_expr ir_type =
+  match ir_type with
+  | IRU64 -> "\"%\" PRIu64 \"\\n\""
+  | IRI64 -> "\"%\" PRId64 \"\\n\""
+  | t     -> sprintf "\"%s\\n\"" (get_printf_format_specifier t)
+
+(** Normalize explicit printf arguments so their C types match our canonical
+    format specifiers on LP64/LLP64 targets. *)
+let normalize_printf_arg ir_type arg_expr =
+  match ir_type with
+  | IRU64 -> sprintf "(unsigned long long)(%s)" arg_expr
+  | IRI64 -> sprintf "(long long)(%s)" arg_expr
+  | _ -> arg_expr
+
+(** Fix format specifiers in a format string based on argument types.
+    For 64-bit integer types (IRI64 / IRU64) only the length modifier is
+    updated to "ll"; flags, width, precision and the conversion character
+    are kept as-is.  For every other type the existing specifier is left
+    completely unchanged.  Arguments that have no corresponding specifier
+    in the format string get a canonical specifier appended at the end. *)
 let fix_format_specifiers format_string arg_types =
-  (* Count existing format specifiers in the string *)
-  let count_format_specs str =
-    let rec count chars spec_count =
-      match chars with
-      | [] -> spec_count
-      | '%' :: '%' :: rest -> count rest spec_count  (* Skip escaped %% *)
-      | '%' :: rest ->
-          (* Find the end of this format specifier *)
-          let rec find_spec_end spec_chars =
-            match spec_chars with
-            | [] -> rest
-            | ('d' | 'i' | 'u' | 'o' | 'x' | 'X' | 'f' | 'F' | 'e' | 'E' | 'g' | 'G' | 'c' | 's' | 'p' | 'n') :: remaining ->
-                remaining
-            | _ :: remaining ->
-                find_spec_end remaining
+  (* Parse one complete printf specifier starting AFTER the leading '%'.
+     Returns Some (flags, width, prec_opt, length_mod, conv_char, remaining)
+     or None if the input is malformed. *)
+  let parse_spec chars =
+    let rec take_flags cs acc =
+      match cs with
+      | ('-'|'+'|' '|'#'|'0') as c :: rest -> take_flags rest (acc ^ String.make 1 c)
+      | _ -> (acc, cs)
+    in
+    let rec take_width cs acc =
+      match cs with
+      | ('0'..'9'|'*') as c :: rest -> take_width rest (acc ^ String.make 1 c)
+      | _ -> (acc, cs)
+    in
+    let take_prec cs =
+      match cs with
+      | '.' :: rest ->
+          let rec digits cs acc =
+            match cs with
+            | ('0'..'9'|'*') as c :: r -> digits r (acc ^ String.make 1 c)
+            | _ -> (Some acc, cs)
           in
-          let remaining = find_spec_end rest in
-          count remaining (spec_count + 1)
-      | _ :: rest -> count rest spec_count
+          digits rest ""
+      | _ -> (None, cs)
     in
-    count (String.to_seq str |> List.of_seq) 0
+    let take_length cs =
+      match cs with
+      | 'h' :: 'h' :: rest -> ("hh", rest)
+      | 'l' :: 'l' :: rest -> ("ll", rest)
+      | ('h'|'l'|'L'|'j'|'z'|'t') as c :: rest -> (String.make 1 c, rest)
+      | _ -> ("", cs)
+    in
+    let is_conv = function
+      | 'd'|'i'|'u'|'o'|'x'|'X'|'f'|'F'|'e'|'E'|'g'|'G'|'c'|'s'|'p'|'n' -> true
+      | _ -> false
+    in
+    let (flags, cs) = take_flags chars "" in
+    let (width, cs) = take_width cs "" in
+    let (prec,  cs) = take_prec cs in
+    let (lmod,  cs) = take_length cs in
+    match cs with
+    | c :: rest when is_conv c -> Some (flags, width, prec, lmod, c, rest)
+    | _ -> None
   in
-  
-  let existing_specs = count_format_specs format_string in
-  let needed_specs = List.length arg_types in
-  
-  if existing_specs >= needed_specs then
-    (* Already has enough format specifiers - don't add more *)
-    format_string
-  else
-    (* Need to add format specifiers for missing arguments *)
-    let missing_count = needed_specs - existing_specs in
-    let missing_types = 
-      let rec take n lst = match n, lst with
-        | 0, _ | _, [] -> []
-        | n, x :: xs -> x :: take (n - 1) xs
-      in
-      List.rev (take missing_count (List.rev arg_types)) in
-    let missing_specs = List.map get_printf_format_specifier missing_types in
-    format_string ^ String.concat "" missing_specs
+  let is_int64 = function IRU64 | IRI64 -> true | _ -> false in
+  let rebuild flags width prec lmod conv =
+    let prec_s = match prec with None -> "" | Some p -> "." ^ p in
+    sprintf "%%%s%s%s%s%c" flags width prec_s lmod conv
+  in
+  let rec rewrite chars remaining_types acc =
+    match chars with
+    | [] ->
+        let rebuilt = String.concat "" (List.rev acc) in
+        let missing = List.map get_printf_format_specifier remaining_types |> String.concat "" in
+        rebuilt ^ missing
+    | '%' :: '%' :: rest -> rewrite rest remaining_types ("%%" :: acc)
+    | '%' :: rest ->
+        (match remaining_types with
+         | arg_type :: rest_types ->
+             (match parse_spec rest with
+              | Some (flags, width, prec, lmod, conv, remaining_chars) ->
+                  let effective_lmod = if is_int64 arg_type then "ll" else lmod in
+                  rewrite remaining_chars rest_types (rebuild flags width prec effective_lmod conv :: acc)
+              | None ->
+                  (* malformed specifier – leave percent and continue *)
+                  rewrite rest remaining_types ("%" :: acc))
+         | [] ->
+             (* extra specifier with no matching arg – preserve as written *)
+             (match parse_spec rest with
+              | Some (flags, width, prec, lmod, conv, remaining_chars) ->
+                  rewrite remaining_chars [] (rebuild flags width prec lmod conv :: acc)
+              | None ->
+                  rewrite rest [] ("%" :: acc)))
+    | c :: rest -> rewrite rest remaining_types ((String.make 1 c) :: acc)
+  in
+  rewrite (String.to_seq format_string |> List.of_seq) arg_types []
 
 
 
@@ -1845,23 +1918,28 @@ let rec generate_c_instruction_from_ir ctx instruction =
                  (* Special handling for print: convert to printf format with proper type specifiers *)
                  (match c_args, args with
                   | [], [] -> (userspace_impl, ["\"\\n\""])
-                  | [first], [_] -> 
-                      (* For single string argument, check if we need to append newline to format string *)
-                      let format_str = first in
-                      let fixed_format = match format_str with
-                        | str when String.length str >= 2 && String.get str 0 = '"' && String.get str (String.length str - 1) = '"' ->
-                            (* Remove quotes, add newline, add quotes back *)
-                            let inner_str = String.sub str 1 (String.length str - 2) in
-                            sprintf "\"%s\\n\"" inner_str
-                        | str -> 
-                            (* Non-quoted string - add newline *)
-                            sprintf "%s \"\\n\"" str
-                      in
-                      (userspace_impl, [fixed_format])
+                  | [first], [ir_arg] -> 
+                      (* If the C representation is a string literal, use it as the
+                         format string directly (e.g. print("hello")).
+                         Otherwise synthesise the correct format expression.
+                         For 64-bit types we emit  "%" PRId64 "\n"  (adjacent
+                         string-literal + macro) so the output is warning-free on
+                         both LP64 and LLP64 targets. *)
+                      if String.length first >= 2
+                         && String.get first 0 = '"'
+                         && String.get first (String.length first - 1) = '"' then
+                        let inner_str = String.sub first 1 (String.length first - 2) in
+                        (userspace_impl, [sprintf "\"%s\\n\"" inner_str])
+                      else
+                        let fmt_expr = build_single_format_expr ir_arg.val_type in
+                        (userspace_impl, [fmt_expr; first])
                   | format_arg :: rest_args, _ :: rest_ir_args ->
                       (* Extract the format string and fix format specifiers based on argument types *)
                       let format_str = format_arg in
                       let arg_types = List.map (fun ir_val -> ir_val.val_type) rest_ir_args in
+                      let normalized_rest_args =
+                        List.map2 normalize_printf_arg arg_types rest_args
+                      in
                       let fixed_format = match format_str with
                         | str when String.length str >= 2 && String.get str 0 = '"' && String.get str (String.length str - 1) = '"' ->
                             (* Remove quotes, fix format specifiers, add newline, add quotes back *)
@@ -1873,7 +1951,7 @@ let rec generate_c_instruction_from_ir ctx instruction =
                             let fixed_str = fix_format_specifiers str arg_types in
                             sprintf "\"%s\\n\"" fixed_str
                       in
-                      (userspace_impl, fixed_format :: rest_args)
+                        (userspace_impl, fixed_format :: normalized_rest_args)
                   | args, _ -> (userspace_impl, args @ ["\"\\n\""]))
              | "load" ->
                  (* Special handling for load: now lightweight - just get program handle from skeleton *)
@@ -1888,21 +1966,33 @@ let rec generate_c_instruction_from_ir ctx instruction =
                   | _ -> failwith "load expects exactly one argument")
              | "attach" ->
                  (* Special handling for attach: now takes program handle (not program name) *)
-                 ctx.function_usage.uses_attach <- true;
-                 (match c_args with
-                  | [program_handle; target; flags] ->
-                      (* KernelScript uses "category/name" format for tracepoints, convert to libbpf "category:name" format *)
-                      let normalized_target = 
-                        if String.contains target '/' then
-                          (* Convert KernelScript "sched/sched_switch" to libbpf "sched:sched_switch" *)
-                          String.map (function '/' -> ':' | c -> c) target
-                        else
-                          (* For non-tracepoint targets (XDP interfaces, kprobe functions, raw tracepoints), use as-is *)
-                          target
-                      in
-                      (* Use the program handle variable directly instead of extracting program name *)
-                      ("attach_bpf_program_by_fd", [program_handle; normalized_target; flags])
-                  | _ -> failwith "attach expects exactly three arguments")
+                 (* Detect perf_options 3-arg form: attach(prog, perf_options{...}, flags) *)
+                 (match args with
+                  | [_; opts_val; _] when (match opts_val.val_type with IRStruct ("perf_options", _) -> true | _ -> false) ->
+                      (* Perf event form: delegate entirely to ks_attach_perf_event(prog, opts, flags) *)
+                      ctx.function_usage.uses_attach_perf <- true;
+                      ctx.function_usage.uses_load <- true;
+                      (match c_args with
+                       | [program_handle; opts_arg; flags_arg] ->
+                           ("ks_attach_perf_event", [program_handle; opts_arg; flags_arg])
+                       | _ -> failwith "attach with perf_options expects exactly three arguments")
+                  | _ ->
+                      (* Standard form: attach(handle, target, flags) *)
+                      ctx.function_usage.uses_attach <- true;
+                      (match c_args with
+                       | [program_handle; target; flags] ->
+                           (* KernelScript uses "category/name" format for tracepoints, convert to libbpf "category:name" format *)
+                           let normalized_target = 
+                             if String.contains target '/' then
+                               (* Convert KernelScript "sched/sched_switch" to libbpf "sched:sched_switch" *)
+                               String.map (function '/' -> ':' | c -> c) target
+                             else
+                               (* For non-tracepoint targets (XDP interfaces, kprobe functions, raw tracepoints), use as-is *)
+                               target
+                           in
+                           (* Use the program handle variable directly instead of extracting program name *)
+                           ("attach_bpf_program_by_fd", [program_handle; normalized_target; flags])
+                       | _ -> failwith "attach expects exactly three arguments (handle, target, flags)"))
              | "detach" ->
                  (* Special handling for detach: takes only program handle *)
                  ctx.function_usage.uses_detach <- true;
@@ -1928,6 +2018,11 @@ let rec generate_c_instruction_from_ir ctx instruction =
                         failwith (Printf.sprintf "exec() only supports Python files (.py), got: %s" file_str);
                       (userspace_impl, c_args)
                   | _ -> failwith "exec() expects exactly one argument")
+             | "perf_read" ->
+                 ctx.function_usage.uses_perf_read <- true;
+                 (match c_args with
+                  | [program_handle] -> ("ks_perf_read", [program_handle])
+                  | _ -> failwith "perf_read expects exactly one argument")
              | _ -> (userspace_impl, c_args))
         | None ->
             (* Regular function call *)
@@ -3694,6 +3789,8 @@ let generate_complete_userspace_program_from_ir ?(config_declarations = []) ?(ta
     {
       uses_load = acc_usage.uses_load || func_usage.uses_load;
       uses_attach = acc_usage.uses_attach || func_usage.uses_attach;
+      uses_attach_perf = acc_usage.uses_attach_perf || func_usage.uses_attach_perf;
+      uses_perf_read = acc_usage.uses_perf_read || func_usage.uses_perf_read;
       uses_detach = acc_usage.uses_detach || func_usage.uses_detach;
       uses_map_operations = acc_usage.uses_map_operations || func_usage.uses_map_operations;
       uses_daemon = acc_usage.uses_daemon || func_usage.uses_daemon;
@@ -3729,9 +3826,12 @@ let generate_complete_userspace_program_from_ir ?(config_declarations = []) ?(ta
   (* For header generation, use all global maps if there are pinned maps, otherwise use the filtered list *)
   let maps_for_headers = if has_any_pinned_maps then global_maps else used_global_maps_with_exec in
   
-  let uses_bpf_functions = all_usage.uses_load || all_usage.uses_attach || all_usage.uses_detach in
+  let uses_bpf_functions = all_usage.uses_load || all_usage.uses_attach || all_usage.uses_detach || all_usage.uses_attach_perf || all_usage.uses_perf_read in
   let base_includes = generate_headers_for_maps ~uses_bpf_functions maps_for_headers in
-  let additional_includes = {|#include <stdbool.h>
+  let bpf_attach_includes = if uses_bpf_functions then
+    "#include <sys/ioctl.h>\n"
+  else "" in
+  let additional_includes = bpf_attach_includes ^ {|#include <stdbool.h>
 #include <stdint.h>
 #include <inttypes.h>
 #include <getopt.h>
@@ -3765,8 +3865,58 @@ let generate_complete_userspace_program_from_ir ?(config_declarations = []) ?(ta
   
   (* Generate bridge code for imported KernelScript and Python modules *)
   let bridge_code = generate_mixed_bridge_code resolved_imports userspace_prog.userspace_functions in
+
+  (* Conditional perf_event type definitions *)
+  let perf_event_defs = if all_usage.uses_attach_perf then {|
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+
+/* KernelScript perf_event type tags */
+typedef enum {
+  perf_type_hardware = PERF_TYPE_HARDWARE,
+  perf_type_software = PERF_TYPE_SOFTWARE,
+  perf_type_tracepoint = PERF_TYPE_TRACEPOINT,
+  perf_type_hw_cache = PERF_TYPE_HW_CACHE,
+  perf_type_raw = PERF_TYPE_RAW,
+  perf_type_breakpoint = PERF_TYPE_BREAKPOINT
+} perf_type;
+
+/* Common config values for PERF_TYPE_HARDWARE */
+typedef enum {
+  cpu_cycles = PERF_COUNT_HW_CPU_CYCLES,
+  instructions = PERF_COUNT_HW_INSTRUCTIONS,
+  cache_references = PERF_COUNT_HW_CACHE_REFERENCES,
+  cache_misses = PERF_COUNT_HW_CACHE_MISSES,
+  branch_instructions = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+  branch_misses = PERF_COUNT_HW_BRANCH_MISSES
+} perf_hw_config;
+
+/* Common config values for PERF_TYPE_SOFTWARE */
+typedef enum {
+  page_faults = PERF_COUNT_SW_PAGE_FAULTS,
+  context_switches = PERF_COUNT_SW_CONTEXT_SWITCHES,
+  cpu_migrations = PERF_COUNT_SW_CPU_MIGRATIONS
+} perf_sw_config;
+
+/* ks_perf_options holds all KernelScript perf_options fields plus the inner
+ * kernel perf_event_attr (from linux/perf_event.h) that ks_open_perf_event fills. */
+typedef struct {
+    struct perf_event_attr attr;  /* kernel perf_event_attr filled by ks_open_perf_event */
+  int32_t perf_type;            /* perf_event_attr.type tag */
+  uint64_t perf_config;         /* perf_event_attr.config value for the chosen type */
+    int32_t pid;                  /* process ID (-1 = all processes, default) */
+    int32_t cpu;                  /* CPU number (0 = CPU 0, default) */
+    uint64_t period;              /* sampling period (default 1 000 000) */
+    uint32_t wakeup;              /* wakeup after N events (default 1) */
+    bool inherit;                 /* inherit to child processes (default false) */
+    bool exclude_kernel;          /* exclude kernel events (default false) */
+    bool exclude_user;            /* exclude user events (default false) */
+} ks_perf_options;
+
+|}
+  else "" in
   
-  let includes = base_includes ^ "\n" ^ additional_includes ^ kmodule_loading_code ^ skeleton_include ^ bridge_code in
+  let includes = base_includes ^ "\n" ^ additional_includes ^ kmodule_loading_code ^ skeleton_include ^ bridge_code ^ perf_event_defs in
 
   (* Reset and use the global config names collector *)
   global_config_names := [];
@@ -3959,60 +4109,33 @@ void cleanup_bpf_maps(void) {
     
     let load_function = generate_load_function_with_tail_calls base_name all_usage tail_call_analysis all_setup_code kfunc_dependencies (Ir.get_global_variables ir_multi_prog) in
     
-    (* Global attachment storage (generated only when attach/detach are used) *)
-    let attachment_storage = if all_usage.uses_attach || all_usage.uses_detach then
+    (* Global attachment storage (generated when attach/detach/perf attach/perf read are used) *)
+    let attachment_storage = if all_usage.uses_attach || all_usage.uses_detach || all_usage.uses_attach_perf || all_usage.uses_perf_read then
       {|// Global attachment storage for tracking active program attachments
-struct attachment_entry {
+  struct attachment_entry {
     int prog_fd;
     char target[128];
     uint32_t flags;
     struct bpf_link *link;    // For kprobe/tracepoint programs (NULL for XDP)
     int ifindex;              // For XDP programs (0 for kprobe/tracepoint)
+    int perf_fd;              // For perf_event programs (-1 otherwise)
+    int detaching;            // Non-zero while teardown is in progress
     enum bpf_prog_type type;
     struct attachment_entry *next;
-};
-
-static struct attachment_entry *attached_programs = NULL;
-static pthread_mutex_t attachment_mutex = PTHREAD_MUTEX_INITIALIZER;
+  };
 
-// Helper function to find attachment entry
-static struct attachment_entry *find_attachment(int prog_fd) {
-    pthread_mutex_lock(&attachment_mutex);
-    struct attachment_entry *current = attached_programs;
-    while (current) {
-        if (current->prog_fd == prog_fd) {
-            pthread_mutex_unlock(&attachment_mutex);
-            return current;
-        }
-        current = current->next;
-    }
-    pthread_mutex_unlock(&attachment_mutex);
-    return NULL;
-}
-
-// Helper function to remove attachment entry
-static void remove_attachment(int prog_fd) {
-    pthread_mutex_lock(&attachment_mutex);
-    struct attachment_entry **current = &attached_programs;
-    while (*current) {
-        if ((*current)->prog_fd == prog_fd) {
-            struct attachment_entry *to_remove = *current;
-            *current = (*current)->next;
-            free(to_remove);
-            break;
-        }
-        current = &(*current)->next;
-    }
-    pthread_mutex_unlock(&attachment_mutex);
-}
+  static struct attachment_entry *attached_programs = NULL;
+  static pthread_mutex_t attachment_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-// Helper function to add attachment entry
-static int add_attachment(int prog_fd, const char *target, uint32_t flags, 
-                         struct bpf_link *link, int ifindex, enum bpf_prog_type type) {
+  // Helper function to add attachment entry.
+  // Duplicate check is performed atomically under the same lock as insertion.
+  static int add_attachment(int prog_fd, const char *target, uint32_t flags, 
+         struct bpf_link *link, int ifindex, int perf_fd,
+         enum bpf_prog_type type) {
     struct attachment_entry *entry = malloc(sizeof(struct attachment_entry));
     if (!entry) {
-        fprintf(stderr, "Failed to allocate memory for attachment entry\n");
-        return -1;
+      fprintf(stderr, "Failed to allocate memory for attachment entry\n");
+      return -1;
     }
     
     entry->prog_fd = prog_fd;
@@ -4021,16 +4144,44 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
     entry->flags = flags;
     entry->link = link;
     entry->ifindex = ifindex;
+    entry->perf_fd = perf_fd;
     entry->type = type;
     
+    entry->detaching = 0;
     pthread_mutex_lock(&attachment_mutex);
+    /* Reject duplicate insertions atomically.
+     * Skip entries that are currently being torn down (detaching != 0) so that
+     * a new attach can succeed while the old detach is still running. */
+    struct attachment_entry *existing = attached_programs;
+    while (existing) {
+      if (existing->prog_fd == prog_fd && !existing->detaching) {
+        pthread_mutex_unlock(&attachment_mutex);
+        free(entry);
+        fprintf(stderr, "Program with fd %d is already attached. Use detach() first.\n", prog_fd);
+        return -1;
+      }
+      existing = existing->next;
+    }
     entry->next = attached_programs;
     attached_programs = entry;
     pthread_mutex_unlock(&attachment_mutex);
     
     return 0;
-}
-|}
+  }
+
+  /* Helper: find the bpf_program in the skeleton object for a given fd.
+   * Returns NULL if the skeleton is not loaded or no program matches. */
+  static struct bpf_program *find_prog_by_fd(int prog_fd) {
+    if (!obj) return NULL;
+    struct bpf_program *prog = NULL;
+    bpf_object__for_each_program(prog, obj->obj) {
+      if (bpf_program__fd(prog) == prog_fd) {
+        return prog;
+      }
+    }
+    return NULL;
+  }
+  |}
     else "" in
 
     let attach_function = if all_usage.uses_attach then
@@ -4040,12 +4191,6 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
         return -1;
     }
     
-    // Check if program is already attached
-    if (find_attachment(prog_fd)) {
-        fprintf(stderr, "Program with fd %d is already attached. Use detach() first.\n", prog_fd);
-        return -1;
-    }
-    
     // Get program type from file descriptor  
     struct bpf_prog_info info = {};
     uint32_t info_len = sizeof(info);
@@ -4071,7 +4216,7 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
             }
             
             // Store XDP attachment (no bpf_link for XDP)
-            if (add_attachment(prog_fd, target, flags, NULL, ifindex, BPF_PROG_TYPE_XDP) != 0) {
+            if (add_attachment(prog_fd, target, flags, NULL, ifindex, -1, BPF_PROG_TYPE_XDP) != 0) {
                 // If storage fails, detach and return error
                 bpf_xdp_detach(ifindex, flags, NULL);
                 return -1;
@@ -4084,23 +4229,7 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
             // For probe programs, target should be the kernel function name (e.g., "sys_read")
             // Use libbpf high-level API for probe attachment
             
-            // Get the bpf_program struct from the object and file descriptor
-            struct bpf_program *prog = NULL;
-            struct bpf_object *obj_iter;
-
-            // Find the program object corresponding to this fd
-            // We need to get the program from the skeleton object
-            if (!obj) {
-                fprintf(stderr, "eBPF skeleton not loaded for probe attachment\n");
-                return -1;
-            }
-
-            bpf_object__for_each_program(prog, obj->obj) {
-                if (bpf_program__fd(prog) == prog_fd) {
-                    break;
-                }
-            }
-
+            struct bpf_program *prog = find_prog_by_fd(prog_fd);
             if (!prog) {
                 fprintf(stderr, "Failed to find bpf_program for fd %d\n", prog_fd);
                 return -1;
@@ -4109,14 +4238,15 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
             // BPF_PROG_TYPE_KPROBE programs always use kprobe attachment
             // (these are generated from @probe("target+offset"))
             struct bpf_link *link = bpf_program__attach_kprobe(prog, false, target);
-            if (!link) {
-                fprintf(stderr, "Failed to attach kprobe to function '%s': %s\n", target, strerror(errno));
+            long link_err = libbpf_get_error(link);
+            if (link_err) {
+              fprintf(stderr, "Failed to attach kprobe to function '%s': %s\n", target, strerror((int)-link_err));
                 return -1;
             }
             printf("Kprobe attached to function: %s\n", target);
             
             // Store probe attachment for later cleanup
-            if (add_attachment(prog_fd, target, flags, link, 0, BPF_PROG_TYPE_KPROBE) != 0) {
+            if (add_attachment(prog_fd, target, flags, link, 0, -1, BPF_PROG_TYPE_KPROBE) != 0) {
                 // If storage fails, destroy link and return error
                 bpf_link__destroy(link);
                 return -1;
@@ -4128,21 +4258,7 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
             // For fentry/fexit programs (BPF_PROG_TYPE_TRACING)
             // These are loaded with SEC("fentry/target") or SEC("fexit/target")
             
-            // Get the bpf_program struct from the object and file descriptor
-            struct bpf_program *prog = NULL;
-
-            // Find the program object corresponding to this fd
-            if (!obj) {
-                fprintf(stderr, "eBPF skeleton not loaded for tracing program attachment\n");
-                return -1;
-            }
-
-            bpf_object__for_each_program(prog, obj->obj) {
-                if (bpf_program__fd(prog) == prog_fd) {
-                    break;
-                }
-            }
-
+            struct bpf_program *prog = find_prog_by_fd(prog_fd);
             if (!prog) {
                 fprintf(stderr, "Failed to find bpf_program for fd %d\n", prog_fd);
                 return -1;
@@ -4150,15 +4266,16 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
 
             // For fentry/fexit programs, use bpf_program__attach_trace
             struct bpf_link *link = bpf_program__attach_trace(prog);
-            if (!link) {
-                fprintf(stderr, "Failed to attach fentry/fexit program to function '%s': %s\n", target, strerror(errno));
+            long link_err = libbpf_get_error(link);
+            if (link_err) {
+              fprintf(stderr, "Failed to attach fentry/fexit program to function '%s': %s\n", target, strerror((int)-link_err));
                 return -1;
             }
             
             printf("Fentry/fexit program attached to function: %s\n", target);
             
             // Store tracing attachment for later cleanup
-            if (add_attachment(prog_fd, target, flags, link, 0, BPF_PROG_TYPE_TRACING) != 0) {
+            if (add_attachment(prog_fd, target, flags, link, 0, -1, BPF_PROG_TYPE_TRACING) != 0) {
                 // If storage fails, destroy link and return error
                 bpf_link__destroy(link);
                 return -1;
@@ -4187,22 +4304,7 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
                 return -1;
             }
             
-            // Get the bpf_program struct from the object and file descriptor
-            struct bpf_program *prog = NULL;
-
-            // Find the program object corresponding to this fd
-            // We need to get the program from the skeleton object
-            if (!obj) {
-                fprintf(stderr, "eBPF skeleton not loaded for tracepoint attachment\n");
-                return -1;
-            }
-
-            bpf_object__for_each_program(prog, obj->obj) {
-                if (bpf_program__fd(prog) == prog_fd) {
-                    break;
-                }
-            }
-
+            struct bpf_program *prog = find_prog_by_fd(prog_fd);
             if (!prog) {
                 fprintf(stderr, "Failed to find bpf_program for fd %d\n", prog_fd);
                 return -1;
@@ -4210,13 +4312,14 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
 
             // Use libbpf's high-level tracepoint attachment API with category and event name
             struct bpf_link *link = bpf_program__attach_tracepoint(prog, category, event_name);
-            if (!link) {
-                fprintf(stderr, "Failed to attach tracepoint to '%s:%s': %s\n", category, event_name, strerror(errno));
+            long link_err = libbpf_get_error(link);
+            if (link_err) {
+              fprintf(stderr, "Failed to attach tracepoint to '%s:%s': %s\n", category, event_name, strerror((int)-link_err));
                 return -1;
             }
             
             // Store tracepoint attachment for later cleanup
-            if (add_attachment(prog_fd, target, flags, link, 0, BPF_PROG_TYPE_TRACEPOINT) != 0) {
+            if (add_attachment(prog_fd, target, flags, link, 0, -1, BPF_PROG_TYPE_TRACEPOINT) != 0) {
                 // If storage fails, destroy link and return error
                 bpf_link__destroy(link);
                 return -1;
@@ -4235,21 +4338,7 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
                 return -1;
             }
             
-            // Get the bpf_program struct from the object and file descriptor
-            struct bpf_program *prog = NULL;
-
-            // Find the program object corresponding to this fd
-            if (!obj) {
-                fprintf(stderr, "eBPF skeleton not loaded for TC attachment\n");
-                return -1;
-            }
-
-            bpf_object__for_each_program(prog, obj->obj) {
-                if (bpf_program__fd(prog) == prog_fd) {
-                    break;
-                }
-            }
-
+            struct bpf_program *prog = find_prog_by_fd(prog_fd);
             if (!prog) {
                 fprintf(stderr, "Failed to find bpf_program for fd %d\n", prog_fd);
                 return -1;
@@ -4260,13 +4349,14 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
 
             // Use libbpf's TC attachment API
             struct bpf_link *link = bpf_program__attach_tcx(prog, ifindex, &tcx_opts);
-            if (!link) {
-                fprintf(stderr, "Failed to attach TC program to interface '%s': %s\n", target, strerror(errno));
+            long link_err = libbpf_get_error(link);
+            if (link_err) {
+              fprintf(stderr, "Failed to attach TC program to interface '%s': %s\n", target, strerror((int)-link_err));
                 return -1;
             }
             
             // Store TC attachment for later cleanup (flags no longer needed for direction)
-            if (add_attachment(prog_fd, target, 0, link, ifindex, BPF_PROG_TYPE_SCHED_CLS) != 0) {
+            if (add_attachment(prog_fd, target, 0, link, ifindex, -1, BPF_PROG_TYPE_SCHED_CLS) != 0) {
                 // If storage fails, destroy link and return error
                 bpf_link__destroy(link);
                 return -1;
@@ -4283,17 +4373,45 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
 }|}
     else "" in
 
-    let detach_function = if all_usage.uses_detach then
-      {|void detach_bpf_program_by_fd(int prog_fd) {
+    let detach_perf_case = if all_usage.uses_attach_perf then
+      {|        case BPF_PROG_TYPE_PERF_EVENT: {
+            if (entry->perf_fd >= 0 && ioctl(entry->perf_fd, PERF_EVENT_IOC_DISABLE, 0) != 0) {
+                fprintf(stderr, "Failed to disable perf event: %s\n", strerror(errno));
+            }
+            if (entry->link) {
+                bpf_link__destroy(entry->link);
+            } else {
+                fprintf(stderr, "Invalid perf event link for program fd %d\n", prog_fd);
+            }
+            if (entry->perf_fd >= 0) {
+                close(entry->perf_fd);
+            }
+            printf("Perf event program detached\n");
+            break;
+        }|}
+    else "" in
+    let detach_function = if all_usage.uses_detach || all_usage.uses_attach_perf then
+      sprintf {|void detach_bpf_program_by_fd(int prog_fd) {
     if (prog_fd < 0) {
-        fprintf(stderr, "Invalid program file descriptor: %d\n", prog_fd);
+        fprintf(stderr, "Invalid program file descriptor: %%d\n", prog_fd);
         return;
     }
     
-    // Find the attachment entry
-    struct attachment_entry *entry = find_attachment(prog_fd);
+    /* Phase 1: mark the entry as detaching under the lock so concurrent
+     * add_attachment can proceed without treating this entry as active. */
+    pthread_mutex_lock(&attachment_mutex);
+    struct attachment_entry *entry = attached_programs;
+    while (entry) {
+        if (entry->prog_fd == prog_fd && !entry->detaching) {
+            entry->detaching = 1;
+            break;
+        }
+        entry = entry->next;
+    }
+    pthread_mutex_unlock(&attachment_mutex);
+
     if (!entry) {
-        fprintf(stderr, "No active attachment found for program fd %d\n", prog_fd);
+        fprintf(stderr, "No active attachment found for program fd %%d\n", prog_fd);
         return;
     }
     
@@ -4302,56 +4420,66 @@ static int add_attachment(int prog_fd, const char *target, uint32_t flags,
         case BPF_PROG_TYPE_XDP: {
             int ret = bpf_xdp_detach(entry->ifindex, entry->flags, NULL);
             if (ret) {
-                fprintf(stderr, "Failed to detach XDP program from interface: %s\n", strerror(errno));
+                fprintf(stderr, "Failed to detach XDP program from interface: %%s\n", strerror(errno));
             } else {
-                printf("XDP detached from interface index: %d\n", entry->ifindex);
+                printf("XDP detached from interface index: %%d\n", entry->ifindex);
             }
             break;
         }
         case BPF_PROG_TYPE_KPROBE: {
             if (entry->link) {
                 bpf_link__destroy(entry->link);
-                printf("Kprobe detached from: %s\n", entry->target);
+                printf("Kprobe detached from: %%s\n", entry->target);
             } else {
-                fprintf(stderr, "Invalid kprobe link for program fd %d\n", prog_fd);
+                fprintf(stderr, "Invalid kprobe link for program fd %%d\n", prog_fd);
             }
             break;
         }
         case BPF_PROG_TYPE_TRACING: {
             if (entry->link) {
                 bpf_link__destroy(entry->link);
-                printf("Fentry/fexit program detached from: %s\n", entry->target);
+                printf("Fentry/fexit program detached from: %%s\n", entry->target);
             } else {
-                fprintf(stderr, "Invalid tracing program link for program fd %d\n", prog_fd);
+                fprintf(stderr, "Invalid tracing program link for program fd %%d\n", prog_fd);
             }
             break;
         }
         case BPF_PROG_TYPE_TRACEPOINT: {
             if (entry->link) {
                 bpf_link__destroy(entry->link);
-                printf("Tracepoint detached from: %s\n", entry->target);
+                printf("Tracepoint detached from: %%s\n", entry->target);
             } else {
-                fprintf(stderr, "Invalid tracepoint link for program fd %d\n", prog_fd);
+                fprintf(stderr, "Invalid tracepoint link for program fd %%d\n", prog_fd);
             }
             break;
         }
         case BPF_PROG_TYPE_SCHED_CLS: {
             if (entry->link) {
                 bpf_link__destroy(entry->link);
-                printf("TC program detached from interface: %s\n", entry->target);
+                printf("TC program detached from interface: %%s\n", entry->target);
             } else {
-                fprintf(stderr, "Invalid TC program link for program fd %d\n", prog_fd);
+                fprintf(stderr, "Invalid TC program link for program fd %%d\n", prog_fd);
             }
             break;
         }
-        default:
-            fprintf(stderr, "Unsupported program type for detachment: %d\n", entry->type);
+%s        default:
+            fprintf(stderr, "Unsupported program type for detachment: %%d\n", entry->type);
             break;
     }
     
-    // Remove from tracking
-    remove_attachment(prog_fd);
-}|}
+    /* Phase 2: teardown is complete; remove entry from tracking list and free. */
+    pthread_mutex_lock(&attachment_mutex);
+    struct attachment_entry **cur2 = &attached_programs;
+    while (*cur2) {
+        if (*cur2 == entry) {
+            *cur2 = entry->next;
+            break;
+        }
+        cur2 = &(*cur2)->next;
+    }
+    pthread_mutex_unlock(&attachment_mutex);
+    free(entry);
+}|} detach_perf_case
     else "" in
     
     let bpf_obj_decl = "" in  (* Skeleton now handles the BPF object *)
@@ -4464,7 +4592,164 @@ static int ensure_bpf_dir(const char *path) {
 }|}
     else "" in
 
-    let functions_list = List.filter (fun s -> s <> "") [mkdir_helper_function; attachment_storage; load_function; attach_function; detach_function; daemon_function; exec_function] in
+    let perf_attach_function = if all_usage.uses_attach_perf then
+        {|int ks_open_perf_event(ks_perf_options ks_attr) {
+    /* Fill the BTF-derived struct perf_event_attr from KernelScript fields */
+      ks_attr.attr.type = (__u32)ks_attr.perf_type;
+    ks_attr.attr.size = sizeof(struct perf_event_attr);
+      ks_attr.attr.config = (__u64)ks_attr.perf_config;
+    ks_attr.attr.sample_type = 0;
+    ks_attr.attr.sample_period = ks_attr.period > 0 ? ks_attr.period : 1000000;
+    ks_attr.attr.wakeup_events = ks_attr.wakeup > 0 ? ks_attr.wakeup : 1;
+    ks_attr.attr.inherit = ks_attr.inherit ? 1 : 0;
+    ks_attr.attr.exclude_kernel = ks_attr.exclude_kernel ? 1 : 0;
+    ks_attr.attr.exclude_user = ks_attr.exclude_user ? 1 : 0;
+    ks_attr.attr.disabled = 1;
+
+    int cpu = ks_attr.cpu;
+    int pid = ks_attr.pid;
+
+    if (pid < -1) {
+        fprintf(stderr, "ks_open_perf_event: invalid pid %d (expected >= -1)\n", pid);
+        return -1;
+    }
+    if (cpu < -1) {
+        fprintf(stderr, "ks_open_perf_event: invalid cpu %d (expected >= -1)\n", cpu);
+        return -1;
+    }
+    if (pid == -1 && cpu == -1) {
+        fprintf(stderr, "ks_open_perf_event: system-wide perf events require an explicit cpu >= 0\n");
+        return -1;
+    }
+
+    int perf_fd = (int)syscall(SYS_perf_event_open, &ks_attr.attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+    if (perf_fd < 0) {
+        fprintf(stderr, "ks_open_perf_event: perf_event_open failed: %s\n", strerror(errno));
+        return -1;
+    }
+    return perf_fd;
+}
+
+/* Attach a perf_event BPF program using a ks_perf_options config.
+ * Opens the perf fd, resets, attaches, and enables counting in one step. */
+int ks_attach_perf_event(int prog_fd, ks_perf_options opts, int flags) {
+    (void)flags;  /* reserved for future use */
+
+    if (prog_fd < 0) {
+        fprintf(stderr, "Invalid program file descriptor: %d\n", prog_fd);
+        return -1;
+    }
+    /* Verify the program is actually a @perf_event program */
+    struct bpf_prog_info prog_info = {};
+    uint32_t info_len = sizeof(prog_info);
+    if (bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len) == 0 &&
+        prog_info.type != BPF_PROG_TYPE_PERF_EVENT) {
+        fprintf(stderr, "ks_attach_perf_event: fd %d is not a @perf_event program (type=%u)\n",
+                prog_fd, prog_info.type);
+        return -1;
+    }
+
+    int perf_fd = ks_open_perf_event(opts);
+    if (perf_fd < 0) return perf_fd;
+
+    struct bpf_program *prog = find_prog_by_fd(prog_fd);
+    if (!prog) {
+        fprintf(stderr, "Failed to find bpf_program for fd %d\n", prog_fd);
+        close(perf_fd);
+        return -1;
+    }
+
+    if (ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0) != 0) {
+        fprintf(stderr, "Failed to reset perf event fd %d: %s\n", perf_fd, strerror(errno));
+        close(perf_fd);
+        return -1;
+    }
+
+    struct bpf_link *link = bpf_program__attach_perf_event(prog, perf_fd);
+    long link_err = libbpf_get_error(link);
+    if (link_err) {
+        fprintf(stderr, "Failed to attach perf_event program to perf_fd %d: %s\n", perf_fd, strerror((int)-link_err));
+        close(perf_fd);
+        return -1;
+    }
+
+    if (ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
+        fprintf(stderr, "Failed to enable perf event fd %d: %s\n", perf_fd, strerror(errno));
+        bpf_link__destroy(link);
+        close(perf_fd);
+        return -1;
+    }
+
+    if (add_attachment(prog_fd, "perf_event", (uint32_t)flags, link, 0, perf_fd, BPF_PROG_TYPE_PERF_EVENT) != 0) {
+        ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, 0);
+        bpf_link__destroy(link);
+        close(perf_fd);
+        return -1;
+    }
+
+    printf("Perf event program attached\n");
+    return 0;
+}
+|}
+    else "" in
+
+    let perf_read_function = if all_usage.uses_perf_read then
+      {|/* Read the current hardware counter value from an open perf_fd.
+ * Returns the raw 64-bit count, or -1 on error. */
+int64_t ks_read_perf_count(int perf_fd) {
+  if (perf_fd < 0) {
+    fprintf(stderr, "ks_read_perf_count: invalid perf_fd %d\n", perf_fd);
+    return -1;
+  }
+  uint64_t count = 0;
+  ssize_t n = read(perf_fd, &count, sizeof(count));
+  if (n < 0) {
+    fprintf(stderr, "ks_read_perf_count: read failed on perf_fd %d: %s\n",
+        perf_fd, strerror(errno));
+    return -1;
+  }
+  if (n != sizeof(count)) {
+    fprintf(stderr, "ks_read_perf_count: short read (%zd bytes) on perf_fd %d\n",
+        n, perf_fd);
+    return -1;
+  }
+  return (int64_t)count;
+}
+
+/* Read the counter for the perf_event program bound to prog_fd.
+ * Looks up the perf_fd from the attachment table and calls ks_read_perf_count. */
+int64_t ks_perf_read(int prog_fd) {
+  pthread_mutex_lock(&attachment_mutex);
+  int found = 0;
+  int dup_fd = -1;
+  struct attachment_entry *cur = attached_programs;
+  while (cur) {
+    if (cur->prog_fd == prog_fd) {
+      if (!cur->detaching && cur->perf_fd >= 0) {
+        found = 1;
+        dup_fd = dup(cur->perf_fd);
+      }
+      break;
+    }
+    cur = cur->next;
+  }
+  pthread_mutex_unlock(&attachment_mutex);
+  if (!found) {
+    fprintf(stderr, "ks_perf_read: no active attachment for program fd %d\n", prog_fd);
+    return -1;
+  }
+  if (dup_fd < 0) {
+    fprintf(stderr, "ks_perf_read: dup(perf_fd) failed for program fd %d: %s\n", prog_fd, strerror(errno));
+    return -1;
+  }
+  int64_t result = ks_read_perf_count(dup_fd);
+  close(dup_fd);
+  return result;
+}
+|}
+    else "" in
+
+    let functions_list = List.filter (fun s -> s <> "") [mkdir_helper_function; attachment_storage; load_function; attach_function; detach_function; perf_attach_function; perf_read_function; daemon_function; exec_function] in
     if functions_list = [] && bpf_obj_decl = "" then ""
     else
       sprintf "\n/* BPF Helper Functions (generated only when used) */\n%s\n\n%s" 
diff --git a/tests/dune b/tests/dune
index 25142e2..5112613 100644
--- a/tests/dune
+++ b/tests/dune
@@ -411,6 +411,11 @@
  (modules test_detach_api)
  (libraries kernelscript alcotest test_utils str))
 
+(executable
+ (name test_perf_event_attach)
+ (modules test_perf_event_attach)
+ (libraries kernelscript alcotest str))
+
 (executable
  (name test_tc)
  (modules test_tc)
@@ -516,6 +521,7 @@
   test_tracepoint.exe
   test_probe.exe
   test_detach_api.exe
+  test_perf_event_attach.exe
   test_tc.exe
   test_exec.exe
   test_void_functions.exe
@@ -838,6 +844,10 @@
  (alias runtest)
  (action (run ./test_detach_api.exe)))
 
+(rule
+ (alias runtest)
+ (action (run ./test_perf_event_attach.exe)))
+
 (rule
  (alias runtest)
  (action (run ./test_tc.exe)))
diff --git a/tests/test_ir.ml b/tests/test_ir.ml
index d2726d9..746323c 100644
--- a/tests/test_ir.ml
+++ b/tests/test_ir.ml
@@ -32,6 +32,7 @@ module Program_type = struct
     | Probe Kprobe -> Format.fprintf fmt "Kprobe"
     | Probe Fprobe -> Format.fprintf fmt "Fprobe"
     | StructOps -> Format.fprintf fmt "StructOps"
+    | PerfEvent -> Format.fprintf fmt "PerfEvent"
 end
 
 (** Helper functions for creating test AST nodes *)
diff --git a/tests/test_perf_event_attach.ml b/tests/test_perf_event_attach.ml
new file mode 100644
index 0000000..ac054d1
--- /dev/null
+++ b/tests/test_perf_event_attach.ml
@@ -0,0 +1,412 @@
+open Alcotest
+open Kernelscript.Ast
+open Kernelscript.Ir
+open Kernelscript.Userspace_codegen
+open Kernelscript.Parse
+open Kernelscript.Type_checker
+
+let contains_substr str substr =
+  try
+    let _ = Str.search_forward (Str.regexp_string substr) str 0 in
+    true
+  with Not_found -> false
+
+let count_substr str substr =
+  let regexp = Str.regexp_string substr in
+  let rec loop start count =
+    try
+      let index = Str.search_forward regexp str start in
+      loop (index + String.length substr) (count + 1)
+    with Not_found -> count
+  in
+  loop 0 0
+
+let test_pos = { line = 1; column = 1; filename = "test.ks" }
+
+let int32_value value =
+  make_ir_value (IRLiteral (IntLit (Signed64 value, None))) IRI32 test_pos
+
+let uint32_value value =
+  make_ir_value (IRLiteral (IntLit (Signed64 value, None))) IRU32 test_pos
+
+let uint64_value value =
+  make_ir_value (IRLiteral (IntLit (Signed64 value, None))) IRU64 test_pos
+
+let bool_value value =
+  make_ir_value (IRLiteral (BoolLit value)) IRBool test_pos
+
+let int64_value value =
+  make_ir_value (IRLiteral (IntLit (Signed64 value, None))) IRI64 test_pos
+
+let perf_type_value name raw_value =
+  make_ir_value
+    (IREnumConstant ("perf_type", name, Signed64 raw_value))
+    (IREnum ("perf_type", []))
+    test_pos
+
+let perf_config_value enum_name name raw_value =
+  make_ir_value
+    (IREnumConstant (enum_name, name, Signed64 raw_value))
+    (IREnum (enum_name, []))
+    test_pos
+
+let perf_attr_expr ~pid ~cpu =
+  make_ir_expr
+    (IRStructLiteral ("perf_options", [
+      ("perf_type", perf_type_value "perf_type_hardware" 0L);
+      ("perf_config", perf_config_value "perf_hw_config" "branch_misses" 5L);
+      ("pid", int32_value pid);
+      ("cpu", int32_value cpu);
+      ("period", uint64_value 1000000L);
+      ("wakeup", uint32_value 1L);
+      ("inherit", bool_value false);
+      ("exclude_kernel", bool_value false);
+      ("exclude_user", bool_value false);
+    ]))
+    (IRStruct ("perf_options", []))
+    test_pos
+
+let make_generated_code instructions =
+  let entry_block = make_ir_basic_block "entry" instructions 0 in
+  let main_func = make_ir_function "main" [] (Some IRI32) [entry_block] ~is_main:true test_pos in
+  let userspace_prog =
+    make_ir_userspace_program
+      [main_func]
+      []
+      (make_ir_coordinator_logic [] [] [] (make_ir_config_management [] [] []))
+      test_pos
+  in
+  let ir_multi_prog = make_ir_multi_program "test" ~userspace_program:userspace_prog test_pos in
+  generate_complete_userspace_program_from_ir userspace_prog [] ir_multi_prog "test.ks"
+
+let test_perf_event_codegen_enforces_pid_cpu_rules () =
+  let prog_handle = make_ir_value (IRVariable "prog") IRI32 test_pos in
+  let attr_value = make_ir_value (IRVariable "attr") (IRStruct ("perf_options", [])) test_pos in
+  let flags_value = uint32_value 0L in
+  let attr_decl =
+    make_ir_instruction
+      (IRVariableDecl (attr_value, IRStruct ("perf_options", []), Some (perf_attr_expr ~pid:(-1L) ~cpu:(-1L))))
+      test_pos
+  in
+  let attach_call =
+    make_ir_instruction
+      (IRCall (DirectCall "attach", [prog_handle; attr_value; flags_value], None))
+      test_pos
+  in
+  let generated_code = make_generated_code [attr_decl; attach_call] in
+
+  check bool "preserve raw cpu value" true
+    (contains_substr generated_code "int cpu = ks_attr.cpu;");
+  check bool "reject invalid pid below -1" true
+    (contains_substr generated_code "if (pid < -1)");
+  check bool "reject invalid cpu below -1" true
+    (contains_substr generated_code "if (cpu < -1)");
+  check bool "reject system-wide attach without explicit cpu" true
+    (contains_substr generated_code "if (pid == -1 && cpu == -1)");
+  check bool "remove old cpu normalization" false
+    (contains_substr generated_code "int cpu = ks_attr.cpu >= 0 ? ks_attr.cpu : 0;");
+  check bool "perf detach disables event" true
+    (contains_substr generated_code "PERF_EVENT_IOC_DISABLE");
+  check bool "perf detach closes event fd" true
+    (contains_substr generated_code "close(entry->perf_fd);");
+  (* Attach success detection *)
+  check bool "perf attach emits IOC_ENABLE on success" true
+    (contains_substr generated_code "PERF_EVENT_IOC_ENABLE");
+  check bool "perf attach prints success message" true
+    (contains_substr generated_code "Perf event program attached");
+  (* Detach success detection *)
+  check bool "perf detach prints success message" true
+    (contains_substr generated_code "Perf event program detached");
+  (* Duplicate attach protection and invalid fd guard *)
+  check bool "perf attach rejects duplicate prog_fd" true
+    (contains_substr generated_code "already attached. Use detach() first.");
+  check bool "perf attach rejects invalid prog_fd" true
+    (contains_substr generated_code "Invalid program file descriptor:")
+
+let find_substr_pos str substr =
+  try Some (Str.search_forward (Str.regexp_string substr) str 0)
+  with Not_found -> None
+
+(* Verify A appears before B in the generated code string *)
+let appears_before str a b =
+  match find_substr_pos str a, find_substr_pos str b with
+  | Some pa, Some pb -> pa < pb
+  | _ -> false
+
+let perf_attr_expr_with ~period ~wakeup =
+  make_ir_expr
+    (IRStructLiteral ("perf_options", [
+      ("perf_type", perf_type_value "perf_type_hardware" 0L);
+      ("perf_config", perf_config_value "perf_hw_config" "branch_misses" 5L);
+      ("pid",     int32_value 1234L);
+      ("cpu",     int32_value 0L);
+      ("period",  uint64_value period);
+      ("wakeup",  uint32_value wakeup);
+      ("inherit",         bool_value false);
+      ("exclude_kernel",  bool_value false);
+      ("exclude_user",    bool_value false);
+    ]))
+    (IRStruct ("perf_options", []))
+    test_pos
+
+(* Generate code that attaches a perf_event program via 3-arg attach(prog, opts, flags) *)
+let make_perf_code_with ~period ~wakeup =
+  let prog_handle = make_ir_value (IRVariable "prog") IRI32 test_pos in
+  let attr_value  = make_ir_value (IRVariable "attr") (IRStruct ("perf_options", [])) test_pos in
+  let flags_value = uint32_value 0L in
+  let attr_decl =
+    make_ir_instruction
+      (IRVariableDecl (attr_value, IRStruct ("perf_options", []),
+                       Some (perf_attr_expr_with ~period ~wakeup)))
+      test_pos
+  in
+  let attach_call =
+    make_ir_instruction
+      (IRCall (DirectCall "attach", [prog_handle; attr_value; flags_value], None))
+      test_pos
+  in
+  make_generated_code [attr_decl; attach_call]
+
+let test_perf_event_counting_starts_correctly () =
+  let code = make_perf_code_with ~period:1000000L ~wakeup:1L in
+
+  (* 1. Counter starts disabled: perf_event_open is called with disabled=1 so the
+        kernel won't fire events before we are ready. *)
+  check bool "attr.disabled set to 1 before perf_event_open" true
+    (contains_substr code "ks_attr.attr.disabled = 1;");
+
+  (* 2. The fd-close-on-exec flag is passed to perf_event_open for fd safety. *)
+  check bool "PERF_FLAG_FD_CLOEXEC passed to perf_event_open" true
+    (contains_substr code "PERF_FLAG_FD_CLOEXEC");
+
+  (* 3. Counter is zeroed before the BPF program is attached and enabled,
+        so the first sample starts from 0. *)
+  check bool "IOC_RESET issued before enabling" true
+    (contains_substr code "PERF_EVENT_IOC_RESET");
+
+  (* 4. Ordering guarantee: RESET must appear before ENABLE in the generated source. *)
+  check bool "IOC_RESET precedes IOC_ENABLE in source" true
+    (appears_before code "PERF_EVENT_IOC_RESET" "PERF_EVENT_IOC_ENABLE");
+
+  (* 5. BPF program is linked to the perf fd before enabling (attach before enable). *)
+  check bool "attach_perf_event called before IOC_ENABLE" true
+    (appears_before code "bpf_program__attach_perf_event" "PERF_EVENT_IOC_ENABLE");
+
+  (* 6. Counting truly kicks off: IOC_ENABLE is the last step and must be present. *)
+  check bool "IOC_ENABLE present to start counting" true
+    (contains_substr code "PERF_EVENT_IOC_ENABLE")
+
+let test_perf_event_period_and_wakeup_defaults () =
+  (* When period=0 and wakeup=0 the codegen must substitute safe defaults so that
+     the kernel actually delivers samples. *)
+  let code = make_perf_code_with ~period:0L ~wakeup:0L in
+
+  check bool "default sample_period 1000000 used when period=0" true
+    (contains_substr code "ks_attr.period > 0 ? ks_attr.period : 1000000");
+  check bool "default wakeup_events 1 used when wakeup=0" true
+    (contains_substr code "ks_attr.wakeup > 0 ? ks_attr.wakeup : 1")
+
+let test_perf_event_period_and_wakeup_custom () =
+  (* When the user supplies explicit values the codegen must honour them, not the
+     defaults, so counting happens at the requested granularity. *)
+  let code = make_perf_code_with ~period:500000L ~wakeup:4L in
+
+  (* The conditional expression is still present - values are resolved at runtime *)
+  check bool "runtime period expression present for custom period" true
+    (contains_substr code "ks_attr.period > 0 ? ks_attr.period : 1000000");
+  check bool "runtime wakeup expression present for custom wakeup" true
+    (contains_substr code "ks_attr.wakeup > 0 ? ks_attr.wakeup : 1")
+
+let test_standard_attach_uses_libbpf_error_checks () =
+  let prog_handle = make_ir_value (IRVariable "prog") IRI32 test_pos in
+  let target = make_ir_value (IRLiteral (StringLit "eth0")) (IRStr 16) test_pos in
+  let flags = uint32_value 0L in
+  let attach_call =
+    make_ir_instruction
+      (IRCall (DirectCall "attach", [prog_handle; target; flags], None))
+      test_pos
+  in
+  let generated_code = make_generated_code [attach_call] in
+
+  (* After removing the dead PERF_EVENT case from attach_bpf_program_by_fd, only
+     the four non-XDP program types (kprobe, tracing, tracepoint, TC) have a
+     libbpf_get_error check; XDP uses bpf_xdp_attach which returns a plain errno. *)
+  check int "standard attach branches use libbpf_get_error" 4
+    (count_substr generated_code "libbpf_get_error(link)");
+  check bool "old null-link checks removed" false
+    (contains_substr generated_code "if (!link)");
+  check bool "kprobe reports libbpf error string" true
+    (contains_substr generated_code "Failed to attach kprobe to function '%s': %s");
+  check bool "tracepoint reports libbpf error string" true
+    (contains_substr generated_code "Failed to attach tracepoint to '%s:%s': %s");
+  check bool "tc reports libbpf error string" true
+    (contains_substr generated_code "Failed to attach TC program to interface '%s': %s")
+
+let test_perf_read_helpers_not_generated () =
+  (* perf_event attach alone should not emit read helpers when they are unused. *)
+  let code = make_perf_code_with ~period:1000000L ~wakeup:1L in
+
+  check bool "ks_read_perf_count helper omitted" false
+    (contains_substr code "ks_read_perf_count");
+  check bool "ks_perf_read helper omitted" false
+    (contains_substr code "ks_perf_read");
+  check bool "perf counter read syscall omitted" false
+    (contains_substr code "read(perf_fd, &count, sizeof(count))")
+
+let test_perf_read_helpers_generated_when_used () =
+  let prog_handle = make_ir_value (IRVariable "prog") IRI32 test_pos in
+  let attr_value  = make_ir_value (IRVariable "attr") (IRStruct ("perf_options", [])) test_pos in
+  let flags_value = uint32_value 0L in
+  let count_value = make_ir_value (IRVariable "count") IRI64 test_pos in
+  let attr_decl =
+    make_ir_instruction
+      (IRVariableDecl (attr_value, IRStruct ("perf_options", []),
+                       Some (perf_attr_expr_with ~period:1000000L ~wakeup:1L)))
+      test_pos
+  in
+  let attach_call =
+    make_ir_instruction
+      (IRCall (DirectCall "attach", [prog_handle; attr_value; flags_value], None))
+      test_pos
+  in
+  let read_call =
+    make_ir_instruction
+      (IRCall (DirectCall "perf_read", [prog_handle], Some count_value))
+      test_pos
+  in
+  let code = make_generated_code [attr_decl; attach_call; read_call] in
+  check bool "ks_read_perf_count helper generated when perf_read is used" true
+    (contains_substr code "ks_read_perf_count");
+  check bool "ks_perf_read helper generated when perf_read is used" true
+    (contains_substr code "ks_perf_read");
+  check bool "perf_read duplicates perf fd under the lock" true
+    (contains_substr code "dup_fd = dup(cur->perf_fd)");
+  check bool "perf_read closes duplicate fd after reading" true
+    (contains_substr code "close(dup_fd)")
+
+let test_perf_attach_event_function_generated () =
+  (* attach(prog, perf_options{...}, 0) must generate ks_attach_perf_event which
+     owns the full open-reset-attach-enable lifecycle in a single C function. *)
+  let code = make_perf_code_with ~period:1000000L ~wakeup:1L in
+
+  check bool "ks_attach_perf_event function generated" true
+    (contains_substr code "ks_attach_perf_event");
+  check bool "ks_attach_perf_event calls ks_open_perf_event" true
+    (contains_substr code "ks_open_perf_event");
+  check bool "counter reset before attach" true
+    (contains_substr code "PERF_EVENT_IOC_RESET");
+  check bool "bpf_program__attach_perf_event used for linking" true
+    (contains_substr code "bpf_program__attach_perf_event");
+  check bool "IOC_ENABLE used to start counting" true
+    (contains_substr code "PERF_EVENT_IOC_ENABLE");
+  (* The old __PERF_RAW_EMIT__ sentinel and snprintf string hack must be gone *)
+  check bool "no __PERF_RAW_EMIT__ sentinel in generated code" false
+    (contains_substr code "__PERF_RAW_EMIT__");
+  check bool "no snprintf perf_fd string hack" false
+    (contains_substr code "snprintf(%s, sizeof(%s),");
+  check bool "perf attr type copied directly from perf_options" true
+    (contains_substr code "ks_attr.attr.type = (__u32)ks_attr.perf_type;");
+  check bool "perf attr config copied directly from perf_options" true
+    (contains_substr code "ks_attr.attr.config = (__u64)ks_attr.perf_config;");
+  check bool "old perf_counter switch removed" false
+    (contains_substr code "switch (ks_attr.counter)");
+  check bool "find_prog_by_fd helper used for program lookup" true
+    (contains_substr code "find_prog_by_fd");
+  check bool "perf attach rejects wrong program type at runtime" true
+    (contains_substr code "is not a @perf_event program");
+  check bool "add_attachment performs atomic duplicate check" true
+    (contains_substr code "Reject duplicate insertions atomically")
+
+let test_detach_attach_concurrent_window () =
+  (* During a detach, the entry stays in the list but is marked detaching=1.
+   * A concurrent attach for the same prog_fd must succeed (not be blocked by
+   * the still-present but detaching entry). *)
+  let code = make_perf_code_with ~period:1000000L ~wakeup:1L in
+  check bool "attachment_entry has detaching field" true
+    (contains_substr code "int detaching;");
+  check bool "add_attachment skips detaching entries in duplicate check" true
+    (contains_substr code "!existing->detaching");
+  check bool "detach marks entry as detaching before teardown" true
+    (contains_substr code "entry->detaching = 1");
+  check bool "detach re-locks to unlink and free entry after teardown" true
+    (contains_substr code "Phase 2: teardown is complete")
+
+(* ── Type-checking regression tests ───────────────────────────────────── *)
+
+let parse_and_check source =
+  let ast = parse_string source in
+  type_check_ast ast
+
+(* A well-formed @perf_event function must pass the type checker end-to-end. *)
+let test_perf_event_valid_signature () =
+  let source =
+    "@perf_event\nfn on_event(ctx: *bpf_perf_event_data) -> i32 {\n    return 0\n}" in
+  (match parse_and_check source with
+   | [_] -> ()
+   | _ -> fail "Valid @perf_event signature should pass type checking")
+
+(* Using the wrong context type (e.g. *xdp_md) must be rejected. *)
+let test_perf_event_wrong_ctx_type () =
+  let source =
+    "@perf_event\nfn on_event(ctx: *xdp_md) -> i32 {\n    return 0\n}" in
+  (try
+    let _ = parse_and_check source in
+    fail "Wrong context type should have been rejected by type checker"
+  with _ -> ())
+
+(* Zero parameters must be rejected. *)
+let test_perf_event_no_params () =
+  let source =
+    "@perf_event\nfn on_event() -> i32 {\n    return 0\n}" in
+  (try
+    let _ = parse_and_check source in
+    fail "Zero parameters should have been rejected by type checker"
+  with _ -> ())
+
+(* More than one parameter must be rejected. *)
+let test_perf_event_too_many_params () =
+  let source =
+    "@perf_event\nfn on_event(ctx: *bpf_perf_event_data, extra: u32) -> i32 {\n    return 0\n}" in
+  (try
+    let _ = parse_and_check source in
+    fail "Two parameters should have been rejected by type checker"
+  with _ -> ())
+
+(* Non-i32 return types (u32, void, bool) must be rejected. *)
+let test_perf_event_wrong_return_type () =
+  let invalid_cases = [
+    ("u32",  "@perf_event\nfn on_event(ctx: *bpf_perf_event_data) -> u32 { return 0 }");
+    ("void", "@perf_event\nfn on_event(ctx: *bpf_perf_event_data) -> void { }");
+    ("bool", "@perf_event\nfn on_event(ctx: *bpf_perf_event_data) -> bool { return false }");
+  ] in
+  List.iter (fun (label, source) ->
+    (try
+      let _ = parse_and_check source in
+      fail (Printf.sprintf "Return type '%s' should have been rejected by type checker" label)
+    with _ -> ())
+  ) invalid_cases
+
+let type_checking_tests = [
+  test_case "perf_event_valid_signature"  `Quick test_perf_event_valid_signature;
+  test_case "perf_event_wrong_ctx_type"   `Quick test_perf_event_wrong_ctx_type;
+  test_case "perf_event_no_params"        `Quick test_perf_event_no_params;
+  test_case "perf_event_too_many_params"  `Quick test_perf_event_too_many_params;
+  test_case "perf_event_wrong_return_type"`Quick test_perf_event_wrong_return_type;
+]
+
+let tests = [
+  test_case "perf_event_codegen_enforces_pid_cpu_rules" `Quick test_perf_event_codegen_enforces_pid_cpu_rules;
+  test_case "perf_event_counting_starts_correctly"      `Quick test_perf_event_counting_starts_correctly;
+  test_case "perf_event_period_and_wakeup_defaults"     `Quick test_perf_event_period_and_wakeup_defaults;
+  test_case "perf_event_period_and_wakeup_custom"       `Quick test_perf_event_period_and_wakeup_custom;
+  test_case "perf_read_helpers_not_generated"           `Quick test_perf_read_helpers_not_generated;
+  test_case "perf_read_helpers_generated_when_used"     `Quick test_perf_read_helpers_generated_when_used;
+  test_case "perf_attach_event_function_generated"      `Quick test_perf_attach_event_function_generated;
+  test_case "detach_attach_concurrent_window"           `Quick test_detach_attach_concurrent_window;
+  test_case "standard_attach_uses_libbpf_error_checks"  `Quick test_standard_attach_uses_libbpf_error_checks;
+]
+
+let () = run "Perf Event Attach Tests" [
+  ("perf_event_attach", tests);
+  ("perf_event_type_checking", type_checking_tests);
+]
\ No newline at end of file
diff --git a/tests/test_program_ref.ml b/tests/test_program_ref.ml
index 0a63731..720220b 100644
--- a/tests/test_program_ref.ml
+++ b/tests/test_program_ref.ml
@@ -143,13 +143,17 @@ let test_stdlib_integration () =
   
   (match Kernelscript.Stdlib.get_builtin_function_signature "attach" with
   | Some (params, return_type) ->
-      check int "attach parameter count" 3 (List.length params);
-      (match params with
-       | first_param :: _ ->
-           check bool "attach first parameter is ProgramHandle" true (first_param = Kernelscript.Ast.ProgramHandle)
-       | [] -> check bool "attach should have parameters" false true);
+      (* attach uses custom validation (param_types = []), so count is 0 *)
+      check int "attach parameter count" 0 (List.length params);
       check bool "attach return type is U32" true (return_type = Kernelscript.Ast.U32)
-  | None -> check bool "attach function signature should exist" false true)
+  | None -> check bool "attach function signature should exist" false true);
+
+  (* Verify that the custom validation function is wired up on the attach entry *)
+  (match Kernelscript.Stdlib.get_builtin_function "attach" with
+  | Some func ->
+      check bool "attach has custom validation wired up" true
+        (match func.validate with Some _ -> true | None -> false)
+  | None -> check bool "attach builtin should exist" false true)
 
 (** Test that calling attach without load fails *)
 let test_attach_without_load_fails () =
@@ -169,10 +173,12 @@ fn main() -> i32 {
     let (_, _) = Kernelscript.Type_checker.type_check_and_annotate_ast ast in
     check bool "should fail when attach called with program reference" false true
   with
-  | Type_error (msg, _) -> 
+  | Type_error (msg, _) ->
       check bool "should fail with type error" true (String.length msg > 0);
-      check bool "error should mention type mismatch" true (String.contains msg 'm')
-  | _ -> 
+      (* Error message is: "attach() requires (handle, target, flags) — ..." *)
+      check bool "error message starts with attach()" true
+        (String.length msg >= 8 && String.sub msg 0 8 = "attach()")
+  | _ ->
       check bool "should fail when attach called with program reference" false true
 
 (** Test multiple program handles with proper resource management *)