<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
  <channel>
    <title>GitHub Cuda Monthly Trending</title>
    <description>Monthly Trending of Cuda in GitHub</description>
    <pubDate>Sun, 17 May 2026 01:52:24 GMT</pubDate>
    <link>http://mshibanami.github.io/GitHubTrendingRSS</link>
    
    <item>
      <title>deepseek-ai/DeepGEMM</title>
      <link>https://github.com/deepseek-ai/DeepGEMM</link>
      <description>&lt;p&gt;DeepGEMM: clean and efficient FP8 GEMM kernels with fine-grained scaling&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/6c75edffe2c35e743f757d2a03befba88ca7416b427e23172c93f9105db83d87/deepseek-ai/DeepGEMM" medium="image" />
      
    </item>
    
    <item>
      <title>deepseek-ai/DeepEP</title>
      <link>https://github.com/deepseek-ai/DeepEP</link>
      <description>&lt;p&gt;DeepEP: an efficient expert-parallel communication library&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/64b05f0f428892ba46cd80443ff80ea5459d0dfbe4fd54eb9d8244d24eb3afa8/deepseek-ai/DeepEP" medium="image" />
      
    </item>
    
    <item>
      <title>alibaba/rtp-llm</title>
      <link>https://github.com/alibaba/rtp-llm</link>
      <description>&lt;p&gt;RTP-LLM: Alibaba&#39;s high-performance LLM inference engine for diverse applications.&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/46bfd4842df5171c655d00ebde16a01318b5845ef451c9d3ccaa5108b6c8895e/alibaba/rtp-llm" medium="image" />
      
    </item>
    
    <item>
      <title>karpathy/llm.c</title>
      <link>https://github.com/karpathy/llm.c</link>
      <description>&lt;p&gt;LLM training in simple, raw C/CUDA&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/1548bac35ac84f0bc09a4718a8cfacb45c40c9b865425d59bf8375944d1fd3e0/karpathy/llm.c" medium="image" />
      
    </item>
    
    <item>
      <title>NVIDIA/AMGX</title>
      <link>https://github.com/NVIDIA/AMGX</link>
      <description>&lt;p&gt;Distributed multigrid linear solver library on GPU&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/88a74fc574c730404b02f1550e582de03a96cf3c6844552ec6883a50530d7c0b/NVIDIA/AMGX" medium="image" />
      
    </item>
    
    <item>
      <title>NVIDIA/nvbench</title>
      <link>https://github.com/NVIDIA/nvbench</link>
      <description>&lt;p&gt;CUDA Kernel Benchmarking Library&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/b3e9c4f2c66740850426d67d2dafa0a2d9b5377d89ce03b22d2598f395369072/NVIDIA/nvbench" medium="image" />
      
    </item>
    
    <item>
      <title>NVIDIA/CUDALibrarySamples</title>
      <link>https://github.com/NVIDIA/CUDALibrarySamples</link>
      <description>&lt;p&gt;CUDA Library Samples&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/f95daf04e01ff2828923af4b60766e620c655bf75ffe4a98ef0639fa9877a84a/NVIDIA/CUDALibrarySamples" medium="image" />
      
    </item>
    
    <item>
      <title>NVIDIA/cub</title>
      <link>https://github.com/NVIDIA/cub</link>
      <description>&lt;p&gt;[ARCHIVED] Cooperative primitives for CUDA C++. See https://github.com/NVIDIA/cccl&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://repository-images.githubusercontent.com/8225159/68a74e00-557d-11eb-8f63-2cdf2ea55052" medium="image" />
      
    </item>
    
    <item>
      <title>NVIDIA/nccl-tests</title>
      <link>https://github.com/NVIDIA/nccl-tests</link>
      <description>&lt;p&gt;NCCL Tests&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/4835913642c0e3082afa2b0e2fd7fb5d1e48a75c5fa9c12d0ab6997f923d8f32/NVIDIA/nccl-tests" medium="image" />
      
    </item>
    
    <item>
      <title>princeton-vl/lietorch</title>
      <link>https://github.com/princeton-vl/lietorch</link>
      <description>&lt;p style=&quot;color:#586069;&quot;&gt;&lt;em&gt;No description/README provided.&lt;/em&gt;&lt;/p&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/5c85d10f9441df5950a0640adfd01ea503107b8bb42dfa4b77e3122295711254/princeton-vl/lietorch" medium="image" />
      
    </item>
    
    <item>
      <title>mirage-project/mirage</title>
      <link>https://github.com/mirage-project/mirage</link>
      <description>&lt;p&gt;Mirage Persistent Kernel: Compiling LLMs into a MegaKernel&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/0d0a7346ab7617bb9db60dbc70bea3252a03680eba6e72e4cafef91b0a164d7c/mirage-project/mirage" medium="image" />
      
    </item>
    
    <item>
      <title>NVlabs/instant-ngp</title>
      <link>https://github.com/NVlabs/instant-ngp</link>
      <description>&lt;p&gt;Instant neural graphics primitives: lightning fast NeRF and more&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://repository-images.githubusercontent.com/444886996/0874cd2d-cff7-4707-9bf4-8caf0ab433bb" medium="image" />
      
    </item>
    
    <item>
      <title>ashawkey/diff-gaussian-rasterization</title>
      <link>https://github.com/ashawkey/diff-gaussian-rasterization</link>
      <description>&lt;p style=&quot;color:#586069;&quot;&gt;&lt;em&gt;No description/README provided.&lt;/em&gt;&lt;/p&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/e0ffaa7dba8f49df4904a8ace8a277e89ef85bdd66bb2570f8c1f3d0f44acd27/ashawkey/diff-gaussian-rasterization" medium="image" />
      
    </item>
    
    <item>
      <title>NVIDIA/cuopt</title>
      <link>https://github.com/NVIDIA/cuopt</link>
      <description>&lt;p&gt;GPU accelerated decision optimization&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/28f5f7de65972e824e3a92ee23b1c53304b88e56de858fc943b451119c45c530/NVIDIA/cuopt" medium="image" />
      
    </item>
    
    <item>
      <title>BBuf/how-to-optim-algorithm-in-cuda</title>
      <link>https://github.com/BBuf/how-to-optim-algorithm-in-cuda</link>
      <description>&lt;p&gt;how to optimize some algorithm in cuda.&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/3b4531cb3f73dbbbf81e127e54481f1c99d882894145d109de55af428fd348b0/BBuf/how-to-optim-algorithm-in-cuda" medium="image" />
      
    </item>
    
    <item>
      <title>rahul-goel/fused-ssim</title>
      <link>https://github.com/rahul-goel/fused-ssim</link>
      <description>&lt;p&gt;Lightning fast differentiable SSIM.&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/a9159bd0d24feb030aab572a2eac121539a1cb73d6b2edac119d16995550ba1f/rahul-goel/fused-ssim" medium="image" />
      
    </item>
    
    <item>
      <title>thu-ml/SageAttention</title>
      <link>https://github.com/thu-ml/SageAttention</link>
      <description>&lt;p&gt;[ICLR2025, ICML2025, NeurIPS2025 Spotlight] Quantized Attention achieves speedup of 2-5x compared to FlashAttention, without losing end-to-end metrics across language, image, and video models.&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/572769917a36b6ea85fafd7c3df66908c8c895e7d3b7f79524b535ea75aefcb6/thu-ml/SageAttention" medium="image" />
      
    </item>
    
    <item>
      <title>HazyResearch/ThunderKittens</title>
      <link>https://github.com/HazyResearch/ThunderKittens</link>
      <description>&lt;p&gt;Tile primitives for speedy kernels&lt;/p&gt;&lt;hr&gt;</description>
      
      <media:content url="https://opengraph.githubassets.com/2ff2afb11cc9d42357ae9aaee25e7fb70a59ba04c3ee1537fb3e4f94a91d5ffd/HazyResearch/ThunderKittens" medium="image" />
      
    </item>
    
  </channel>
</rss>
