From 66a20b64546b90f6a18a72f07d9fffd63749abfe Mon Sep 17 00:00:00 2001
From: Obie Fernandez <obiefernandez@gmail.com>
Date: Fri, 18 Oct 2024 20:00:23 -0600
Subject: [PATCH] 0.4.0 Anthropic prompt caching support

---
 .rubocop.yml                            |   2 +-
 CHANGELOG.md                            |   7 +
 README.md                               |  24 ++++
 lib/raix.rb                             |   5 +
 lib/raix/chat_completion.rb             |  64 +++++----
 lib/raix/message_adapters/base.rb       |  50 +++++++
 lib/raix/version.rb                     |   2 +-
 spec/files/getting_real.md              | 173 ++++++++++++++++++++++++
 spec/raix/message_adapters/base_spec.rb |  39 ++++++
 spec/raix/prompt_caching_spec.rb        |  47 +++++++
 10 files changed, 381 insertions(+), 32 deletions(-)
 create mode 100644 lib/raix/message_adapters/base.rb
 create mode 100644 spec/files/getting_real.md
 create mode 100644 spec/raix/message_adapters/base_spec.rb
 create mode 100644 spec/raix/prompt_caching_spec.rb

diff --git a/.rubocop.yml b/.rubocop.yml
index 483b84a..886a44a 100644
--- a/.rubocop.yml
+++ b/.rubocop.yml
@@ -11,7 +11,7 @@ Style/StringLiteralsInInterpolation:
   EnforcedStyle: double_quotes
 
 Layout/LineLength:
-  Max: 120
+  Max: 180
 
 Metrics/BlockLength:
   Enabled: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 01155c6..a7c0bd8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,3 +8,10 @@
 - adds `ChatCompletion` module
 - adds `PromptDeclarations` module
 - adds `FunctionDispatch` module
+
+## [0.3.2] - 2024-06-29
+- adds support for streaming
+
+## [0.4.0] - 2024-10-18
+- adds support for Anthropic-style prompt caching
+- defaults to `max_completion_tokens` when using OpenAI directly
diff --git a/README.md b/README.md
index 2681526..0e6aa35 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,30 @@ transcript << { role: "user", content: "What is the meaning of life?" }
 
 One of the advantages of OpenRouter and the reason that it is used by default by this library is that it handles mapping message formats from the OpenAI standard to whatever other model you're wanting to use (Anthropic, Cohere, etc.)
 
+### Prompt Caching
+
+Raix supports [Anthropic-style prompt caching](https://openrouter.ai/docs/prompt-caching#anthropic-claude) when using Anthropic's Claud family of models. You can specify a `cache_at` parameter when doing a chat completion. If the character count for the content of a particular message is longer than the cache_at parameter, it will be sent to Anthropic as a multipart message with a cache control "breakpoint" set to "ephemeral".
+
+Note that there is a limit of four breakpoints, and the cache will expire within five minutes. Therefore, it is recommended to reserve the cache breakpoints for large bodies of text, such as character cards, CSV data, RAG data, book chapters, etc. Raix does not enforce a limit on the number of breakpoints, which means that you might get an error if you try to cache too many messages.
+
+```ruby
+>> my_class.chat_completion(params: { cache_at: 1000 })
+=> {
+  "messages": [
+    {
+      "role": "system",
+      "content": [
+        {
+          "type": "text",
+          "text": "HUGE TEXT BODY LONGER THAN 1000 CHARACTERS",
+          "cache_control": {
+            "type": "ephemeral"
+          }
+        }
+      ]
+    },
+```
+
 ### Use of Tools/Functions
 
 The second (optional) module that you can add to your Ruby classes after `ChatCompletion` is `FunctionDispatch`. It lets you declare and implement functions to be called at the AI's discretion as part of a chat completion "loop" in a declarative, Rails-like "DSL" fashion.
diff --git a/lib/raix.rb b/lib/raix.rb
index bdda83f..843342f 100644
--- a/lib/raix.rb
+++ b/lib/raix.rb
@@ -16,6 +16,9 @@ class Configuration
     # The max_tokens option determines the maximum number of tokens to generate.
     attr_accessor :max_tokens
 
+    # The max_completion_tokens option determines the maximum number of tokens to generate.
+    attr_accessor :max_completion_tokens
+
     # The model option determines the model to use for text generation. This option
     # is normally set in each class that includes the ChatCompletion module.
     attr_accessor :model
@@ -27,12 +30,14 @@ class Configuration
     attr_accessor :openai_client
 
     DEFAULT_MAX_TOKENS = 1000
+    DEFAULT_MAX_COMPLETION_TOKENS = 16_384
     DEFAULT_MODEL = "meta-llama/llama-3-8b-instruct:free"
     DEFAULT_TEMPERATURE = 0.0
 
     # Initializes a new instance of the Configuration class with default values.
     def initialize
       self.temperature = DEFAULT_TEMPERATURE
+      self.max_completion_tokens = DEFAULT_MAX_COMPLETION_TOKENS
       self.max_tokens = DEFAULT_MAX_TOKENS
       self.model = DEFAULT_MODEL
     end
diff --git a/lib/raix/chat_completion.rb b/lib/raix/chat_completion.rb
index 1993672..946ff13 100644
--- a/lib/raix/chat_completion.rb
+++ b/lib/raix/chat_completion.rb
@@ -2,6 +2,7 @@
 
 require "active_support/concern"
 require "active_support/core_ext/object/blank"
+require "raix/message_adapters/base"
 require "open_router"
 require "openai"
 
@@ -17,9 +18,9 @@ module Raix
   module ChatCompletion
     extend ActiveSupport::Concern
 
-    attr_accessor :frequency_penalty, :logit_bias, :logprobs, :loop, :min_p, :model, :presence_penalty,
-                  :repetition_penalty, :response_format, :stream, :temperature, :max_tokens, :seed, :stop, :top_a,
-                  :top_k, :top_logprobs, :top_p, :tools, :tool_choice, :provider
+    attr_accessor :cache_at, :frequency_penalty, :logit_bias, :logprobs, :loop, :min_p, :model, :presence_penalty,
+                  :repetition_penalty, :response_format, :stream, :temperature, :max_completion_tokens,
+                  :max_tokens, :seed, :stop, :top_a, :top_k, :top_logprobs, :top_p, :tools, :tool_choice, :provider
 
     # This method performs chat completion based on the provided transcript and parameters.
     #
@@ -30,16 +31,12 @@ module ChatCompletion
     # @option params [Boolean] :raw (false) Whether to return the raw response or dig the text content.
     # @return [String|Hash] The completed chat response.
     def chat_completion(params: {}, loop: false, json: false, raw: false, openai: false)
-      messages = transcript.flatten.compact.map { |msg| transform_message_format(msg) }
-      raise "Can't complete an empty transcript" if messages.blank?
-
-      # used by FunctionDispatch
-      self.loop = loop
-
       # set params to default values if not provided
+      params[:cache_at] ||= cache_at.presence
       params[:frequency_penalty] ||= frequency_penalty.presence
       params[:logit_bias] ||= logit_bias.presence
       params[:logprobs] ||= logprobs.presence
+      params[:max_completion_tokens] ||= max_completion_tokens.presence || Raix.configuration.max_completion_tokens
       params[:max_tokens] ||= max_tokens.presence || Raix.configuration.max_tokens
       params[:min_p] ||= min_p.presence
       params[:presence_penalty] ||= presence_penalty.presence
@@ -57,23 +54,29 @@ def chat_completion(params: {}, loop: false, json: false, raw: false, openai: fa
       params[:top_p] ||= top_p.presence
 
       if json
-        params[:provider] ||= {}
-        params[:provider][:require_parameters] = true
+        unless openai
+          params[:provider] ||= {}
+          params[:provider][:require_parameters] = true
+        end
         params[:response_format] ||= {}
         params[:response_format][:type] = "json_object"
       end
 
+      # used by FunctionDispatch
+      self.loop = loop
+
       # set the model to the default if not provided
       self.model ||= Raix.configuration.model
 
+      adapter = MessageAdapters::Base.new(self)
+      messages = transcript.flatten.compact.map { |msg| adapter.transform(msg) }
+      raise "Can't complete an empty transcript" if messages.blank?
+
       begin
         response = if openai
-                     openai_request(params:, model: openai,
-                                    messages:)
+                     openai_request(params:, model: openai, messages:)
                    else
-                     openrouter_request(
-                       params:, model:, messages:
-                     )
+                     openrouter_request(params:, model:, messages:)
                    end
         retry_count = 0
         content = nil
@@ -115,8 +118,8 @@ def chat_completion(params: {}, loop: false, json: false, raw: false, openai: fa
           raise e # just fail if we can't get content after 3 attempts
         end
 
-        # attempt to fix the JSON
-        JsonFixer.new.call(content, e.message)
+        puts "Bad JSON received!!!!!!: #{content}"
+        raise e
       rescue Faraday::BadRequestError => e
         # make sure we see the actual error message on console or Honeybadger
         puts "Chat completion failed!!!!!!!!!!!!!!!!: #{e.response[:body]}"
@@ -132,6 +135,9 @@ def chat_completion(params: {}, loop: false, json: false, raw: false, openai: fa
     # { user: "Hey what time is it?" },
     # { assistant: "Sorry, pumpkins do not wear watches" }
     #
+    # to add a function call use the following format:
+    # { function: { name: 'fancy_pants_function', arguments: { param: 'value' } } }
+    #
     # to add a function result use the following format:
     # { function: result, name: 'fancy_pants_function' }
     #
@@ -143,11 +149,21 @@ def transcript
     private
 
     def openai_request(params:, model:, messages:)
+      # deprecated in favor of max_completion_tokens
+      params.delete(:max_tokens)
+
       params[:stream] ||= stream.presence
+      params[:stream_options] = { include_usage: true } if params[:stream]
+
+      params.delete(:temperature) if model == "o1-preview"
+
       Raix.configuration.openai_client.chat(parameters: params.compact.merge(model:, messages:))
     end
 
     def openrouter_request(params:, model:, messages:)
+      # max_completion_tokens is not supported by OpenRouter
+      params.delete(:max_completion_tokens)
+
       retry_count = 0
 
       begin
@@ -163,17 +179,5 @@ def openrouter_request(params:, model:, messages:)
         raise e
       end
     end
-
-    def transform_message_format(message)
-      return message if message[:role].present?
-
-      if message[:function].present?
-        { role: "assistant", name: message.dig(:function, :name), content: message.dig(:function, :arguments).to_json }
-      elsif message[:result].present?
-        { role: "function", name: message[:name], content: message[:result] }
-      else
-        { role: message.first.first, content: message.first.last }
-      end
-    end
   end
 end
diff --git a/lib/raix/message_adapters/base.rb b/lib/raix/message_adapters/base.rb
new file mode 100644
index 0000000..0e13b71
--- /dev/null
+++ b/lib/raix/message_adapters/base.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+require "active_support/core_ext/module/delegation"
+
+module Raix
+  module MessageAdapters
+    # Transforms messages into the format expected by the OpenAI API
+    class Base
+      attr_accessor :context
+
+      delegate :cache_at, :model, to: :context
+
+      def initialize(context)
+        @context = context
+      end
+
+      def transform(message)
+        return message if message[:role].present?
+
+        if message[:function].present?
+          { role: "assistant", name: message.dig(:function, :name), content: message.dig(:function, :arguments).to_json }
+        elsif message[:result].present?
+          { role: "function", name: message[:name], content: message[:result] }
+        else
+          content(message)
+        end
+      end
+
+      protected
+
+      def content(message)
+        case message
+        in { system: content }
+          { role: "system", content: }
+        in { user: content }
+          { role: "user", content: }
+        in { assistant: content }
+          { role: "assistant", content: }
+        else
+          raise ArgumentError, "Invalid message format: #{message.inspect}"
+        end.tap do |msg|
+          # convert to anthropic multipart format if model is claude-3 and cache_at is set
+          if model["anthropic/claude-3"] && cache_at && msg[:content].length > cache_at.to_i
+            msg[:content] = [{ type: "text", text: msg[:content], cache_control: { type: "ephemeral" } }]
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/raix/version.rb b/lib/raix/version.rb
index 21cb115..9bfa559 100644
--- a/lib/raix/version.rb
+++ b/lib/raix/version.rb
@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 
 module Raix
-  VERSION = "0.3.2"
+  VERSION = "0.4.0"
 end
diff --git a/spec/files/getting_real.md b/spec/files/getting_real.md
new file mode 100644
index 0000000..1f04802
--- /dev/null
+++ b/spec/files/getting_real.md
@@ -0,0 +1,173 @@
+Introduction
+What is Getting Real?
+About 37signals
+Caveats, disclaimers, and other preemptive strikes
+
+
+ What is Getting Real?
+Want to build a successful web app? Then it’s time to Get Real. Getting Real is a smaller, faster, better way to build software.
+Getting Real is about skipping all the stuff that represents real (charts, graphs, boxes, arrows, schematics, wireframes, etc.) and actually building the real thing.
+Getting real is less. Less mass, less software, less features, less paperwork, less of everything that’s not essential (and most of what you think is essential actually isn’t).
+Getting Real is staying small and being agile.
+Getting Real starts with the interface, the real screens that people are going to use. It begins with what the customer actually experiences and builds backwards from there.This lets you get the interface right before you get the software wrong.
+Getting Real is about iterations and lowering the cost of change. Getting Real is all about launching, tweaking, and constantly improving which makes it a perfect approach for web-based software.
+Getting Real delivers just what customers need and eliminates anything they don’t.
+The benefits of Getting Real
+Getting Real delivers better results because it forces you to deal with the actual problems you’re trying to solve instead of your ideas about those problems. It forces you to deal with reality.
+
+ Getting Real foregoes functional specs and other transitory documentation in favor of building real screens. A functional spec is make-believe, an illusion of agreement, while an actual web page is reality. That’s what your customers are going to see and use. That’s what matters. Getting Real gets you there faster.
+And that means you’re making software decisions based on the real thing instead of abstract notions.
+Finally, Getting Real is an approach ideally suited to web-based software. The old school model of shipping software in a box and then waiting a year or two to deliver an update is fading away. Unlike installed software, web apps can constantly evolve on a day-to-day basis. Getting Real leverages this advantage for all its worth.
+How To Write Vigorous Software
+Vigorous writing is concise.A sentence should contain no unnecessary words, a paragraph no unnecessary sentences, for the same reason that a drawing should have no unnecessary lines and a machine no unnecessary parts.This requires not that the writer make all sentences short or avoid all detail and treat subjects only in outline, but that every word tell.
+From “The Elements of Style” by William Strunk Jr.
+No more bloat
+The old way: a lengthy, bureaucratic, we’re-doing-this-to-cover- our-asses process. The typical result: bloated, forgettable soft- ware dripping with mediocrity. Blech.
+Getting Real gets rid of...
+Timelines that take months or even years Pie-in-the-sky functional specs Scalability debates
+
+ Interminable staff meetings
+The “need” to hire dozens of employees Meaningless version numbers
+Pristine roadmaps that predict the perfect future Endless preference options
+Outsourced support
+Unrealistic user testing
+Useless paperwork
+Top-down hierarchy
+You don’t need tons of money or a huge team or a lengthy development cycle to build great software. Those things are the ingredients for slow, murky, changeless applications. Getting real takes the opposite approach.
+In this book we’ll show you...
+The importance of having a philosophy Why staying small is a good thing
+How to build less
+How to get from idea to reality quickly How to staff your team
+Why you should design from the inside out Why writing is so crucial
+Why you should underdo your competition
+
+ How to promote your app and spread the word Secrets to successful support
+Tips on keeping momentum going after launch
+...and lots more
+The focus is on big-picture ideas. We won’t bog you down with detailed code snippets or css tricks. We’ll stick to the major ideas and philosophies that drive the Getting Real process.
+Is this book for you?
+You’re an entrepreneur, designer, programmer, or marketer working on a big idea.
+You realize the old rules don’t apply anymore. Distribute your software on cd-roms every year? How 2002. Version numbers? Out the window. You need to build, launch, and tweak. Then rinse and repeat.
+Or maybe you’re not yet on board with agile development and business structures, but you’re eager to learn more.
+If this sounds like you, then this book is for you.
+Note: While this book’s emphasis is on building a web app, a lot of these ideas are applicable to non-software activities too. The suggestions about small teams, rapid prototyping, expect- ing iterations, and many others presented here can serve as a guide whether you’re starting a business, writing a book, designing a web site, recording an album, or doing a variety
+of other endeavors. Once you start Getting Real in one area of your life, you’ll see how these concepts can apply to a wide range of activities.
+
+ About 37signals
+What we do
+37signals is a small team that creates simple, focused software. Our products help you collaborate and get organized. More than 350,000 people and small businesses use our web-apps to get things done. Jeremy Wagstaff, of the Wall Street Journal, wrote, “37signals products are beautifully simple, elegant and intuitive tools that make an Outlook screen look like the soft- ware equivalent of a torture chamber.” Our apps never put you on the rack.
+Our modus operandi
+We believe software is too complex. Too many features, too many buttons, too much to learn. Our products do less than the competition – intentionally. We build products that work smarter, feel better, allow you to do things your way, and are easier to use.
+Our products
+As of the publishing date of this book, we have five commercial products and one open source web application framework.
+Basecamp turns project management on its head. Instead of Gantt charts, fancy graphs, and stats-heavy spreadsheets, Base- camp offers message boards, to-do lists, simple scheduling, col- laborative writing, and file sharing. So far, hundreds of thou- sands agree it’s a better way. Farhad Manjoo of Salon.com said
+“Basecamp represents the future of software on the Web.”
+
+ Campfire brings simple group chat to the business setting. Businesses in the know understand how valuable real-time persistent group chat can be. Conventional instant messaging is great for quick 1-on-1 chats, but it’s miserable for 3 or more people at once. Campfire solves that problem and plenty more.
+Backpack is the alternative to those confusing, complex, “orga- nize your life in 25 simple steps” personal information managers. Backpack’s simple take on pages, notes, to-dos, and cellphone/ email-based reminders is a novel idea in a product category that suffers from status-quo-itis. Thomas Weber of the Wall Street Journal said it’s the best product in its class and David Pogue of the New York Times called it a “very cool” organization tool.
+Writeboard lets you write, share, revise, and compare text
+solo or with others. It’s the refreshing alternative to bloated word processors that are overkill for 95% of what you write. John Gruber of Daring Fireball said, “Writeboard might be the clearest, simplest web application I’ve ever seen.” Web-guru Jeffrey Zeldman said, “The brilliant minds at 37signals have done it again.”
+Ta-da List keeps all your to-do lists together and organized online. Keep the lists to yourself or share them with others for easy collaboration. There’s no easier way to get things done. Over 100,000 lists with nearly 1,000,000 items have been created so far.
+Ruby on Rails, for developers, is a full-stack, open-source web framework in Ruby for writing real-world applications quickly and easily. Rails takes care of the busy work so you can focus on your idea. Nathan Torkington of the O’Reilly publish- ing empire said “Ruby on Rails is astounding. Using it is like watching a kung-fu movie, where a dozen bad-ass frameworks prepare to beat up the little newcomer only to be handed their asses in a variety of imaginative ways.” Gotta love that quote.
+
+ Caveats, disclaimers, and other preemptive strikes
+Just to get it out of the way, here are our responses to some com- plaints we hear every now and again:
+“These techniques won’t work for me.”
+Getting real is a system that’s worked terrifically for us. That said, the ideas in this book won’t apply to every project under the sun. If you are building a weapons system, a nuclear control plant, a banking system for millions of customers, or some other life/finance-critical system, you’re going to balk at some of our laissez-faire attitude. Go ahead and take additional precautions.
+And it doesn’t have to be an all or nothing proposition. Even if you can’t embrace Getting Real fully, there are bound to be at least a few ideas in here you can sneak past the powers that be.
+“You didn’t invent that idea.”
+We’re not claiming to have invented these techniques. Many of these concepts have been around in one form or another for a long time. Don’t get huffy if you read some
+of our advice and it reminds you of something you read about already on so and so’s weblog or in some book pub- lished 20 years ago. It’s definitely possible. These tech- niques are not at all exclusive to 37signals. We’re just telling you how we work and what’s been successful for us.
+
+ “You take too much of a black and white view.”
+If our tone seems too know-it-allish, bear with us. We think it’s better to present ideas in bold strokes than to be wishy-washy about it. If that comes off as cocky or arrogant, so be it. We’d rather be provocative than water everything down with “it depends...” Of course there will be times when these rules need to be stretched or broken. And some of these tactics may not apply to your situation. Use your judgement and imagination.
+“This won’t work inside my company.”
+Think you’re too big to Get Real? Even Microsoft is Getting Real (and we doubt you’re bigger than them).
+Even if your company typically runs on long-term schedules with big teams, there are still ways to get real.The first step is
+to break up into smaller units. When there’s too many people involved, nothing gets done. The leaner you are, the faster – and better – things get done.
+Granted, it may take some salesmanship. Pitch your company on the Getting Real process. Show them this book. Show them the real results you can achieve in less time and with a smaller team.
+Explain that Getting Real is a low-risk, low-investment way to test new concepts. See if you can split off from the mothership on a smaller project as a proof of concept. Demonstrate results.
+Or, if you really want to be ballsy, go stealth. Fly under the radar and demonstrate real results. That’s the approach the Start.com team has used while Getting Real at Microsoft. “I’ve watched the Start.com team work. They don’t ask permission,” says Robert Scoble, Technical Evangelist at Microsoft. “They have a boss that provides air cover. And they bite off a little bit at a time and do that and respond to feedback.”
+
+   Shipping Microsoft’s Start.com
+In big companies, processes and meetings are the norm. Many months are spent on planning features and arguing details with the goal of everyone reaching an agreement on what is the “right” thing for the customer.
+That may be the right approach for shrink-wrapped software, but with the web we have an incredible advantage. Just ship it! Let the user tell you if it’s the right thing and if it’s not, hey you can fix it and ship it to the web the same day if you want! There is no word stronger than the customer’s – resist the urge to engage in long-winded meetings and arguments. Just ship it and prove a point.
+Much easier said than done – this implies:
+Months of planning are not necessary.
+Months of writing specs are not necessary – specs should have the foundations nailed and details figured out and refined during the development phase. Don’t try to close all open issues and nail every single detail before development starts.
+Ship less features, but quality features.
+You don’t need a big bang approach with a whole new release and bunch of features. Give the users byte-size pieces that they can digest.
+If there are minor bugs, ship it as soon you have the core scenarios nailed and ship the bug fixes to web gradually after that.The faster you get the user feedback the better. Ideas can sound great on paper but in practice turn out to be suboptimal.The sooner you find out about fundamental issues that are wrong with an idea, the better.
+Once you iterate quickly and react on customer feedback, you will establish a customer connection. Remember the goal is to win the customer by building what they want.
+-Sanaz Ahari, Program Manager of Start.com, Microsoft
+
+
+  The Starting Line
+Build Less
+What’s Your Problem?
+Fund Yourself
+Fix Time and Budget, Flex Scope Have an Enemy
+It Shouldn’t be a Chore
+
+
+ Build Less
+Underdo your competition
+Conventional wisdom says that to beat your competitors you need to one-up them. If they have four features, you need five (or 15, or 25). If they’re spending x, you need to spend xx. If they have 20, you need 30.
+This sort of one-upping Cold War mentality is a dead-end. It’s an expensive, defensive, and paranoid way of building products. Defensive, paranoid companies can’t think ahead, they can only think behind. They don’t lead, they follow.
+If you want to build a company that follows, you might as well put down this book now.
+So what to do then? The answer is less. Do less than your com- petitors to beat them. Solve the simple problems and leave the hairy, difficult, nasty problems to everyone else. Instead of one- upping, try one-downing. Instead of outdoing, try underdoing.
+We’ll cover the concept of less throughout this book, but for starters, less means:
+Less features
+Less options/preferences
+Less people and corporate structure Less meetings and abstractions
+Less promises
+
+
+ What’s Your Problem?
+Build software for yourself
+A great way to build software is to start out by solving your own problems. You’ll be the target audience and you’ll know what’s important and what’s not. That gives you a great head start on delivering a breakout product.
+The key here is understanding that you’re not alone. If you’re having this problem, it’s likely hundreds of thousands of others are in the same boat. There’s your market. Wasn’t that easy?
+Basecamp originated in a problem: As a design firm we needed a simple way to communicate with our clients about projects. We started out doing this via client ex- tranets which we would update manually. But changing the html by hand every time a project needed to be updated just wasn’t working. These project sites always seemed to go stale and eventually were abandoned. It was frustrating because it left us disorganized and left clients in the dark.
+So we started looking at other options. Yet every tool we found either 1) didn’t do what we needed or 2) was bloated with fea- tures we didn’t need – like billing, strict access controls, charts, graphs, etc. We knew there had to be a better way so we decided to build our own.
+When you solve your own problem, you create a tool that you’re passionate about. And passion is key. Passion means you’ll truly use it and care about it. And that’s the best way to get others to feel passionate about it too.
+
+   Scratching your own itch
+The Open Source world embraced this mantra a long time ago – they call it “scratching your own itch.” For the open source developers, it means they get the tools they want, delivered the way they want them. But the benefit goes much deeper.
+As the designer or developer of a new application, you’re faced with hundreds of micro-decisions each and every day: blue or green? One table or two? Static or dynamic? Abort or recover? How do we make these decisions? If it’s something we recognize as being important, we might ask.The rest, we guess.And all that guessing builds up a kind of debt in our applications – an interconnected web of assumptions.
+As a developer, I hate this.The knowledge of all these small-scale timebombs in the applications I write adds to my stress. Open Source developers, scratching their own itches, don’t suffer this. Because they are their own users, they know the correct answers to 90% of the decisions they have to make. I think this is one of the reasons folks come home after a hard day of coding and then work on open source: It’s relaxing.
+–Dave Thomas, The Pragmatic Programmers
+
+Born out of necessity
+Campaign Monitor really was born out of necessity. For years we’d been frustrated by the quality of the email marketing options out there. One tool would do x and y but never z, the next had y
+and z nailed but just couldn’t get x right.We couldn’t win.
+We decided to clear our schedule and have a go at building our dream email marketing tool.We consciously decided not to look at what everyone else was doing and instead build something that would make ours and our customer’s lives a little easier.
+As it turned out, we weren’t the only ones who were unhappy with the options out there.We made a few modifications to the software so any design firm could use it and started spreading the word. In less than six months, thousands of designers were using Campaign Monitor to send email newsletters for themselves and their clients.
+–David Greiner, founder, Campaign Monitor
+
+
+   You need to care about it
+When you write a book, you need to have more than an interesting story. You need to have a desire to tell the story.You need to be personally invested in some way. If you’re going to live with something for two years, three years, the rest of your life, you need to care about it.
+–Malcolm Gladwell, author (from A Few Thin Slices of Malcolm Gladwell)
+
+
+ Fund Yourself
+Outside money is plan B
+The first priority of many startups is acquiring funding from investors. But remember, if you turn to outsiders for funding, you’ll have to answer to them too. Expectations are raised. Investors want their money back – and quickly. The sad fact is cashing in often begins to trump building a quality product.
+These days it doesn’t take much to get rolling. Hardware
+is cheap and plenty of great infrastructure software is open source and free. And passion doesn’t come with a price tag.
+So do what you can with the cash on hand. Think hard and determine what’s really essential and what you can do without. What can you do with three people instead of ten? What can you do with $20k instead of $100k? What can you do in three months instead of six? What can you do if you keep your day job and build your app on the side?
+Constraints force creativity
+Run on limited resources and you’ll be forced to reckon with constraints earlier and more intensely. And that’s a good thing. Constraints drive innovation.
+
+
+ Constraints also force you to get your idea out in the wild sooner rather than later – another good thing. A month or two out of the gates you should have a pretty good idea of whether you’re onto something or not. If you are, you’ll be self-sustain- able shortly and won’t need external cash. If your idea’s a lemon, it’s time to go back to the drawing board. At least you know now as opposed to months (or years) down the road. And at least you can back out easily. Exit plans get a lot trickier once inves- tors are involved.
+If you’re creating software just to make a quick buck, it will show. Truth is a quick payout is pretty unlikely. So focus on building a quality tool that you and your customers can live with for a long time.
+
+Two paths
+[Jake Walker started one company with investor money (Disclive) and one without (The Show). Here he discusses the differences between the two paths.]
+
+The root of all the problems wasn’t raising money itself, but everything that came along with it.The expectations are simply higher. People start taking salary, and the motivation is to build it up and sell it, or find some other way for the initial investors to make their money back. In the case of the first company,
+we simply started acting much bigger than we were – out of necessity...
+[With The Show] we realized that we could deliver a much better product with less costs, only with more time. And we gambled with a bit of our own money that people would be willing to wait for quality over speed. But the company has stayed (and will likely continue to be) a small operation.And ever since that first project, we’ve been fully self funded.With just a bit of creative terms from our vendors, we’ve never really need to put much of our own money into the operation at all.And the expectation isn’t to grow and sell,but to grow for the sake of growth and to continue to benefit from it financially.
+–A comment from Signal vs. Noise
+
diff --git a/spec/raix/message_adapters/base_spec.rb b/spec/raix/message_adapters/base_spec.rb
new file mode 100644
index 0000000..245ba19
--- /dev/null
+++ b/spec/raix/message_adapters/base_spec.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+require "spec_helper"
+
+RSpec.describe Raix::MessageAdapters::Base do
+  let(:context) { double("Context", model: "anthropic/claude-3", cache_at: 10) }
+  let(:adapter) { described_class.new(context) }
+
+  describe "#transform" do
+    it "returns the message if it already has a role" do
+      message = { role: "user", content: "Hello" }
+      expect(adapter.transform(message)).to eq(message)
+    end
+
+    it "transforms a function call message" do
+      message = { function: { name: "my_function", arguments: { param: "value" } } }
+      expected = { role: "assistant", name: "my_function", content: { param: "value" }.to_json }
+      expect(adapter.transform(message)).to eq(expected)
+    end
+
+    it "transforms a result message" do
+      message = { result: "Hello", name: "my_function" }
+      expected = { role: "function", name: "my_function", content: "Hello" }
+      expect(adapter.transform(message)).to eq(expected)
+    end
+
+    it "transforms a message with a single key-value pair" do
+      message = { user: "Hello" }
+      expected = { role: "user", content: "Hello" }
+      expect(adapter.transform(message)).to eq(expected)
+    end
+
+    it "transforms a message with a large content" do
+      message = { user: "Hello" * 5 }
+      expected = { role: "user", content: [{ type: "text", text: "Hello" * 5, cache_control: { type: "ephemeral" } }] }
+      expect(adapter.transform(message)).to eq(expected)
+    end
+  end
+end
diff --git a/spec/raix/prompt_caching_spec.rb b/spec/raix/prompt_caching_spec.rb
new file mode 100644
index 0000000..fac8544
--- /dev/null
+++ b/spec/raix/prompt_caching_spec.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+class GettingRealAnthropic
+  include Raix::ChatCompletion
+
+  def initialize
+    self.model = "anthropic/claude-3-haiku"
+    transcript << {
+      "role": "system",
+      "content": [
+        {
+          "type": "text",
+          "text": "You are a modern historian studying trends in modern business. You know the following book callsed 'Getting Real' very well:"
+        },
+        {
+          "type": "text",
+          "text": File.read("spec/files/getting_real.md"),
+          "cache_control": {
+            "type": "ephemeral"
+          }
+        }
+      ]
+    }
+    transcript << { user: "What is the meaning of Getting Real according to the book? Begin your response with According to the book," }
+  end
+end
+
+RSpec.describe GettingRealAnthropic do
+  subject { described_class.new }
+
+  it "does a completion with prompt caching" do
+    subject.chat_completion.tap do |response|
+      expect(response).to include("According to the book")
+    end
+
+    # now do it again
+    subject.chat_completion
+
+    # pause to let OpenRouter's usage event system catch up
+    sleep 2
+
+    # check the c
+    OpenRouter::Client.new.query_generation_stats(Thread.current[:chat_completion_response]["id"]).then do |response|
+      expect(response["cache_discount"]).to be > 0
+    end
+  end
+end