languagemodels
1import requests 2import datetime 3import json 4import re 5from typing import overload 6 7from languagemodels.config import config 8from languagemodels.preprocess import get_html_paragraphs 9from languagemodels.inference import ( 10 generate, 11 rank_instruct, 12 parse_chat, 13 list_tokens, 14) 15from languagemodels import embeddings 16 17docs = embeddings.RetrievalContext() 18 19 20def complete(prompt: str) -> str: 21 """Provide one completion for a given open-ended prompt 22 23 :param prompt: Prompt to use as input to the model 24 :return: Completion returned from the language model 25 26 Examples: 27 28 >>> complete("Luke thought that he") #doctest: +SKIP 29 'was going to be a doctor.' 30 31 >>> complete("There are many mythical creatures who") #doctest: +SKIP 32 'are able to fly' 33 34 >>> complete("She hid in her room until") #doctest: +SKIP 35 'she was sure she was safe' 36 """ 37 38 result = generate( 39 ["Write a sentence"], 40 prefix=prompt, 41 max_tokens=config["max_tokens"], 42 temperature=0.7, 43 topk=40, 44 )[0] 45 46 if result.startswith(prompt): 47 prefix_length = len(prompt) 48 return result[prefix_length:] 49 else: 50 return result 51 52 53@overload 54def do(prompt: list) -> list: 55 ... 56 57 58@overload 59def do(prompt: str) -> str: 60 ... 61 62 63def do(prompt, choices=None): 64 """Follow a single-turn instructional prompt 65 66 :param prompt: Instructional prompt(s) to follow 67 :param choices: If provided, outputs are restricted to values in choices 68 :return: Completion returned from the language model 69 70 Note that this function is overloaded to return a list of results if 71 a list if of prompts is provided and a single string if a single 72 prompt is provided as a string 73 74 Examples: 75 76 >>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP 77 'Hello world!' 78 79 >>> do("Pick the planet from the list: baseball, Texas, Saturn") 80 '...Saturn...' 81 82 >>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2) 83 ['...Saturn...', '...Saturn...'] 84 85 >>> do(["Say red", "Say blue"], choices=["red", "blue"]) 86 ['red', 'blue'] 87 88 >>> do("Classify as positive or negative: LLMs are bad", 89 ... choices=["Positive", "Negative"]) 90 'Negative' 91 92 >>> do("Classify as positive or negative: LLMs are great", 93 ... choices=["Positive", "Negative"]) 94 'Positive' 95 """ 96 97 prompts = [prompt] if isinstance(prompt, str) else prompt 98 99 if choices: 100 results = [r[0] for r in rank_instruct(prompts, choices)] 101 else: 102 results = generate(prompts, max_tokens=config["max_tokens"], topk=1) 103 104 return results[0] if isinstance(prompt, str) else results 105 106 107@overload 108def embed(doc: list) -> list: 109 ... 110 111 112@overload 113def embed(doc: str) -> str: 114 ... 115 116 117def embed(doc): 118 """Create embedding for a document 119 120 :param doc: Document(s) to embed 121 :return: Embedding 122 123 Note that this function is overloaded to return a list of embeddings if 124 a list if of docs is provided and a single embedding if a single 125 doc is provided as a string 126 127 Examples: 128 129 >>> embed("Hello, world") 130 [-0.0...] 131 132 >>> embed(["Hello", "world"]) 133 [[-0.0...]] 134 """ 135 136 docs = [doc] if isinstance(doc, str) else doc 137 138 # Create embeddings and convert to lists of floats 139 emb = [[float(n) for n in e] for e in embeddings.embed(docs)] 140 141 return emb[0] if isinstance(doc, str) else emb 142 143 144def chat(prompt: str) -> str: 145 """Get new message from chat-optimized language model 146 147 The `prompt` for this model is provided as a series of messages as a single 148 plain-text string. Several special tokens are used to delineate chat 149 messages. 150 151 - `system:` - Indicates the start of a system message providing 152 instructions about how the assistant should behave. 153 - `user:` - Indicates the start of a prompter (typically user) 154 message. 155 - `assistant:` - Indicates the start of an assistant message. 156 157 A complete prompt may look something like this: 158 159 ``` 160 Assistant is helpful and harmless 161 162 User: What is the capital of Germany? 163 164 Assistant: The capital of Germany is Berlin. 165 166 User: How many people live there? 167 168 Assistant: 169 ``` 170 171 The completion from the language model is returned. 172 173 :param message: Prompt using formatting described above 174 :return: Completion returned from the language model 175 176 Examples: 177 178 >>> response = chat(''' 179 ... System: Respond as a helpful assistant. It is 5:00pm. 180 ... 181 ... User: What time is it? 182 ... 183 ... Assistant: 184 ... ''') # doctest: +SKIP 185 "It's 5:00pm." 186 """ 187 188 messages = parse_chat(prompt) 189 190 # Suppress starts of all assistant messages to avoid repeat generation 191 suppress = [ 192 "Assistant: " + m["content"].split(" ")[0] 193 for m in messages 194 if m["role"] in ["assistant", "user"] 195 ] 196 197 # Suppress all user messages to avoid repeating them 198 suppress += [m["content"] for m in messages if m["role"] == "user"] 199 200 system_msgs = [m for m in messages if m["role"] == "system"] 201 assistant_msgs = [m for m in messages if m["role"] == "assistant"] 202 user_msgs = [m for m in messages if m["role"] == "user"] 203 204 # The current model is tuned on instructions and tends to get 205 # lost if it sees too many questions 206 # Use only the most recent user and assistant message for context 207 # Keep all system messages 208 messages = system_msgs + assistant_msgs[-1:] + user_msgs[-1:] 209 210 rolemap = { 211 "system": "System", 212 "user": "Question", 213 "assistant": "Assistant", 214 } 215 216 messages = [f"{rolemap[m['role']]}: {m['content']}" for m in messages] 217 218 prompt = "\n\n".join(messages) + "\n\n" + "Assistant:" 219 220 if prompt.startswith("System:"): 221 prompt = prompt[7:].strip() 222 223 response = generate( 224 [prompt], 225 max_tokens=config["max_tokens"], 226 temperature=0.3, 227 topk=40, 228 prefix="Assistant:", 229 suppress=suppress, 230 )[0] 231 232 # Remove duplicate assistant being generated 233 if response.startswith("Assistant:"): 234 response = response[10:] 235 236 return response.strip() 237 238 239def code(prompt: str) -> str: 240 """Complete a code prompt 241 242 This assumes that users are expecting Python completions. Default models 243 are fine-tuned on Python where applicable. 244 245 :param prompt: Code context to complete 246 :return: Completion returned from the language model 247 248 Examples: 249 250 >>> code("# Print Hello, world!\\n") 251 'print("Hello, world!")\\n' 252 253 >>> code("def return_4():") 254 '...return 4...' 255 """ 256 return generate([prompt], max_tokens=config["max_tokens"], topk=1, model="code")[0] 257 258 259def extract_answer(question: str, context: str) -> str: 260 """Extract an answer to a `question` from a provided `context` 261 262 :param question: A question to answer using knowledge from context 263 :param context: Knowledge used to answer the question 264 :return: Answer to the question. 265 266 Examples: 267 268 >>> context = "There is a green ball and a red box" 269 >>> extract_answer("What color is the ball?", context).lower() 270 '...green...' 271 272 >>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP 273 '...Guido van Rossum...' 274 """ 275 276 return generate([f"{context}\n\n{question}"])[0] 277 278 279def classify(doc: str, label1: str, label2: str) -> str: 280 """Performs binary classification on an input 281 282 :param doc: A plain text input document to classify 283 :param label1: The first label to classify against 284 :param label2: The second label to classify against 285 :return: The closest matching class. The return value will always be 286 `label1` or `label2` 287 288 Examples: 289 290 >>> classify("That book was good.","positive","negative") 291 'positive' 292 >>> classify("That movie was terrible.","positive","negative") 293 'negative' 294 """ 295 296 return do( 297 f"Classify as {label1} or {label2}: {doc}\n\nClassification:", 298 choices=[label1, label2], 299 ) 300 301 302def store_doc(doc: str, name: str = "") -> None: 303 """Store document for later retrieval 304 305 :param doc: A plain text document to store. 306 :param name: Optional name for the document. This is used as a chunk prefix. 307 308 Examples: 309 310 >>> store_doc("The sky is blue.") 311 """ 312 docs.store(doc, name) 313 314 315def load_doc(query: str) -> str: 316 """Load a matching document 317 318 A single document that best matches `query` will be returned. 319 320 :param query: Query to compare to stored documents 321 :return: Content of the closest matching document 322 323 Examples: 324 325 >>> store_doc("Paris is in France.") 326 >>> store_doc("The sky is blue.") 327 >>> load_doc("Where is Paris?") 328 'Paris is in France.' 329 """ 330 return docs.get_match(query) 331 332 333def get_doc_context(query: str) -> str: 334 """Loads context from documents 335 336 A string representing the most relevant content from all stored documents 337 will be returned. This may be a blend of chunks from multiple documents. 338 339 :param query: Query to compare to stored documents 340 :return: Up to 128 tokens of context 341 342 Examples: 343 344 >>> store_doc("Paris is in France.") 345 >>> store_doc("Paris is nice.") 346 >>> store_doc("The sky is blue.") 347 >>> get_doc_context("Where is Paris?") 348 'Paris is in France.\\n\\nParis is nice.' 349 """ 350 return docs.get_context(query) 351 352 353def get_web(url: str) -> str: 354 """ 355 Return the text of paragraphs from a web page 356 357 :param url: The URL to load 358 :return str: Plain text content from the URL 359 360 Note that it is difficult to return only the human-readable 361 content from an HTML page. This function takes a basic and quick 362 approach. It will not work perfectly on all sites, but will 363 often do a reasonable job of returning the plain text content 364 of a page. 365 366 If the `url` points to a plain text page, the page content 367 will be returned verbatim. 368 """ 369 370 res = requests.get( 371 url, headers={"User-Agent": "Mozilla/5.0 (compatible; languagemodels)"} 372 ) 373 374 if "text/plain" in res.raw.getheader("content-type"): 375 return res.text 376 elif "text/html" in res.raw.getheader("content-type"): 377 return get_html_paragraphs(res.text) 378 379 return "" 380 381 382def get_wiki(topic: str) -> str: 383 """ 384 Return Wikipedia summary for a topic 385 386 This function ignores the complexity of disambiguation pages and simply 387 returns the first result that is not a disambiguation page 388 389 :param topic: Topic to search for on Wikipedia 390 :return: Text content of the lead section of the most popular matching article 391 392 Examples: 393 394 >>> get_wiki('Python language') 395 'Python is a high-level...' 396 397 >>> get_wiki('Chemistry') 398 'Chemistry is the scientific study...' 399 """ 400 401 url = "https://api.wikimedia.org/core/v1/wikipedia/en/search/title" 402 response = requests.get(url, params={"q": topic, "limit": 5}) 403 response = json.loads(response.text) 404 405 for page in response["pages"]: 406 wiki_result = requests.get( 407 f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&" 408 f"exintro&redirects=1&titles={page['title']}&format=json" 409 ).json() 410 411 first = wiki_result["query"]["pages"].popitem()[1] 412 if "disambiguation" in first["pageprops"]: 413 continue 414 415 summary = first["extract"] 416 417 cutoffs = [ 418 "See_also", 419 "Notes", 420 "References", 421 "Further_reading", 422 "External_links", 423 ] 424 425 for cutoff in cutoffs: 426 summary = summary.split(f'<span id="{cutoff}">', 1)[0] 427 428 summary = re.sub(r"<p>", "\n\n", summary, flags=re.I) 429 summary = re.sub(r"<!\-\-.*?\-\->", "", summary, flags=re.I | re.DOTALL) 430 summary = re.sub(r"<.*?>", "", summary, flags=re.I) 431 summary = re.sub(r"\s*[\n\r]+\s*[\r\n]+[\s\r\n]*", "\n\n", summary, flags=re.I) 432 summary = summary.strip() 433 return summary 434 else: 435 return "No matching wiki page found." 436 437 438def get_weather(latitude, longitude): 439 """Fetch the current weather for a supplied longitude and latitude 440 441 Weather is provided by the US government and this function only supports 442 locations in the United States. 443 444 :param latitude: Latitude value representing this location 445 :param longitude: Longitude value representing this location 446 :return: Plain text description of the current weather forecast 447 448 Examples: 449 450 >>> get_weather(41.8, -87.6) # doctest: +SKIP 451 'Scattered showers and thunderstorms before 1pm with a high of 73.' 452 """ 453 454 res = requests.get(f"https://api.weather.gov/points/{latitude},{longitude}") 455 points = json.loads(res.text) 456 forecast_url = points["properties"]["forecast"] 457 458 res = requests.get(forecast_url) 459 forecast = json.loads(res.text) 460 current = forecast["properties"]["periods"][0] 461 462 return current["detailedForecast"] 463 464 465def get_date() -> str: 466 """Returns the current date and time in natural language 467 468 >>> get_date() # doctest: +SKIP 469 'Friday, May 12, 2023 at 09:27AM' 470 """ 471 472 now = datetime.datetime.now() 473 474 return now.strftime("%A, %B %d, %Y at %I:%M%p") 475 476 477def print_tokens(prompt: str) -> None: 478 """Prints a list of tokens in a prompt 479 480 :param prompt: Prompt to use as input to tokenizer 481 :return: Nothing 482 483 Examples: 484 485 >>> print_tokens("Hello world") 486 ' Hello' (token 8774) 487 ' world' (token 296) 488 489 >>> print_tokens("Hola mundo") 490 ' Hol' (token 5838) 491 'a' (token 9) 492 ' mun' (token 13844) 493 'd' (token 26) 494 'o' (token 32) 495 """ 496 497 tokens = list_tokens(prompt) 498 499 for token in tokens: 500 print(f"'{token[0].replace('▁', ' ')}' (token {token[1]})") 501 502 503def count_tokens(prompt: str) -> None: 504 """Counts tokens in a prompt 505 506 :param prompt: Prompt to use as input to tokenizer 507 :return: Nothing 508 509 Examples: 510 511 >>> count_tokens("Hello world") 512 2 513 514 >>> count_tokens("Hola mundo") 515 5 516 """ 517 518 return len(list_tokens(prompt)) 519 520 521def set_max_ram(value): 522 """Sets max allowed RAM 523 524 This value takes priority over environment variables 525 526 Returns the numeric value set in GB 527 528 >>> set_max_ram(16) 529 16.0 530 531 >>> set_max_ram('512mb') 532 0.5 533 """ 534 535 config["max_ram"] = value 536 537 return config["max_ram"] 538 539 540def require_model_license(match_re): 541 """Require models to match supplied regex 542 543 This can be used to enforce certain licensing constraints when using this 544 package. 545 """ 546 config["model_license"] = match_re
21def complete(prompt: str) -> str: 22 """Provide one completion for a given open-ended prompt 23 24 :param prompt: Prompt to use as input to the model 25 :return: Completion returned from the language model 26 27 Examples: 28 29 >>> complete("Luke thought that he") #doctest: +SKIP 30 'was going to be a doctor.' 31 32 >>> complete("There are many mythical creatures who") #doctest: +SKIP 33 'are able to fly' 34 35 >>> complete("She hid in her room until") #doctest: +SKIP 36 'she was sure she was safe' 37 """ 38 39 result = generate( 40 ["Write a sentence"], 41 prefix=prompt, 42 max_tokens=config["max_tokens"], 43 temperature=0.7, 44 topk=40, 45 )[0] 46 47 if result.startswith(prompt): 48 prefix_length = len(prompt) 49 return result[prefix_length:] 50 else: 51 return result
Provide one completion for a given open-ended prompt
Parameters
- prompt: Prompt to use as input to the model
Returns
Completion returned from the language model
Examples:
>>> complete("Luke thought that he") #doctest: +SKIP
'was going to be a doctor.'
>>> complete("There are many mythical creatures who") #doctest: +SKIP
'are able to fly'
>>> complete("She hid in her room until") #doctest: +SKIP
'she was sure she was safe'
64def do(prompt, choices=None): 65 """Follow a single-turn instructional prompt 66 67 :param prompt: Instructional prompt(s) to follow 68 :param choices: If provided, outputs are restricted to values in choices 69 :return: Completion returned from the language model 70 71 Note that this function is overloaded to return a list of results if 72 a list if of prompts is provided and a single string if a single 73 prompt is provided as a string 74 75 Examples: 76 77 >>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP 78 'Hello world!' 79 80 >>> do("Pick the planet from the list: baseball, Texas, Saturn") 81 '...Saturn...' 82 83 >>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2) 84 ['...Saturn...', '...Saturn...'] 85 86 >>> do(["Say red", "Say blue"], choices=["red", "blue"]) 87 ['red', 'blue'] 88 89 >>> do("Classify as positive or negative: LLMs are bad", 90 ... choices=["Positive", "Negative"]) 91 'Negative' 92 93 >>> do("Classify as positive or negative: LLMs are great", 94 ... choices=["Positive", "Negative"]) 95 'Positive' 96 """ 97 98 prompts = [prompt] if isinstance(prompt, str) else prompt 99 100 if choices: 101 results = [r[0] for r in rank_instruct(prompts, choices)] 102 else: 103 results = generate(prompts, max_tokens=config["max_tokens"], topk=1) 104 105 return results[0] if isinstance(prompt, str) else results
Follow a single-turn instructional prompt
Parameters
- prompt: Instructional prompt(s) to follow
- choices: If provided, outputs are restricted to values in choices
Returns
Completion returned from the language model
Note that this function is overloaded to return a list of results if a list if of prompts is provided and a single string if a single prompt is provided as a string
Examples:
>>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP
'Hello world!'
>>> do("Pick the planet from the list: baseball, Texas, Saturn")
'...Saturn...'
>>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2)
['...Saturn...', '...Saturn...']
>>> do(["Say red", "Say blue"], choices=["red", "blue"])
['red', 'blue']
>>> do("Classify as positive or negative: LLMs are bad",
... choices=["Positive", "Negative"])
'Negative'
>>> do("Classify as positive or negative: LLMs are great",
... choices=["Positive", "Negative"])
'Positive'
118def embed(doc): 119 """Create embedding for a document 120 121 :param doc: Document(s) to embed 122 :return: Embedding 123 124 Note that this function is overloaded to return a list of embeddings if 125 a list if of docs is provided and a single embedding if a single 126 doc is provided as a string 127 128 Examples: 129 130 >>> embed("Hello, world") 131 [-0.0...] 132 133 >>> embed(["Hello", "world"]) 134 [[-0.0...]] 135 """ 136 137 docs = [doc] if isinstance(doc, str) else doc 138 139 # Create embeddings and convert to lists of floats 140 emb = [[float(n) for n in e] for e in embeddings.embed(docs)] 141 142 return emb[0] if isinstance(doc, str) else emb
Create embedding for a document
Parameters
- doc: Document(s) to embed
Returns
Embedding
Note that this function is overloaded to return a list of embeddings if a list if of docs is provided and a single embedding if a single doc is provided as a string
Examples:
>>> embed("Hello, world")
[-0.0...]
>>> embed(["Hello", "world"])
[[-0.0...]]
145def chat(prompt: str) -> str: 146 """Get new message from chat-optimized language model 147 148 The `prompt` for this model is provided as a series of messages as a single 149 plain-text string. Several special tokens are used to delineate chat 150 messages. 151 152 - `system:` - Indicates the start of a system message providing 153 instructions about how the assistant should behave. 154 - `user:` - Indicates the start of a prompter (typically user) 155 message. 156 - `assistant:` - Indicates the start of an assistant message. 157 158 A complete prompt may look something like this: 159 160 ``` 161 Assistant is helpful and harmless 162 163 User: What is the capital of Germany? 164 165 Assistant: The capital of Germany is Berlin. 166 167 User: How many people live there? 168 169 Assistant: 170 ``` 171 172 The completion from the language model is returned. 173 174 :param message: Prompt using formatting described above 175 :return: Completion returned from the language model 176 177 Examples: 178 179 >>> response = chat(''' 180 ... System: Respond as a helpful assistant. It is 5:00pm. 181 ... 182 ... User: What time is it? 183 ... 184 ... Assistant: 185 ... ''') # doctest: +SKIP 186 "It's 5:00pm." 187 """ 188 189 messages = parse_chat(prompt) 190 191 # Suppress starts of all assistant messages to avoid repeat generation 192 suppress = [ 193 "Assistant: " + m["content"].split(" ")[0] 194 for m in messages 195 if m["role"] in ["assistant", "user"] 196 ] 197 198 # Suppress all user messages to avoid repeating them 199 suppress += [m["content"] for m in messages if m["role"] == "user"] 200 201 system_msgs = [m for m in messages if m["role"] == "system"] 202 assistant_msgs = [m for m in messages if m["role"] == "assistant"] 203 user_msgs = [m for m in messages if m["role"] == "user"] 204 205 # The current model is tuned on instructions and tends to get 206 # lost if it sees too many questions 207 # Use only the most recent user and assistant message for context 208 # Keep all system messages 209 messages = system_msgs + assistant_msgs[-1:] + user_msgs[-1:] 210 211 rolemap = { 212 "system": "System", 213 "user": "Question", 214 "assistant": "Assistant", 215 } 216 217 messages = [f"{rolemap[m['role']]}: {m['content']}" for m in messages] 218 219 prompt = "\n\n".join(messages) + "\n\n" + "Assistant:" 220 221 if prompt.startswith("System:"): 222 prompt = prompt[7:].strip() 223 224 response = generate( 225 [prompt], 226 max_tokens=config["max_tokens"], 227 temperature=0.3, 228 topk=40, 229 prefix="Assistant:", 230 suppress=suppress, 231 )[0] 232 233 # Remove duplicate assistant being generated 234 if response.startswith("Assistant:"): 235 response = response[10:] 236 237 return response.strip()
Get new message from chat-optimized language model
The prompt
for this model is provided as a series of messages as a single
plain-text string. Several special tokens are used to delineate chat
messages.
system:
- Indicates the start of a system message providing instructions about how the assistant should behave.user:
- Indicates the start of a prompter (typically user) message.assistant:
- Indicates the start of an assistant message.
A complete prompt may look something like this:
Assistant is helpful and harmless
User: What is the capital of Germany?
Assistant: The capital of Germany is Berlin.
User: How many people live there?
Assistant:
The completion from the language model is returned.
Parameters
- message: Prompt using formatting described above
Returns
Completion returned from the language model
Examples:
>>> response = chat('''
... System: Respond as a helpful assistant. It is 5:00pm.
...
... User: What time is it?
...
... Assistant:
... ''') # doctest: +SKIP
"It's 5:00pm."
240def code(prompt: str) -> str: 241 """Complete a code prompt 242 243 This assumes that users are expecting Python completions. Default models 244 are fine-tuned on Python where applicable. 245 246 :param prompt: Code context to complete 247 :return: Completion returned from the language model 248 249 Examples: 250 251 >>> code("# Print Hello, world!\\n") 252 'print("Hello, world!")\\n' 253 254 >>> code("def return_4():") 255 '...return 4...' 256 """ 257 return generate([prompt], max_tokens=config["max_tokens"], topk=1, model="code")[0]
Complete a code prompt
This assumes that users are expecting Python completions. Default models are fine-tuned on Python where applicable.
Parameters
- prompt: Code context to complete
Returns
Completion returned from the language model
Examples:
>>> code("# Print Hello, world!\n")
'print("Hello, world!")\n'
>>> code("def return_4():")
'...return 4...'
260def extract_answer(question: str, context: str) -> str: 261 """Extract an answer to a `question` from a provided `context` 262 263 :param question: A question to answer using knowledge from context 264 :param context: Knowledge used to answer the question 265 :return: Answer to the question. 266 267 Examples: 268 269 >>> context = "There is a green ball and a red box" 270 >>> extract_answer("What color is the ball?", context).lower() 271 '...green...' 272 273 >>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP 274 '...Guido van Rossum...' 275 """ 276 277 return generate([f"{context}\n\n{question}"])[0]
Extract an answer to a question
from a provided context
Parameters
- question: A question to answer using knowledge from context
- context: Knowledge used to answer the question
Returns
Answer to the question.
Examples:
>>> context = "There is a green ball and a red box"
>>> extract_answer("What color is the ball?", context).lower()
'...green...'
>>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP
'...Guido van Rossum...'
280def classify(doc: str, label1: str, label2: str) -> str: 281 """Performs binary classification on an input 282 283 :param doc: A plain text input document to classify 284 :param label1: The first label to classify against 285 :param label2: The second label to classify against 286 :return: The closest matching class. The return value will always be 287 `label1` or `label2` 288 289 Examples: 290 291 >>> classify("That book was good.","positive","negative") 292 'positive' 293 >>> classify("That movie was terrible.","positive","negative") 294 'negative' 295 """ 296 297 return do( 298 f"Classify as {label1} or {label2}: {doc}\n\nClassification:", 299 choices=[label1, label2], 300 )
Performs binary classification on an input
Parameters
- doc: A plain text input document to classify
- label1: The first label to classify against
- label2: The second label to classify against
Returns
The closest matching class. The return value will always be
label1
orlabel2
Examples:
>>> classify("That book was good.","positive","negative")
'positive'
>>> classify("That movie was terrible.","positive","negative")
'negative'
303def store_doc(doc: str, name: str = "") -> None: 304 """Store document for later retrieval 305 306 :param doc: A plain text document to store. 307 :param name: Optional name for the document. This is used as a chunk prefix. 308 309 Examples: 310 311 >>> store_doc("The sky is blue.") 312 """ 313 docs.store(doc, name)
Store document for later retrieval
Parameters
- doc: A plain text document to store.
- name: Optional name for the document. This is used as a chunk prefix.
Examples:
>>> store_doc("The sky is blue.")
316def load_doc(query: str) -> str: 317 """Load a matching document 318 319 A single document that best matches `query` will be returned. 320 321 :param query: Query to compare to stored documents 322 :return: Content of the closest matching document 323 324 Examples: 325 326 >>> store_doc("Paris is in France.") 327 >>> store_doc("The sky is blue.") 328 >>> load_doc("Where is Paris?") 329 'Paris is in France.' 330 """ 331 return docs.get_match(query)
Load a matching document
A single document that best matches query
will be returned.
Parameters
- query: Query to compare to stored documents
Returns
Content of the closest matching document
Examples:
>>> store_doc("Paris is in France.")
>>> store_doc("The sky is blue.")
>>> load_doc("Where is Paris?")
'Paris is in France.'
334def get_doc_context(query: str) -> str: 335 """Loads context from documents 336 337 A string representing the most relevant content from all stored documents 338 will be returned. This may be a blend of chunks from multiple documents. 339 340 :param query: Query to compare to stored documents 341 :return: Up to 128 tokens of context 342 343 Examples: 344 345 >>> store_doc("Paris is in France.") 346 >>> store_doc("Paris is nice.") 347 >>> store_doc("The sky is blue.") 348 >>> get_doc_context("Where is Paris?") 349 'Paris is in France.\\n\\nParis is nice.' 350 """ 351 return docs.get_context(query)
Loads context from documents
A string representing the most relevant content from all stored documents will be returned. This may be a blend of chunks from multiple documents.
Parameters
- query: Query to compare to stored documents
Returns
Up to 128 tokens of context
Examples:
>>> store_doc("Paris is in France.")
>>> store_doc("Paris is nice.")
>>> store_doc("The sky is blue.")
>>> get_doc_context("Where is Paris?")
'Paris is in France.\n\nParis is nice.'
354def get_web(url: str) -> str: 355 """ 356 Return the text of paragraphs from a web page 357 358 :param url: The URL to load 359 :return str: Plain text content from the URL 360 361 Note that it is difficult to return only the human-readable 362 content from an HTML page. This function takes a basic and quick 363 approach. It will not work perfectly on all sites, but will 364 often do a reasonable job of returning the plain text content 365 of a page. 366 367 If the `url` points to a plain text page, the page content 368 will be returned verbatim. 369 """ 370 371 res = requests.get( 372 url, headers={"User-Agent": "Mozilla/5.0 (compatible; languagemodels)"} 373 ) 374 375 if "text/plain" in res.raw.getheader("content-type"): 376 return res.text 377 elif "text/html" in res.raw.getheader("content-type"): 378 return get_html_paragraphs(res.text) 379 380 return ""
Return the text of paragraphs from a web page
Parameters
- url: The URL to load
Returns
Plain text content from the URL
Note that it is difficult to return only the human-readable content from an HTML page. This function takes a basic and quick approach. It will not work perfectly on all sites, but will often do a reasonable job of returning the plain text content of a page.
If the url
points to a plain text page, the page content
will be returned verbatim.
383def get_wiki(topic: str) -> str: 384 """ 385 Return Wikipedia summary for a topic 386 387 This function ignores the complexity of disambiguation pages and simply 388 returns the first result that is not a disambiguation page 389 390 :param topic: Topic to search for on Wikipedia 391 :return: Text content of the lead section of the most popular matching article 392 393 Examples: 394 395 >>> get_wiki('Python language') 396 'Python is a high-level...' 397 398 >>> get_wiki('Chemistry') 399 'Chemistry is the scientific study...' 400 """ 401 402 url = "https://api.wikimedia.org/core/v1/wikipedia/en/search/title" 403 response = requests.get(url, params={"q": topic, "limit": 5}) 404 response = json.loads(response.text) 405 406 for page in response["pages"]: 407 wiki_result = requests.get( 408 f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&" 409 f"exintro&redirects=1&titles={page['title']}&format=json" 410 ).json() 411 412 first = wiki_result["query"]["pages"].popitem()[1] 413 if "disambiguation" in first["pageprops"]: 414 continue 415 416 summary = first["extract"] 417 418 cutoffs = [ 419 "See_also", 420 "Notes", 421 "References", 422 "Further_reading", 423 "External_links", 424 ] 425 426 for cutoff in cutoffs: 427 summary = summary.split(f'<span id="{cutoff}">', 1)[0] 428 429 summary = re.sub(r"<p>", "\n\n", summary, flags=re.I) 430 summary = re.sub(r"<!\-\-.*?\-\->", "", summary, flags=re.I | re.DOTALL) 431 summary = re.sub(r"<.*?>", "", summary, flags=re.I) 432 summary = re.sub(r"\s*[\n\r]+\s*[\r\n]+[\s\r\n]*", "\n\n", summary, flags=re.I) 433 summary = summary.strip() 434 return summary 435 else: 436 return "No matching wiki page found."
Return Wikipedia summary for a topic
This function ignores the complexity of disambiguation pages and simply returns the first result that is not a disambiguation page
Parameters
- topic: Topic to search for on Wikipedia
Returns
Text content of the lead section of the most popular matching article
Examples:
>>> get_wiki('Python language')
'Python is a high-level...'
>>> get_wiki('Chemistry')
'Chemistry is the scientific study...'
439def get_weather(latitude, longitude): 440 """Fetch the current weather for a supplied longitude and latitude 441 442 Weather is provided by the US government and this function only supports 443 locations in the United States. 444 445 :param latitude: Latitude value representing this location 446 :param longitude: Longitude value representing this location 447 :return: Plain text description of the current weather forecast 448 449 Examples: 450 451 >>> get_weather(41.8, -87.6) # doctest: +SKIP 452 'Scattered showers and thunderstorms before 1pm with a high of 73.' 453 """ 454 455 res = requests.get(f"https://api.weather.gov/points/{latitude},{longitude}") 456 points = json.loads(res.text) 457 forecast_url = points["properties"]["forecast"] 458 459 res = requests.get(forecast_url) 460 forecast = json.loads(res.text) 461 current = forecast["properties"]["periods"][0] 462 463 return current["detailedForecast"]
Fetch the current weather for a supplied longitude and latitude
Weather is provided by the US government and this function only supports locations in the United States.
Parameters
- latitude: Latitude value representing this location
- longitude: Longitude value representing this location
Returns
Plain text description of the current weather forecast
Examples:
>>> get_weather(41.8, -87.6) # doctest: +SKIP
'Scattered showers and thunderstorms before 1pm with a high of 73.'
466def get_date() -> str: 467 """Returns the current date and time in natural language 468 469 >>> get_date() # doctest: +SKIP 470 'Friday, May 12, 2023 at 09:27AM' 471 """ 472 473 now = datetime.datetime.now() 474 475 return now.strftime("%A, %B %d, %Y at %I:%M%p")
Returns the current date and time in natural language
>>> get_date() # doctest: +SKIP
'Friday, May 12, 2023 at 09:27AM'
478def print_tokens(prompt: str) -> None: 479 """Prints a list of tokens in a prompt 480 481 :param prompt: Prompt to use as input to tokenizer 482 :return: Nothing 483 484 Examples: 485 486 >>> print_tokens("Hello world") 487 ' Hello' (token 8774) 488 ' world' (token 296) 489 490 >>> print_tokens("Hola mundo") 491 ' Hol' (token 5838) 492 'a' (token 9) 493 ' mun' (token 13844) 494 'd' (token 26) 495 'o' (token 32) 496 """ 497 498 tokens = list_tokens(prompt) 499 500 for token in tokens: 501 print(f"'{token[0].replace('▁', ' ')}' (token {token[1]})")
Prints a list of tokens in a prompt
Parameters
- prompt: Prompt to use as input to tokenizer
Returns
Nothing
Examples:
>>> print_tokens("Hello world")
' Hello' (token 8774)
' world' (token 296)
>>> print_tokens("Hola mundo")
' Hol' (token 5838)
'a' (token 9)
' mun' (token 13844)
'd' (token 26)
'o' (token 32)
504def count_tokens(prompt: str) -> None: 505 """Counts tokens in a prompt 506 507 :param prompt: Prompt to use as input to tokenizer 508 :return: Nothing 509 510 Examples: 511 512 >>> count_tokens("Hello world") 513 2 514 515 >>> count_tokens("Hola mundo") 516 5 517 """ 518 519 return len(list_tokens(prompt))
Counts tokens in a prompt
Parameters
- prompt: Prompt to use as input to tokenizer
Returns
Nothing
Examples:
>>> count_tokens("Hello world")
2
>>> count_tokens("Hola mundo")
5
522def set_max_ram(value): 523 """Sets max allowed RAM 524 525 This value takes priority over environment variables 526 527 Returns the numeric value set in GB 528 529 >>> set_max_ram(16) 530 16.0 531 532 >>> set_max_ram('512mb') 533 0.5 534 """ 535 536 config["max_ram"] = value 537 538 return config["max_ram"]
Sets max allowed RAM
This value takes priority over environment variables
Returns the numeric value set in GB
>>> set_max_ram(16)
16.0
>>> set_max_ram('512mb')
0.5
541def require_model_license(match_re): 542 """Require models to match supplied regex 543 544 This can be used to enforce certain licensing constraints when using this 545 package. 546 """ 547 config["model_license"] = match_re
Require models to match supplied regex
This can be used to enforce certain licensing constraints when using this package.