Initial commit

This commit is contained in:
2026-05-08 21:48:19 +02:00
commit db259093e4
3 changed files with 546998 additions and 0 deletions

16
README.md Normal file
View File

@@ -0,0 +1,16 @@
# Scraping data from Archives of Nethys
## How to scrape all creatures
1. Use firefox
2. Go to about:config and raise `devtools.netmonitor.responseBodyLimit` to the max
3. Open network dev tools
4. Visit https://2e.aonprd.com/Creatures.aspx
5. Scroll down and click the button to load all
6. In dev tools, click cogwheel, export HAR file.
let jsons = (open '2e.aonprd.com_Archive [26-05-08 21-11-24].har' | from json | get log.entries.response | where headers.value has "application/json" | get content.text)
⤷open '2e.aonprd.com_Archive [26-05-08 21-11-24].har' | from json | get log.entries.response | where headers.value has "application/json" | get content.text

546967
data/creatures.json Normal file

File diff suppressed because one or more lines are too long

15
extract-json-from-har.nu Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/env nu
def main [
har_file: path,
--type: string
] {
let jsons = (open --raw $har_file | from json | get log.entries.response | where headers.value has "application/json" | get content.text)
cd (mktemp -d)
mut i = 1;
for json in $jsons {
$i += 1;
$json | save $"($i).json"
}
open *.json | where type? == $type | to json
}