Experiments into markov chains, n-grams, and text generation.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

81 lines
4.2 KiB

#!/usr/bin/env bash
### MediaWiki helpers ###
# Iterates over all the pages in a category.
# Handles continues (multi-request page lists) correctly.
# $1 - Wiki api.php root
# $2 - Category name
function iterate_category() {
# Example url: https://starbounder.org/mediawiki/api.php?action=query&format=json&list=categorymembers&cmlimit=max&cmtitle=Category:Craftables
initial_url="$1?action=query&format=json&list=categorymembers&cmlimit=max&cmtitle=$2";
next_url="${initial_url}";
temp_file="$(mktemp --suffix .CategoryDownloaded.json)";
echo "Iterating over category $2 at $1" >&2;
echo "Temporary file is at ${temp_file}" >&2;
while :; do
echo "Fetching ${next_url}" >&2;
curl -sS -o "${temp_file}" "${next_url}";
jq --raw-output '.query.categorymembers[].title' <"${temp_file}"
# If there's no continue object in the response, then we must be done
if [ "$(cat "${temp_file}" | jq --raw-output '.continue' --monochrome-output)" == "null" ]; then
echo "Category iteration complete" >&2;
break
fi
continue_code="$(cat "${temp_file}" | jq --raw-output '.continue.cmcontinue')";
next_url="${initial_url}&cmcontinue=${continue_code}";
done
}
### CrossCode ###
curl https://crosscode.gamepedia.com/Items | xidel --data - --css "table a" | awk "NF > 0" | sort >Cross-Code-Items.txt
### Final Fantasy XV ###
curl 'http://finalfantasy.wikia.com/wiki/Treasures_(Final_Fantasy_XV)' | xidel --data - --css "table th.b[rowspan=4]" >Final-Fantasy-15-Items.txt
curl 'http://finalfantasy.wikia.com/wiki/Ingredients' | xidel --data - --css "table th.b[rowspan=4]" | sed -e 's/(.*$//g' | sort >>Final-Fantasy-15-Items.txt
curl 'http://finalfantasy.wikia.com/wiki/Auto_Parts' | xidel --data - --css "table th.b[rowspan=2]" | sort >>Final-Fantasy-15-Items.txt
curl 'http://finalfantasy.wikia.com/wiki/Leisure_Goods' | xidel --data - --css "table.article-table tr:not(.a) th.b" | sort >>Final-Fantasy-15-Items.txt
curl 'http://finalfantasy.wikia.com/wiki/Key_Items_(Final_Fantasy_XV)' | xidel --data - --css "table.article-table tr:not(.a) th.b" | sort >>Final-Fantasy-15-Items.txt
curl 'http://finalfantasy.wikia.com/wiki/List_of_Final_Fantasy_XV_items' | xidel --data - --css "table.article-table tr:not(.a) th.b" | sed -e 's/(.*$//g' | sort >>Final-Fantasy-15-Items.txt
curl 'http://finalfantasy.wikia.com/wiki/List_of_Final_Fantasy_XV_accessories' | xidel --data - --css "table.article-table tr:not(.a) th.b" | sed -e 's/(.*$//g' | sort >>Final-Fantasy-15-Items.txt
sort Final-Fantasy-15-Items.txt -o Final-Fantasy-15-Items.txt
### No Man's Sky ###
curl "http://orcz.com/No_Man's_Sky:_Items_List" | xidel --data - --css "table td:first-child a, #mw-content-text > ul > li" | sed -e 's/\s*—.*$//g' | sort >No-Mans-Sky-Items.txt
### Stardew Valley ###
curl https://stardewids.com/ | xidel --data - --css "td.ts a" | sort >Stardew-Valley-Items.txt
# --no-split --lowercase --order 4 --length 12
# --no-split --start-uppercase --order 4 --length 12
# --no-split --start-uppercase --order 3 --length 12
### Recipes Wikia ###
curl http://recipes.wikia.com/sitemap-newsitemapxml-index.xml | xidel --data - --css "loc" | grep -i NS_0 | xargs -n1 -I{} sh -c 'curl {} | xidel --data - --css "loc"' | sed -e 's/^.*\///g' -e 's/_/ /g' | python -c "import urllib, sys; print urllib.unquote(sys.argv[1] if len(sys.argv) > 1 else sys.stdin.read()[0:-1])" | grep -iv "Nutrient" | sort >Dishes.txt
### Rise of Berk Dragons list ###
function list_pages() {
curl "http://riseofberk.wikia.com/api.php?action=query&generator=categorymembers&gcmtitle=${1}&cllimt=max&gcmlimit=max&format=json" | jq --raw-output '.query.pages[].title' | grep -iv Category:
}
function list_categories() {
curl "http://riseofberk.wikia.com/api.php?action=query&generator=categorymembers&gcmtitle=${1}&cllimt=max&gcmlimit=max&format=json" | jq --raw-output '.query.pages[].title' | grep -i Category:
}
list_pages "Category:Dragons" >Dragons.txt
### Starbound ###
curl https://starbounder.org/Blocks | xidel --data - --css ".gametable a" | awk '/[^.].*/' | sort | uniq >Starbound.txt
iterate_category "https://starbounder.org/mediawiki/api.php" "Category:Craftables" >>Starbound.txt;
### Shakespeare's Complete Works ###
curl https://www.gutenberg.org/files/100/100-0.txt >Shakespeares-Works.txt