diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..18c4027 --- /dev/null +++ b/.gitignore @@ -0,0 +1,166 @@ +.direnv +result + +# https://github.com/github/gitignore/blob/main/Python.gitignore +# Basic .gitignore for a python repo. + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/background.mp4 b/figs/background.mp4 similarity index 100% rename from background.mp4 rename to figs/background.mp4 diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..af9faf5 --- /dev/null +++ b/flake.lock @@ -0,0 +1,64 @@ +{ + "nodes": { + "flake-parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1701473968, + "narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1702312524, + "narHash": "sha256-gkZJRDBUCpTPBvQk25G0B7vfbpEYM5s5OZqghkjZsnE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "a9bf124c46ef298113270b1f84a164865987a91c", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-lib": { + "locked": { + "dir": "lib", + "lastModified": 1701253981, + "narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58", + "type": "github" + }, + "original": { + "dir": "lib", + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-parts": "flake-parts", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..8c01cb5 --- /dev/null +++ b/flake.nix @@ -0,0 +1,35 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-parts.url = "github:hercules-ci/flake-parts"; + }; + + outputs = inputs@{ nixpkgs, flake-parts, ... }: + flake-parts.lib.mkFlake { inherit inputs; } { + systems = [ "x86_64-linux" "aarch64-linux" ]; + + perSystem = { pkgs, system, ... }: { + devShells.default = + pkgs.mkShell { packages = with pkgs; [ marp-cli ]; }; + + packages.slides = pkgs.stdenvNoCC.mkDerivation { + name = "slides"; + + src = ./.; + dontUnpack = true; + buildInputs = with pkgs; [ marp-cli ]; + + buildPhase = '' + marp --html $src/slides.md --allow-local-files -o slides.html + ''; + + installPhase = '' + mkdir -p $out + cp -r $src/figs $out/figs + cp slides.html $out/index.html + ''; + }; + }; + }; +} + diff --git a/slides.html b/slides.html deleted file mode 100644 index 68a16f0..0000000 --- a/slides.html +++ /dev/null @@ -1,190 +0,0 @@ -CAPTCHA
-

-

CAPTCHAs

-
-
-
-

What is a CAPTCHA ?

-
-

Definition:

-
    -
  • Completely Automated Public Turing test to tell Computers and Humans Apart
  • -
  • commonly, a third party software installed on the web pages
  • -
  • /kæp.tʃə/
  • -
- -A bit of history: - -
-
-
-

What is a CAPTCHA ?

-
-

Definition:

-
    -
  • Completely Automated Public Turing test to tell Computers and Humans Apart.
  • -
  • commonly, a third party software installed on the web pages.
  • -
  • /kæp.tʃə/
  • -
-

A bit of history:

- -
-
-
-

What are CAPTCHAs for ?

-
-

They filter out the real humans !

-

What is a non real human ?

- -
-
-
-

Why are CAPTCHAs needed ?

-
-

-

Source: Imperva

-
-
-
-

some CAPTCHA examples

-
- -
-
-
-

some CAPTCHA examples

-
- -
-
-
-

some CAPTCHA examples

-
- -
-
-
-

some CAPTCHA examples

-
- -
-
-
-

some exotic CAPTCHA examples

-
-
-
-
-

some exotic CAPTCHA examples

-
-
-
-
-

some exotic CAPTCHA examples

-
-
-
-
-

some exotic CAPTCHA examples

-
-
-
-
-

Possible attacks on CAPTCHAs ?

-
-

Quite difficult and costly:

- -
-
-
-

Alternatives to CAPTCHAs ?

-
-

Not much:

- -
-
-
-

Drawbacks ?

-
-
    -
  • Annoying
  • -
  • Accessibility
  • -
  • Privacy
  • -
-
-
- -
-🎉 Thank you for your attention 🎉 -
-
-

Welcome dear fellow humans to our scientific presentation on CAPTCHAs

Dire plein de trucs bonus en cliquant en live sur les liens (en bleu) des slides

On en faisant le con sur les tests des captchas

Faire planer le doute tout du long si clément est réelement un humain

So first of all, what is a captcha ?

By definition, CAPTCHAs are a completely automated...

So they are simply a tool for categorizing humans and non-humans

Turing was a brilliant famous mathematician of the last century, he is well known to be at founder of modern computers (turing machine...)

CAPTCHAs nowadays mostly present in your web browser (pretty much only place where you encounter them).

They a are what's called 3rd party software, meaning that they 99% of the time not dev by owner of site but by other organisation. This is due to the requirements that such a tool has. We'll talk a bit more about that in few seconds !

And they are pronounced /kæp.tʃə/.

Let's see where captchas come from

Introduced by AltaVista, a web engine company when they wanted to prevent unwanted addition by nefarious users to their search engine. Because at the time, if you wanted your website to be referenced in a search engine, so that it could be found easily, you add to manually add them to their system.

At the time, this preventive system was unnamed. the term captcha was coined by four mathematicians / computer scientists in 2003, namely Luis...

It's based on a reverse turing test ! first of all a turing test is method for determining whether a computer is capable of human-like thinking. So reverse turing test is a method for testing wether or not something is a human or not.

They concieved so that they are practically impossible for current computers to decipher, but they must be easy enough for real humans to do.

So captcha filter out non humans, this include

bots, a software application that runs automated tasks (scripts), usually with the intent to emulate human activity. They are fairly easy to code, and generally astonishly cheap. precisely who we want to restrict.

crawlers, an internet bot that browses the World Wide Web for the purpose of web indexing. They are most of the time used by search engines to better their search results, they mostly look at the metadata of pages (title, date, author, thumbnail, description, language, icons...), but they can also by used for more nefarious reasons, combined with scrappers for example.

scrappers, the automated extraction of data on websites via bots and crawlers, not just metadata anymore they are designed to gather a lot more data, phone numbers, emails, passwords (?), addresses, any precious info. They are generally badly viewed since they generally cause a lot of traffic on sites.

Dogs/cat KEKW

spammers, you don't want your contact form to be unprotected, or you'll soon receive email for special pills..

hackers, they actually are humans, but they generally use all the tools from above (except cat/dog) and you want to at least slow them down.

clément ? 😳

Why all the trouble, are bots really that common ? yes

a study from Imperva in 2020, estimate human traffic to only be about 60%, some other studies are even more aggressive (less than 45% sometimes).

good bots, search engines, monitoring bots, commercials crawlers, feed fetchers...

bad bots, every tools that we saw before, hackers, state spies...

You may understand why one may want to protect some areas of his website

In a way this type of challenge is relatively easy for computers to do nowadays, the difficulty of this captcha comes from the fact that attackers don't have the dataset that google has. (if you didn't know theses come from google street view)

dataset comes from companies or individuals that need data to be classified, if you pay them and give them a 100 millions images, they will classify it for you (at a price).

simpler test, can still be effective, but will be surpassed very easily

same, simpler test

theses types of captchas are generally uncommon, but are generally insanely effective at stopping bots. They are cheap to create and manage/evolve.

Though they aren't well suited for any other platform that a desktop computer. I don't want to solve that using my phone.

theses types of captchas are generally uncommon, but are generally insanely effective at stopping bots. They are cheap to create and manage/evolve.

Though they aren't well suited for any other platform that a desktop computer. I don't want to solve that using my phone.

theses types of captchas are generally uncommon, but are generally insanely effective at stopping bots. They are cheap to create and manage/evolve.

Though they aren't well suited for any other platform that a desktop computer. I don't want to solve that using my phone.

audio is interesting, for blind people

Human farms, sound like matrix... but you can actually pay people, in third world countries, to click on your captchas.

Flying under the radars, you could try to optimize your techniques to be as less suspicous as possible, you'll get a bit further

praying ?

It's an rams race, people are building deep learning models to try an solve these captchas

MITM, simply infecting of normal people and making internet requests on their behalf, basically a botnet

Honeypot, not a real alternative, but more a mindset, you want to trick bots into doing useless stuff

double authentification, your bank for example doesn't want you to be a robot

Centralized sign-on, the famous "connect with google/facebook/france connect" button, this way you don't actually do the process yourself, but trust a third party to filter out the bots for you. (spoiler: not that effective)

force human interaction, example des procurations lors des présidentielles

motion tracking, captchas are actually observing you even though you are not actively solving them, they look at you mouse movement, your keyboard strokes, and categorize you. For example when you initally click the I'm not a robot, the algorithm will observe this click an compare it to precedent cliks to detect if there is a pattern (did you click perfectly in the center each time ?)

\ No newline at end of file diff --git a/slides.md b/slides.md index 9574e77..dd5cbd8 100644 --- a/slides.md +++ b/slides.md @@ -28,7 +28,7 @@ h1 { } - + # CAPTCHAs