{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Entity Annotation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gendertitlefirst_namelast_nameemailname_partnergender_partnerpostcodecitystreetfull_address
0MaleMrImanolKirlinimanol_kirlin@faulkner.comMila WeissnatFemaleAB10 1ABAberdeenBroad StreetBroad Street, AB10 1AB Aberdeen
1FemaleMsClaudieRodriguezclaudierodriguez91@haas.comJorja SchusterFemaleIM1 1AGIsle of ManCircular RoadCircular Road, IM1 1AG Isle of Man
2MaleMrIsmaelZemlakismael-zemlak45@jackson-campbell.infoJalon GloverMaleTN34 2EZHastingsBaldslow RoadBaldslow Road, TN34 2EZ Hastings
3Non-BinaryMxJesusRutherfordjesus-rutherford61@nunez.comMartin KihnMaleLA22 9HAAmblesideKirkfieldKirkfield, LA22 9HA Ambleside
4FemaleMrsLesleeBrownleslee_brown42@mendez.orgDerrell KeeblerMaleW9 2BTLondonShirland RoadShirland Road, W9 2BT London
....................................
6068FemaleMsLouettaO'Connerlouetta_o'conner@gallagher.comObed TerryMaleHG4 2QNRiponBishopton LaneBishopton Lane, HG4 2QN Ripon
6069Non-BinaryMxFleetThompsonfleet_thompson@thompson.comLeeann StoltenbergNon-BinaryEH10 4ANEdinburghFalcon AvenueFalcon Avenue, EH10 4AN Edinburgh
6070MaleMrPleasantKshlerinpleasant.kshlerin69@leonard.orgEvelyne BernierFemaleCM8 1SXWithamHolst AvenueHolst Avenue, CM8 1SX Witham
6071Non-BinaryMxTildenDickenstilden.dickens@alvarez.orgSavion JohnsMaleHA1 2RZHarrowRosslyn CrescentRosslyn Crescent, HA1 2RZ Harrow
6072MaleMrLenaKilbacklena.kilback19@lowe.comRosanne TurnerFemaleLN13 0ABAlfordChristopher RoadChristopher Road, LN13 0AB Alford
\n", "

6073 rows × 11 columns

\n", "
" ], "text/plain": [ " gender title first_name last_name \\\n", "0 Male Mr Imanol Kirlin \n", "1 Female Ms Claudie Rodriguez \n", "2 Male Mr Ismael Zemlak \n", "3 Non-Binary Mx Jesus Rutherford \n", "4 Female Mrs Leslee Brown \n", "... ... ... ... ... \n", "6068 Female Ms Louetta O'Conner \n", "6069 Non-Binary Mx Fleet Thompson \n", "6070 Male Mr Pleasant Kshlerin \n", "6071 Non-Binary Mx Tilden Dickens \n", "6072 Male Mr Lena Kilback \n", "\n", " email name_partner \\\n", "0 imanol_kirlin@faulkner.com Mila Weissnat \n", "1 claudierodriguez91@haas.com Jorja Schuster \n", "2 ismael-zemlak45@jackson-campbell.info Jalon Glover \n", "3 jesus-rutherford61@nunez.com Martin Kihn \n", "4 leslee_brown42@mendez.org Derrell Keebler \n", "... ... ... \n", "6068 louetta_o'conner@gallagher.com Obed Terry \n", "6069 fleet_thompson@thompson.com Leeann Stoltenberg \n", "6070 pleasant.kshlerin69@leonard.org Evelyne Bernier \n", "6071 tilden.dickens@alvarez.org Savion Johns \n", "6072 lena.kilback19@lowe.com Rosanne Turner \n", "\n", " gender_partner postcode city street \\\n", "0 Female AB10 1AB Aberdeen Broad Street \n", "1 Female IM1 1AG Isle of Man Circular Road \n", "2 Male TN34 2EZ Hastings Baldslow Road \n", "3 Male LA22 9HA Ambleside Kirkfield \n", "4 Male W9 2BT London Shirland Road \n", "... ... ... ... ... \n", "6068 Male HG4 2QN Ripon Bishopton Lane \n", "6069 Non-Binary EH10 4AN Edinburgh Falcon Avenue \n", "6070 Female CM8 1SX Witham Holst Avenue \n", "6071 Male HA1 2RZ Harrow Rosslyn Crescent \n", "6072 Female LN13 0AB Alford Christopher Road \n", "\n", " full_address \n", "0 Broad Street, AB10 1AB Aberdeen \n", "1 Circular Road, IM1 1AG Isle of Man \n", "2 Baldslow Road, TN34 2EZ Hastings \n", "3 Kirkfield, LA22 9HA Ambleside \n", "4 Shirland Road, W9 2BT London \n", "... ... \n", "6068 Bishopton Lane, HG4 2QN Ripon \n", "6069 Falcon Avenue, EH10 4AN Edinburgh \n", "6070 Holst Avenue, CM8 1SX Witham \n", "6071 Rosslyn Crescent, HA1 2RZ Harrow \n", "6072 Christopher Road, LN13 0AB Alford \n", "\n", "[6073 rows x 11 columns]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv(\"pii_dataset.csv\")\n", "df" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[, , ]\n" ] } ], "source": [ "from synthesized import MetaExtractor\n", "from synthesized.config import AddressLabels, PersonLabels\n", "from synthesized.metadata.value import Address, Person\n", "\n", "address = Address(\n", " name=\"address\",\n", " labels=AddressLabels(\n", " postcode=\"postcode\", street=\"street\", city=\"city\", full_address=\"full_address\"\n", " ),\n", ")\n", "person = Person(\n", " name=\"person\",\n", " labels=PersonLabels(\n", " firstname=\"first_name\", lastname=\"last_name\", title=\"title\", gender=\"gender\", email=\"email\"\n", " ),\n", ")\n", "person_partner = Person(\n", " name=\"person_partner\", labels=PersonLabels(fullname=\"name_partner\", gender=\"gender_partner\")\n", ")\n", "\n", "df_meta = MetaExtractor.extract(df, annotations=[address, person, person_partner])\n", "print(list(df_meta.children))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[AddressModel(meta=),\n", " PersonModel(meta=),\n", " PersonModel(meta=)]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from synthesized.config import HighDimConfig\n", "from synthesized import HighDimSynthesizer\n", "\n", "config = HighDimConfig(\n", " sample_addresses=False,\n", " address_locale=\"en_GB\",\n", ")\n", "\n", "synth = HighDimSynthesizer(df_meta, config=config)\n", "synth._df_model.children" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training \u001b[38;5;47m╠████████████████████╣\u001b[39m Done.\n" ] } ], "source": [ "synth.learn(df)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gendertitlefirst_namelast_nameemailname_partnergender_partnerpostcodecitystreetfull_address
0FemaleMrsAmandaDaviesamandadavies@hopkins-roberts.comJosh RogersMaleN3B 4XJAndersonshireBarton manor3 Barton manor, N3B 4XJ Andersonshire
1FemaleMrsCharleneSmithcharlene-smith@simpson-mitchell.bizAbdul BuckleyNon-BinaryWA8 1YHRobinfurtRees islands927 Rees islands, WA8 1YH Robinfurt
2MaleMrBryanThompsonbryanthompson@moss.comChristine ScottFemaleBH44 2JUEast SamDavies mission4 Davies mission, BH44 2JU East Sam
3MaleMrRobertSuttonrobert_sutton79@evans-baker.comRobert JonesMaleB3 6XDSouth MarianmouthJones coves28 Jones coves, B3 6XD South Marianmouth
4FemaleMsJasmineNorthjasminenorth@jackson.comGail RogersNon-BinaryB5A 0YDShannonmouthGreen islands83 Green islands, B5A 0YD Shannonmouth
....................................
95FemaleMrsLorraineMcKenzielorraine.mckenzie@brennan-watkins.orgReece SmithMaleDN24 6XZNorth BenjaminGeoffrey groves0 Geoffrey groves, DN24 6XZ North Benjamin
96MaleMrJohnStephensjohn_stephens@parry.orgSean IqbalNon-BinaryN3K 0ZRTalbottonChristine springs20 Christine springs, N3K 0ZR Talbotton
97FemaleMsMichelleHowardmichelle_howard42@johnson.comCraig CoatesMaleNR8 3ZFEast ClareportGregory prairie6 Gregory prairie, NR8 3ZF East Clareport
98FemaleMsValerieClarkevalerie_clarke@ross-benson.co.ukMathew AllanNon-BinaryM65 8GENorth GeorgiaMiller walk088 Miller walk, M65 8GE North Georgia
99MaleMrMichaelFostermichaelfoster@murphy-patel.comJenna BurkeFemaleUB3A 1SLPort HilaryJarvis islands73 Jarvis islands, UB3A 1SL Port Hilary
\n", "

100 rows × 11 columns

\n", "
" ], "text/plain": [ " gender title first_name last_name email \\\n", "0 Female Mrs Amanda Davies amandadavies@hopkins-roberts.com \n", "1 Female Mrs Charlene Smith charlene-smith@simpson-mitchell.biz \n", "2 Male Mr Bryan Thompson bryanthompson@moss.com \n", "3 Male Mr Robert Sutton robert_sutton79@evans-baker.com \n", "4 Female Ms Jasmine North jasminenorth@jackson.com \n", ".. ... ... ... ... ... \n", "95 Female Mrs Lorraine McKenzie lorraine.mckenzie@brennan-watkins.org \n", "96 Male Mr John Stephens john_stephens@parry.org \n", "97 Female Ms Michelle Howard michelle_howard42@johnson.com \n", "98 Female Ms Valerie Clarke valerie_clarke@ross-benson.co.uk \n", "99 Male Mr Michael Foster michaelfoster@murphy-patel.com \n", "\n", " name_partner gender_partner postcode city \\\n", "0 Josh Rogers Male N3B 4XJ Andersonshire \n", "1 Abdul Buckley Non-Binary WA8 1YH Robinfurt \n", "2 Christine Scott Female BH44 2JU East Sam \n", "3 Robert Jones Male B3 6XD South Marianmouth \n", "4 Gail Rogers Non-Binary B5A 0YD Shannonmouth \n", ".. ... ... ... ... \n", "95 Reece Smith Male DN24 6XZ North Benjamin \n", "96 Sean Iqbal Non-Binary N3K 0ZR Talbotton \n", "97 Craig Coates Male NR8 3ZF East Clareport \n", "98 Mathew Allan Non-Binary M65 8GE North Georgia \n", "99 Jenna Burke Female UB3A 1SL Port Hilary \n", "\n", " street full_address \n", "0 Barton manor 3 Barton manor, N3B 4XJ Andersonshire \n", "1 Rees islands 927 Rees islands, WA8 1YH Robinfurt \n", "2 Davies mission 4 Davies mission, BH44 2JU East Sam \n", "3 Jones coves 28 Jones coves, B3 6XD South Marianmouth \n", "4 Green islands 83 Green islands, B5A 0YD Shannonmouth \n", ".. ... ... \n", "95 Geoffrey groves 0 Geoffrey groves, DN24 6XZ North Benjamin \n", "96 Christine springs 20 Christine springs, N3K 0ZR Talbotton \n", "97 Gregory prairie 6 Gregory prairie, NR8 3ZF East Clareport \n", "98 Miller walk 088 Miller walk, M65 8GE North Georgia \n", "99 Jarvis islands 73 Jarvis islands, UB3A 1SL Port Hilary \n", "\n", "[100 rows x 11 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_synth = synth.synthesize(100)\n", "df_synth" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generating fuzzed postcodes" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from synthesized.config import HighDimConfig\n", "\n", "config = HighDimConfig(\n", " learn_postcodes=True,\n", " address_locale=\"en_GB\",\n", " postcode_level=1,\n", ")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Step 50 of 50 \u001b[38;5;156m╠████████████████████╣\u001b[39m \n", "Done.\n" ] } ], "source": [ "synth = HighDimSynthesizer(df_meta, config=config)\n", "synth.learn(df, num_iterations=1)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gendertitlefirst_namelast_nameemailname_partnergender_partnerpostcodecitystreetfull_address
0MaleMrLukeRichardsonluke_richardson@wallace.comKarl PughMaleEN9 9ZDNew LukevilleRichardson rapids83 Richardson rapids, EN9 9ZD New Lukeville
1FemaleMsHeatherStevensheather_stevens30@kerr-smith.comOliver GardinerNon-BinaryPA10 8TLNew OliviaKathleen crossing0 Kathleen crossing, PA10 8TL New Olivia
2Non-BinaryMxStaceyNashstaceynash97@dobson.orgCaroline VincentNon-BinaryIP5 6DXJohnsonportThomas gateway955 Thomas gateway, IP5 6DX Johnsonport
3Non-BinaryMxMarcMurphymarc-murphy@chapman.comStacey WilliamsNon-BinaryCH1 0RNEast HollyfortShirley views845 Shirley views, CH1 0RN East Hollyfort
4MaleMrBenjaminAtkinsonbenjaminatkinson@middleton-wright.comAshley NorrisMaleLE3 9RELake EllieMarian harbors2 Marian harbors, LE3 9RE Lake Ellie
....................................
95MaleMrMarcLyonsmarclyons@barker.comBruce ClarkeMaleDY10 2SAHamiltonfortLucy lake788 Lucy lake, DY10 2SA Hamiltonfort
96MaleMrDarrenRobinsondarren-robinson11@thompson.comElliott ReesNon-BinaryLA18 8GBWallshireTimothy skyway61 Timothy skyway, LA18 8GB Wallshire
97MaleMrEdwardMilesedwardmiles50@storey-hussain.co.ukHugh PowellMaleLE3 6DXPort JacquelinestadRobinson rapid078 Robinson rapid, LE3 6DX Port Jacquelinestad
98FemaleMsWendyPhillipswendyphillips@coleman.bizBryan HaleMaleZE1 5JNLaurenboroughBennett branch885 Bennett branch, ZE1 5JN Laurenborough
99MaleMrGavinAdamsgavin.adams@brown.comJulian WilliamsMaleKY10 3YLFionachesterAntony point38 Antony point, KY10 3YL Fionachester
\n", "

100 rows × 11 columns

\n", "
" ], "text/plain": [ " gender title first_name last_name \\\n", "0 Male Mr Luke Richardson \n", "1 Female Ms Heather Stevens \n", "2 Non-Binary Mx Stacey Nash \n", "3 Non-Binary Mx Marc Murphy \n", "4 Male Mr Benjamin Atkinson \n", ".. ... ... ... ... \n", "95 Male Mr Marc Lyons \n", "96 Male Mr Darren Robinson \n", "97 Male Mr Edward Miles \n", "98 Female Ms Wendy Phillips \n", "99 Male Mr Gavin Adams \n", "\n", " email name_partner gender_partner \\\n", "0 luke_richardson@wallace.com Karl Pugh Male \n", "1 heather_stevens30@kerr-smith.com Oliver Gardiner Non-Binary \n", "2 staceynash97@dobson.org Caroline Vincent Non-Binary \n", "3 marc-murphy@chapman.com Stacey Williams Non-Binary \n", "4 benjaminatkinson@middleton-wright.com Ashley Norris Male \n", ".. ... ... ... \n", "95 marclyons@barker.com Bruce Clarke Male \n", "96 darren-robinson11@thompson.com Elliott Rees Non-Binary \n", "97 edwardmiles50@storey-hussain.co.uk Hugh Powell Male \n", "98 wendyphillips@coleman.biz Bryan Hale Male \n", "99 gavin.adams@brown.com Julian Williams Male \n", "\n", " postcode city street \\\n", "0 EN9 9ZD New Lukeville Richardson rapids \n", "1 PA10 8TL New Olivia Kathleen crossing \n", "2 IP5 6DX Johnsonport Thomas gateway \n", "3 CH1 0RN East Hollyfort Shirley views \n", "4 LE3 9RE Lake Ellie Marian harbors \n", ".. ... ... ... \n", "95 DY10 2SA Hamiltonfort Lucy lake \n", "96 LA18 8GB Wallshire Timothy skyway \n", "97 LE3 6DX Port Jacquelinestad Robinson rapid \n", "98 ZE1 5JN Laurenborough Bennett branch \n", "99 KY10 3YL Fionachester Antony point \n", "\n", " full_address \n", "0 83 Richardson rapids, EN9 9ZD New Lukeville \n", "1 0 Kathleen crossing, PA10 8TL New Olivia \n", "2 955 Thomas gateway, IP5 6DX Johnsonport \n", "3 845 Shirley views, CH1 0RN East Hollyfort \n", "4 2 Marian harbors, LE3 9RE Lake Ellie \n", ".. ... \n", "95 788 Lucy lake, DY10 2SA Hamiltonfort \n", "96 61 Timothy skyway, LA18 8GB Wallshire \n", "97 078 Robinson rapid, LE3 6DX Port Jacquelinestad \n", "98 885 Bennett branch, ZE1 5JN Laurenborough \n", "99 38 Antony point, KY10 3YL Fionachester \n", "\n", "[100 rows x 11 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_synth = synth.synthesize(100)\n", "df_synth" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
postcodecity
0AB10 1ABAberdeen
1IM1 1AGIsle of Man
2TN34 2EZHastings
3LA22 9HAAmbleside
\n", "
" ], "text/plain": [ " postcode city\n", "0 AB10 1AB Aberdeen\n", "1 IM1 1AG Isle of Man\n", "2 TN34 2EZ Hastings\n", "3 LA22 9HA Ambleside" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[[\"postcode\", \"city\"]][0:4]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
postcodecitystreetfull_address
0LA22 8FEAmblesidePatel river978 Patel river, LA22 8FE Ambleside
1AB10 4ZUAberdeenAtkins ridge79 Atkins ridge, AB10 4ZU Aberdeen
2IM1 6BEIsle of ManBrian coves45 Brian coves, IM1 6BE Isle of Man
3TN34 6SLHastingsFrancesca forks3 Francesca forks, TN34 6SL Hastings
\n", "
" ], "text/plain": [ " postcode city street \\\n", "0 LA22 8FE Ambleside Patel river \n", "1 AB10 4ZU Aberdeen Atkins ridge \n", "2 IM1 6BE Isle of Man Brian coves \n", "3 TN34 6SL Hastings Francesca forks \n", "\n", " full_address \n", "0 978 Patel river, LA22 8FE Ambleside \n", "1 79 Atkins ridge, AB10 4ZU Aberdeen \n", "2 45 Brian coves, IM1 6BE Isle of Man \n", "3 3 Francesca forks, TN34 6SL Hastings " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_address = synth._df_model.children[0].sample(conditions=df[[\"postcode\", \"city\"]][0:4])\n", "synth._df_model.children[0].meta.convert_df_for_children(df_address)\n", "df_address" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Extending to different Countries" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training \u001b[38;5;47m╠████████████████████╣\u001b[39m Done.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gendertitlefirst_namelast_nameemailname_partnergender_partnerpostcodecitystreetfull_address
0MaleMrLukeWillisluke_willis35@cole.infoTracey CrawfordFemale36685LambsideTracey Ridge63295 Tracey Ridge, 36685 Lambside
1FemaleMsKimberleyHardingkimberley_harding85@george.comDanielle RobinsonFemale11445South ElizabethWoods Square5876 Woods Square, 11445 South Elizabeth
2MaleMrIanEvansianevans31@lane.co.ukLeigh WilliamsMale35950Lake StephenMichelle Underpass517 Michelle Underpass, 35950 Lake Stephen
3MaleMrStephenHillstephen_hill15@boyle.comTeresa BarnettFemale17186Lake TimothyMichelle Bridge03930 Michelle Bridge, 17186 Lake Timothy
4MaleMrScottDeanscott-dean@parker.infoLorraine JohnsonFemale95593DiaztownRobert Islands236 Robert Islands, 95593 Diaztown
....................................
95Non-BinaryMxAmeliaWinteramelia-winter@gordon-gilbert.comMitchell BurkeMale89957Port DenisefortGarcia Streets28536 Garcia Streets, 89957 Port Denisefort
96FemaleMrsKimRowleykim-rowley@jones-jones.co.ukGavin HammondMale91960North SavannahburyKari Tunnel985 Kari Tunnel, 91960 North Savannahbury
97MaleMrJoeBelljoe-bell@hartley-evans.comStanley MarshNon-Binary69347CrystallandAaron Islands506 Aaron Islands, 69347 Crystalland
98MaleMrAshleyAllenashleyallen15@greenwood.comLorraine WardFemale23153PinedavilleHunter Parks557 Hunter Parks, 23153 Pinedaville
99FemaleMrsGailDaviesgail_davies@rees.comDiane GreenFemale63773ClarkeviewMichele Radial0812 Michele Radial, 63773 Clarkeview
\n", "

100 rows × 11 columns

\n", "
" ], "text/plain": [ " gender title first_name last_name email \\\n", "0 Male Mr Luke Willis luke_willis35@cole.info \n", "1 Female Ms Kimberley Harding kimberley_harding85@george.com \n", "2 Male Mr Ian Evans ianevans31@lane.co.uk \n", "3 Male Mr Stephen Hill stephen_hill15@boyle.com \n", "4 Male Mr Scott Dean scott-dean@parker.info \n", ".. ... ... ... ... ... \n", "95 Non-Binary Mx Amelia Winter amelia-winter@gordon-gilbert.com \n", "96 Female Mrs Kim Rowley kim-rowley@jones-jones.co.uk \n", "97 Male Mr Joe Bell joe-bell@hartley-evans.com \n", "98 Male Mr Ashley Allen ashleyallen15@greenwood.com \n", "99 Female Mrs Gail Davies gail_davies@rees.com \n", "\n", " name_partner gender_partner postcode city \\\n", "0 Tracey Crawford Female 36685 Lambside \n", "1 Danielle Robinson Female 11445 South Elizabeth \n", "2 Leigh Williams Male 35950 Lake Stephen \n", "3 Teresa Barnett Female 17186 Lake Timothy \n", "4 Lorraine Johnson Female 95593 Diaztown \n", ".. ... ... ... ... \n", "95 Mitchell Burke Male 89957 Port Denisefort \n", "96 Gavin Hammond Male 91960 North Savannahbury \n", "97 Stanley Marsh Non-Binary 69347 Crystalland \n", "98 Lorraine Ward Female 23153 Pinedaville \n", "99 Diane Green Female 63773 Clarkeview \n", "\n", " street full_address \n", "0 Tracey Ridge 63295 Tracey Ridge, 36685 Lambside \n", "1 Woods Square 5876 Woods Square, 11445 South Elizabeth \n", "2 Michelle Underpass 517 Michelle Underpass, 35950 Lake Stephen \n", "3 Michelle Bridge 03930 Michelle Bridge, 17186 Lake Timothy \n", "4 Robert Islands 236 Robert Islands, 95593 Diaztown \n", ".. ... ... \n", "95 Garcia Streets 28536 Garcia Streets, 89957 Port Denisefort \n", "96 Kari Tunnel 985 Kari Tunnel, 91960 North Savannahbury \n", "97 Aaron Islands 506 Aaron Islands, 69347 Crystalland \n", "98 Hunter Parks 557 Hunter Parks, 23153 Pinedaville \n", "99 Michele Radial 0812 Michele Radial, 63773 Clarkeview \n", "\n", "[100 rows x 11 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config = HighDimConfig(address_locale=\"en_US\")\n", "synth = HighDimSynthesizer(df_meta, config=config)\n", "synth.learn(df)\n", "synth.synthesize(100)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['az_AZ',\n", " 'bn_BD',\n", " 'cs_CZ',\n", " 'da_DK',\n", " 'de',\n", " 'de_AT',\n", " 'de_CH',\n", " 'de_DE',\n", " 'el_GR',\n", " 'en',\n", " 'en_AU',\n", " 'en_BD',\n", " 'en_CA',\n", " 'en_GB',\n", " 'en_IE',\n", " 'en_IN',\n", " 'en_NZ',\n", " 'en_PH',\n", " 'en_US',\n", " 'es',\n", " 'es_AR',\n", " 'es_CL',\n", " 'es_CO',\n", " 'es_ES',\n", " 'es_MX',\n", " 'fa_IR',\n", " 'fi_FI',\n", " 'fil_PH',\n", " 'fr_CA',\n", " 'fr_CH',\n", " 'fr_FR',\n", " 'he_IL',\n", " 'hi_IN',\n", " 'hr_HR',\n", " 'hu_HU',\n", " 'hy_AM',\n", " 'id_ID',\n", " 'it_IT',\n", " 'ja_JP',\n", " 'ka_GE',\n", " 'ko_KR',\n", " 'ne_NP',\n", " 'nl_BE',\n", " 'nl_NL',\n", " 'no_NO',\n", " 'pl_PL',\n", " 'pt_BR',\n", " 'pt_PT',\n", " 'ro_RO',\n", " 'ru_RU',\n", " 'sk_SK',\n", " 'sl_SI',\n", " 'sv_SE',\n", " 'ta_IN',\n", " 'th',\n", " 'th_TH',\n", " 'tl_PH',\n", " 'uk_UA',\n", " 'zh_CN',\n", " 'zh_TW']" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from synthesized.model.models.address import all_supported_locales\n", "\n", "all_supported_locales()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['az_AZ',\n", " 'cs_CZ',\n", " 'da_DK',\n", " 'de_AT',\n", " 'de_CH',\n", " 'de_DE',\n", " 'el_GR',\n", " 'en_AU',\n", " 'en_CA',\n", " 'en_GB',\n", " 'en_IE',\n", " 'en_IN',\n", " 'en_NZ',\n", " 'en_PH',\n", " 'en_US',\n", " 'es_AR',\n", " 'es_CL',\n", " 'es_CO',\n", " 'es_ES',\n", " 'es_MX',\n", " 'fi_FI',\n", " 'fil_PH',\n", " 'fr_CA',\n", " 'fr_CH',\n", " 'fr_FR',\n", " 'he_IL',\n", " 'hi_IN',\n", " 'hr_HR',\n", " 'hu_HU',\n", " 'hy_AM',\n", " 'id_ID',\n", " 'it_IT',\n", " 'ja_JP',\n", " 'ka_GE',\n", " 'ko_KR',\n", " 'ne_NP',\n", " 'nl_BE',\n", " 'nl_NL',\n", " 'no_NO',\n", " 'pl_PL',\n", " 'pt_BR',\n", " 'pt_PT',\n", " 'ro_RO',\n", " 'ru_RU',\n", " 'sk_SK',\n", " 'ta_IN',\n", " 'th_TH',\n", " 'tl_PH',\n", " 'uk_UA',\n", " 'zh_CN',\n", " 'zh_TW']" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from synthesized.model.models.address import all_supported_postcode_locales\n", "\n", "all_supported_postcode_locales()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py39-sdk-2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 2 }