asked on

logic to make unique abbreviations of a column

could you suggest code logic that would work down on a column to make it abbreviated into specific characters, but keep it unique?

25112

ASKER

here is an example..

declare @test table (column1 varchar(100))

insert into @test  
select 	'A   NOT/AV'	union
select 	'A AMERICA'	union
select 	'A B ART'	union
select 	'A NOT/AVL'	union
select 	'A O SMITH'	union
select 	'AAMER'	union
select 	'ABAKKA'	union
select 	'ACCENT'	union
select 	'ACCENTTREA'	union
select 	'ACORN TRAI'	union
select 	'ADJUST'	union
select 	'ADMIRAL'	union
select 	'AIR TEMP'	union
select 	'ALBION'	union
select 	'ALEXANDER'	union
select 	'AMANA'	union
select 	'AMER HEIR'	union
select 	'AMERICAN'	union
select 	'AMES'	union
select 	'AMETEK'	union
select 	'AMISCO'	union
select 	'AOG'	union
select 	'AQUA MATIC'	union
select 	'ARTAGE'	union
select 	'ASEA'	union
select 	'ASHLEN'	union
select 	'ASHLEY'	union
select 	'ASKO'	union
select 	'ASPEN'	union
select 	'ASSURANT'	union
select 	'AUTOTROL'	union
select 	'AVANTI'	union
select 	'B&B CHLOR'	union
select 	'BARCALOUNG'	union
select 	'BARD'	union
select 	'BAROQUE'	union
select 	'BEANPOD'	union
select 	'BEECHDALE'	union
select 	'BELLECRAFT'	union
select 	'BEMIS'	union
select 	'BERRYHILL'	union
select 	'BERTAZZONI'	union
select 	'BEST'	union
select 	'BFGINC'	union
select 	'BLANCO'	union
select 	'BLOMBERG'	union
select 	'BLUESTAR'	union
select 	'BOSCH'	union
select 	'BOYDS'	union
select 	'BRADFORD'	union
select 	'BRADINGTON'	union
select 	'BREEZYACRE'	union
select 	'BRIDGEWATE'	union
select 	'BROAN'	union
select 	'BROOKS'	union
select 	'BROWN'	union
select 	'BUTLER'	union
select 	'CALORIC'	union
select 	'CANDLEBARN'	union
select 	'CANDYM'	union
select 	'CAPEL'	union
select 	'CAPITAL'	union
select 	'CAROLINA'	union
select 	'CARPENTER'	union
select 	'CARPENTREE'	union
select 	'CARRIER'	union
select 	'CATNAPPER'	union
select 	'CBK'	union
select 	'CDM OUTLET'	union
select 	'CELADON'	union
select 	'CHEM-TECH'	union
select 	'CHEST/CF'	union
select 	'CHESTS UNL'	union
select 	'CHRISTOPHE'	union
select 	'CHROM'	union
select 	'CLASSIC'	union
select 	'CLAYTON'	union
select 	'COLONIAL H'	union
select 	'COLORSHOP'	union
select 	'COMFDESIGN'	union
select 	'COMPLEMENT'	union
select 	'CONNER'	union
select 	'COUNDESIGN'	union
select 	'COUNTRY'	union
select 	'COUNTRYV'	union
select 	'CRAFTMAST'	union
select 	'CRESCENT'	union
select 	'CRESTVIEW'	union
select 	'CROSLEY'	union
select 	'CRYSTAL CL'	union
select 	'CULLIGAN'	union
select 	'D&C CORP'	union
select 	'DACOR'	union
select 	'DALYN'	union
select 	'DANBY'	union
select 	'DANIELLAMP'	union
select 	'DCS'	union
select 	'DELONGHI'	union
select 	'DIMPLEX'	union
select 	'DIMPLEXE'	union
select 	'DIVA'	union
select 	'DONIQUE'	union
select 	'DUCANE'	union
select 	'DURAFLAME'	union
select 	'EASTERNACC'	union
select 	'ECHOES'	union
select 	'EDEN PURE'	union
select 	'EDRICH'	union
select 	'ELECTRICAL'	union
select 	'ELECTROLUX'	union
select 	'ELITE'	union
select 	'ELMCREST'	union
select 	'ELMIRA'	union
select 	'ELRAN'	union
select 	'EMERSON'	union
select 	'ENGLAND'	union
select 	'ENGLANDER'	union
select 	'ERIE'	union
select 	'EUROTECH'	union
select 	'EVERPURE'	union
select 	'EXPRESSION'	union
select 	'EZGLIDECO'	union
select 	'FABER'	union
select 	'FAIRFIELD'	union
select 	'FAMILY'	union
select 	'FEDDERS'	union
select 	'FISHERPAYK'	union
select 	'FIVE STAR'	union
select 	'FLECK'	union
select 	'FLEXBED'	union
select 	'FRANKE'	union
select 	'FRANKLIN'	union
select 	'FRIEDRICH'	union
select 	'FRIGIDAIRE'	union
select 	'FRIGO'	union
select 	'FUJITSU'	union
select 	'GAGGENAU'	union
select 	'GARLAND'	union
select 	'GARMIN'	union
select 	'GAYLA''S'	union
select 	'GE'	union
select 	'GEM'	union
select 	'GENERAL'	union
select 	'GIBSON'	union
select 	'GIFTCRAFT'	union
select 	'GOLDSTAR'	union
select 	'GOODMAN'	union
select 	'GOULDS'	union
select 	'GRAHAMDUNN'	union
select 	'GRANDHALL'	union
select 	'GRILLI'	union
select 	'GRISTMILL'	union
select 	'GROFF'	union
select 	'GUARDSMAN'	union
select 	'H'	union
select 	'HACH'	union
select 	'HAIER'	union
select 	'HALIANT'	union
select 	'HALSEY TAY'	union
select 	'HAMILTON'	union
select 	'HARDEN'	union
select 	'HARDWICK'	union
select 	'HARMSCO'	union
select 	'HE ANDERSO'	union
select 	'HERITAGE'	union
select 	'HERITAGE L'	union
select 	'HERITAGE W'	union
select 	'HERITAGEDI'	union
select 	'HOLLAND'	union
select 	'HOLTON'	union
select 	'HOMESPICE'	union
select 	'HOMESTEAD'	union
select 	'HOOKER'	union
select 	'HOOVER'	union
select 	'HORNING'	union
select 	'HOTPOINT'	union
select 	'HUBBARTON'	union
select 	'HUEBSCH'	union
select 	'HWCHAIR'	union
select 	'IDEAL HORI'	union
select 	'IMAX'	union
select 	'INSINKERAT'	union
select 	'INSTALL'	union
select 	'INTERACTIV'	union
select 	'INTERIOREX'	union
select 	'IRON-A-WAY'	union
select 	'ISTA'	union
select 	'J C PENNY'	union
select 	'J EDWARD'	union
select 	'JACKFURN'	union
select 	'JANDAL'	union
select 	'JENN-AIR'	union
select 	'JWEAVERQU'	union
select 	'KALALOU'	union
select 	'KELLER'	union
select 	'KELVINATOR'	union
select 	'KENMORE'	union
select 	'KENNEBUNK'	union
select 	'KEYSTONE'	union
select 	'KEYTON'	union
select 	'KICHLER'	union
select 	'KINCAID'	union
select 	'KINETICO'	union
select 	'KITCHENAID'	union
select 	'KLAUSSNER'	union
select 	'KOHLER'	union
select 	'LAMOTTE'	union
select 	'LANCASTER'	union
select 	'LANE'	union
select 	'LANG'	union
select 	'LATEX'	union
select 	'LDBEAN'	union
select 	'LEA'	union
select 	'LEGGETT'	union
select 	'LG'	union
select 	'LIEBHERR'	union
select 	'LINDSAY'	union
select 	'LITTLE VAL'	union
select 	'LITTON'	union
select 	'LIVINGSTON'	union
select 	'LMI'	union
select 	'LYNX'	union
select 	'MADISON'	union
select 	'MAGICCHEF'	union
select 	'MAGNATROL'	union
select 	'MAGNUSSEN'	union
select 	'MANUEL'	union
select 	'MAPP'	union
select 	'MARATHON'	union
select 	'MARFURN'	union
select 	'MARVEL'	union
select 	'MASTER'	union
select 	'MAXWELL WO'	union
select 	'MAYTAG'	union
select 	'MAZZEI'	union
select 	'MEDALLION'	union
select 	'MIAMI CARE'	union
select 	'MIDWEST'	union
select 	'MIELE'	union
select 	'MISC'	union
select 	'MOBEL'	union
select 	'MOBEL/CF'	union
select 	'MODERN MAI'	union
select 	'MOEN'	union
select 	'MONTGOMERY'	union
select 	'NATURES'	union
select 	'NEEDLE'	union
select 	'NEWCENTURY'	union
select 	'NORCOLD'	union
select 	'NORGE'	union
select 	'NORWESCO'	union
select 	'NULLFURN'	union
select 	'NW'	union
select 	'OASIS'	union
select 	'OCI'	union
select 	'OHIO TABLE'	union
select 	'PACIFICRIM'	union
select 	'PANASONIC'	union
select 	'PARK DES'	union
select 	'PARKER SO'	union
select 	'PAUL B ZIM'	union
select 	'PEERLESS'	union
select 	'PELL-CHLOR'	union
select 	'PENTEK'	union
select 	'PEOPLOUNGE'	union
select 	'PERFORMANC'	union
select 	'PERLICK'	union
select 	'PETERSREV'	union
select 	'PICTUREIT'	union
select 	'PIERI'	union
select 	'PILGRIMSPR'	union
select 	'PLUMBING'	union
select 	'POMROY'	union
select 	'POULAN'	union
select 	'POWELL'	union
select 	'PREMIER'	union
select 	'PRESTIGE'	union
select 	'PRICEPFIST'	union
select 	'PROMINENT'	union
select 	'PROPAC'	union
select 	'PULASKI'	union
select 	'PURETEST'	union
select 	'QUASAR'	union
select 	'QUOIZEL'	union
select 	'RAINSOFT'	union
select 	'RANGE AIRE'	union
select 	'RANGEAIRE'	union
select 	'RCA'	union
select 	'REFERRAL'	union
select 	'RELAXOLOUN'	union
select 	'RENWIL'	union
select 	'RESTONIC'	union
select 	'REVCO'	union
select 	'REVERE'	union
select 	'ROPER'	union
select 	'ROWEFURN'	union
select 	'RUSCO'	union
select 	'RUSSELL'	union
select 	'SADEK'	union
select 	'SALEM'	union
select 	'SAMMOORE'	union
select 	'SAMSUNG'	union
select 	'SANDERS'	union
select 	'SANITRON'	union
select 	'SANYO'	union
select 	'SCOTSMAN'	union
select 	'SEARS'	union
select 	'SERTA'	union
select 	'SERVICESOF'	union
select 	'SHARP'	union
select 	'SHAW'	union
select 	'SHURFLO'	union
select 	'SIEMENS'	union
select 	'SIMMONS'	union
select 	'SJ ELECTRO'	union
select 	'SLIGH'	union
select 	'SOMERSET'	union
select 	'SOUTH TEX'	union
select 	'SPEED QUEE'	union
select 	'STABER'	union
select 	'STANLEY'	union
select 	'STARITE'	union
select 	'STATE'	union
select 	'STEINWORLD'	union
select 	'STENNER'	union
select 	'STERLING'	union
select 	'STRUCTURAL'	union
select 	'STUARTFURN'	union
select 	'STYLECRAFT'	union
select 	'SUB-ZERO'	union
select 	'SULLINS'	union
select 	'SUMMIT'	union
select 	'SUN RAY'	union
select 	'SUNBEAM'	union
select 	'SUNNYDESIG'	union
select 	'SUNROC'	union
select 	'SUPER/CF'	union
select 	'SUPERIOR'	union
select 	'TACHING'	union
select 	'TADIRAN'	union
select 	'TAPPAN'	union
select 	'TECHNETIC'	union
select 	'TEI'	union
select 	'TEMPLE'	union
select 	'TEMPO'	union
select 	'TENDER HRT'	union
select 	'THERMADOR'	union
select 	'TOMKINKADE'	union
select 	'TOWN HALL'	union
select 	'TOWN SQUAR'	union
select 	'TOWNES/CF'	union
select 	'TRADE'	union
select 	'TRAEGER'	union
select 	'TREEMASTER'	union
select 	'TRIP'	union
select 	'TROJAN'	union
select 	'TWINSTAR'	union
select 	'U-LINE'	union
select 	'ULTRASHIEL'	union
select 	'UMA ENTERP'	union
select 	'UNIVERSAL'	union
select 	'USED'	union
select 	'VALLEYVIEW'	union
select 	'VAUGHAN'	union
select 	'VAUGHN'	union
select 	'VENMAR'	union
select 	'VENT A HOO'	union
select 	'VESTA'	union
select 	'VIKING'	union
select 	'VINEYARD'	union
select 	'VORNADO'	union
select 	'WALKER'	union
select 	'WARRANTY'	union
select 	'WASTE KING'	union
select 	'WATERFORD'	union
select 	'WATR FACT'	union
select 	'WAVERLY'	union
select 	'WEBER'	union
select 	'WELBUILT'	union
select 	'WELL MATE'	union
select 	'WELLXTROL'	union
select 	'WESLEYALL'	union
select 	'WESTINGHSE'	union
select 	'WHIRLPOOL'	union
select 	'WINDCREST'	union
select 	'WOLF'	union
select 	'WOODCRAFT'	union
select 	'WOODMARC'	union
select 	'WOODS'	union
select 	'Y&TWOODCR'	union
select 	'ZEPHYR'	

select COUNT(*) from @test
select count(distinct column1) from @test
select count(distinct left(column1,4)) from @test

Open in new window

25112

ASKER

there are 391 entries.. but when you put distinct on left (column1,4), it becomes down to 356. how you would work around to make sure all 391 have unique abbreviations? (preferably take out the spaces)

25112

ASKER

the goal is to keep the abbreviation close to the original but there is no strict rules how it should be done.. and that entries should have a unique abbreviation.

chwong67

select count(distinct replace(column1,' ', '')) from @test

25112

ASKER

we need to abbreviate to 4 characters..

so
select count(distinct left(replace(column1,' ', ''),4)) from @test
is only 353.

25112

ASKER

these are the records where there is duplicates.. is there a logic to make it iterate and find a unique abbreviation from the source?

column1	column2
AAMER	AAME
A AMERICA	AAME
ACCENT	ACCE
ACCENTTREA	ACCE
AMER HEIR	AMER
AMERICAN	AMER
A NOT/AVL	ANOT
A   NOT/AV	ANOT
ASHLEN	ASHL
ASHLEY	ASHL
BRADFORD	BRAD
BRADINGTON	BRAD
CANDLEBARN	CAND
CANDYM	CAND
CARPENTER	CARP
CARPENTREE	CARP
CHEST/CF	CHES
CHESTS UNL	CHES
COLONIAL H	COLO
COLORSHOP	COLO
COUNDESIGN	COUN
COUNTRY	COUN
COUNTRYV	COUN
CRESCENT	CRES
CRESTVIEW	CRES
DIMPLEX	DIMP
DIMPLEXE	DIMP
ELECTRICAL	ELEC
ELECTROLUX	ELEC
ENGLAND	ENGL
ENGLANDER	ENGL
FRANKE	FRAN
FRANKLIN	FRAN
FRIGIDAIRE	FRIG
FRIGO	FRIG
HARDEN	HARD
HARDWICK	HARD
HERITAGE	HERI
HERITAGE L	HERI
HERITAGE W	HERI
HERITAGEDI	HERI
HOMESPICE	HOME
HOMESTEAD	HOME
INTERACTIV	INTE
INTERIOREX	INTE
LITTLE VAL	LITT
LITTON	LITT
MAGNATROL	MAGN
MAGNUSSEN	MAGN
MOBEL	MOBE
MOBEL/CF	MOBE
PARK DES	PARK
PARKER SO	PARK
RANGE AIRE	RANG
RANGEAIRE	RANG
SUNROC	SUNR
SUN RAY	SUNR
SUPER/CF	SUPE
SUPERIOR	SUPE
TEMPLE	TEMP
TEMPO	TEMP
TOWN HALL	TOWN
TOWN SQUAR	TOWN
TOWNES/CF	TOWN
VAUGHAN	VAUG
VAUGHN	VAUG
WELL MATE	WELL
WELLXTROL	WELL
WOODCRAFT	WOOD
WOODMARC	WOOD
WOODS	WOOD

Open in new window

Kevin Cross

I would maybe go with first initial of each word, then if not enough words are found then use the first and last of the last word. It may eliminate some of the conflicts.

25112

ASKER

can you give an example with the below:

FRANKE FRAN
FRANKLIN FRAN

how does your logic work for the above?

ASKER CERTIFIED SOLUTION

Kevin Cross

membership

This solution is only available to members.

To access this solution, you must be a member of Experts Exchange.

Start Free Trial

Kevin Cross

With your two test cases, the above algorithm returns:

FRANKE = FRAE
FRANKLIN = FRNN

Kevin

Kevin Cross

As an enhancement, you can strip out special characters like the '/' and '&' and make those spaces which gives you more words.

dbo.fn_DelimitedToTable(replace(replace(column1, '/', ' '), '&', ''), ' ')

Also, if you data may just contain one letter, this is a problem:
substring(w1.word, 2, 1)

I tried a variation with replacing '/' and '&' as well as adding another join as #words w3 -- I can get to 388 distinct values, so again if you play with this based on other rules about your data you know to be true, i.e., if always X length guaranteed, then more substring(word, n, 1) formulas can be used without causing errors in nonexistent index passed to substring.

Hope that all makes sense.

Kevin

25112

ASKER

truly helped.. thanks Kevin.